blob: a60dbd084488174119da3c4a25f5f173325b510c [file] [log] [blame]
# liblouis: text.nabcc.dis
#
# Based on the braille contraction modules in BRLTTY
#
# Copyright (C) 1995-2004 by The BRLTTY Team
# Copyright (C) 2004 ViewPlus Technologies, Inc., www.viewplustech.com
# Copyright (C) 2004 Computers to Help People, Inc., www.chpi.org
# This file is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this file; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# liblouis comes with ABSOLUTELY NO WARRANTY.
# This is a description of the default text translation table used by BRLTTY.
# It's based on the North American Braille Computer Code, but defines the full
# Latin1 (ISO-8859-1) character set.
# The 95 printable characters of the standard 7-bit US-ASCII character set
# (32-126) are identical to their representations within the North American
# Braille Computer Code (these are the only characters which the NABCC actually
# defines). Characters from literary braille, symbols from The Nemeth Braille
# Code for Mathematics and Science Notation, and a bit of human imagination
# have all been combined to create an easy-to-remember, one-to-one mapping
# between each character and its braille counterpart. All possible combinations
# involving only the original 6 braille dots are used, but that only allows for
# 64 out of the required 95 character representations. The presence or absence
# of dot 7 is used to differentiate between pairs of characters which either
# are very closely related in meaning or, in a few cases where a more intuitive
# reason couldn't be found, have a very close logical relationship within the
# US-ASCII code. Dot 8 is used for control characters and accented characters.
# This table is synchronized with Text/en-nabcc.ttb table from the BRLTTY
# project, except the re-added no-break space mapping.
# The space and the 26 lowercase letters (a-z) are the same as in literary
# braille:
#Hex Dots Dec Char Description
display \x0020 0 # 32 SPACE
display \x0061 1 # 97 a LATIN SMALL LETTER A
display \x0062 12 # 98 b LATIN SMALL LETTER B
display \x0063 14 # 99 c LATIN SMALL LETTER C
display \x0064 145 # 100 d LATIN SMALL LETTER D
display \x0065 15 # 101 e LATIN SMALL LETTER E
display \x0066 124 # 102 f LATIN SMALL LETTER F
display \x0067 1245 # 103 g LATIN SMALL LETTER G
display \x0068 125 # 104 h LATIN SMALL LETTER H
display \x0069 24 # 105 i LATIN SMALL LETTER I
display \x006a 245 # 106 j LATIN SMALL LETTER J
display \x006b 13 # 107 k LATIN SMALL LETTER K
display \x006c 123 # 108 l LATIN SMALL LETTER L
display \x006d 134 # 109 m LATIN SMALL LETTER M
display \x006e 1345 # 110 n LATIN SMALL LETTER N
display \x006f 135 # 111 o LATIN SMALL LETTER O
display \x0070 1234 # 112 p LATIN SMALL LETTER P
display \x0071 12345 # 113 q LATIN SMALL LETTER Q
display \x0072 1235 # 114 r LATIN SMALL LETTER R
display \x0073 234 # 115 s LATIN SMALL LETTER S
display \x0074 2345 # 116 t LATIN SMALL LETTER T
display \x0075 136 # 117 u LATIN SMALL LETTER U
display \x0076 1236 # 118 v LATIN SMALL LETTER V
display \x0077 2456 # 119 w LATIN SMALL LETTER W
display \x0078 1346 # 120 x LATIN SMALL LETTER X
display \x0079 13456 # 121 y LATIN SMALL LETTER Y
display \x007a 1356 # 122 z LATIN SMALL LETTER Z
# The 26 uppercase letters A-Z) are the same as their lowercase counterparts
# except that dot7 is added:
#Hex Dots Dec Char Description
display \x0041 17 # 65 A LATIN CAPITAL LETTER A
display \x0042 127 # 66 B LATIN CAPITAL LETTER B
display \x0043 147 # 67 C LATIN CAPITAL LETTER C
display \x0044 1457 # 68 D LATIN CAPITAL LETTER D
display \x0045 157 # 69 E LATIN CAPITAL LETTER E
display \x0046 1247 # 70 F LATIN CAPITAL LETTER F
display \x0047 12457 # 71 G LATIN CAPITAL LETTER G
display \x0048 1257 # 72 H LATIN CAPITAL LETTER H
display \x0049 247 # 73 I LATIN CAPITAL LETTER I
display \x004a 2457 # 74 J LATIN CAPITAL LETTER J
display \x004b 137 # 75 K LATIN CAPITAL LETTER K
display \x004c 1237 # 76 L LATIN CAPITAL LETTER L
display \x004d 1347 # 77 M LATIN CAPITAL LETTER M
display \x004e 13457 # 78 N LATIN CAPITAL LETTER N
display \x004f 1357 # 79 O LATIN CAPITAL LETTER O
display \x0050 12347 # 80 P LATIN CAPITAL LETTER P
display \x0051 123457 # 81 Q LATIN CAPITAL LETTER Q
display \x0052 12357 # 82 R LATIN CAPITAL LETTER R
display \x0053 2347 # 83 S LATIN CAPITAL LETTER S
display \x0054 23457 # 84 T LATIN CAPITAL LETTER T
display \x0055 1367 # 85 U LATIN CAPITAL LETTER U
display \x0056 12367 # 86 V LATIN CAPITAL LETTER V
display \x0057 24567 # 87 W LATIN CAPITAL LETTER W
display \x0058 13467 # 88 X LATIN CAPITAL LETTER X
display \x0059 134567 # 89 Y LATIN CAPITAL LETTER Y
display \x005a 13567 # 90 Z LATIN CAPITAL LETTER Z
# The 10 decimal digits 0-9) are the same as in the Nemeth Code:
#Hex Dots Dec Char Description
display \x0030 356 # 48 0 DIGIT ZERO
display \x0031 2 # 49 1 DIGIT ONE
display \x0032 23 # 50 2 DIGIT TWO
display \x0033 25 # 51 3 DIGIT THREE
display \x0034 256 # 52 4 DIGIT FOUR
display \x0035 26 # 53 5 DIGIT FIVE
display \x0036 235 # 54 6 DIGIT SIX
display \x0037 2356 # 55 7 DIGIT SEVEN
display \x0038 236 # 56 8 DIGIT EIGHT
display \x0039 35 # 57 9 DIGIT NINE
# Common symbols used within mathematical expressions by popular computer
# programming languages are the same as in the Nemeth Code:
#Hex Dots Dec Char Description
display \x002e 46 # 46 . FULL STOP
display \x002b 346 # 43 + PLUS SIGN
display \x002d 36 # 45 - HYPHEN-MINUS
display \x002a 16 # 42 * ASTERISK
display \x002f 34 # 47 / SOLIDUS
display \x0028 12356 # 40 ( LEFT PARENTHESIS
display \x0029 23456 # 41 ) RIGHT PARENTHESIS
# With all of these major considerations having been taken into account,
# convenient representations were still available, and are used, for some of
# the remaining characters:
#Hex Dots Dec Char Description
display \x0026 12346 # 38 & AMPERSAND
display \x0023 3456 # 35 # NUMBER SIGN
# The remaining characters are what they are. Dot 7 isn't used either within
# the number block (32-63) or, with the exception of the DEL control character
# (127), within the lowercase block (96-127). With the exception of the
# underscore (95), dot 7 is used for every character within the uppercase block
# (64-95). Adding dot 7 to any character within the lowercase block (96-127)
# yields its corresponding character within the uppercase block (64-95) except
# that removing dot 7 from the DEL control character yields the underscore.
#Hex Dots Dec Char Description
display \x002c 6 # 44 , COMMA
display \x003b 56 # 59 ; SEMICOLON
display \x003a 156 # 58 : COLON
display \x0021 2346 # 33 ! EXCLAMATION MARK
display \x003f 1456 # 63 ? QUESTION MARK
display \x0022 5 # 34 " QUOTATION MARK
display \x0027 3 # 39 ' APOSTROPHE
display \x0060 4 # 96 ` GRAVE ACCENT
display \x005e 457 # 94 ^ CIRCUMFLEX ACCENT
display \x007e 45 # 126 ~ TILDE
display \x005b 2467 # 91 [ LEFT SQUARE BRACKET
display \x005d 124567 # 93 ] RIGHT SQUARE BRACKET
display \x007b 246 # 123 { LEFT CURLY BRACKET
display \x007d 12456 # 125 } RIGHT CURLY BRACKET
display \x003d 123456 # 61 = EQUALS SIGN
display \x003c 126 # 60 < LESS-THAN SIGN
display \x003e 345 # 62 > GREATER-THAN SIGN
display \x0024 1246 # 36 $ DOLLAR SIGN
display \x0025 146 # 37 % PERCENT SIGN
display \x0040 47 # 64 @ COMMERCIAL AT
display \x007c 1256 # 124 | VERTICAL LINE
display \x005c 12567 # 92 \ REVERSE SOLIDUS
display \x005f 456 # 95 _ LOW LINE
# The DEL control character
#Hex Dots Dec Char Description
display \x007f 4567 # 127 ^? DELETE
# Each of the characters within the basic control character block (0-31) is the
# same as its corresponding character within both the uppercase block (64-95)
# and the lowercase block (96-127) except that dots 7 and 8 are both used.
#Hex Dots Dec Char Description
display \x0000 478 # 0 ^@ NULL
display \x0001 178 # 1 ^A START OF HEADING
display \x0002 1278 # 2 ^B START OF TEXT
display \x0003 1478 # 3 ^C END OF TEXT
display \x0004 14578 # 4 ^D END OF TRANSMISSION
display \x0005 1578 # 5 ^E ENQUIRY
display \x0006 12478 # 6 ^F ACKNOWLEDGE
display \x0007 124578 # 7 ^G BELL
display \x0008 12578 # 8 ^H BACKSPACE
display \x0009 2478 # 9 ^I HORIZONTAL TABULATION
display \x000a 24578 # 10 ^J LINE FEED
display \x000b 1378 # 11 ^K VERTICAL TABULATION
display \x000c 12378 # 12 ^L FORM FEED
display \x000d 13478 # 13 ^M CARRIAGE RETURN
display \x000e 134578 # 14 ^N SHIFT OUT
display \x000f 13578 # 15 ^O SHIFT IN
display \x0010 123478 # 16 ^P DATA LINK ESCAPE
display \x0011 1234578 # 17 ^Q DEVICE CONTROL ONE
display \x0012 123578 # 18 ^R DEVICE CONTROL TWO
display \x0013 23478 # 19 ^S DEVICE CONTROL THREE
display \x0014 234578 # 20 ^T DEVICE CONTROL FOUR
display \x0015 13678 # 21 ^U NEGATIVE ACKNOWLEDGE
display \x0016 123678 # 22 ^V SYNCHRONOUS IDLE
display \x0017 245678 # 23 ^W END OF TRANSMISSION BLOCK
display \x0018 134678 # 24 ^X CANCEL
display \x0019 1345678 # 25 ^Y END OF MEDIUM
display \x001a 135678 # 26 ^Z SUBSTITUTE
display \x001b 24678 # 27 ^[ ESCAPE
display \x001c 125678 # 28 ^\ FILE SEPARATOR
display \x001d 1245678 # 29 ^] GROUP SEPARATOR
display \x001e 4578 # 30 ^^ RECORD SEPARATOR
display \x001f 45678 # 31 ^_ UNIT SEPARATOR
# Each of the characters within the extended control character block (128-159)
# is the same as its corresponding character within the basic control character
# block (0-31) except that only dot 8 is used.
#Hex Dots Dec Char Description
display \x0080 48 # 128 ~@ <control>
display \x0081 18 # 129 ~A <CONTROL>
display \x0082 128 # 130 ~B BREAK PERMITTED HERE
display \x0083 148 # 131 ~C NO BREAK HERE
display \x0084 1458 # 132 ~D <CONTROL>
display \x0085 158 # 133 ~E NEXT LINE
display \x0086 1248 # 134 ~F START OF SELECTED AREA
display \x0087 12458 # 135 ~G END OF SELECTED AREA
display \x0088 1258 # 136 ~H CHARACTER TABULATION SET
display \x0089 248 # 137 ~I CHARACTER TABULATION WITH JUSTIFICATION
display \x008a 2458 # 138 ~J LINE TABULATION SET
display \x008b 138 # 139 ~K PARTIAL LINE DOWN
display \x008c 1238 # 140 ~L PARTIAL LINE UP
display \x008d 1348 # 141 ~M REVERSE LINE FEED
display \x008e 13458 # 142 ~N SINGLE SHIFT TWO
display \x008f 1358 # 143 ~O SINGLE SHIFT THREE
display \x0090 12348 # 144 ~P DEVICE CONTROL STRING
display \x0091 123458 # 145 ~Q PRIVATE USE ONE
display \x0092 12358 # 146 ~R PRIVATE USE TWO
display \x0093 2348 # 147 ~S SET TRANSMIT STATE
display \x0094 23458 # 148 ~T CANCEL CHARACTER
display \x0095 1368 # 149 ~U MESSAGE WAITING
display \x0096 12368 # 150 ~V START OF GUARDED AREA
display \x0097 24568 # 151 ~W END OF GUARDED AREA
display \x0098 13468 # 152 ~X START OF STRING
display \x0099 134568 # 153 ~Y <CONTROL>
display \x009a 13568 # 154 ~Z SINGLE CHARACTER INTRODUCER
display \x009b 2468 # 155 ~[ CONTROL SEQUENCE INTRODUCER
display \x009c 12568 # 156 ~\ STRING TERMINATOR
display \x009d 124568 # 157 ~] OPERATING SYSTEM COMMAND
display \x009e 458 # 158 ~^ PRIVACY MESSAGE
display \x009f 4568 # 159 ~_ APPLICATION PROGRAM COMMAND
# Representations for the uppercase accented letters are drawn from the
# remaining combinations which use both dots 7 and 8. The representation for a
# lowercase accented letter is the same as its uppercase counterpart except
# that dot 7 isn't used. This scheme retains the use of dot 7 as the modifier
# for a capitalized letter. The only exception to these rules is that, due to
# the nature of the Latin 1 character set, the German lowercase double-s is
# treated as though it were an uppercase y-dieresis neither has an uppercase
# definition). These representations have been gathered, as much as possible,
# into logical groupings.
# The 5 letters with a circumflex accent (^) use the [1-5] dot combinations:
#Hex Dots Dec Char Description
display \x00c2 278 # 194 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
display \x00ca 2378 # 202 Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
display \x00ce 2578 # 206 Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
display \x00d4 25678 # 212 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
display \x00db 2678 # 219 Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
display \x00e2 28 # 226 â LATIN SMALL LETTER A WITH CIRCUMFLEX
display \x00ea 238 # 234 ê LATIN SMALL LETTER E WITH CIRCUMFLEX
display \x00ee 258 # 238 î LATIN SMALL LETTER I WITH CIRCUMFLEX
display \x00f4 2568 # 244 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
display \x00fb 268 # 251 û LATIN SMALL LETTER U WITH CIRCUMFLEX
# The 5 letters with a grave accent (`) use the [6-0] dot combinations:
#Hex Dots Dec Char Description
display \x00c0 23578 # 192 À LATIN CAPITAL LETTER A WITH GRAVE
display \x00c8 235678 # 200 È LATIN CAPITAL LETTER E WITH GRAVE
display \x00cc 23678 # 204 Ì LATIN CAPITAL LETTER I WITH GRAVE
display \x00d2 3578 # 210 Ò LATIN CAPITAL LETTER O WITH GRAVE
display \x00d9 35678 # 217 Ù LATIN CAPITAL LETTER U WITH GRAVE
display \x00e0 2358 # 224 à LATIN SMALL LETTER A WITH GRAVE
display \x00e8 23568 # 232 è LATIN SMALL LETTER E WITH GRAVE
display \x00ec 2368 # 236 ì LATIN SMALL LETTER I WITH GRAVE
display \x00f2 358 # 242 ò LATIN SMALL LETTER O WITH GRAVE
display \x00f9 3568 # 249 ù LATIN SMALL LETTER U WITH GRAVE
# The 6 letters with an acute accent (´) use the [a-f] dot combinations with
# dots 6 and 8 added:
#Hex Dots Dec Char Description
display \x00c1 1678 # 193 Á LATIN CAPITAL LETTER A WITH ACUTE
display \x00c9 12678 # 201 É LATIN CAPITAL LETTER E WITH ACUTE
display \x00cd 14678 # 205 Í LATIN CAPITAL LETTER I WITH ACUTE
display \x00d3 145678 # 211 Ó LATIN CAPITAL LETTER O WITH ACUTE
display \x00da 15678 # 218 Ú LATIN CAPITAL LETTER U WITH ACUTE
display \x00dd 124678 # 221 Ý LATIN CAPITAL LETTER Y WITH ACUTE
display \x00e1 168 # 225 á LATIN SMALL LETTER A WITH ACUTE
display \x00e9 1268 # 233 é LATIN SMALL LETTER E WITH ACUTE
display \x00ed 1468 # 237 í LATIN SMALL LETTER I WITH ACUTE
display \x00f3 14568 # 243 ó LATIN SMALL LETTER O WITH ACUTE
display \x00fa 1568 # 250 ú LATIN SMALL LETTER U WITH ACUTE
display \x00fd 12468 # 253 ý LATIN SMALL LETTER Y WITH ACUTE
# The 6 letters with a dieresis accent (¨) use the [f-j] dot combinations with
# dots 3 and 6 added, and the number sign (because it fits the sequence
# reasonably well):
#Hex Dots Dec Char Description
display \x00c4 1234678 # 196 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
display \x00cb 12345678 # 203 Ë LATIN CAPITAL LETTER E WITH DIAERESIS
display \x00cf 1235678 # 207 Ï LATIN CAPITAL LETTER I WITH DIAERESIS
display \x00d6 234678 # 214 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
display \x00dc 2345678 # 220 Ü LATIN CAPITAL LETTER U WITH DIAERESIS
display \x00e4 123468 # 228 ä LATIN SMALL LETTER A WITH DIAERESIS
display \x00eb 1234568 # 235 ë LATIN SMALL LETTER E WITH DIAERESIS
display \x00ef 123568 # 239 ï LATIN SMALL LETTER I WITH DIAERESIS
display \x00f6 23468 # 246 ö LATIN SMALL LETTER O WITH DIAERESIS
display \x00fc 234568 # 252 ü LATIN SMALL LETTER U WITH DIAERESIS
display \x00ff 34568 # 255 ÿ LATIN SMALL LETTER Y WITH DIAERESIS
# There is no uppercase y-dieresis in the Latin1 character set. The German
# lowercase double-s, which also doesn't have an uppercase counterpart in the
# Latin1 character set, uses its representation:
#Hex Dots Dec Char Description
display \x00df 345678 # 223 ß LATIN SMALL LETTER SHARP S
# The remaining accented letters are:
#Hex Dots Dec Char Description
display \x00c3 578 # 195 Ã LATIN CAPITAL LETTER A WITH TILDE
display \x00d1 4678 # 209 Ñ LATIN CAPITAL LETTER N WITH TILDE
display \x00d5 5678 # 213 Õ LATIN CAPITAL LETTER O WITH TILDE
display \x00c5 34578 # 197 Å LATIN CAPITAL LETTER A WITH RING ABOVE
display \x00c7 34678 # 199 Ç LATIN CAPITAL LETTER C WITH CEDILLA
display \x00d8 3478 # 216 Ø LATIN CAPITAL LETTER O WITH STROKE
display \x00c6 378 # 198 Æ LATIN CAPITAL LETTER AE
display \x00d0 678 # 208 Ð LATIN CAPITAL LETTER ETH
display \x00de 3678 # 222 Þ LATIN CAPITAL LETTER THORN
display \x00e3 58 # 227 ã LATIN SMALL LETTER A WITH TILDE
display \x00f1 468 # 241 ñ LATIN SMALL LETTER N WITH TILDE
display \x00f5 568 # 245 õ LATIN SMALL LETTER O WITH TILDE
display \x00e5 3458 # 229 å LATIN SMALL LETTER A WITH RING ABOVE
display \x00e7 3468 # 231 ç LATIN SMALL LETTER C WITH CEDILLA
display \x00f8 348 # 248 ø LATIN SMALL LETTER O WITH STROKE
display \x00e6 38 # 230 æ LATIN SMALL LETTER AE
display \x00f0 68 # 240 ð LATIN SMALL LETTER ETH
display \x00fe 368 # 254 þ LATIN SMALL LETTER THORN
# Some characters are the same as other characters which they resemble but with
# dot 7 added:
#Hex Dots Dec Char Description
display \x00ad 367 # 173 ­ SOFT HYPHEN
display \x00ab 1267 # 171 « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
display \x00bb 3457 # 187 » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
display \x00a6 1567 # 166 ¦ BROKEN BAR
display \x00b9 27 # 185 ¹ SUPERSCRIPT ONE
display \x00b2 237 # 178 ² SUPERSCRIPT TWO
display \x00b3 257 # 179 ³ SUPERSCRIPT THREE
display \x00b1 3467 # 177 ± PLUS-MINUS SIGN
display \x00d7 167 # 215 × MULTIPLICATION SIGN
display \x00f7 347 # 247 ÷ DIVISION SIGN
display \x00b7 467 # 183 · MIDDLE DOT
display \x00a1 23467 # 161 ¡ INVERTED EXCLAMATION MARK
display \x00bf 14567 # 191 ¿ INVERTED QUESTION MARK
display \x00a2 12467 # 162 ¢ CENT SIGN
display \x00a3 34567 # 163 £ POUND SIGN
# A few more characters follow this same convention but their relationships
# to their base characters is a bit obscure:
#Hex Dots Dec Char Description
display \x00a4 1467 # 164 ¤ CURRENCY SIGN
display \x00a5 123467 # 165 ¥ YEN SIGN
# Some characters are represented by the first letters of their names lowered
# by one row of dots:
#Hex Dots Dec Char Description
display \x00ac 2567 # 172 ¬ NOT SIGN
display \x00b6 2357 # 182 ¶ PILCROW SIGN
display \x00a9 23567 # 169 © COPYRIGHT SIGN
display \x00ae 2367 # 174 ® REGISTERED SIGN
display \x00a7 357 # 167 § SECTION SIGN
display \x00b0 3567 # 176 ° DEGREE SIGN
# The three fraction characters use combinations of dots 1 and 4 (which
# progress from left to right as the value of the fraction increases) together
# with dots 2,3,5,6,7:
#Hex Dots Dec Char Description
display \x00bc 123567 # 188 ¼ VULGAR FRACTION ONE QUARTER
display \x00bd 1234567 # 189 ½ VULGAR FRACTION ONE HALF
display \x00be 234567 # 190 ¾ VULGAR FRACTION THREE QUARTERS
# Each of the three extended accent characters is the same as its conventional
# compose character but with dot7 added:
#Hex Dots Dec Char Description
display \x00b4 37 # 180 ´ ACUTE ACCENT
display \x00b8 67 # 184 ¸ CEDILLA
display \x00a8 57 # 168 ¨ DIAERESIS
# The two gender symbols are:
#Hex Dots Dec Char Description
display \x00ba 7 # 186 º MASCULINE ORDINAL INDICATOR
display \x00aa 8 # 170 ª FEMININE ORDINAL INDICATOR
# The three remaining characters are:
#Hex Dots Dec Char Description
display \x00af 267 # 175 ¯ MACRON
display \x00b5 567 # 181 µ MICRO SIGN
display \x00a0 78 # 160   NO-BREAK SPACE
# The nonbreaking space is dots 7 and 8 because this presents a sequence of
# nonbreaking spaces as a smooth low line segment.