blob: 48eec4c7cdfd64f943ba2c3cf19c528ef14c2d63 [file] [log] [blame]
#
# Copyright (C) 2016 Rimas Kudelis <rq@akl.lt>
#
# This file is part of liblouis.
#
# liblouis is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1 of the
# License, or (at your option) any later version.
#
# liblouis is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with liblouis. If not, see
# <http://www.gnu.org/licenses/>.
# The Lithuanian 8-dot Braille writing system is described in a decree of the Minister of
# Social Security and Labor, called "Dėl vieningos aštuonių taškų Brailio rašto sistemos
# naudojimo tvarkos aprašo patvirtinimo", which is in effect since 2011-04-13. Document
# number is A1-183. At the time of writing this file, the document was accessible at
# https://www.e-tar.lt/portal/lt/legalAct/TAR.443D667CA047 .
# The document is referred to as "the standard" below.
#
# The standard maps ISO-8859-13 character set to 8-dot Braille writing system. However,
# even though it defines different mappings for literary and computer braille modes, it
# doesn't seem like the authors had a good understanding of why these two modes exist
# and how they differ. Furthermore, the standard contains a few errors (incorrectly named
# characters as well as mapping conflicts). I would say it needs further improvements.
#
# This file is based on the standard, but does not exactly follow it. Some standard
# definitions are commented out, some changed, and some extra ones are added.
###
### WHITESPACE
###
# No-break space is defined as 7 in the standard.
space \x00a0 7 [NO-BREAK SPACE]
include spaces.uti
###
### DIGITS
###
include digits8Dots.uti
###
### LETTERS
###
include latinLetterDef8Dots.uti
# Accented letters which are part of the Lithuanian alphabet.
uplow \x0104\x0105 167,16 Ąą LATIN CAPITAL LETTER A WITH OGONEK,LATIN SMALL LETTER A WITH OGONEK
uplow \x010c\x010d 1467,146 Čč LATIN CAPITAL LETTER C WITH CARON,LATIN SMALL LETTER C WITH CARON
uplow \x0118\x0119 1567,156 Ęę LATIN CAPITAL LETTER E WITH OGONEK,LATIN SMALL LETTER E WITH OGONEK
uplow \x0116\x0117 3457,345 Ėė LATIN CAPITAL LETTER E WITH DOT ABOVE,LATIN SMALL LETTER E WITH DOT ABOVE
uplow \x012e\x012f 2467,246 Įį LATIN CAPITAL LETTER I WITH OGONEK,LATIN SMALL LETTER E WITH OGONEK
uplow \x0160\x0161 23467,2346 Šš LATIN CAPITAL LETTER S WITH CARON,LATIN SMALL LETTER S WITH CARON
uplow \x0172\x0173 3467,346 Ųų LATIN CAPITAL LETTER U WITH OGONEK,LATIN SMALL LETTER U WITH OGONEK
uplow \x016a\x016b 12567,1256 Ūū LATIN CAPITAL LETTER U WITH MACRON,LATIN SMALL LETTER U WITH MACRON
uplow \x017d\x017e 1267,126 Žž LATIN CAPITAL LETTER Z WITH CARON,LATIN SMALL LETTER Z WITH CARON
###
### PUNCTUATION
###
punctuation \x0021 235 ! EXCLAMATION MARK
punctuation \x0022 4 " QUOTATION MARK
punctuation \x0027 3 ' APOSTROPHE
# The following symbol is defined as 134568 in the standard, but that is hardly useful.
# According to Unicode, it is the preferred character to use for apostrophe, hence
# defining it as one here.
punctuation \x2019 3 ’ RIGHT SINGLE QUOTATION MARK
punctuation \x0028 23567 ( LEFT PARENTHESIS
punctuation \x0029 23568 ) RIGHT PARENTHESIS
punctuation \x002c 2 , COMMA
punctuation \x002d 36 - HYPHEN-MINUS
punctuation \x002e 256 . FULL STOP
punctuation \x003a 25 : COLON
punctuation \x003b 23 ; SEMICOLON
punctuation \x003f 26 ? QUESTION MARK
punctuation \x005b 12356 [ LEFT SQUARE BRACKET
punctuation \x005d 23456 ] RIGHT SQUARE BRACKET
punctuation \x007b 24678 { LEFT CURLY BRACKET
punctuation \x007d 13578 } RIGHT CURLY BRACKET
# The following symbol is defined as 368 in the standard.
punctuation \x00ad 36 [SOFT HYPHEN]
# The following symbol is not defined in the standard.
punctuation \x2013 36 – EN DASH
punctuation \x2014 36 — EM DASH
# These are the typographically correct quotes in Lithuania.
punctuation \x201e 2367 „ DOUBLE LOW-9 QUOTATION MARK
punctuation \x201c 3567 “ LEFT DOUBLE QUOTATION MARK
# The following character is defined in the standard, because it exists in ISO-8859-13
# character set, even though it should not be used in Lithuanian texts.
punctuation \x201d 367 ” RIGHT DOUBLE QUOTATION MARK
# Let's make straight quotes smarter if we can.
prepunc \x0022 2367 " QUOTATION MARK
postpunc \x0022 3567 " QUOTATION MARK
# The following combination of smart quotes is sometimes mistakenly used due to automatic
# replacement based on English rules. This will turn them to the correct quotes.
prepunc \x201c 2367 “ LEFT DOUBLE QUOTATION MARK
postpunc \x201d 3567 ” RIGHT DOUBLE QUOTATION MARK
# Two versions of horizontal ellipsis.
punctuation \x2026 3-3-3 … HORIZONTAL ELLIPSIS
always \x002e\x002e\x002e 3-3-3 ... Three times FULL STOP
###
### MATHEMATICAL SYMBOLS
###
math \x002b 2358 + PLUS SIGN
math \x003c 2468 < LESS-THAN SIGN
math \x003d 2356 = EQUALS SIGN
math \x003e 1358 > GREATER-THAN SIGN
math \x00b1 23578 ± PLUS-MINUS SIGN
# math \x00bc 1368 ¼ VULGAR FRACTION ONE QUARTER
# math \x00bd 12368 ½ VULGAR FRACTION ONE HALF
# math \x00be 13468 ¾ VULGAR FRACTION THREE QUARTERS
math \x2212 36 − MINUS SIGN
math \x00d7 2348 × MULTIPLICATION SIGN
math \x00f7 125678 ÷ DIVISION SIGN
# math \x2215 25 ∕ DIVISION SLASH
# Parentheses are different in Math than in literature.
# midnum \x0028 126 ( LEFT PARENTHESIS
# midnum \x0029 345 ) RIGHT PARENTHESIS
###
### OTHER CHARACTERS
###
sign \x0023 3456 # NUMBER SIGN
sign \x0024 46 $ DOLLAR SIGN
sign \x0025 123456 % PERCENT SIGN
sign \x0026 12346 & AMPERSAND
sign \x002a 35 * ASTERISK
sign \x002f 34 / SOLIDUS
sign \x0040 34578 @ COMMERCIAL AT
sign \x005c 347 \ REVERSE SOLIDUS
sign \x005e 2568 ^ CIRCUMFLEX ACCENT
sign \x005f 4567 _ LOW LINE
sign \x0060 6 ` GRAVE ACCENT
sign \x007c 456 | VERTICAL LINE
sign \x007e 268 ~ TILDE
sign \x00a2 58 ¢ CENT SIGN
sign \x00a3 467 £ POUND SIGN
sign \x00a4 4678 ¤ CURRENCY SIGN
sign \x00a6 1456 ¦ BROKEN BAR
sign \x00a7 3468 § SECTION SIGN
sign \x00a9 123468 © COPYRIGHT SIGN
# Angle quotation marks are not commonly used in Lithuanian language.
# It's probably much more common to see them as fancy breadcrumb separators
# than anything else, thus marking them as sign, not punctuation.
sign \x00ab 5678 « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
sign \x00bb 4578 » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
sign \x00ac 5 ¬ NOT SIGN
sign \x00ae 12358 ® REGISTERED SIGN
sign \x00b0 4568 ° DEGREE SIGN
sign \x00b2 1278 ² SUPERSCRIPT TWO
sign \x00b3 1478 ³ SUPERSCRIPT THREE
sign \x00b5 1348 µ MICRO SIGN
sign \x00b6 12348 ¶ PILCROW SIGN
sign \x00b7 37 · MIDDLE DOT
sign \x00b9 178 ¹ SUPERSCRIPT ONE
###
### INDICATOR AND SPECIAL SYMBOL OPCODES
###
# numsign 3456 Number indicator in text (6-dot output only)
hyphen \x002d 36 - HYPHEN-MINUS
decpoint \x002c 2 , DECIMAL SEPARATOR
sign \xfffd 7 � REPLACEMENT CHARACTER
undefined 7 Replacement dots for characters not defined in this table
###
### ATTIC (commented out definitions)
###
# Other letters from the ISO-8859-13 repertoire.
# This set seems quite useless and poorly designed, so leaving it commented out.
# uplow \x00c4\x00e4 567,3458 Ää LATIN CAPITAL LETTER A WITH DIAERESIS,LATIN SMALL LETTER A WITH DIAERESIS
# uplow \x00c5\x00e5 34567,345678 Åå LATIN CAPITAL LETTER A WITH RING ABOVE,LATIN SMALL LETTER A WITH RING ABOVE
# uplow \x00c6\x00e6 458,38 Ææ LATIN CAPITAL LETTER AE,LATIN SMALL LETTER AE
# uplow \x00c9\x00e9 238,1268 Éé LATIN CAPITAL LETTER E WITH ACUTE,LATIN SMALL LETTER E WITH ACUTE
# uplow \x00d3\x00f3 123567,14568 Óó LATIN CAPITAL LETTER O WITH ACUTE,LATIN SMALL LETTER O WITH ACUTE
# uplow \x00d5\x00f5 267,1234567 Õõ LATIN CAPITAL LETTER O WITH TILDE,LATIN SMALL LETTER O WITH TILDE
# uplow \x00d6\x00f6 358,1234568 Öö LATIN CAPITAL LETTER O WITH DIAERESIS,LATIN SMALL LETTER O WITH DIAERESIS
# uplow \x00d8\x00f8 48,68 Øø LATIN CAPITAL LETTER O WITH STROKE,LATIN SMALL LETTER O WITH STROKE
# uplow \x00dc\x00fc 2368,12568 Üü LATIN CAPITAL LETTER U WITH DIAERESIS,LATIN SMALL LETTER U WITH DIAERESIS
# lowercase \x00df 34568 ß LATIN SMALL LETTER SHARP S
# uplow \x0100\x0101 12468,1678 Āā LATIN CAPITAL LETTER A WITH MACRON,LATIN SMALL LETTER A WITH MACRON
# uplow \x0106\x0107 15678,34678 Ćć LATIN CAPITAL LETTER C WITH ACUTE,LATIN SMALL LETTER C WITH ACUTE
# uplow \x0112\x0113 123467,1234678 Ēē LATIN CAPITAL LETTER E WITH MACRON,LATIN SMALL LETTER E WITH MACRON
# uplow \x0122\x0123 57,348 Ģģ LATIN CAPITAL LETTER G WITH CEDILLA,LATIN SMALL LETTER G WITH CEDILLA
# uplow \x012a\x012b 23468,14678 Īī LATIN CAPITAL LETTER I WITH MACRON,LATIN SMALL LETTER I WITH MACRON
# uplow \x0136\x0137 258,1468 Ķķ LATIN CAPITAL LETTER K WITH CEDILLA,LATIN SMALL LETTER K WITH CEDILLA
# uplow \x013b\x013c 23678,124568 Ļļ LATIN CAPITAL LETTER L WITH CEDILLA,LATIN SMALL LETTER L WITH CEDILLA
# uplow \x0141\x0142 35678,234568 Łł LATIN CAPITAL LETTER L WITH STROKE,LATIN SMALL LETTER L WITH STROKE
# uplow \x0143\x0144 2567,13458 Ńń LATIN CAPITAL LETTER N WITH ACUTE,LATIN SMALL LETTER N WITH ACUTE
# uplow \x0145\x0146 578,234567 Ņņ LATIN CAPITAL LETTER N WITH CEDILLA,LATIN SMALL LETTER N WITH CEDILLA
# uplow \x014c\x014d 14567,145678 Ōō LATIN CAPITAL LETTER O WITH MACRON,LATIN SMALL LETTER O WITH MACRON
# uplow \x015a\x015b 123568,1568 Śś LATIN CAPITAL LETTER S WITH ACUTE,LATIN SMALL LETTER S WITH ACUTE
# uplow \x0179\x017a 23458,12678 Źź LATIN CAPITAL LETTER Z WITH ACUTE,LATIN SMALL LETTER A WITH ACUTE
# uplow \x017b\x017c 3568,24568 Żż LATIN CAPITAL LETTER Z WITH DOT ABOVE,LATIN SMALL LETTER Z WITH DOT ABOVE
# uplow \x0156\x0157 2357,12456 Ŗŗ LATIN CAPITAL LETTER R WITH CEDILLA,LATIN SMALL LETTER R WITH CEDILLA
# These control chars are defined by the standard, but are hardly useful.
# These official definitions are commented out below, and only whitespace characters
# are defined as 0 (no dots) in the Whitespace section at the top of this file.
# sign \x0000 3578 [NULL]
# sign \x0001 28 [START OF HEADING]
# sign \x0002 45 [START OF TEXT]
# sign \x0003 47 [END OF TEXT]
# sign \x0004 14578 [END OF TRANSMISSION]
# sign \x0005 1578 [ENQUIRY]
# sign \x0006 12478 [ACKNOWLEDGE]
# sign \x0007 124578 [BELL]
# sign \x0008 12578 [BACKSPACE]
# space \x0009 2478 [CHARACTER TABULATION]
# space \x000a 24578 [LINE FEED (LF)]
# space \x000b 1378 [LINE TABULATION]
# space \x000c 12378 [FORM FEED (FF)]
# space \x000d 13478 [CARRIAGE RETURN (CR)]
# sign \x000e 134578 [SHIFT OUT]
# sign \x000f 56 [SHIFT IN]
# sign \x0010 123478 [DATA LINK ESCAPE]
# sign \x0011 1234578 [DEVICE CONTROL ONE]
# sign \x0012 123578 [DEVICE CONTROL TWO]
# sign \x0013 23478 [DEVICE CONTROL THREE]
# sign \x0014 234578 [DEVICE CONTROL FOUR]
# sign \x0015 13678 [NEGATIVE ACKNOWLEDGE]
# sign \x0016 123678 [SYNCHRONOUS IDLE]
# sign \x0017 245678 [END OF TRANSMISSION BLOCK]
# sign \x0018 134678 [CANCEL]
# sign \x0019 1345678 [END OF MEDIUM]
# sign \x001a 135678 [SUBSTITUTE]
# sign \x001b 1235678 [ESCAPE]
# sign \x001c 3478 [INFORMATION SEPARATOR FOUR]
# sign \x001d 2345678 [INFORMATION SEPARATOR THREE]
# sign \x001e 234678 [INFORMATION SEPARATOR TWO]
# sign \x001f 45678 [INFORMATION SEPARATOR ONE]
# sign \x007f 138 [DELETE]
# sign \x0080 457
# sign \x0081 8
# sign \x0082 3678 [BREAK PERMITTED HERE]
# sign \x0083 78 [NO BREAK HERE]
# sign \x0084 236
# sign \x0085 25678 [NEXT LINE (NEL)]
# sign \x0086 168 [START OF SELECTED AREA]
# sign \x0087 357 [END OF SELECTED AREA]
# sign \x0088 378 [CHARACTER TABULATION SET]
# sign \x0089 468 [CHARACTER TABULATION WITH JUSTIFICATION]
# sign \x008a 678 [LINE TABULATION SET]
# sign \x008b 27 [PARTIAL LINE FORWARD]
# sign \x008c 237 [PARTIAL LINE BACKWARD]
# sign \x008d 257 [REVERSE LINE FEED]
# sign \x008e 478 [SINGLE SHIFT TWO]
# sign \x008f 12467 [SINGLE SHIFT THREE]
# sign \x0090 124678 [DEVICE CONTROL STRING]
# sign \x0091 568 [PRIVATE USE ONE]
# sign \x0092 1246 [PRIVATE USE TWO]
# sign \x0093 356 [SET TRANSMIT STATE]
# sign \x0094 1238 [CANCEL CHARACTER]
# sign \x0095 278 [MESSAGE WAITING]
# sign \x0096 2378 [START OF GUARDED AREA]
# sign \x0097 13568 [END OF GUARDED AREA]
# sign \x0098 2578 [START OF STRING]
# sign \x0099 1245678
# sign \x009a 2678 [SINGLE CHARACTER INTRODUCER]
# sign \x009b 123458 [CONTROL SEQUENCE INTRODUCER]
# sign \x009c 67 [STRING TERMINATOR]
# sign \x009d 124567 [OPERATING SYSTEM COMMAND]
# sign \x009e 235678 [PRIVACY MESSAGE]
# sign \x009f 12345678 [APPLICATION PROGRAM COMMAND]