blob: febde5753aa2a345db5e911cde88b375abdf2a46 [file] [log] [blame]
# liblouis Braille Translation and Back-Translation Library
#
# Copyright (C) 2017 Bert Frees
#
# This file is part of liblouis.
#
# liblouis is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 2.1 of the License, or
# (at your option) any later version.
#
# liblouis is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
#
import argparse
import csv
import fileinput
from utils import *
class Reader:
def __init__(self, files, encoding):
self.reader = csv.reader(fileinput.FileInput(files, openhook=fileinput.hook_encoded(encoding)),
delimiter='\t', quoting=csv.QUOTE_NONE)
def __iter__(self):
return self
def __next__(self):
row = next(self.reader)
if not row or row[0].startswith("#"):
return next(self)
if not len(row) == 3:
printerrln('expected 3 columns, got %s: %s' % (len(row),row))
exit(1)
if not row[0] == "":
printerrln("expected first column to be empty, got '%s'" % (row[0],))
exit(1)
maybe_chunked_text, braille = row[1:3]
maybe_chunked_text = to_lowercase(maybe_chunked_text)
text, chunked_text = read_text(maybe_chunked_text)
braille = braille if braille != "" else None
if braille != None:
if '0' in to_dot_pattern(braille).split('-'):
printerrln('invalid braille: %s' % (braille,))
exit(1)
exit_if_not(not chunked_text or validate_chunks(chunked_text))
return {'text': text,
'braille': braille,
'chunked_text': chunked_text}
def main():
parser = argparse.ArgumentParser(description="Submit entries to dictionary.")
parser.add_argument('FILE', nargs='*', default=['-'], help="TSV file with entries")
parser.add_argument('-d', '--dictionary', required=True, dest="DICTIONARY",
help="dictionary file")
parser.add_argument('-t', '--table', required=True, dest="TABLE",
help="translation table to be used for converting words to lowercase")
args = parser.parse_args()
load_table(args.TABLE)
conn, c = open_dictionary(args.DICTIONARY)
reader = Reader(args.FILE, "utf-8")
c.execute("CREATE TABLE IF NOT EXISTS dictionary (text TEXT PRIMARY KEY, braille TEXT, chunked_text TEXT, frequency INTEGER)")
for row in reader:
c.execute("INSERT OR IGNORE INTO dictionary VALUES (:text,:braille,:chunked_text,0)", row)
if c.lastrowid == 0:
c.execute("UPDATE dictionary SET braille=:braille,chunked_text=:chunked_text WHERE text=:text", row)
conn.commit()
conn.close()
if __name__ == "__main__": main()