tools/lou_maketable.d/utils.py - manifest_repos/liblouis-github - Git at Google

 # liblouis Braille Translation and Back-Translation Library
 #
 # Copyright (C) 2017 Bert Frees
 #
 # This file is part of liblouis.
 #
 # liblouis is free software: you can redistribute it and/or modify it
 # under the terms of the GNU Lesser General Public License as published
 # by the Free Software Foundation, either version 2.1 of the License, or
 # (at your option) any later version.
 #
 # liblouis is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 # Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
 # License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
 #

 from ctypes import *
 from itertools import takewhile, zip_longest, chain, tee
 izip = zip
 izip_longest = zip_longest

 from louis import _loader, liblouis, outlenMultiplier
 import re
 import sqlite3
 import sys

 def exit_if_not(expression):
     if not expression:
         raise RuntimeError()

 exit_if_not(liblouis.lou_charSize() == 4)

 liblouis.isLetter.argtypes = (c_wchar,)
 liblouis.toLowercase.argtypes = (c_wchar,)
 liblouis.toLowercase.restype = c_wchar

 def println(line=""):
     sys.stdout.write(("%s\n" % line))

 def printerrln(line=""):
     sys.stderr.write(("%s\n" % line))

 def validate_chunks(chunked_text):
     return re.search(r"^([^)(|]+|\([^)(|][^)(|]+\))(\|?([^)(|]+|\([^)(|][^)(|]+\)))*$", chunked_text) != None

 def print_chunks(text, hyphen_string):
     exit_if_not(len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string))
     chunked_text = []
     k = 0
     prev_c = None
     for c in hyphen_string:
         if c != prev_c and c == "0":
             chunked_text.append("(")
         chunked_text.append(text[k])
         if c != prev_c and prev_c == "0":
             chunked_text.append(")")
         if c == "1":
             chunked_text.append("|")
         prev_c = c
         k += 1
     chunked_text.append(text[k])
     if (prev_c == "0"):
         chunked_text.append(")")
     return "".join(chunked_text)

 def parse_chunks(chunked_text):
     exit_if_not(validate_chunks(chunked_text))
     text, _ = read_text(chunked_text)
     hyphen_string = ["x"] * (len(text) - 1)
     k = 0
     for c in chunked_text:
         if c == "(":
             hyphen_string[k:] = ["0"] * (len(text) - 1 - k)
         elif c == ")":
             hyphen_string[k-1:] =  ["x"] * (len(text) - 1 - (k-1))
         elif c == "|":
             hyphen_string[k-1] = "1"
         else:
             k += 1
             if k > len(text):
                 break
     return text, "".join(hyphen_string)

 def read_text(maybe_chunked_text):
     if re.search("[)(|]", maybe_chunked_text) != None:
         text = re.sub("[)(|]", "", maybe_chunked_text)
         chunked_text = maybe_chunked_text
     else:
         text = maybe_chunked_text
         chunked_text = None
     return text, chunked_text

 def compare_chunks(expected_hyphen_string, actual_hyphen_string, text):
     exit_if_not(len(expected_hyphen_string) == len(text) - 1 and re.search("^[01x]+$", expected_hyphen_string))
     exit_if_not(len(actual_hyphen_string) == len(text) - 1 and re.search("^[01]+$", actual_hyphen_string))
     chunk_errors = my_zip(text,
                           map(lambda e, a: "*" if e in "1x" and a == "1" else
                                            "." if a == "1" else
                                            "-" if e == "1" else None,
                               expected_hyphen_string, actual_hyphen_string))
     return chunk_errors if re.search(r"[-\.]", chunk_errors) else None

 # split a string into words consisting of only letters (at least two)
 # return an empty list if the provided hyphen string does not have zeros at all positions before and after non-letters
 def split_into_words(text, hyphen_string):
     exit_if_not(len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string))
     words = []
     word_hyphen_strings = []
     word = []
     word_hyphen_string = []
     for c,(h1,h2) in izip(text, pairwise('1' + hyphen_string + '1')):
         if is_letter(c):
             word.append(c)
             word_hyphen_string.append(h1)
         elif h1 not in "1x" or h2 not in "1x":
             return []
         else:
             if len(word) > 1:
                 words.append("".join(word))
                 word_hyphen_strings.append("".join(word_hyphen_string[1:]))
             word = []
             word_hyphen_string = []
     if len(word) > 1:
         words.append("".join(word))
         word_hyphen_strings.append("".join(word_hyphen_string[1:]))
     return izip(words, word_hyphen_strings)

 table = None

 def load_table(new_table):
     global table
     table = new_table
     table = table.encode("ASCII") if isinstance(table, str) else bytes(table)
     liblouis.loadTable(table);

 def is_letter(text):
     return all([liblouis.isLetter(c) for c in text])

 def to_lowercase(text):
     return "".join([liblouis.toLowercase(c) for c in text])

 def to_dot_pattern(braille):
     c_braille = create_unicode_buffer(braille)
     c_dots = create_string_buffer(9 * len(braille))
     liblouis.toDotPattern(c_braille, c_dots)
     return c_dots.value.decode('ascii')

 def hyphenate(text):
     c_text = create_unicode_buffer(text)
     c_text_len = c_int(len(text))
     c_hyphen_string = create_string_buffer(len(text) + 1)
     exit_if_not(liblouis.lou_hyphenate(table, c_text, c_text_len, c_hyphen_string, 0))
     return "".join(['1' if int(p) % 2 else '0' for p in c_hyphen_string.value[1:]])

 def translate(text):
     c_text = create_unicode_buffer(text)
     c_text_len = c_int(len(text))
     braille_len = len(text) * outlenMultiplier
     c_braille = create_unicode_buffer(braille_len)
     c_braille_len = c_int(braille_len)
     max_rules = 16
     c_rules = (c_void_p * max_rules)()
     c_rules_len = c_int(max_rules)
     exit_if_not(liblouis._lou_translate(table, c_text, byref(c_text_len), c_braille, byref(c_braille_len),
                                         None, None, None, None, None, 0, c_rules, byref(c_rules_len)))
     return c_braille.value, c_rules[0:c_rules_len.value]

 def get_rule(c_rule_pointer):
     c_rule_string = create_unicode_buffer(u"", 128)
     if not liblouis.printRule(cast(c_rule_pointer, c_void_p), c_rule_string):
         return None
     return tuple(c_rule_string.value.split(" "))

 def suggest_chunks(text, braille):
     c_text = create_unicode_buffer(text)
     c_braille = create_unicode_buffer(braille)
     c_hyphen_string = create_string_buffer(len(text) + 2)
     if not liblouis.suggestChunks(c_text, c_braille, c_hyphen_string):
         return None;
     hyphen_string = c_hyphen_string.value.decode('ascii')
     hyphen_string = hyphen_string[1:len(hyphen_string)-1]
     assert len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string)
     return hyphen_string

 def find_relevant_rules(text):
     c_text = create_unicode_buffer(text)
     max_rules = 16
     c_rules = [u""] * max_rules + [None]
     for i in range(0, max_rules):
         c_rules[i] = create_unicode_buffer(c_rules[i], 128)
         c_rules[i] = cast(c_rules[i], c_wchar_p)
     c_rules = (c_wchar_p * (max_rules + 1))(*c_rules)
     liblouis.findRelevantRules(c_text, c_rules)
     return map(lambda x: tuple(x.split(" ")), takewhile(lambda x: x, c_rules))

 def open_dictionary(dictionary):
     conn = sqlite3.connect(dictionary)
     c = conn.cursor()
     return conn, c

 def filterfalse(predicate, iterable):
     return [x for x in iterable if not predicate(x)]

 def partition(pred, iterable):
     t1, t2 = tee(iterable)
     return filterfalse(pred, t1), filter(pred, t2)

 def pairwise(iterable):
     a, b = tee(iterable)
     next(b, None)
     return izip(a, b)

 def my_zip(*iterables):
     return "".join([x for x in chain(*izip_longest(*iterables)) if x is not None])

 class future:
     def __init__(self, f):
         self.f = f
         self.fut = f
         self.is_realized = False
     def __call__(self):
         if not self.is_realized:
             self.fut = self.f()
             self.is_realized = True
         return self.fut
	# liblouis Braille Translation and Back-Translation Library
	#
	# Copyright (C) 2017 Bert Frees
	#
	# This file is part of liblouis.
	#
	# liblouis is free software: you can redistribute it and/or modify it
	# under the terms of the GNU Lesser General Public License as published
	# by the Free Software Foundation, either version 2.1 of the License, or
	# (at your option) any later version.
	#
	# liblouis is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
	#

	from ctypes import *
	from itertools import takewhile, zip_longest, chain, tee
	izip = zip
	izip_longest = zip_longest

	from louis import _loader, liblouis, outlenMultiplier
	import re
	import sqlite3
	import sys

	def exit_if_not(expression):
	if not expression:
	raise RuntimeError()

	exit_if_not(liblouis.lou_charSize() == 4)

	liblouis.isLetter.argtypes = (c_wchar,)
	liblouis.toLowercase.argtypes = (c_wchar,)
	liblouis.toLowercase.restype = c_wchar

	def println(line=""):
	sys.stdout.write(("%s\n" % line))

	def printerrln(line=""):
	sys.stderr.write(("%s\n" % line))

	def validate_chunks(chunked_text):
	return re.search(r"^([^)(\|]+\|\([^)(\|][^)(\|]+\))(\\|?([^)(\|]+\|\([^)(\|][^)(\|]+\)))*$", chunked_text) != None

	def print_chunks(text, hyphen_string):
	exit_if_not(len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string))
	chunked_text = []
	k = 0
	prev_c = None
	for c in hyphen_string:
	if c != prev_c and c == "0":
	chunked_text.append("(")
	chunked_text.append(text[k])
	if c != prev_c and prev_c == "0":
	chunked_text.append(")")
	if c == "1":
	chunked_text.append("\|")
	prev_c = c
	k += 1
	chunked_text.append(text[k])
	if (prev_c == "0"):
	chunked_text.append(")")
	return "".join(chunked_text)

	def parse_chunks(chunked_text):
	exit_if_not(validate_chunks(chunked_text))
	text, _ = read_text(chunked_text)
	hyphen_string = ["x"] * (len(text) - 1)
	k = 0
	for c in chunked_text:
	if c == "(":
	hyphen_string[k:] = ["0"] * (len(text) - 1 - k)
	elif c == ")":
	hyphen_string[k-1:] = ["x"] * (len(text) - 1 - (k-1))
	elif c == "\|":
	hyphen_string[k-1] = "1"
	else:
	k += 1
	if k > len(text):
	break
	return text, "".join(hyphen_string)

	def read_text(maybe_chunked_text):
	if re.search("[)(\|]", maybe_chunked_text) != None:
	text = re.sub("[)(\|]", "", maybe_chunked_text)
	chunked_text = maybe_chunked_text
	else:
	text = maybe_chunked_text
	chunked_text = None
	return text, chunked_text

	def compare_chunks(expected_hyphen_string, actual_hyphen_string, text):
	exit_if_not(len(expected_hyphen_string) == len(text) - 1 and re.search("^[01x]+$", expected_hyphen_string))
	exit_if_not(len(actual_hyphen_string) == len(text) - 1 and re.search("^[01]+$", actual_hyphen_string))
	chunk_errors = my_zip(text,
	map(lambda e, a: "*" if e in "1x" and a == "1" else
	"." if a == "1" else
	"-" if e == "1" else None,
	expected_hyphen_string, actual_hyphen_string))
	return chunk_errors if re.search(r"[-\.]", chunk_errors) else None

	# split a string into words consisting of only letters (at least two)
	# return an empty list if the provided hyphen string does not have zeros at all positions before and after non-letters
	def split_into_words(text, hyphen_string):
	exit_if_not(len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string))
	words = []
	word_hyphen_strings = []
	word = []
	word_hyphen_string = []
	for c,(h1,h2) in izip(text, pairwise('1' + hyphen_string + '1')):
	if is_letter(c):
	word.append(c)
	word_hyphen_string.append(h1)
	elif h1 not in "1x" or h2 not in "1x":
	return []
	else:
	if len(word) > 1:
	words.append("".join(word))
	word_hyphen_strings.append("".join(word_hyphen_string[1:]))
	word = []
	word_hyphen_string = []
	if len(word) > 1:
	words.append("".join(word))
	word_hyphen_strings.append("".join(word_hyphen_string[1:]))
	return izip(words, word_hyphen_strings)

	table = None

	def load_table(new_table):
	global table
	table = new_table
	table = table.encode("ASCII") if isinstance(table, str) else bytes(table)
	liblouis.loadTable(table);

	def is_letter(text):
	return all([liblouis.isLetter(c) for c in text])

	def to_lowercase(text):
	return "".join([liblouis.toLowercase(c) for c in text])

	def to_dot_pattern(braille):
	c_braille = create_unicode_buffer(braille)
	c_dots = create_string_buffer(9 * len(braille))
	liblouis.toDotPattern(c_braille, c_dots)
	return c_dots.value.decode('ascii')

	def hyphenate(text):
	c_text = create_unicode_buffer(text)
	c_text_len = c_int(len(text))
	c_hyphen_string = create_string_buffer(len(text) + 1)
	exit_if_not(liblouis.lou_hyphenate(table, c_text, c_text_len, c_hyphen_string, 0))
	return "".join(['1' if int(p) % 2 else '0' for p in c_hyphen_string.value[1:]])

	def translate(text):
	c_text = create_unicode_buffer(text)
	c_text_len = c_int(len(text))
	braille_len = len(text) * outlenMultiplier
	c_braille = create_unicode_buffer(braille_len)
	c_braille_len = c_int(braille_len)
	max_rules = 16
	c_rules = (c_void_p * max_rules)()
	c_rules_len = c_int(max_rules)
	exit_if_not(liblouis._lou_translate(table, c_text, byref(c_text_len), c_braille, byref(c_braille_len),
	None, None, None, None, None, 0, c_rules, byref(c_rules_len)))
	return c_braille.value, c_rules[0:c_rules_len.value]

	def get_rule(c_rule_pointer):
	c_rule_string = create_unicode_buffer(u"", 128)
	if not liblouis.printRule(cast(c_rule_pointer, c_void_p), c_rule_string):
	return None
	return tuple(c_rule_string.value.split(" "))

	def suggest_chunks(text, braille):
	c_text = create_unicode_buffer(text)
	c_braille = create_unicode_buffer(braille)
	c_hyphen_string = create_string_buffer(len(text) + 2)
	if not liblouis.suggestChunks(c_text, c_braille, c_hyphen_string):
	return None;
	hyphen_string = c_hyphen_string.value.decode('ascii')
	hyphen_string = hyphen_string[1:len(hyphen_string)-1]
	assert len(hyphen_string) == len(text) - 1 and re.search("^[01x]+$", hyphen_string)
	return hyphen_string

	def find_relevant_rules(text):
	c_text = create_unicode_buffer(text)
	max_rules = 16
	c_rules = [u""] * max_rules + [None]
	for i in range(0, max_rules):
	c_rules[i] = create_unicode_buffer(c_rules[i], 128)
	c_rules[i] = cast(c_rules[i], c_wchar_p)
	c_rules = (c_wchar_p * (max_rules + 1))(*c_rules)
	liblouis.findRelevantRules(c_text, c_rules)
	return map(lambda x: tuple(x.split(" ")), takewhile(lambda x: x, c_rules))

	def open_dictionary(dictionary):
	conn = sqlite3.connect(dictionary)
	c = conn.cursor()
	return conn, c

	def filterfalse(predicate, iterable):
	return [x for x in iterable if not predicate(x)]

	def partition(pred, iterable):
	t1, t2 = tee(iterable)
	return filterfalse(pred, t1), filter(pred, t2)

	def pairwise(iterable):
	a, b = tee(iterable)
	next(b, None)
	return izip(a, b)

	def my_zip(*iterables):
	return "".join([x for x in chain(izip_longest(iterables)) if x is not None])

	class future:
	def __init__(self, f):
	self.f = f
	self.fut = f
	self.is_realized = False
	def __call__(self):
	if not self.is_realized:
	self.fut = self.f()
	self.is_realized = True
	return self.fut