| /* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published |
| by the Free Software Foundation; version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software Foundation, |
| Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include <config.h> |
| #endif |
| |
| #include <alloca.h> |
| #include <byteswap.h> |
| #include <endian.h> |
| #include <errno.h> |
| #include <limits.h> |
| #include <obstack.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <wchar.h> |
| #include <wctype.h> |
| #include <sys/uio.h> |
| |
| #include "localedef.h" |
| #include "charmap.h" |
| #include "localeinfo.h" |
| #include "langinfo.h" |
| #include "linereader.h" |
| #include "locfile-token.h" |
| #include "locfile.h" |
| |
| #include <assert.h> |
| |
| |
| #ifdef PREDEFINED_CLASSES |
| /* These are the extra bits not in wctype.h since these are not preallocated |
| classes. */ |
| # define _ISwspecial1 (1 << 29) |
| # define _ISwspecial2 (1 << 30) |
| # define _ISwspecial3 (1 << 31) |
| #endif |
| |
| |
| /* The bit used for representing a special class. */ |
| #define BITPOS(class) ((class) - tok_upper) |
| #define BIT(class) (_ISbit (BITPOS (class))) |
| #define BITw(class) (_ISwbit (BITPOS (class))) |
| |
| #define ELEM(ctype, collection, idx, value) \ |
| *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ |
| &ctype->collection##_act idx, value) |
| |
| |
| /* To be compatible with former implementations we for now restrict |
| the number of bits for character classes to 16. When compatibility |
| is not necessary anymore increase the number to 32. */ |
| #define char_class_t uint16_t |
| #define char_class32_t uint32_t |
| |
| |
| /* Type to describe a transliteration action. We have a possibly |
| multiple character from-string and a set of multiple character |
| to-strings. All are 32bit values since this is what is used in |
| the gconv functions. */ |
| struct translit_to_t |
| { |
| uint32_t *str; |
| |
| struct translit_to_t *next; |
| }; |
| |
| struct translit_t |
| { |
| uint32_t *from; |
| |
| const char *fname; |
| size_t lineno; |
| |
| struct translit_to_t *to; |
| |
| struct translit_t *next; |
| }; |
| |
| struct translit_ignore_t |
| { |
| uint32_t from; |
| uint32_t to; |
| uint32_t step; |
| |
| const char *fname; |
| size_t lineno; |
| |
| struct translit_ignore_t *next; |
| }; |
| |
| |
| /* Type to describe a transliteration include statement. */ |
| struct translit_include_t |
| { |
| const char *copy_locale; |
| const char *copy_repertoire; |
| |
| struct translit_include_t *next; |
| }; |
| |
| |
| /* Sparse table of uint32_t. */ |
| #define TABLE idx_table |
| #define ELEMENT uint32_t |
| #define DEFAULT ((uint32_t) ~0) |
| #define NO_FINALIZE |
| #include "3level.h" |
| |
| |
| /* The real definition of the struct for the LC_CTYPE locale. */ |
| struct locale_ctype_t |
| { |
| uint32_t *charnames; |
| size_t charnames_max; |
| size_t charnames_act; |
| /* An index lookup table, to speedup find_idx. */ |
| struct idx_table charnames_idx; |
| |
| struct repertoire_t *repertoire; |
| |
| /* We will allow up to 8 * sizeof (uint32_t) character classes. */ |
| #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) |
| size_t nr_charclass; |
| const char *classnames[MAX_NR_CHARCLASS]; |
| uint32_t last_class_char; |
| uint32_t class256_collection[256]; |
| uint32_t *class_collection; |
| size_t class_collection_max; |
| size_t class_collection_act; |
| uint32_t class_done; |
| uint32_t class_offset; |
| |
| struct charseq **mbdigits; |
| size_t mbdigits_act; |
| size_t mbdigits_max; |
| uint32_t *wcdigits; |
| size_t wcdigits_act; |
| size_t wcdigits_max; |
| |
| struct charseq *mboutdigits[10]; |
| uint32_t wcoutdigits[10]; |
| size_t outdigits_act; |
| |
| /* If the following number ever turns out to be too small simply |
| increase it. But I doubt it will. --drepper@gnu */ |
| #define MAX_NR_CHARMAP 16 |
| const char *mapnames[MAX_NR_CHARMAP]; |
| uint32_t *map_collection[MAX_NR_CHARMAP]; |
| uint32_t map256_collection[2][256]; |
| size_t map_collection_max[MAX_NR_CHARMAP]; |
| size_t map_collection_act[MAX_NR_CHARMAP]; |
| size_t map_collection_nr; |
| size_t last_map_idx; |
| int tomap_done[MAX_NR_CHARMAP]; |
| uint32_t map_offset; |
| |
| /* Transliteration information. */ |
| struct translit_include_t *translit_include; |
| struct translit_t *translit; |
| struct translit_ignore_t *translit_ignore; |
| uint32_t ntranslit_ignore; |
| |
| uint32_t *default_missing; |
| const char *default_missing_file; |
| size_t default_missing_lineno; |
| |
| uint32_t to_nonascii; |
| uint32_t nonascii_case; |
| |
| /* The arrays for the binary representation. */ |
| char_class_t *ctype_b; |
| char_class32_t *ctype32_b; |
| uint32_t **map_b; |
| uint32_t **map32_b; |
| uint32_t **class_b; |
| struct iovec *class_3level; |
| struct iovec *map_3level; |
| uint32_t *class_name_ptr; |
| uint32_t *map_name_ptr; |
| struct iovec width; |
| uint32_t mb_cur_max; |
| const char *codeset_name; |
| uint32_t *translit_from_idx; |
| uint32_t *translit_from_tbl; |
| uint32_t *translit_to_idx; |
| uint32_t *translit_to_tbl; |
| uint32_t translit_idx_size; |
| size_t translit_from_tbl_size; |
| size_t translit_to_tbl_size; |
| |
| struct obstack mempool; |
| }; |
| |
| |
| /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless |
| whether 'int' is 16 bit, 32 bit, or 64 bit. */ |
| #define EMPTY ((uint32_t) ~0) |
| |
| |
| #define obstack_chunk_alloc xmalloc |
| #define obstack_chunk_free free |
| |
| |
| /* Prototypes for local functions. */ |
| static void ctype_startup (struct linereader *lr, struct localedef_t *locale, |
| const struct charmap_t *charmap, |
| struct localedef_t *copy_locale, |
| int ignore_content); |
| static void ctype_class_new (struct linereader *lr, |
| struct locale_ctype_t *ctype, const char *name); |
| static void ctype_map_new (struct linereader *lr, |
| struct locale_ctype_t *ctype, |
| const char *name, const struct charmap_t *charmap); |
| static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, |
| size_t *max, size_t *act, unsigned int idx); |
| static void set_class_defaults (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire); |
| static void allocate_arrays (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire); |
| |
| |
| static const char *longnames[] = |
| { |
| "zero", "one", "two", "three", "four", |
| "five", "six", "seven", "eight", "nine" |
| }; |
| static const char *uninames[] = |
| { |
| "U00000030", "U00000031", "U00000032", "U00000033", "U00000034", |
| "U00000035", "U00000036", "U00000037", "U00000038", "U00000039" |
| }; |
| static const unsigned char digits[] = "0123456789"; |
| |
| |
| static void |
| ctype_startup (struct linereader *lr, struct localedef_t *locale, |
| const struct charmap_t *charmap, |
| struct localedef_t *copy_locale, int ignore_content) |
| { |
| unsigned int cnt; |
| struct locale_ctype_t *ctype; |
| |
| if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL) |
| { |
| if (copy_locale == NULL) |
| { |
| /* Allocate the needed room. */ |
| locale->categories[LC_CTYPE].ctype = ctype = |
| (struct locale_ctype_t *) xcalloc (1, |
| sizeof (struct locale_ctype_t)); |
| |
| /* We have seen no names yet. */ |
| ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; |
| ctype->charnames = |
| (unsigned int *) xmalloc (ctype->charnames_max |
| * sizeof (unsigned int)); |
| for (cnt = 0; cnt < 256; ++cnt) |
| ctype->charnames[cnt] = cnt; |
| ctype->charnames_act = 256; |
| idx_table_init (&ctype->charnames_idx); |
| |
| /* Fill character class information. */ |
| ctype->last_class_char = ILLEGAL_CHAR_VALUE; |
| /* The order of the following instructions determines the bit |
| positions! */ |
| ctype_class_new (lr, ctype, "upper"); |
| ctype_class_new (lr, ctype, "lower"); |
| ctype_class_new (lr, ctype, "alpha"); |
| ctype_class_new (lr, ctype, "digit"); |
| ctype_class_new (lr, ctype, "xdigit"); |
| ctype_class_new (lr, ctype, "space"); |
| ctype_class_new (lr, ctype, "print"); |
| ctype_class_new (lr, ctype, "graph"); |
| ctype_class_new (lr, ctype, "blank"); |
| ctype_class_new (lr, ctype, "cntrl"); |
| ctype_class_new (lr, ctype, "punct"); |
| ctype_class_new (lr, ctype, "alnum"); |
| #ifdef PREDEFINED_CLASSES |
| /* The following are extensions from ISO 14652. */ |
| ctype_class_new (lr, ctype, "left_to_right"); |
| ctype_class_new (lr, ctype, "right_to_left"); |
| ctype_class_new (lr, ctype, "num_terminator"); |
| ctype_class_new (lr, ctype, "num_separator"); |
| ctype_class_new (lr, ctype, "segment_separator"); |
| ctype_class_new (lr, ctype, "block_separator"); |
| ctype_class_new (lr, ctype, "direction_control"); |
| ctype_class_new (lr, ctype, "sym_swap_layout"); |
| ctype_class_new (lr, ctype, "char_shape_selector"); |
| ctype_class_new (lr, ctype, "num_shape_selector"); |
| ctype_class_new (lr, ctype, "non_spacing"); |
| ctype_class_new (lr, ctype, "non_spacing_level3"); |
| ctype_class_new (lr, ctype, "normal_connect"); |
| ctype_class_new (lr, ctype, "r_connect"); |
| ctype_class_new (lr, ctype, "no_connect"); |
| ctype_class_new (lr, ctype, "no_connect-space"); |
| ctype_class_new (lr, ctype, "vowel_connect"); |
| #endif |
| |
| ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; |
| ctype->class_collection |
| = (uint32_t *) xcalloc (sizeof (unsigned long int), |
| ctype->class_collection_max); |
| ctype->class_collection_act = 256; |
| |
| /* Fill character map information. */ |
| ctype->last_map_idx = MAX_NR_CHARMAP; |
| ctype_map_new (lr, ctype, "toupper", charmap); |
| ctype_map_new (lr, ctype, "tolower", charmap); |
| #ifdef PREDEFINED_CLASSES |
| ctype_map_new (lr, ctype, "tosymmetric", charmap); |
| #endif |
| |
| /* Fill first 256 entries in `toXXX' arrays. */ |
| for (cnt = 0; cnt < 256; ++cnt) |
| { |
| ctype->map_collection[0][cnt] = cnt; |
| ctype->map_collection[1][cnt] = cnt; |
| #ifdef PREDEFINED_CLASSES |
| ctype->map_collection[2][cnt] = cnt; |
| #endif |
| ctype->map256_collection[0][cnt] = cnt; |
| ctype->map256_collection[1][cnt] = cnt; |
| } |
| |
| if (enc_not_ascii_compatible) |
| ctype->to_nonascii = 1; |
| |
| obstack_init (&ctype->mempool); |
| } |
| else |
| ctype = locale->categories[LC_CTYPE].ctype = |
| copy_locale->categories[LC_CTYPE].ctype; |
| } |
| } |
| |
| |
| void |
| ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap) |
| { |
| /* See POSIX.2, table 2-6 for the meaning of the following table. */ |
| #define NCLASS 12 |
| static const struct |
| { |
| const char *name; |
| const char allow[NCLASS]; |
| } |
| valid_table[NCLASS] = |
| { |
| /* The order is important. See token.h for more information. |
| M = Always, D = Default, - = Permitted, X = Mutually exclusive */ |
| { "upper", "--MX-XDDXXX-" }, |
| { "lower", "--MX-XDDXXX-" }, |
| { "alpha", "---X-XDDXXX-" }, |
| { "digit", "XXX--XDDXXX-" }, |
| { "xdigit", "-----XDDXXX-" }, |
| { "space", "XXXXX------X" }, |
| { "print", "---------X--" }, |
| { "graph", "---------X--" }, |
| { "blank", "XXXXXM-----X" }, |
| { "cntrl", "XXXXX-XX--XX" }, |
| { "punct", "XXXXX-DD-X-X" }, |
| { "alnum", "-----XDDXXX-" } |
| }; |
| size_t cnt; |
| int cls1, cls2; |
| uint32_t space_value; |
| struct charseq *space_seq; |
| struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; |
| int warned; |
| const void *key; |
| size_t len; |
| void *vdata; |
| void *curs; |
| |
| /* Now resolve copying and also handle completely missing definitions. */ |
| if (ctype == NULL) |
| { |
| const char *repertoire_name; |
| |
| /* First see whether we were supposed to copy. If yes, find the |
| actual definition. */ |
| if (locale->copy_name[LC_CTYPE] != NULL) |
| { |
| /* Find the copying locale. This has to happen transitively since |
| the locale we are copying from might also copying another one. */ |
| struct localedef_t *from = locale; |
| |
| do |
| from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE], |
| from->repertoire_name, charmap); |
| while (from->categories[LC_CTYPE].ctype == NULL |
| && from->copy_name[LC_CTYPE] != NULL); |
| |
| ctype = locale->categories[LC_CTYPE].ctype |
| = from->categories[LC_CTYPE].ctype; |
| } |
| |
| /* If there is still no definition issue an warning and create an |
| empty one. */ |
| if (ctype == NULL) |
| { |
| if (! be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| No definition for %s category found"), "LC_CTYPE")); |
| ctype_startup (NULL, locale, charmap, NULL, 0); |
| ctype = locale->categories[LC_CTYPE].ctype; |
| } |
| |
| /* Get the repertoire we have to use. */ |
| repertoire_name = locale->repertoire_name ?: repertoire_global; |
| if (repertoire_name != NULL) |
| ctype->repertoire = repertoire_read (repertoire_name); |
| } |
| |
| /* We need the name of the currently used 8-bit character set to |
| make correct conversion between this 8-bit representation and the |
| ISO 10646 character set used internally for wide characters. */ |
| ctype->codeset_name = charmap->code_set_name; |
| if (ctype->codeset_name == NULL) |
| { |
| if (! be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| No character set name specified in charmap"))); |
| ctype->codeset_name = "//UNKNOWN//"; |
| } |
| |
| /* Set default value for classes not specified. */ |
| set_class_defaults (ctype, charmap, ctype->repertoire); |
| |
| /* Check according to table. */ |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| { |
| uint32_t tmp = ctype->class_collection[cnt]; |
| |
| if (tmp != 0) |
| { |
| for (cls1 = 0; cls1 < NCLASS; ++cls1) |
| if ((tmp & _ISwbit (cls1)) != 0) |
| for (cls2 = 0; cls2 < NCLASS; ++cls2) |
| if (valid_table[cls1].allow[cls2] != '-') |
| { |
| int eq = (tmp & _ISwbit (cls2)) != 0; |
| switch (valid_table[cls1].allow[cls2]) |
| { |
| case 'M': |
| if (!eq) |
| { |
| uint32_t value = ctype->charnames[cnt]; |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character L'\\u%0*x' in class `%s' must be in class `%s'"), |
| value > 0xffff ? 8 : 4, |
| value, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'X': |
| if (eq) |
| { |
| uint32_t value = ctype->charnames[cnt]; |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character L'\\u%0*x' in class `%s' must not be in class `%s'"), |
| value > 0xffff ? 8 : 4, |
| value, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'D': |
| ctype->class_collection[cnt] |= _ISwbit (cls2); |
| break; |
| |
| default: |
| WITH_CUR_LOCALE (error (5, 0, _("\ |
| internal error in %s, line %u"), __FUNCTION__, __LINE__)); |
| } |
| } |
| } |
| } |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| { |
| uint32_t tmp = ctype->class256_collection[cnt]; |
| |
| if (tmp != 0) |
| { |
| for (cls1 = 0; cls1 < NCLASS; ++cls1) |
| if ((tmp & _ISbit (cls1)) != 0) |
| for (cls2 = 0; cls2 < NCLASS; ++cls2) |
| if (valid_table[cls1].allow[cls2] != '-') |
| { |
| int eq = (tmp & _ISbit (cls2)) != 0; |
| switch (valid_table[cls1].allow[cls2]) |
| { |
| case 'M': |
| if (!eq) |
| { |
| char buf[17]; |
| |
| snprintf (buf, sizeof buf, "\\%Zo", cnt); |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character '%s' in class `%s' must be in class `%s'"), |
| buf, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'X': |
| if (eq) |
| { |
| char buf[17]; |
| |
| snprintf (buf, sizeof buf, "\\%Zo", cnt); |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character '%s' in class `%s' must not be in class `%s'"), |
| buf, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'D': |
| ctype->class256_collection[cnt] |= _ISbit (cls2); |
| break; |
| |
| default: |
| WITH_CUR_LOCALE (error (5, 0, _("\ |
| internal error in %s, line %u"), __FUNCTION__, __LINE__)); |
| } |
| } |
| } |
| } |
| |
| /* ... and now test <SP> as a special case. */ |
| space_value = 32; |
| if (((cnt = BITPOS (tok_space), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_space)) == 0) |
| || (cnt = BITPOS (tok_blank), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_blank)) == 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else if (((cnt = BITPOS (tok_punct), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_punct)) != 0) |
| || (cnt = BITPOS (tok_graph), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_graph)) |
| != 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| <SP> character must not be in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else |
| ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); |
| |
| space_seq = charmap_find_value (charmap, "SP", 2); |
| if (space_seq == NULL) |
| space_seq = charmap_find_value (charmap, "space", 5); |
| if (space_seq == NULL) |
| space_seq = charmap_find_value (charmap, "U00000020", 9); |
| if (space_seq == NULL || space_seq->nbytes != 1) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character <SP> not defined in character map"))); |
| } |
| else if (((cnt = BITPOS (tok_space), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_space)) == 0) |
| || (cnt = BITPOS (tok_blank), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_blank)) == 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else if (((cnt = BITPOS (tok_punct), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_punct)) != 0) |
| || (cnt = BITPOS (tok_graph), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_graph)) != 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| <SP> character must not be in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else |
| ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); |
| |
| /* Check whether all single-byte characters make to their upper/lowercase |
| equivalent according to the ASCII rules. */ |
| for (cnt = 'A'; cnt <= 'Z'; ++cnt) |
| { |
| uint32_t uppval = ctype->map256_collection[0][cnt]; |
| uint32_t lowval = ctype->map256_collection[1][cnt]; |
| uint32_t lowuppval = ctype->map256_collection[0][lowval]; |
| uint32_t lowlowval = ctype->map256_collection[1][lowval]; |
| |
| if (uppval != cnt |
| || lowval != cnt + 0x20 |
| || lowuppval != cnt |
| || lowlowval != cnt + 0x20) |
| ctype->nonascii_case = 1; |
| } |
| for (cnt = 0; cnt < 256; ++cnt) |
| if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z') |
| if (ctype->map256_collection[0][cnt] != cnt |
| || ctype->map256_collection[1][cnt] != cnt) |
| ctype->nonascii_case = 1; |
| |
| /* Now that the tests are done make sure the name array contains all |
| characters which are handled in the WIDTH section of the |
| character set definition file. */ |
| if (charmap->width_rules != NULL) |
| for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) |
| { |
| unsigned char bytes[charmap->mb_cur_max]; |
| int nbytes = charmap->width_rules[cnt].from->nbytes; |
| |
| /* We have the range of character for which the width is |
| specified described using byte sequences of the multibyte |
| charset. We have to convert this to UCS4 now. And we |
| cannot simply convert the beginning and the end of the |
| sequence, we have to iterate over the byte sequence and |
| convert it for every single character. */ |
| memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); |
| |
| while (nbytes < charmap->width_rules[cnt].to->nbytes |
| || memcmp (bytes, charmap->width_rules[cnt].to->bytes, |
| nbytes) <= 0) |
| { |
| /* Find the UCS value for `bytes'. */ |
| int inner; |
| uint32_t wch; |
| struct charseq *seq |
| = charmap_find_symbol (charmap, (char *) bytes, nbytes); |
| |
| if (seq == NULL) |
| wch = ILLEGAL_CHAR_VALUE; |
| else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) |
| wch = seq->ucs4; |
| else |
| wch = repertoire_find_value (ctype->repertoire, seq->name, |
| strlen (seq->name)); |
| |
| if (wch != ILLEGAL_CHAR_VALUE) |
| /* We are only interested in the side-effects of the |
| `find_idx' call. It will add appropriate entries in |
| the name array if this is necessary. */ |
| (void) find_idx (ctype, NULL, NULL, NULL, wch); |
| |
| /* "Increment" the bytes sequence. */ |
| inner = nbytes - 1; |
| while (inner >= 0 && bytes[inner] == 0xff) |
| --inner; |
| |
| if (inner < 0) |
| { |
| /* We have to extend the byte sequence. */ |
| if (nbytes >= charmap->width_rules[cnt].to->nbytes) |
| break; |
| |
| bytes[0] = 1; |
| memset (&bytes[1], 0, nbytes); |
| ++nbytes; |
| } |
| else |
| { |
| ++bytes[inner]; |
| while (++inner < nbytes) |
| bytes[inner] = 0; |
| } |
| } |
| } |
| |
| /* Now set all the other characters of the character set to the |
| default width. */ |
| curs = NULL; |
| while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) |
| { |
| struct charseq *data = (struct charseq *) vdata; |
| |
| if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| data->ucs4 = repertoire_find_value (ctype->repertoire, |
| data->name, len); |
| |
| if (data->ucs4 != ILLEGAL_CHAR_VALUE) |
| (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4); |
| } |
| |
| /* There must be a multiple of 10 digits. */ |
| if (ctype->mbdigits_act % 10 != 0) |
| { |
| assert (ctype->mbdigits_act == ctype->wcdigits_act); |
| ctype->wcdigits_act -= ctype->mbdigits_act % 10; |
| ctype->mbdigits_act -= ctype->mbdigits_act % 10; |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| `digit' category has not entries in groups of ten"))); |
| } |
| |
| /* Check the input digits. There must be a multiple of ten available. |
| In each group it could be that one or the other character is missing. |
| In this case the whole group must be removed. */ |
| cnt = 0; |
| while (cnt < ctype->mbdigits_act) |
| { |
| size_t inner; |
| for (inner = 0; inner < 10; ++inner) |
| if (ctype->mbdigits[cnt + inner] == NULL) |
| break; |
| |
| if (inner == 10) |
| cnt += 10; |
| else |
| { |
| /* Remove the group. */ |
| memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], |
| ((ctype->wcdigits_act - cnt - 10) |
| * sizeof (ctype->mbdigits[0]))); |
| ctype->mbdigits_act -= 10; |
| } |
| } |
| |
| /* If no input digits are given use the default. */ |
| if (ctype->mbdigits_act == 0) |
| { |
| if (ctype->mbdigits_max == 0) |
| { |
| ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| 10 * sizeof (struct charseq *)); |
| ctype->mbdigits_max = 10; |
| } |
| |
| for (cnt = 0; cnt < 10; ++cnt) |
| { |
| ctype->mbdigits[cnt] = charmap_find_symbol (charmap, |
| (char *) digits + cnt, 1); |
| if (ctype->mbdigits[cnt] == NULL) |
| { |
| ctype->mbdigits[cnt] = charmap_find_symbol (charmap, |
| longnames[cnt], |
| strlen (longnames[cnt])); |
| if (ctype->mbdigits[cnt] == NULL) |
| { |
| /* Hum, this ain't good. */ |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| no input digits defined and none of the standard names in the charmap"))); |
| |
| ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| sizeof (struct charseq) + 1); |
| |
| /* This is better than nothing. */ |
| ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; |
| ctype->mbdigits[cnt]->nbytes = 1; |
| } |
| } |
| } |
| |
| ctype->mbdigits_act = 10; |
| } |
| |
| /* Check the wide character input digits. There must be a multiple |
| of ten available. In each group it could be that one or the other |
| character is missing. In this case the whole group must be |
| removed. */ |
| cnt = 0; |
| while (cnt < ctype->wcdigits_act) |
| { |
| size_t inner; |
| for (inner = 0; inner < 10; ++inner) |
| if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) |
| break; |
| |
| if (inner == 10) |
| cnt += 10; |
| else |
| { |
| /* Remove the group. */ |
| memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], |
| ((ctype->wcdigits_act - cnt - 10) |
| * sizeof (ctype->wcdigits[0]))); |
| ctype->wcdigits_act -= 10; |
| } |
| } |
| |
| /* If no input digits are given use the default. */ |
| if (ctype->wcdigits_act == 0) |
| { |
| if (ctype->wcdigits_max == 0) |
| { |
| ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| 10 * sizeof (uint32_t)); |
| ctype->wcdigits_max = 10; |
| } |
| |
| for (cnt = 0; cnt < 10; ++cnt) |
| ctype->wcdigits[cnt] = L'0' + cnt; |
| |
| ctype->mbdigits_act = 10; |
| } |
| |
| /* Check the outdigits. */ |
| warned = 0; |
| for (cnt = 0; cnt < 10; ++cnt) |
| if (ctype->mboutdigits[cnt] == NULL) |
| { |
| static struct charseq replace[2]; |
| |
| if (!warned) |
| { |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| not all characters used in `outdigit' are available in the charmap"))); |
| warned = 1; |
| } |
| |
| replace[0].nbytes = 1; |
| replace[0].bytes[0] = '?'; |
| replace[0].bytes[1] = '\0'; |
| ctype->mboutdigits[cnt] = &replace[0]; |
| } |
| |
| warned = 0; |
| for (cnt = 0; cnt < 10; ++cnt) |
| if (ctype->wcoutdigits[cnt] == 0) |
| { |
| if (!warned) |
| { |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| not all characters used in `outdigit' are available in the repertoire"))); |
| warned = 1; |
| } |
| |
| ctype->wcoutdigits[cnt] = L'?'; |
| } |
| |
| /* Sort the entries in the translit_ignore list. */ |
| if (ctype->translit_ignore != NULL) |
| { |
| struct translit_ignore_t *firstp = ctype->translit_ignore; |
| struct translit_ignore_t *runp; |
| |
| ctype->ntranslit_ignore = 1; |
| |
| for (runp = firstp->next; runp != NULL; runp = runp->next) |
| { |
| struct translit_ignore_t *lastp = NULL; |
| struct translit_ignore_t *cmpp; |
| |
| ++ctype->ntranslit_ignore; |
| |
| for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next) |
| if (runp->from < cmpp->from) |
| break; |
| |
| runp->next = lastp; |
| if (lastp == NULL) |
| firstp = runp; |
| } |
| |
| ctype->translit_ignore = firstp; |
| } |
| } |
| |
| |
| void |
| ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, |
| const char *output_path) |
| { |
| static const char nulbytes[4] = { 0, 0, 0, 0 }; |
| struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; |
| const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1) |
| + ctype->nr_charclass + ctype->map_collection_nr); |
| struct iovec *iov = alloca (sizeof *iov |
| * (2 + nelems + 2 * ctype->nr_charclass |
| + ctype->map_collection_nr + 4)); |
| struct locale_file data; |
| uint32_t *idx = alloca (sizeof *idx * (nelems + 1)); |
| uint32_t default_missing_len; |
| size_t elem, cnt, offset, total; |
| char *cp; |
| |
| /* Now prepare the output: Find the sizes of the table we can use. */ |
| allocate_arrays (ctype, charmap, ctype->repertoire); |
| |
| data.magic = LIMAGIC (LC_CTYPE); |
| data.n = nelems; |
| iov[0].iov_base = (void *) &data; |
| iov[0].iov_len = sizeof (data); |
| |
| iov[1].iov_base = (void *) idx; |
| iov[1].iov_len = nelems * sizeof (uint32_t); |
| |
| idx[0] = iov[0].iov_len + iov[1].iov_len; |
| offset = 0; |
| |
| for (elem = 0; elem < nelems; ++elem) |
| { |
| if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)) |
| switch (elem) |
| { |
| #define CTYPE_EMPTY(name) \ |
| case name: \ |
| iov[2 + elem + offset].iov_base = NULL; \ |
| iov[2 + elem + offset].iov_len = 0; \ |
| idx[elem + 1] = idx[elem]; \ |
| break |
| |
| CTYPE_EMPTY(_NL_CTYPE_GAP1); |
| CTYPE_EMPTY(_NL_CTYPE_GAP2); |
| CTYPE_EMPTY(_NL_CTYPE_GAP3); |
| CTYPE_EMPTY(_NL_CTYPE_GAP4); |
| CTYPE_EMPTY(_NL_CTYPE_GAP5); |
| CTYPE_EMPTY(_NL_CTYPE_GAP6); |
| |
| #define CTYPE_DATA(name, base, len) \ |
| case _NL_ITEM_INDEX (name): \ |
| iov[2 + elem + offset].iov_base = (base); \ |
| iov[2 + elem + offset].iov_len = (len); \ |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \ |
| break |
| |
| CTYPE_DATA (_NL_CTYPE_CLASS, |
| ctype->ctype_b, |
| (256 + 128) * sizeof (char_class_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TOUPPER, |
| ctype->map_b[0], |
| (256 + 128) * sizeof (uint32_t)); |
| CTYPE_DATA (_NL_CTYPE_TOLOWER, |
| ctype->map_b[1], |
| (256 + 128) * sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TOUPPER32, |
| ctype->map32_b[0], |
| 256 * sizeof (uint32_t)); |
| CTYPE_DATA (_NL_CTYPE_TOLOWER32, |
| ctype->map32_b[1], |
| 256 * sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_CLASS32, |
| ctype->ctype32_b, |
| 256 * sizeof (char_class32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET, |
| &ctype->class_offset, sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_MAP_OFFSET, |
| &ctype->map_offset, sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE, |
| &ctype->translit_idx_size, sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX, |
| ctype->translit_from_idx, |
| ctype->translit_idx_size * sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL, |
| ctype->translit_from_tbl, |
| ctype->translit_from_tbl_size); |
| |
| CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX, |
| ctype->translit_to_idx, |
| ctype->translit_idx_size * sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL, |
| ctype->translit_to_tbl, ctype->translit_to_tbl_size); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): |
| /* The class name array. */ |
| total = 0; |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset) |
| { |
| iov[2 + elem + offset].iov_base |
| = (void *) ctype->classnames[cnt]; |
| iov[2 + elem + offset].iov_len |
| = strlen (ctype->classnames[cnt]) + 1; |
| total += iov[2 + elem + offset].iov_len; |
| } |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = 4 - (total % 4); |
| total += 4 - (total % 4); |
| |
| idx[elem + 1] = idx[elem] + total; |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): |
| /* The class name array. */ |
| total = 0; |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset) |
| { |
| iov[2 + elem + offset].iov_base |
| = (void *) ctype->mapnames[cnt]; |
| iov[2 + elem + offset].iov_len |
| = strlen (ctype->mapnames[cnt]) + 1; |
| total += iov[2 + elem + offset].iov_len; |
| } |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = 4 - (total % 4); |
| total += 4 - (total % 4); |
| |
| idx[elem + 1] = idx[elem] + total; |
| break; |
| |
| CTYPE_DATA (_NL_CTYPE_WIDTH, |
| ctype->width.iov_base, |
| ctype->width.iov_len); |
| |
| CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX, |
| &ctype->mb_cur_max, sizeof (uint32_t)); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): |
| total = strlen (ctype->codeset_name) + 1; |
| if (total % 4 == 0) |
| iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name; |
| else |
| { |
| iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3); |
| memset (mempcpy (iov[2 + elem + offset].iov_base, |
| ctype->codeset_name, total), |
| '\0', 4 - (total & 3)); |
| total = (total + 3) & ~3; |
| } |
| iov[2 + elem + offset].iov_len = total; |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| |
| CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII, |
| &ctype->to_nonascii, sizeof (uint32_t)); |
| |
| CTYPE_DATA (_NL_CTYPE_NONASCII_CASE, |
| &ctype->nonascii_case, sizeof (uint32_t)); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): |
| iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); |
| iov[2 + elem + offset].iov_len = sizeof (uint32_t); |
| *(uint32_t *) iov[2 + elem + offset].iov_base = |
| ctype->mbdigits_act / 10; |
| idx[elem + 1] = idx[elem] + sizeof (uint32_t); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN): |
| /* Align entries. */ |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4; |
| idx[elem] += iov[2 + elem + offset].iov_len; |
| ++offset; |
| |
| iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); |
| iov[2 + elem + offset].iov_len = sizeof (uint32_t); |
| *(uint32_t *) iov[2 + elem + offset].iov_base = |
| ctype->wcdigits_act / 10; |
| idx[elem + 1] = idx[elem] + sizeof (uint32_t); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): |
| /* Compute the length of all possible characters. For INDIGITS |
| there might be more than one. We simply concatenate all of |
| them with a NUL byte following. The NUL byte wouldn't be |
| necessary but it makes it easier for the user. */ |
| total = 0; |
| |
| for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB); |
| cnt < ctype->mbdigits_act; cnt += 10) |
| total += ctype->mbdigits[cnt]->nbytes + 1; |
| iov[2 + elem + offset].iov_base = (char *) alloca (total); |
| iov[2 + elem + offset].iov_len = total; |
| |
| cp = iov[2 + elem + offset].iov_base; |
| for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB); |
| cnt < ctype->mbdigits_act; cnt += 10) |
| { |
| cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes, |
| ctype->mbdigits[cnt]->nbytes); |
| *cp++ = '\0'; |
| } |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): |
| /* Compute the length of all possible characters. For INDIGITS |
| there might be more than one. We simply concatenate all of |
| them with a NUL byte following. The NUL byte wouldn't be |
| necessary but it makes it easier for the user. */ |
| cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB); |
| total = ctype->mboutdigits[cnt]->nbytes + 1; |
| iov[2 + elem + offset].iov_base = (char *) alloca (total); |
| iov[2 + elem + offset].iov_len = total; |
| |
| *(char *) mempcpy (iov[2 + elem + offset].iov_base, |
| ctype->mboutdigits[cnt]->bytes, |
| ctype->mboutdigits[cnt]->nbytes) = '\0'; |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC): |
| total = ctype->wcdigits_act / 10; |
| |
| iov[2 + elem + offset].iov_base = |
| (uint32_t *) alloca (total * sizeof (uint32_t)); |
| iov[2 + elem + offset].iov_len = total * sizeof (uint32_t); |
| |
| for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC); |
| cnt < ctype->wcdigits_act; cnt += 10) |
| ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10] |
| = ctype->wcdigits[cnt]; |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC): |
| /* Align entries. */ |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4; |
| idx[elem] += iov[2 + elem + offset].iov_len; |
| ++offset; |
| /* FALLTRHOUGH */ |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC): |
| cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC); |
| iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt]; |
| iov[2 + elem + offset].iov_len = sizeof (uint32_t); |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): |
| /* Align entries. */ |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4; |
| idx[elem] += iov[2 + elem + offset].iov_len; |
| ++offset; |
| |
| default_missing_len = (ctype->default_missing |
| ? wcslen ((wchar_t *)ctype->default_missing) |
| : 0); |
| iov[2 + elem + offset].iov_base = &default_missing_len; |
| iov[2 + elem + offset].iov_len = sizeof (uint32_t); |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING): |
| iov[2 + elem + offset].iov_base = |
| ctype->default_missing ?: (uint32_t *) L""; |
| iov[2 + elem + offset].iov_len = |
| wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t); |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN): |
| /* Align entries. */ |
| iov[2 + elem + offset].iov_base = (void *) nulbytes; |
| iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4; |
| idx[elem] += iov[2 + elem + offset].iov_len; |
| ++offset; |
| |
| iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore; |
| iov[2 + elem + offset].iov_len = sizeof (uint32_t); |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE): |
| { |
| uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore |
| * 3 * sizeof (uint32_t)); |
| struct translit_ignore_t *runp; |
| |
| iov[2 + elem + offset].iov_base = ranges; |
| iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore |
| * 3 * sizeof (uint32_t)); |
| |
| for (runp = ctype->translit_ignore; runp != NULL; |
| runp = runp->next) |
| { |
| *ranges++ = runp->from; |
| *ranges++ = runp->to; |
| *ranges++ = runp->step; |
| } |
| } |
| /* Remove the following line in case a new entry is added |
| after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */ |
| if (elem < nelems) |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| break; |
| |
| default: |
| assert (! "unknown CTYPE element"); |
| } |
| else |
| { |
| /* Handle extra maps. */ |
| size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); |
| if (nr < ctype->nr_charclass) |
| { |
| iov[2 + elem + offset].iov_base = ctype->class_b[nr]; |
| iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t); |
| idx[elem] += iov[2 + elem + offset].iov_len; |
| ++offset; |
| |
| iov[2 + elem + offset] = ctype->class_3level[nr]; |
| } |
| else |
| { |
| nr -= ctype->nr_charclass; |
| assert (nr < ctype->map_collection_nr); |
| iov[2 + elem + offset] = ctype->map_3level[nr]; |
| } |
| idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; |
| } |
| } |
| |
| assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass |
| + ctype->map_collection_nr + 4 + 2)); |
| |
| write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset, |
| iov); |
| } |
| |
| |
| /* Local functions. */ |
| static void |
| ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, |
| const char *name) |
| { |
| size_t cnt; |
| |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (ctype->classnames[cnt], name) == 0) |
| break; |
| |
| if (cnt < ctype->nr_charclass) |
| { |
| lr_error (lr, _("character class `%s' already defined"), name); |
| return; |
| } |
| |
| if (ctype->nr_charclass == MAX_NR_CHARCLASS) |
| /* Exit code 2 is prescribed in P1003.2b. */ |
| WITH_CUR_LOCALE (error (2, 0, _("\ |
| implementation limit: no more than %Zd character classes allowed"), |
| MAX_NR_CHARCLASS)); |
| |
| ctype->classnames[ctype->nr_charclass++] = name; |
| } |
| |
| |
| static void |
| ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, |
| const char *name, const struct charmap_t *charmap) |
| { |
| size_t max_chars = 0; |
| size_t cnt; |
| |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) |
| { |
| if (strcmp (ctype->mapnames[cnt], name) == 0) |
| break; |
| |
| if (max_chars < ctype->map_collection_max[cnt]) |
| max_chars = ctype->map_collection_max[cnt]; |
| } |
| |
| if (cnt < ctype->map_collection_nr) |
| { |
| lr_error (lr, _("character map `%s' already defined"), name); |
| return; |
| } |
| |
| if (ctype->map_collection_nr == MAX_NR_CHARMAP) |
| /* Exit code 2 is prescribed in P1003.2b. */ |
| WITH_CUR_LOCALE (error (2, 0, _("\ |
| implementation limit: no more than %d character maps allowed"), |
| MAX_NR_CHARMAP)); |
| |
| ctype->mapnames[cnt] = name; |
| |
| if (max_chars == 0) |
| ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; |
| else |
| ctype->map_collection_max[cnt] = max_chars; |
| |
| ctype->map_collection[cnt] = (uint32_t *) |
| xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]); |
| ctype->map_collection_act[cnt] = 256; |
| |
| ++ctype->map_collection_nr; |
| } |
| |
| |
| /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This |
| is possible if we only want to extend the name array. */ |
| static uint32_t * |
| find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, |
| size_t *act, uint32_t idx) |
| { |
| size_t cnt; |
| |
| if (idx < 256) |
| return table == NULL ? NULL : &(*table)[idx]; |
| |
| /* Use the charnames_idx lookup table instead of the slow search loop. */ |
| #if 1 |
| cnt = idx_table_get (&ctype->charnames_idx, idx); |
| if (cnt == EMPTY) |
| /* Not found. */ |
| cnt = ctype->charnames_act; |
| #else |
| for (cnt = 256; cnt < ctype->charnames_act; ++cnt) |
| if (ctype->charnames[cnt] == idx) |
| break; |
| #endif |
| |
| /* We have to distinguish two cases: the name is found or not. */ |
| if (cnt == ctype->charnames_act) |
| { |
| /* Extend the name array. */ |
| if (ctype->charnames_act == ctype->charnames_max) |
| { |
| ctype->charnames_max *= 2; |
| ctype->charnames = (uint32_t *) |
| xrealloc (ctype->charnames, |
| sizeof (uint32_t) * ctype->charnames_max); |
| } |
| ctype->charnames[ctype->charnames_act++] = idx; |
| idx_table_add (&ctype->charnames_idx, idx, cnt); |
| } |
| |
| if (table == NULL) |
| /* We have done everything we are asked to do. */ |
| return NULL; |
| |
| if (max == NULL) |
| /* The caller does not want to extend the table. */ |
| return (cnt >= *act ? NULL : &(*table)[cnt]); |
| |
| if (cnt >= *act) |
| { |
| if (cnt >= *max) |
| { |
| size_t old_max = *max; |
| do |
| *max *= 2; |
| while (*max <= cnt); |
| |
| *table = |
| (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t)); |
| memset (&(*table)[old_max], '\0', |
| (*max - old_max) * sizeof (uint32_t)); |
| } |
| |
| *act = cnt + 1; |
| } |
| |
| return &(*table)[cnt]; |
| } |
| |
| |
| static int |
| get_character (struct token *now, const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct charseq **seqp, uint32_t *wchp) |
| { |
| if (now->tok == tok_bsymbol) |
| { |
| /* This will hopefully be the normal case. */ |
| *wchp = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| *seqp = charmap_find_value (charmap, now->val.str.startmb, |
| now->val.str.lenmb); |
| } |
| else if (now->tok == tok_ucs4) |
| { |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4); |
| *seqp = charmap_find_value (charmap, utmp, 9); |
| |
| if (*seqp == NULL) |
| *seqp = repertoire_find_seq (repertoire, now->val.ucs4); |
| |
| if (*seqp == NULL) |
| { |
| /* Compute the value in the charmap from the UCS value. */ |
| const char *symbol = repertoire_find_symbol (repertoire, |
| now->val.ucs4); |
| |
| if (symbol == NULL) |
| *seqp = NULL; |
| else |
| *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); |
| |
| if (*seqp == NULL) |
| { |
| if (repertoire != NULL) |
| { |
| /* Insert a negative entry. */ |
| static const struct charseq negative |
| = { .ucs4 = ILLEGAL_CHAR_VALUE }; |
| uint32_t *newp = obstack_alloc (&repertoire->mem_pool, |
| sizeof (uint32_t)); |
| *newp = now->val.ucs4; |
| |
| insert_entry (&repertoire->seq_table, newp, |
| sizeof (uint32_t), (void *) &negative); |
| } |
| } |
| else |
| (*seqp)->ucs4 = now->val.ucs4; |
| } |
| else if ((*seqp)->ucs4 != now->val.ucs4) |
| *seqp = NULL; |
| |
| *wchp = now->val.ucs4; |
| } |
| else if (now->tok == tok_charcode) |
| { |
| /* We must map from the byte code to UCS4. */ |
| *seqp = charmap_find_symbol (charmap, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (*seqp == NULL) |
| *wchp = ILLEGAL_CHAR_VALUE; |
| else |
| { |
| if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, |
| strlen ((*seqp)->name)); |
| *wchp = (*seqp)->ucs4; |
| } |
| } |
| else |
| return 1; |
| |
| return 0; |
| } |
| |
| |
| /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and |
| the .(2). counterparts. */ |
| static void |
| charclass_symbolic_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, |
| const char *last_str, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int base, |
| int ignore_content, int handle_digits, int step) |
| { |
| const char *nowstr = now->val.str.startmb; |
| char tmp[now->val.str.lenmb + 1]; |
| const char *cp; |
| char *endp; |
| unsigned long int from; |
| unsigned long int to; |
| |
| /* We have to compute the ellipsis values using the symbolic names. */ |
| assert (last_str != NULL); |
| |
| if (strlen (last_str) != now->val.str.lenmb) |
| { |
| invalid_range: |
| lr_error (ldfile, |
| _("`%s' and `%.*s' are not valid names for symbolic range"), |
| last_str, (int) now->val.str.lenmb, nowstr); |
| return; |
| } |
| |
| if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) |
| /* Nothing to do, the names are the same. */ |
| return; |
| |
| for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) |
| ; |
| |
| errno = 0; |
| from = strtoul (cp, &endp, base); |
| if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') |
| goto invalid_range; |
| |
| to = strtoul (nowstr + (cp - last_str), &endp, base); |
| if ((to == UINT_MAX && errno == ERANGE) |
| || (endp - nowstr) != now->val.str.lenmb || from >= to) |
| goto invalid_range; |
| |
| /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ |
| if (!ignore_content) |
| { |
| now->val.str.startmb = tmp; |
| while ((from += step) <= to) |
| { |
| struct charseq *seq; |
| uint32_t wch; |
| |
| sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"), |
| (int) (cp - last_str), last_str, |
| (int) (now->val.str.lenmb - (cp - last_str)), |
| from); |
| |
| get_character (now, charmap, repertoire, &seq, &wch); |
| |
| if (seq != NULL && seq->nbytes == 1) |
| /* Yep, we can store information about this byte sequence. */ |
| ctype->class256_collection[seq->bytes[0]] |= class256_bit; |
| |
| if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) |
| /* We have the UCS4 position. */ |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| } |
| } |
| |
| |
| /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */ |
| static void |
| charclass_ucs4_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, uint32_t last_wch, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int ignore_content, |
| int handle_digits, int step) |
| { |
| if (last_wch > now->val.ucs4) |
| { |
| lr_error (ldfile, _("\ |
| to-value <U%0*X> of range is smaller than from-value <U%0*X>"), |
| (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, |
| (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); |
| return; |
| } |
| |
| if (!ignore_content) |
| while ((last_wch += step) <= now->val.ucs4) |
| { |
| /* We have to find out whether there is a byte sequence corresponding |
| to this UCS4 value. */ |
| struct charseq *seq; |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", last_wch); |
| seq = charmap_find_value (charmap, utmp, 9); |
| if (seq == NULL) |
| { |
| snprintf (utmp, sizeof (utmp), "U%04X", last_wch); |
| seq = charmap_find_value (charmap, utmp, 5); |
| } |
| |
| if (seq == NULL) |
| /* Try looking in the repertoire map. */ |
| seq = repertoire_find_seq (repertoire, last_wch); |
| |
| /* If this is the first time we look for this sequence create a new |
| entry. */ |
| if (seq == NULL) |
| { |
| static const struct charseq negative |
| = { .ucs4 = ILLEGAL_CHAR_VALUE }; |
| |
| /* Find the symbolic name for this UCS4 value. */ |
| if (repertoire != NULL) |
| { |
| const char *symbol = repertoire_find_symbol (repertoire, |
| last_wch); |
| uint32_t *newp = obstack_alloc (&repertoire->mem_pool, |
| sizeof (uint32_t)); |
| *newp = last_wch; |
| |
| if (symbol != NULL) |
| /* We have a name, now search the multibyte value. */ |
| seq = charmap_find_value (charmap, symbol, strlen (symbol)); |
| |
| if (seq == NULL) |
| /* We have to create a fake entry. */ |
| seq = (struct charseq *) &negative; |
| else |
| seq->ucs4 = last_wch; |
| |
| insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t), |
| seq); |
| } |
| else |
| /* We have to create a fake entry. */ |
| seq = (struct charseq *) &negative; |
| } |
| |
| /* We have a name, now search the multibyte value. */ |
| if (seq->ucs4 == last_wch && seq->nbytes == 1) |
| /* Yep, we can store information about this byte sequence. */ |
| ctype->class256_collection[(size_t) seq->bytes[0]] |
| |= class256_bit; |
| |
| /* And of course we have the UCS4 position. */ |
| if (class_bit != 0) |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, last_wch) |= class_bit; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch |
| ? seq : NULL); |
| ctype->wcdigits[ctype->wcdigits_act++] = last_wch; |
| } |
| else if (handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch |
| ? seq : NULL); |
| ctype->wcoutdigits[ctype->outdigits_act] = last_wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| } |
| |
| |
| /* Ellipsis as in `/xea/x12.../xea/x34'. */ |
| static void |
| charclass_charcode_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, char *last_charcode, |
| uint32_t last_charcode_len, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int ignore_content, |
| int handle_digits) |
| { |
| /* First check whether the to-value is larger. */ |
| if (now->val.charcode.nbytes != last_charcode_len) |
| { |
| lr_error (ldfile, _("\ |
| start and end character sequence of range must have the same length")); |
| return; |
| } |
| |
| if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) |
| { |
| lr_error (ldfile, _("\ |
| to-value character sequence is smaller than from-value sequence")); |
| return; |
| } |
| |
| if (!ignore_content) |
| { |
| do |
| { |
| /* Increment the byte sequence value. */ |
| struct charseq *seq; |
| uint32_t wch; |
| int i; |
| |
| for (i = last_charcode_len - 1; i >= 0; --i) |
| if (++last_charcode[i] != 0) |
| break; |
| |
| if (last_charcode_len == 1) |
| /* Of course we have the charcode value. */ |
| ctype->class256_collection[(size_t) last_charcode[0]] |
| |= class256_bit; |
| |
| /* Find the symbolic name. */ |
| seq = charmap_find_symbol (charmap, last_charcode, |
| last_charcode_len); |
| if (seq != NULL) |
| { |
| if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| seq->ucs4 = repertoire_find_value (repertoire, seq->name, |
| strlen (seq->name)); |
| wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4; |
| |
| if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| } |
| else |
| wch = ILLEGAL_CHAR_VALUE; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| seq = xmalloc (sizeof (struct charseq) + last_charcode_len); |
| memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); |
| seq->nbytes = last_charcode_len; |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (handle_digits == 2) |
| { |
| struct charseq *seq; |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| seq = xmalloc (sizeof (struct charseq) + last_charcode_len); |
| memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); |
| seq->nbytes = last_charcode_len; |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| while (memcmp (last_charcode, now->val.charcode.bytes, |
| last_charcode_len) != 0); |
| } |
| } |
| |
| |
| static uint32_t * |
| find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap, |
| uint32_t wch) |
| { |
| struct translit_t *trunp = ctype->translit; |
| struct translit_ignore_t *tirunp = ctype->translit_ignore; |
| |
| while (trunp != NULL) |
| { |
| /* XXX We simplify things here. The transliterations we look |
| for are only allowed to have one character. */ |
| if (trunp->from[0] == wch && trunp->from[1] == 0) |
| { |
| /* Found it. Now look for a transliteration which can be |
| represented with the character set. */ |
| struct translit_to_t *torunp = trunp->to; |
| |
| while (torunp != NULL) |
| { |
| int i; |
| |
| for (i = 0; torunp->str[i] != 0; ++i) |
| { |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]); |
| if (charmap_find_value (charmap, utmp, 9) == NULL) |
| /* This character cannot be represented. */ |
| break; |
| } |
| |
| if (torunp->str[i] == 0) |
| return torunp->str; |
| |
| torunp = torunp->next; |
| } |
| |
| break; |
| } |
| |
| trunp = trunp->next; |
| } |
| |
| /* Check for ignored chars. */ |
| while (tirunp != NULL) |
| { |
| if (tirunp->from <= wch && tirunp->to >= wch) |
| { |
| uint32_t wi; |
| |
| for (wi = tirunp->from; wi <= wch; wi += tirunp->step) |
| if (wi == wch) |
| return (uint32_t []) { 0 }; |
| } |
| } |
| |
| /* Nothing found. */ |
| return NULL; |
| } |
| |
| |
| uint32_t * |
| find_translit (struct localedef_t *locale, const struct charmap_t *charmap, |
| uint32_t wch) |
| { |
| struct locale_ctype_t *ctype; |
| uint32_t *result = NULL; |
| |
| assert (locale != NULL); |
| ctype = locale->categories[LC_CTYPE].ctype; |
| |
| if (ctype == NULL) |
| return NULL; |
| |
| if (ctype->translit != NULL) |
| result = find_translit2 (ctype, charmap, wch); |
| |
| if (result == NULL) |
| { |
| struct translit_include_t *irunp = ctype->translit_include; |
| |
| while (irunp != NULL && result == NULL) |
| { |
| result = find_translit (find_locale (CTYPE_LOCALE, |
| irunp->copy_locale, |
| irunp->copy_repertoire, |
| charmap), |
| charmap, wch); |
| irunp = irunp->next; |
| } |
| } |
| |
| return result; |
| } |
| |
| |
| /* Read one transliteration entry. */ |
| static uint32_t * |
| read_widestring (struct linereader *ldfile, struct token *now, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| uint32_t *wstr; |
| |
| if (now->tok == tok_default_missing) |
| /* The special name "" will denote this case. */ |
| wstr = ((uint32_t *) { 0 }); |
| else if (now->tok == tok_bsymbol) |
| { |
| /* Get the value from the repertoire. */ |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| if (wstr[0] == ILLEGAL_CHAR_VALUE) |
| { |
| /* We cannot proceed, we don't know the UCS4 value. */ |
| free (wstr); |
| return NULL; |
| } |
| |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_ucs4) |
| { |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = now->val.ucs4; |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_charcode) |
| { |
| /* Argh, we have to convert to the symbol name first and then to the |
| UCS4 value. */ |
| struct charseq *seq = charmap_find_symbol (charmap, |
| now->val.str.startmb, |
| now->val.str.lenmb); |
| if (seq == NULL) |
| /* Cannot find the UCS4 value. */ |
| return NULL; |
| |
| if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| seq->ucs4 = repertoire_find_value (repertoire, seq->name, |
| strlen (seq->name)); |
| if (seq->ucs4 == ILLEGAL_CHAR_VALUE) |
| /* We cannot proceed, we don't know the UCS4 value. */ |
| return NULL; |
| |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = seq->ucs4; |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_string) |
| { |
| wstr = now->val.str.startwc; |
| if (wstr == NULL || wstr[0] == 0) |
| return NULL; |
| } |
| else |
| { |
| if (now->tok != tok_eol && now->tok != tok_eof) |
| lr_ignore_rest (ldfile, 0); |
| SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); |
| return (uint32_t *) -1l; |
| } |
| |
| return wstr; |
| } |
| |
| |
| static void |
| read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, |
| struct token *now, const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); |
| struct translit_t *result; |
| struct translit_to_t **top; |
| struct obstack *ob = &ctype->mempool; |
| int first; |
| int ignore; |
| |
| if (from_wstr == NULL) |
| /* There is no valid from string. */ |
| return; |
| |
| result = (struct translit_t *) obstack_alloc (ob, |
| sizeof (struct translit_t)); |
| result->from = from_wstr; |
| result->fname = ldfile->fname; |
| result->lineno = ldfile->lineno; |
| result->next = NULL; |
| result->to = NULL; |
| top = &result->to; |
| first = 1; |
| ignore = 0; |
| |
| while (1) |
| { |
| uint32_t *to_wstr; |
| |
| /* Next we have one or more transliterations. They are |
| separated by semicolons. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) |
| { |
| /* One string read. */ |
| const uint32_t zero = 0; |
| |
| if (!ignore) |
| { |
| obstack_grow (ob, &zero, 4); |
| to_wstr = obstack_finish (ob); |
| |
| *top = obstack_alloc (ob, sizeof (struct translit_to_t)); |
| (*top)->str = to_wstr; |
| (*top)->next = NULL; |
| } |
| |
| if (now->tok == tok_eol) |
| { |
| result->next = ctype->translit; |
| ctype->translit = result; |
| return; |
| } |
| |
| if (!ignore) |
| top = &(*top)->next; |
| ignore = 0; |
| } |
| else |
| { |
| to_wstr = read_widestring (ldfile, now, charmap, repertoire); |
| if (to_wstr == (uint32_t *) -1l) |
| { |
| /* An error occurred. */ |
| obstack_free (ob, result); |
| return; |
| } |
| |
| if (to_wstr == NULL) |
| ignore = 1; |
| else |
| /* This value is usable. */ |
| obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); |
| |
| first = 0; |
| } |
| } |
| } |
| |
| |
| static void |
| read_translit_ignore_entry (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| /* We expect a semicolon-separated list of characters we ignore. We are |
| only interested in the wide character definitions. These must be |
| single characters, possibly defining a range when an ellipsis is used. */ |
| while (1) |
| { |
| struct token *now = lr_token (ldfile, charmap, NULL, repertoire, |
| verbose); |
| struct translit_ignore_t *newp; |
| uint32_t from; |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| lr_error (ldfile, |
| _("premature end of `translit_ignore' definition")); |
| return; |
| } |
| |
| if (now->tok != tok_bsymbol && now->tok != tok_ucs4) |
| { |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| |
| if (now->tok == tok_ucs4) |
| from = now->val.ucs4; |
| else |
| /* Try to get the value. */ |
| from = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (from == ILLEGAL_CHAR_VALUE) |
| { |
| lr_error (ldfile, "invalid character name"); |
| newp = NULL; |
| } |
| else |
| { |
| newp = (struct translit_ignore_t *) |
| obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t)); |
| newp->from = from; |
| newp->to = from; |
| newp->step = 1; |
| |
| newp->next = ctype->translit_ignore; |
| ctype->translit_ignore = newp; |
| } |
| |
| /* Now we expect either a semicolon, an ellipsis, or the end of the |
| line. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2) |
| { |
| /* XXX Should we bother implementing `....'? `...' certainly |
| will not be implemented. */ |
| uint32_t to; |
| int step = now->tok == tok_ellipsis2_2 ? 2 : 1; |
| |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| lr_error (ldfile, |
| _("premature end of `translit_ignore' definition")); |
| return; |
| } |
| |
| if (now->tok != tok_bsymbol && now->tok != tok_ucs4) |
| { |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| |
| if (now->tok == tok_ucs4) |
| to = now->val.ucs4; |
| else |
| /* Try to get the value. */ |
| to = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (to == ILLEGAL_CHAR_VALUE) |
| lr_error (ldfile, "invalid character name"); |
| else |
| { |
| /* Make sure the `to'-value is larger. */ |
| if (to >= from) |
| { |
| newp->to = to; |
| newp->step = step; |
| } |
| else |
| lr_error (ldfile, _("\ |
| to-value <U%0*X> of range is smaller than from-value <U%0*X>"), |
| (to | from) < 65536 ? 4 : 8, to, |
| (to | from) < 65536 ? 4 : 8, from); |
| } |
| |
| /* And the next token. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| } |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| /* We are done. */ |
| return; |
| |
| if (now->tok == tok_semicolon) |
| /* Next round. */ |
| continue; |
| |
| /* If we come here something is wrong. */ |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| } |
| |
| |
| /* The parser for the LC_CTYPE section of the locale definition. */ |
| void |
| ctype_read (struct linereader *ldfile, struct localedef_t *result, |
| const struct charmap_t *charmap, const char *repertoire_name, |
| int ignore_content) |
| { |
| struct repertoire_t *repertoire = NULL; |
| struct locale_ctype_t *ctype; |
| struct token *now; |
| enum token_t nowtok; |
| size_t cnt; |
| struct charseq *last_seq; |
| uint32_t last_wch = 0; |
| enum token_t last_token; |
| enum token_t ellipsis_token; |
| int step; |
| char last_charcode[16]; |
| size_t last_charcode_len = 0; |
| const char *last_str = NULL; |
| int mapidx; |
| struct localedef_t *copy_locale = NULL; |
| |
| /* Get the repertoire we have to use. */ |
| if (repertoire_name != NULL) |
| repertoire = repertoire_read (repertoire_name); |
| |
| /* The rest of the line containing `LC_CTYPE' must be free. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| |
| do |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| } |
| while (nowtok == tok_eol); |
| |
| /* If we see `copy' now we are almost done. */ |
| if (nowtok == tok_copy) |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_string) |
| { |
| SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); |
| |
| skip_category: |
| do |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eof && now->tok != tok_end); |
| |
| if (now->tok != tok_eof |
| || (now = lr_token (ldfile, charmap, NULL, NULL, verbose), |
| now->tok == tok_eof)) |
| lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); |
| else if (now->tok != tok_lc_ctype) |
| { |
| lr_error (ldfile, _("\ |
| %1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); |
| lr_ignore_rest (ldfile, 0); |
| } |
| else |
| lr_ignore_rest (ldfile, 1); |
| |
| return; |
| } |
| |
| if (! ignore_content) |
| { |
| /* Get the locale definition. */ |
| copy_locale = load_locale (LC_CTYPE, now->val.str.startmb, |
| repertoire_name, charmap, NULL); |
| if ((copy_locale->avail & CTYPE_LOCALE) == 0) |
| { |
| /* Not yet loaded. So do it now. */ |
| if (locfile_read (copy_locale, charmap) != 0) |
| goto skip_category; |
| } |
| |
| if (copy_locale->categories[LC_CTYPE].ctype == NULL) |
| return; |
| } |
| |
| lr_ignore_rest (ldfile, 1); |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| } |
| |
| /* Prepare the data structures. */ |
| ctype_startup (ldfile, result, charmap, copy_locale, ignore_content); |
| ctype = result->categories[LC_CTYPE].ctype; |
| |
| /* Remember the repertoire we use. */ |
| if (!ignore_content) |
| ctype->repertoire = repertoire; |
| |
| while (1) |
| { |
| unsigned long int class_bit = 0; |
| unsigned long int class256_bit = 0; |
| int handle_digits = 0; |
| |
| /* Of course we don't proceed beyond the end of file. */ |
| if (nowtok == tok_eof) |
| break; |
| |
| /* Ingore empty lines. */ |
| if (nowtok == tok_eol) |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| continue; |
| } |
| |
| switch (nowtok) |
| { |
| case tok_charclass: |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok == tok_ident || now->tok == tok_string) |
| { |
| ctype_class_new (ldfile, ctype, now->val.str.startmb); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| break; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| if (now->tok != tok_eol) |
| SYNTAX_ERROR (_("\ |
| %s: syntax error in definition of new character class"), "LC_CTYPE"); |
| break; |
| |
| case tok_charconv: |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok == tok_ident || now->tok == tok_string) |
| { |
| ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| break; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| if (now->tok != tok_eol) |
| SYNTAX_ERROR (_("\ |
| %s: syntax error in definition of new character map"), "LC_CTYPE"); |
| break; |
| |
| case tok_class: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* We simply forget the `class' keyword and use the following |
| operand to determine the bit. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_ident || now->tok == tok_string) |
| { |
| /* Must can be one of the predefined class names. */ |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) |
| break; |
| if (cnt >= ctype->nr_charclass) |
| { |
| #ifdef PREDEFINED_CLASSES |
| if (now->val.str.lenmb == 8 |
| && memcmp ("special1", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial1; |
| else if (now->val.str.lenmb == 8 |
| && memcmp ("special2", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial2; |
| else if (now->val.str.lenmb == 8 |
| && memcmp ("special3", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial3; |
| else |
| #endif |
| { |
| /* OK, it's a new class. */ |
| ctype_class_new (ldfile, ctype, now->val.str.startmb); |
| |
| class_bit = _ISwbit (ctype->nr_charclass - 1); |
| } |
| } |
| else |
| { |
| class_bit = _ISwbit (cnt); |
| |
| free (now->val.str.startmb); |
| } |
| } |
| else if (now->tok == tok_digit) |
| goto handle_tok_digit; |
| else if (now->tok < tok_upper || now->tok > tok_blank) |
| goto err_label; |
| else |
| { |
| class_bit = BITw (now->tok); |
| class256_bit = BIT (now->tok); |
| } |
| |
| /* The next character must be a semicolon. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| goto read_charclass; |
| |
| case tok_upper: |
| case tok_lower: |
| case tok_alpha: |
| case tok_alnum: |
| case tok_space: |
| case tok_cntrl: |
| case tok_punct: |
| case tok_graph: |
| case tok_print: |
| case tok_xdigit: |
| case tok_blank: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| class_bit = BITw (now->tok); |
| class256_bit = BIT (now->tok); |
| handle_digits = 0; |
| read_charclass: |
| ctype->class_done |= class_bit; |
| last_token = tok_none; |
| ellipsis_token = tok_none; |
| step = 1; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eol && now->tok != tok_eof) |
| { |
| uint32_t wch; |
| struct charseq *seq; |
| |
| if (ellipsis_token == tok_none) |
| { |
| if (get_character (now, charmap, repertoire, &seq, &wch)) |
| goto err_label; |
| |
| if (!ignore_content && seq != NULL && seq->nbytes == 1) |
| /* Yep, we can store information about this byte |
| sequence. */ |
| ctype->class256_collection[seq->bytes[0]] |= class256_bit; |
| |
| if (!ignore_content && wch != ILLEGAL_CHAR_VALUE |
| && class_bit != 0) |
| /* We have the UCS4 position. */ |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| |
| last_token = now->tok; |
| /* Terminate the string. */ |
| if (last_token == tok_bsymbol) |
| { |
| now->val.str.startmb[now->val.str.lenmb] = '\0'; |
| last_str = now->val.str.startmb; |
| } |
| else |
| last_str = NULL; |
| last_seq = seq; |
| last_wch = wch; |
| memcpy (last_charcode, now->val.charcode.bytes, 16); |
| last_charcode_len = now->val.charcode.nbytes; |
| |
| if (!ignore_content && handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max += 10; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max += 10; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (!ignore_content && handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| else |
| { |
| /* Now it gets complicated. We have to resolve the |
| ellipsis problem. First we must distinguish between |
| the different kind of ellipsis and this must match the |
| tokens we have seen. */ |
| assert (last_token != tok_none); |
| |
| if (last_token != now->tok) |
| { |
| lr_error (ldfile, _("\ |
| ellipsis range must be marked by two operands of same type")); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| if (last_token == tok_bsymbol) |
| { |
| if (ellipsis_token == tok_ellipsis3) |
| lr_error (ldfile, _("with symbolic name range values \ |
| the absolute ellipsis `...' must not be used")); |
| |
| charclass_symbolic_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, last_str, |
| class256_bit, class_bit, |
| (ellipsis_token |
| == tok_ellipsis4 |
| ? 10 : 16), |
| ignore_content, |
| handle_digits, step); |
| } |
| else if (last_token == tok_ucs4) |
| { |
| if (ellipsis_token != tok_ellipsis2) |
| lr_error (ldfile, _("\ |
| with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); |
| |
| charclass_ucs4_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, last_wch, |
| class256_bit, class_bit, |
| ignore_content, handle_digits, |
| step); |
| } |
| else |
| { |
| assert (last_token == tok_charcode); |
| |
| if (ellipsis_token != tok_ellipsis3) |
| lr_error (ldfile, _("\ |
| with character code range values one must use the absolute ellipsis `...'")); |
| |
| charclass_charcode_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, |
| last_charcode, |
| last_charcode_len, |
| class256_bit, class_bit, |
| ignore_content, |
| handle_digits); |
| } |
| |
| /* Now we have used the last value. */ |
| last_token = tok_none; |
| } |
| |
| /* Next we expect a semicolon or the end of the line. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| break; |
| |
| if (last_token != tok_none |
| && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2) |
| { |
| if (now->tok == tok_ellipsis2_2) |
| { |
| now->tok = tok_ellipsis2; |
| step = 2; |
| } |
| else if (now->tok == tok_ellipsis4_2) |
| { |
| now->tok = tok_ellipsis4; |
| step = 2; |
| } |
| |
| ellipsis_token = now->tok; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| continue; |
| } |
| |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| |
| /* And get the next character. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| |
| ellipsis_token = tok_none; |
| step = 1; |
| } |
| break; |
| |
| case tok_digit: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| handle_tok_digit: |
| class_bit = _ISwdigit; |
| class256_bit = _ISdigit; |
| handle_digits = 1; |
| goto read_charclass; |
| |
| case tok_outdigit: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| if (ctype->outdigits_act != 0) |
| lr_error (ldfile, _("\ |
| %s: field `%s' declared more than once"), |
| "LC_CTYPE", "outdigit"); |
| class_bit = 0; |
| class256_bit = 0; |
| handle_digits = 2; |
| goto read_charclass; |
| |
| case tok_toupper: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| mapidx = 0; |
| goto read_mapping; |
| |
| case tok_tolower: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| mapidx = 1; |
| goto read_mapping; |
| |
| case tok_map: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* We simply forget the `map' keyword and use the following |
| operand to determine the mapping. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_ident || now->tok == tok_string) |
| { |
| size_t cnt; |
| |
| for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) |
| break; |
| |
| if (cnt < ctype->map_collection_nr) |
| free (now->val.str.startmb); |
| else |
| /* OK, it's a new map. */ |
| ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); |
| |
| mapidx = cnt; |
| } |
| else if (now->tok < tok_toupper || now->tok > tok_tolower) |
| goto err_label; |
| else |
| mapidx = now->tok - tok_toupper; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| /* This better should be a semicolon. */ |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| |
| read_mapping: |
| /* Test whether this mapping was already defined. */ |
| if (ctype->tomap_done[mapidx]) |
| { |
| lr_error (ldfile, _("duplicated definition for mapping `%s'"), |
| ctype->mapnames[mapidx]); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| ctype->tomap_done[mapidx] = 1; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eol && now->tok != tok_eof) |
| { |
| struct charseq *from_seq; |
| uint32_t from_wch; |
| struct charseq *to_seq; |
| uint32_t to_wch; |
| |
| /* Every pair starts with an opening brace. */ |
| if (now->tok != tok_open_brace) |
| goto err_label; |
| |
| /* Next comes the from-value. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (get_character (now, charmap, repertoire, &from_seq, |
| &from_wch) != 0) |
| goto err_label; |
| |
| /* The next is a comma. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_comma) |
| goto err_label; |
| |
| /* And the other value. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (get_character (now, charmap, repertoire, &to_seq, |
| &to_wch) != 0) |
| goto err_label; |
| |
| /* And the last thing is the closing brace. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_close_brace) |
| goto err_label; |
| |
| if (!ignore_content) |
| { |
| /* Check whether the mapping converts from an ASCII value |
| to a non-ASCII value. */ |
| if (from_seq != NULL && from_seq->nbytes == 1 |
| && isascii (from_seq->bytes[0]) |
| && to_seq != NULL && (to_seq->nbytes != 1 |
| || !isascii (to_seq->bytes[0]))) |
| ctype->to_nonascii = 1; |
| |
| if (mapidx < 2 && from_seq != NULL && to_seq != NULL |
| && from_seq->nbytes == 1 && to_seq->nbytes == 1) |
| /* We can use this value. */ |
| ctype->map256_collection[mapidx][from_seq->bytes[0]] |
| = to_seq->bytes[0]; |
| |
| if (from_wch != ILLEGAL_CHAR_VALUE |
| && to_wch != ILLEGAL_CHAR_VALUE) |
| /* Both correct values. */ |
| *find_idx (ctype, &ctype->map_collection[mapidx], |
| &ctype->map_collection_max[mapidx], |
| &ctype->map_collection_act[mapidx], |
| from_wch) = to_wch; |
| } |
| |
| /* Now comes a semicolon or the end of the line/file. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_semicolon) |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| break; |
| |
| case tok_translit_start: |
| /* Ignore the entire translit section with its peculiar syntax |
| if we don't need the input. */ |
| if (ignore_content) |
| { |
| do |
| { |
| lr_ignore_rest (ldfile, 0); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| while (now->tok != tok_translit_end && now->tok != tok_eof); |
| |
| if (now->tok == tok_eof) |
| lr_error (ldfile, _(\ |
| "%s: `translit_start' section does not end with `translit_end'"), |
| "LC_CTYPE"); |
| |
| break; |
| } |
| |
| /* The rest of the line better should be empty. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| /* We count here the number of allocated entries in the `translit' |
| array. */ |
| cnt = 0; |
| |
| ldfile->translate_strings = 1; |
| ldfile->return_widestr = 1; |
| |
| /* We proceed until we see the `translit_end' token. */ |
| while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose), |
| now->tok != tok_translit_end && now->tok != tok_eof) |
| { |
| if (now->tok == tok_eol) |
| /* Ignore empty lines. */ |
| continue; |
| |
| if (now->tok == tok_include) |
| { |
| /* We have to include locale. */ |
| const char *locale_name; |
| const char *repertoire_name; |
| struct translit_include_t *include_stmt, **include_ptr; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| /* This should be a string or an identifier. In any |
| case something to name a locale. */ |
| if (now->tok != tok_string && now->tok != tok_ident) |
| { |
| translit_syntax: |
| lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); |
| lr_ignore_rest (ldfile, 0); |
| continue; |
| } |
| locale_name = now->val.str.startmb; |
| |
| /* Next should be a semicolon. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| goto translit_syntax; |
| |
| /* Now the repertoire name. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if ((now->tok != tok_string && now->tok != tok_ident) |
| || now->val.str.startmb == NULL) |
| goto translit_syntax; |
| repertoire_name = now->val.str.startmb; |
| if (repertoire_name[0] == '\0') |
| /* Ignore the empty string. */ |
| repertoire_name = NULL; |
| |
| /* Save the include statement for later processing. */ |
| include_stmt = (struct translit_include_t *) |
| xmalloc (sizeof (struct translit_include_t)); |
| include_stmt->copy_locale = locale_name; |
| include_stmt->copy_repertoire = repertoire_name; |
| include_stmt->next = NULL; |
| |
| include_ptr = &ctype->translit_include; |
| while (*include_ptr != NULL) |
| include_ptr = &(*include_ptr)->next; |
| *include_ptr = include_stmt; |
| |
| /* The rest of the line must be empty. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| /* Make sure the locale is read. */ |
| add_to_readlist (LC_CTYPE, locale_name, repertoire_name, |
| 1, NULL); |
| continue; |
| } |
| else if (now->tok == tok_default_missing) |
| { |
| uint32_t *wstr; |
| |
| while (1) |
| { |
| /* We expect a single character or string as the |
| argument. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| wstr = read_widestring (ldfile, now, charmap, |
| repertoire); |
| |
| if (wstr != NULL) |
| { |
| if (ctype->default_missing != NULL) |
| { |
| lr_error (ldfile, _("\ |
| %s: duplicate `default_missing' definition"), "LC_CTYPE"); |
| WITH_CUR_LOCALE (error_at_line (0, 0, |
| ctype->default_missing_file, |
| ctype->default_missing_lineno, |
| _("\ |
| previous definition was here"))); |
| } |
| else |
| { |
| ctype->default_missing = wstr; |
| ctype->default_missing_file = ldfile->fname; |
| ctype->default_missing_lineno = ldfile->lineno; |
| } |
| /* We can have more entries, ignore them. */ |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| else if (wstr == (uint32_t *) -1l) |
| /* This was an syntax error. */ |
| break; |
| |
| /* Maybe there is another replacement we can use. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| /* Nothing found. We tell the user. */ |
| lr_error (ldfile, _("\ |
| %s: no representable `default_missing' definition found"), "LC_CTYPE"); |
| break; |
| } |
| if (now->tok != tok_semicolon) |
| goto translit_syntax; |
| } |
| |
| continue; |
| } |
| else if (now->tok == tok_translit_ignore) |
| { |
| read_translit_ignore_entry (ldfile, ctype, charmap, |
| repertoire); |
| continue; |
| } |
| |
| read_translit_entry (ldfile, ctype, now, charmap, repertoire); |
| } |
| ldfile->return_widestr = 0; |
| |
| if (now->tok == tok_eof) |
| lr_error (ldfile, _(\ |
| "%s: `translit_start' section does not end with `translit_end'"), |
| "LC_CTYPE"); |
| |
| break; |
| |
| case tok_ident: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* This could mean one of several things. First test whether |
| it's a character class name. */ |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) |
| break; |
| if (cnt < ctype->nr_charclass) |
| { |
| class_bit = _ISwbit (cnt); |
| class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) |
| break; |
| if (cnt < ctype->map_collection_nr) |
| { |
| mapidx = cnt; |
| free (now->val.str.startmb); |
| goto read_mapping; |
| } |
| #ifdef PREDEFINED_CLASSES |
| if (strcmp (now->val.str.startmb, "special1") == 0) |
| { |
| class_bit = _ISwspecial1; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "special2") == 0) |
| { |
| class_bit = _ISwspecial2; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "special3") == 0) |
| { |
| class_bit = _ISwspecial3; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "tosymmetric") == 0) |
| { |
| mapidx = 2; |
| goto read_mapping; |
| } |
| #endif |
| break; |
| |
| case tok_end: |
| /* Next we assume `LC_CTYPE'. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eof) |
| break; |
| if (now->tok == tok_eol) |
| lr_error (ldfile, _("%s: incomplete `END' line"), |
| "LC_CTYPE"); |
| else if (now->tok != tok_lc_ctype) |
| lr_error (ldfile, _("\ |
| %1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); |
| lr_ignore_rest (ldfile, now->tok == tok_lc_ctype); |
| return; |
| |
| default: |
| err_label: |
| if (now->tok != tok_eof) |
| SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); |
| } |
| |
| /* Prepare for the next round. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| } |
| |
| /* When we come here we reached the end of the file. */ |
| lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); |
| } |
| |
| |
| static void |
| set_class_defaults (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| size_t cnt; |
| |
| /* These function defines the default values for the classes and conversions |
| according to POSIX.2 2.5.2.1. |
| It may seem that the order of these if-blocks is arbitrary but it is NOT. |
| Don't move them unless you know what you do! */ |
| |
| auto void set_default (int bitpos, int from, int to); |
| |
| void set_default (int bitpos, int from, int to) |
| { |
| char tmp[2]; |
| int ch; |
| int bit = _ISbit (bitpos); |
| int bitw = _ISwbit (bitpos); |
| /* Define string. */ |
| strcpy (tmp, "?"); |
| |
| for (ch = from; ch <= to; ++ch) |
| { |
| struct charseq *seq; |
| tmp[0] = ch; |
| |
| seq = charmap_find_value (charmap, tmp, 1); |
| if (seq == NULL) |
| { |
| char buf[10]; |
| sprintf (buf, "U%08X", ch); |
| seq = charmap_find_value (charmap, buf, 9); |
| } |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", tmp)); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", tmp)); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= bit; |
| |
| /* No need to search here, the ASCII value is also the Unicode |
| value. */ |
| ELEM (ctype, class_collection, , ch) |= bitw; |
| } |
| } |
| |
| /* Set default values if keyword was not present. */ |
| if ((ctype->class_done & BITw (tok_upper)) == 0) |
| /* "If this keyword [lower] is not specified, the lowercase letters |
| `A' through `Z', ..., shall automatically belong to this class, |
| with implementation defined character values." [P1003.2, 2.5.2.1] */ |
| set_default (BITPOS (tok_upper), 'A', 'Z'); |
| |
| if ((ctype->class_done & BITw (tok_lower)) == 0) |
| /* "If this keyword [lower] is not specified, the lowercase letters |
| `a' through `z', ..., shall automatically belong to this class, |
| with implementation defined character values." [P1003.2, 2.5.2.1] */ |
| set_default (BITPOS (tok_lower), 'a', 'z'); |
| |
| if ((ctype->class_done & BITw (tok_alpha)) == 0) |
| { |
| /* Table 2-6 in P1003.2 says that characters in class `upper' or |
| class `lower' *must* be in class `alpha'. */ |
| unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); |
| unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower); |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| if ((ctype->class256_collection[cnt] & mask) != 0) |
| ctype->class256_collection[cnt] |= BIT (tok_alpha); |
| |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| if ((ctype->class_collection[cnt] & maskw) != 0) |
| ctype->class_collection[cnt] |= BITw (tok_alpha); |
| } |
| |
| if ((ctype->class_done & BITw (tok_digit)) == 0) |
| /* "If this keyword [digit] is not specified, the digits `0' through |
| `9', ..., shall automatically belong to this class, with |
| implementation-defined character values." [P1003.2, 2.5.2.1] */ |
| set_default (BITPOS (tok_digit), '0', '9'); |
| |
| /* "Only characters specified for the `alpha' and `digit' keyword |
| shall be specified. Characters specified for the keyword `alpha' |
| and `digit' are automatically included in this class. */ |
| { |
| unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); |
| unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit); |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| if ((ctype->class256_collection[cnt] & mask) != 0) |
| ctype->class256_collection[cnt] |= BIT (tok_alnum); |
| |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| if ((ctype->class_collection[cnt] & maskw) != 0) |
| ctype->class_collection[cnt] |= BITw (tok_alnum); |
| } |
| |
| if ((ctype->class_done & BITw (tok_space)) == 0) |
| /* "If this keyword [space] is not specified, the characters <space>, |
| <form-feed>, <newline>, <carriage-return>, <tab>, and |
| <vertical-tab>, ..., shall automatically belong to this class, |
| with implementation-defined character values." [P1003.2, 2.5.2.1] */ |
| { |
| struct charseq *seq; |
| |
| seq = charmap_find_value (charmap, "space", 5); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "SP", 2); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U00000020", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<space>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<space>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L' ') |= BITw (tok_space); |
| |
| seq = charmap_find_value (charmap, "form-feed", 9); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U0000000C", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<form-feed>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<form-feed>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space); |
| |
| |
| seq = charmap_find_value (charmap, "newline", 7); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U0000000A", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<newline>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<newline>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space); |
| |
| |
| seq = charmap_find_value (charmap, "carriage-return", 15); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U0000000D", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<carriage-return>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<carriage-return>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space); |
| |
| |
| seq = charmap_find_value (charmap, "tab", 3); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U00000009", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<tab>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<tab>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space); |
| |
| |
| seq = charmap_find_value (charmap, "vertical-tab", 12); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U0000000B", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<vertical-tab>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<vertical-tab>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space); |
| } |
| |
| if ((ctype->class_done & BITw (tok_xdigit)) == 0) |
| /* "If this keyword is not specified, the digits `0' to `9', the |
| uppercase letters `A' through `F', and the lowercase letters `a' |
| through `f', ..., shell automatically belong to this class, with |
| implementation defined character values." [P1003.2, 2.5.2.1] */ |
| { |
| set_default (BITPOS (tok_xdigit), '0', '9'); |
| set_default (BITPOS (tok_xdigit), 'A', 'F'); |
| set_default (BITPOS (tok_xdigit), 'a', 'f'); |
| } |
| |
| if ((ctype->class_done & BITw (tok_blank)) == 0) |
| /* "If this keyword [blank] is unspecified, the characters <space> and |
| <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */ |
| { |
| struct charseq *seq; |
| |
| seq = charmap_find_value (charmap, "space", 5); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "SP", 2); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U00000020", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<space>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<space>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank); |
| |
| |
| seq = charmap_find_value (charmap, "tab", 3); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U00000009", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<tab>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<tab>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank); |
| } |
| |
| if ((ctype->class_done & BITw (tok_graph)) == 0) |
| /* "If this keyword [graph] is not specified, characters specified for |
| the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', |
| shall belong to this character class." [P1003.2, 2.5.2.1] */ |
| { |
| unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | |
| BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); |
| unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | |
| BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | |
| BITw (tok_punct); |
| size_t cnt; |
| |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| if ((ctype->class_collection[cnt] & maskw) != 0) |
| ctype->class_collection[cnt] |= BITw (tok_graph); |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| if ((ctype->class256_collection[cnt] & mask) != 0) |
| ctype->class256_collection[cnt] |= BIT (tok_graph); |
| } |
| |
| if ((ctype->class_done & BITw (tok_print)) == 0) |
| /* "If this keyword [print] is not provided, characters specified for |
| the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', |
| and the <space> character shall belong to this character class." |
| [P1003.2, 2.5.2.1] */ |
| { |
| unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | |
| BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); |
| unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | |
| BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | |
| BITw (tok_punct); |
| size_t cnt; |
| struct charseq *seq; |
| |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| if ((ctype->class_collection[cnt] & maskw) != 0) |
| ctype->class_collection[cnt] |= BITw (tok_print); |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| if ((ctype->class256_collection[cnt] & mask) != 0) |
| ctype->class256_collection[cnt] |= BIT (tok_print); |
| |
| |
| seq = charmap_find_value (charmap, "space", 5); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "SP", 2); |
| if (seq == NULL) |
| seq = charmap_find_value (charmap, "U00000020", 9); |
| if (seq == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", "<space>")); |
| } |
| else if (seq->nbytes != 1) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' in charmap not representable with one byte"), |
| "LC_CTYPE", "<space>")); |
| else |
| ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print); |
| |
| /* No need to search. */ |
| ELEM (ctype, class_collection, , L' ') |= BITw (tok_print); |
| } |
| |
| if (ctype->tomap_done[0] == 0) |
| /* "If this keyword [toupper] is not specified, the lowercase letters |
| `a' through `z', and their corresponding uppercase letters `A' to |
| `Z', ..., shall automatically be included, with implementation- |
| defined character values." [P1003.2, 2.5.2.1] */ |
| { |
| char tmp[4]; |
| int ch; |
| |
| strcpy (tmp, "<?>"); |
| |
| for (ch = 'a'; ch <= 'z'; ++ch) |
| { |
| struct charseq *seq_from, *seq_to; |
| |
| tmp[1] = (char) ch; |
| |
| seq_from = charmap_find_value (charmap, &tmp[1], 1); |
| if (seq_from == NULL) |
| { |
| char buf[10]; |
| sprintf (buf, "U%08X", ch); |
| seq_from = charmap_find_value (charmap, buf, 9); |
| } |
| if (seq_from == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", tmp)); |
| } |
| else if (seq_from->nbytes != 1) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' needed as default value not representable with one byte"), |
| "LC_CTYPE", tmp)); |
| } |
| else |
| { |
| /* This conversion is implementation defined. */ |
| tmp[1] = (char) (ch + ('A' - 'a')); |
| seq_to = charmap_find_value (charmap, &tmp[1], 1); |
| if (seq_to == NULL) |
| { |
| char buf[10]; |
| sprintf (buf, "U%08X", ch + ('A' - 'a')); |
| seq_to = charmap_find_value (charmap, buf, 9); |
| } |
| if (seq_to == NULL) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' not defined while needed as default value"), |
| "LC_CTYPE", tmp)); |
| } |
| else if (seq_to->nbytes != 1) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: character `%s' needed as default value not representable with one byte"), |
| "LC_CTYPE", tmp)); |
| } |
| else |
| /* The index [0] is determined by the order of the |
| `ctype_map_newP' calls in `ctype_startup'. */ |
| ctype->map256_collection[0][seq_from->bytes[0]] |
| = seq_to->bytes[0]; |
| } |
| |
| /* No need to search. */ |
| ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a'); |
| } |
| } |
| |
| if (ctype->tomap_done[1] == 0) |
| /* "If this keyword [tolower] is not specified, the mapping shall be |
| the reverse mapping of the one specified to `toupper'." [P1003.2] */ |
| { |
| for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) |
| if (ctype->map_collection[0][cnt] != 0) |
| ELEM (ctype, map_collection, [1], |
| ctype->map_collection[0][cnt]) |
| = ctype->charnames[cnt]; |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| if (ctype->map256_collection[0][cnt] != 0) |
| ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt; |
| } |
| |
| if (ctype->outdigits_act != 10) |
| { |
| if (ctype->outdigits_act != 0) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit")); |
| |
| for (cnt = ctype->outdigits_act; cnt < 10; ++cnt) |
| { |
| ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, |
| (char *) digits + cnt, |
| 1); |
| |
| if (ctype->mboutdigits[cnt] == NULL) |
| ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, |
| longnames[cnt], |
| strlen (longnames[cnt])); |
| |
| if (ctype->mboutdigits[cnt] == NULL) |
| ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, |
| uninames[cnt], 9); |
| |
| if (ctype->mboutdigits[cnt] == NULL) |
| { |
| /* Provide a replacement. */ |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| no output digits defined and none of the standard names in the charmap"))); |
| |
| ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| sizeof (struct charseq) |
| + 1); |
| |
| /* This is better than nothing. */ |
| ctype->mboutdigits[cnt]->bytes[0] = digits[cnt]; |
| ctype->mboutdigits[cnt]->nbytes = 1; |
| } |
| |
| ctype->wcoutdigits[cnt] = L'0' + cnt; |
| } |
| |
| ctype->outdigits_act = 10; |
| } |
| } |
| |
| |
| /* Construction of sparse 3-level tables. |
| See wchar-lookup.h for their structure and the meaning of p and q. */ |
| |
| struct wctype_table |
| { |
| /* Parameters. */ |
| unsigned int p; |
| unsigned int q; |
| /* Working representation. */ |
| size_t level1_alloc; |
| size_t level1_size; |
| uint32_t *level1; |
| size_t level2_alloc; |
| size_t level2_size; |
| uint32_t *level2; |
| size_t level3_alloc; |
| size_t level3_size; |
| uint32_t *level3; |
| /* Compressed representation. */ |
| size_t result_size; |
| char *result; |
| }; |
| |
| /* Initialize. Assumes t->p and t->q have already been set. */ |
| static inline void |
| wctype_table_init (struct wctype_table *t) |
| { |
| t->level1 = NULL; |
| t->level1_alloc = t->level1_size = 0; |
| t->level2 = NULL; |
| t->level2_alloc = t->level2_size = 0; |
| t->level3 = NULL; |
| t->level3_alloc = t->level3_size = 0; |
| } |
| |
| /* Retrieve an entry. */ |
| static inline int |
| wctype_table_get (struct wctype_table *t, uint32_t wc) |
| { |
| uint32_t index1 = wc >> (t->q + t->p + 5); |
| if (index1 < t->level1_size) |
| { |
| uint32_t lookup1 = t->level1[index1]; |
| if (lookup1 != EMPTY) |
| { |
| uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1)) |
| + (lookup1 << t->q); |
| uint32_t lookup2 = t->level2[index2]; |
| if (lookup2 != EMPTY) |
| { |
| uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1)) |
| + (lookup2 << t->p); |
| uint32_t lookup3 = t->level3[index3]; |
| uint32_t index4 = wc & 0x1f; |
| |
| return (lookup3 >> index4) & 1; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| /* Add one entry. */ |
| static void |
| wctype_table_add (struct wctype_table *t, uint32_t wc) |
| { |
| uint32_t index1 = wc >> (t->q + t->p + 5); |
| uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1); |
| uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1); |
| uint32_t index4 = wc & 0x1f; |
| size_t i, i1, i2; |
| |
| if (index1 >= t->level1_size) |
| { |
| if (index1 >= t->level1_alloc) |
| { |
| size_t alloc = 2 * t->level1_alloc; |
| if (alloc <= index1) |
| alloc = index1 + 1; |
| t->level1 = (uint32_t *) xrealloc ((char *) t->level1, |
| alloc * sizeof (uint32_t)); |
| t->level1_alloc = alloc; |
| } |
| while (index1 >= t->level1_size) |
| t->level1[t->level1_size++] = EMPTY; |
| } |
| |
| if (t->level1[index1] == EMPTY) |
| { |
| if (t->level2_size == t->level2_alloc) |
| { |
| size_t alloc = 2 * t->level2_alloc + 1; |
| t->level2 = (uint32_t *) xrealloc ((char *) t->level2, |
| (alloc << t->q) * sizeof (uint32_t)); |
| t->level2_alloc = alloc; |
| } |
| i1 = t->level2_size << t->q; |
| i2 = (t->level2_size + 1) << t->q; |
| for (i = i1; i < i2; i++) |
| t->level2[i] = EMPTY; |
| t->level1[index1] = t->level2_size++; |
| } |
| |
| index2 += t->level1[index1] << t->q; |
| |
| if (t->level2[index2] == EMPTY) |
| { |
| if (t->level3_size == t->level3_alloc) |
| { |
| size_t alloc = 2 * t->level3_alloc + 1; |
| t->level3 = (uint32_t *) xrealloc ((char *) t->level3, |
| (alloc << t->p) * sizeof (uint32_t)); |
| t->level3_alloc = alloc; |
| } |
| i1 = t->level3_size << t->p; |
| i2 = (t->level3_size + 1) << t->p; |
| for (i = i1; i < i2; i++) |
| t->level3[i] = 0; |
| t->level2[index2] = t->level3_size++; |
| } |
| |
| index3 += t->level2[index2] << t->p; |
| |
| t->level3[index3] |= (uint32_t)1 << index4; |
| } |
| |
| /* Finalize and shrink. */ |
| static void |
| wctype_table_finalize (struct wctype_table *t) |
| { |
| size_t i, j, k; |
| uint32_t reorder3[t->level3_size]; |
| uint32_t reorder2[t->level2_size]; |
| uint32_t level1_offset, level2_offset, level3_offset; |
| |
| /* Uniquify level3 blocks. */ |
| k = 0; |
| for (j = 0; j < t->level3_size; j++) |
| { |
| for (i = 0; i < k; i++) |
| if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], |
| (1 << t->p) * sizeof (uint32_t)) == 0) |
| break; |
| /* Relocate block j to block i. */ |
| reorder3[j] = i; |
| if (i == k) |
| { |
| if (i != j) |
| memcpy (&t->level3[i << t->p], &t->level3[j << t->p], |
| (1 << t->p) * sizeof (uint32_t)); |
| k++; |
| } |
| } |
| t->level3_size = k; |
| |
| for (i = 0; i < (t->level2_size << t->q); i++) |
| if (t->level2[i] != EMPTY) |
| t->level2[i] = reorder3[t->level2[i]]; |
| |
| /* Uniquify level2 blocks. */ |
| k = 0; |
| for (j = 0; j < t->level2_size; j++) |
| { |
| for (i = 0; i < k; i++) |
| if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], |
| (1 << t->q) * sizeof (uint32_t)) == 0) |
| break; |
| /* Relocate block j to block i. */ |
| reorder2[j] = i; |
| if (i == k) |
| { |
| if (i != j) |
| memcpy (&t->level2[i << t->q], &t->level2[j << t->q], |
| (1 << t->q) * sizeof (uint32_t)); |
| k++; |
| } |
| } |
| t->level2_size = k; |
| |
| for (i = 0; i < t->level1_size; i++) |
| if (t->level1[i] != EMPTY) |
| t->level1[i] = reorder2[t->level1[i]]; |
| |
| /* Create and fill the resulting compressed representation. */ |
| t->result_size = |
| 5 * sizeof (uint32_t) |
| + t->level1_size * sizeof (uint32_t) |
| + (t->level2_size << t->q) * sizeof (uint32_t) |
| + (t->level3_size << t->p) * sizeof (uint32_t); |
| t->result = (char *) xmalloc (t->result_size); |
| |
| level1_offset = |
| 5 * sizeof (uint32_t); |
| level2_offset = |
| 5 * sizeof (uint32_t) |
| + t->level1_size * sizeof (uint32_t); |
| level3_offset = |
| 5 * sizeof (uint32_t) |
| + t->level1_size * sizeof (uint32_t) |
| + (t->level2_size << t->q) * sizeof (uint32_t); |
| |
| ((uint32_t *) t->result)[0] = t->q + t->p + 5; |
| ((uint32_t *) t->result)[1] = t->level1_size; |
| ((uint32_t *) t->result)[2] = t->p + 5; |
| ((uint32_t *) t->result)[3] = (1 << t->q) - 1; |
| ((uint32_t *) t->result)[4] = (1 << t->p) - 1; |
| |
| for (i = 0; i < t->level1_size; i++) |
| ((uint32_t *) (t->result + level1_offset))[i] = |
| (t->level1[i] == EMPTY |
| ? 0 |
| : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); |
| |
| for (i = 0; i < (t->level2_size << t->q); i++) |
| ((uint32_t *) (t->result + level2_offset))[i] = |
| (t->level2[i] == EMPTY |
| ? 0 |
| : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset); |
| |
| for (i = 0; i < (t->level3_size << t->p); i++) |
| ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i]; |
| |
| if (t->level1_alloc > 0) |
| free (t->level1); |
| if (t->level2_alloc > 0) |
| free (t->level2); |
| if (t->level3_alloc > 0) |
| free (t->level3); |
| } |
| |
| #define TABLE wcwidth_table |
| #define ELEMENT uint8_t |
| #define DEFAULT 0xff |
| #include "3level.h" |
| |
| #define TABLE wctrans_table |
| #define ELEMENT int32_t |
| #define DEFAULT 0 |
| #define wctrans_table_add wctrans_table_add_internal |
| #include "3level.h" |
| #undef wctrans_table_add |
| /* The wctrans_table must actually store the difference between the |
| desired result and the argument. */ |
| static inline void |
| wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) |
| { |
| wctrans_table_add_internal (t, wc, mapped_wc - wc); |
| } |
| |
| |
| /* Flattens the included transliterations into a translit list. |
| Inserts them in the list at `cursor', and returns the new cursor. */ |
| static struct translit_t ** |
| translit_flatten (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct translit_t **cursor) |
| { |
| while (ctype->translit_include != NULL) |
| { |
| const char *copy_locale = ctype->translit_include->copy_locale; |
| const char *copy_repertoire = ctype->translit_include->copy_repertoire; |
| struct localedef_t *other; |
| |
| /* Unchain the include statement. During the depth-first traversal |
| we don't want to visit any locale more than once. */ |
| ctype->translit_include = ctype->translit_include->next; |
| |
| other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap); |
| |
| if (other == NULL || other->categories[LC_CTYPE].ctype == NULL) |
| { |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| %s: transliteration data from locale `%s' not available"), |
| "LC_CTYPE", copy_locale)); |
| } |
| else |
| { |
| struct locale_ctype_t *other_ctype = |
| other->categories[LC_CTYPE].ctype; |
| |
| cursor = translit_flatten (other_ctype, charmap, cursor); |
| assert (other_ctype->translit_include == NULL); |
| |
| if (other_ctype->translit != NULL) |
| { |
| /* Insert the other_ctype->translit list at *cursor. */ |
| struct translit_t *endp = other_ctype->translit; |
| while (endp->next != NULL) |
| endp = endp->next; |
| |
| endp->next = *cursor; |
| *cursor = other_ctype->translit; |
| |
| /* Avoid any risk of circular lists. */ |
| other_ctype->translit = NULL; |
| |
| cursor = &endp->next; |
| } |
| |
| if (ctype->default_missing == NULL) |
| ctype->default_missing = other_ctype->default_missing; |
| } |
| } |
| |
| return cursor; |
| } |
| |
| static void |
| allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| size_t idx, nr; |
| const void *key; |
| size_t len; |
| void *vdata; |
| void *curs; |
| |
| /* You wonder about this amount of memory? This is only because some |
| users do not manage to address the array with unsigned values or |
| data types with range >= 256. '\200' would result in the array |
| index -128. To help these poor people we duplicate the entries for |
| 128 up to 255 below the entry for \0. */ |
| ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t)); |
| ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t)); |
| ctype->class_b = (uint32_t **) |
| xmalloc (ctype->nr_charclass * sizeof (uint32_t *)); |
| ctype->class_3level = (struct iovec *) |
| xmalloc (ctype->nr_charclass * sizeof (struct iovec)); |
| |
| /* This is the array accessed using the multibyte string elements. */ |
| for (idx = 0; idx < 256; ++idx) |
| ctype->ctype_b[128 + idx] = ctype->class256_collection[idx]; |
| |
| /* Mirror first 127 entries. We must take care that entry -1 is not |
| mirrored because EOF == -1. */ |
| for (idx = 0; idx < 127; ++idx) |
| ctype->ctype_b[idx] = ctype->ctype_b[256 + idx]; |
| |
| /* The 32 bit array contains all characters < 0x100. */ |
| for (idx = 0; idx < ctype->class_collection_act; ++idx) |
| if (ctype->charnames[idx] < 0x100) |
| ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx]; |
| |
| for (nr = 0; nr < ctype->nr_charclass; nr++) |
| { |
| ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t)); |
| |
| /* We only set CLASS_B for the bits in the ISO C classes, not |
| the user defined classes. The number should not change but |
| who knows. */ |
| #define LAST_ISO_C_BIT 11 |
| if (nr <= LAST_ISO_C_BIT) |
| for (idx = 0; idx < 256; ++idx) |
| if (ctype->class256_collection[idx] & _ISbit (nr)) |
| ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f); |
| } |
| |
| for (nr = 0; nr < ctype->nr_charclass; nr++) |
| { |
| struct wctype_table t; |
| |
| t.p = 4; /* or: 5 */ |
| t.q = 7; /* or: 6 */ |
| wctype_table_init (&t); |
| |
| for (idx = 0; idx < ctype->class_collection_act; ++idx) |
| if (ctype->class_collection[idx] & _ISwbit (nr)) |
| wctype_table_add (&t, ctype->charnames[idx]); |
| |
| wctype_table_finalize (&t); |
| |
| if (verbose) |
| WITH_CUR_LOCALE (fprintf (stderr, _("\ |
| %s: table for class \"%s\": %lu bytes\n"), |
| "LC_CTYPE", ctype->classnames[nr], |
| (unsigned long int) t.result_size)); |
| |
| ctype->class_3level[nr].iov_base = t.result; |
| ctype->class_3level[nr].iov_len = t.result_size; |
| } |
| |
| /* Room for table of mappings. */ |
| ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *)); |
| ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr |
| * sizeof (uint32_t *)); |
| ctype->map_3level = (struct iovec *) |
| xmalloc (ctype->map_collection_nr * sizeof (struct iovec)); |
| |
| /* Fill in all mappings. */ |
| for (idx = 0; idx < 2; ++idx) |
| { |
| unsigned int idx2; |
| |
| /* Allocate table. */ |
| ctype->map_b[idx] = (uint32_t *) |
| xmalloc ((256 + 128) * sizeof (uint32_t)); |
| |
| /* Copy values from collection. */ |
| for (idx2 = 0; idx2 < 256; ++idx2) |
| ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2]; |
| |
| /* Mirror first 127 entries. We must take care not to map entry |
| -1 because EOF == -1. */ |
| for (idx2 = 0; idx2 < 127; ++idx2) |
| ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2]; |
| |
| /* EOF must map to EOF. */ |
| ctype->map_b[idx][127] = EOF; |
| } |
| |
| for (idx = 0; idx < ctype->map_collection_nr; ++idx) |
| { |
| unsigned int idx2; |
| |
| /* Allocate table. */ |
| ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t)); |
| |
| /* Copy values from collection. Default is identity mapping. */ |
| for (idx2 = 0; idx2 < 256; ++idx2) |
| ctype->map32_b[idx][idx2] = |
| (ctype->map_collection[idx][idx2] != 0 |
| ? ctype->map_collection[idx][idx2] |
| : idx2); |
| } |
| |
| for (nr = 0; nr < ctype->map_collection_nr; nr++) |
| { |
| struct wctrans_table t; |
| |
| t.p = 7; |
| t.q = 9; |
| wctrans_table_init (&t); |
| |
| for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx) |
| if (ctype->map_collection[nr][idx] != 0) |
| wctrans_table_add (&t, ctype->charnames[idx], |
| ctype->map_collection[nr][idx]); |
| |
| wctrans_table_finalize (&t); |
| |
| if (verbose) |
| WITH_CUR_LOCALE (fprintf (stderr, _("\ |
| %s: table for map \"%s\": %lu bytes\n"), |
| "LC_CTYPE", ctype->mapnames[nr], |
| (unsigned long int) t.result_size)); |
| |
| ctype->map_3level[nr].iov_base = t.result; |
| ctype->map_3level[nr].iov_len = t.result_size; |
| } |
| |
| /* Extra array for class and map names. */ |
| ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass |
| * sizeof (uint32_t)); |
| ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr |
| * sizeof (uint32_t)); |
| |
| ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); |
| ctype->map_offset = ctype->class_offset + ctype->nr_charclass; |
| |
| /* Array for width information. Because the expected widths are very |
| small (never larger than 2) we use only one single byte. This |
| saves space. |
| We put only printable characters in the table. wcwidth is specified |
| to return -1 for non-printable characters. Doing the check here |
| saves a run-time check. |
| But we put L'\0' in the table. This again saves a run-time check. */ |
| { |
| struct wcwidth_table t; |
| |
| t.p = 7; |
| t.q = 9; |
| wcwidth_table_init (&t); |
| |
| /* First set all the printable characters of the character set to |
| the default width. */ |
| curs = NULL; |
| while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) |
| { |
| struct charseq *data = (struct charseq *) vdata; |
| |
| if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| data->ucs4 = repertoire_find_value (ctype->repertoire, |
| data->name, len); |
| |
| if (data->ucs4 != ILLEGAL_CHAR_VALUE) |
| { |
| uint32_t *class_bits = |
| find_idx (ctype, &ctype->class_collection, NULL, |
| &ctype->class_collection_act, data->ucs4); |
| |
| if (class_bits != NULL && (*class_bits & BITw (tok_print))) |
| wcwidth_table_add (&t, data->ucs4, charmap->width_default); |
| } |
| } |
| |
| /* Now add the explicitly specified widths. */ |
| if (charmap->width_rules != NULL) |
| { |
| size_t cnt; |
| |
| for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) |
| { |
| unsigned char bytes[charmap->mb_cur_max]; |
| int nbytes = charmap->width_rules[cnt].from->nbytes; |
| |
| /* We have the range of character for which the width is |
| specified described using byte sequences of the multibyte |
| charset. We have to convert this to UCS4 now. And we |
| cannot simply convert the beginning and the end of the |
| sequence, we have to iterate over the byte sequence and |
| convert it for every single character. */ |
| memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); |
| |
| while (nbytes < charmap->width_rules[cnt].to->nbytes |
| || memcmp (bytes, charmap->width_rules[cnt].to->bytes, |
| nbytes) <= 0) |
| { |
| /* Find the UCS value for `bytes'. */ |
| int inner; |
| uint32_t wch; |
| struct charseq *seq = |
| charmap_find_symbol (charmap, (char *) bytes, nbytes); |
| |
| if (seq == NULL) |
| wch = ILLEGAL_CHAR_VALUE; |
| else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) |
| wch = seq->ucs4; |
| else |
| wch = repertoire_find_value (ctype->repertoire, seq->name, |
| strlen (seq->name)); |
| |
| if (wch != ILLEGAL_CHAR_VALUE) |
| { |
| /* Store the value. */ |
| uint32_t *class_bits = |
| find_idx (ctype, &ctype->class_collection, NULL, |
| &ctype->class_collection_act, wch); |
| |
| if (class_bits != NULL && (*class_bits & BITw (tok_print))) |
| wcwidth_table_add (&t, wch, |
| charmap->width_rules[cnt].width); |
| } |
| |
| /* "Increment" the bytes sequence. */ |
| inner = nbytes - 1; |
| while (inner >= 0 && bytes[inner] == 0xff) |
| --inner; |
| |
| if (inner < 0) |
| { |
| /* We have to extend the byte sequence. */ |
| if (nbytes >= charmap->width_rules[cnt].to->nbytes) |
| break; |
| |
| bytes[0] = 1; |
| memset (&bytes[1], 0, nbytes); |
| ++nbytes; |
| } |
| else |
| { |
| ++bytes[inner]; |
| while (++inner < nbytes) |
| bytes[inner] = 0; |
| } |
| } |
| } |
| } |
| |
| /* Set the width of L'\0' to 0. */ |
| wcwidth_table_add (&t, 0, 0); |
| |
| wcwidth_table_finalize (&t); |
| |
| if (verbose) |
| WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"), |
| "LC_CTYPE", (unsigned long int) t.result_size)); |
| |
| ctype->width.iov_base = t.result; |
| ctype->width.iov_len = t.result_size; |
| } |
| |
| /* Set MB_CUR_MAX. */ |
| ctype->mb_cur_max = charmap->mb_cur_max; |
| |
| /* Now determine the table for the transliteration information. |
| |
| XXX It is not yet clear to me whether it is worth implementing a |
| complicated algorithm which uses a hash table to locate the entries. |
| For now I'll use a simple array which can be searching using binary |
| search. */ |
| if (ctype->translit_include != NULL) |
| /* Traverse the locales mentioned in the `include' statements in a |
| depth-first way and fold in their transliteration information. */ |
| translit_flatten (ctype, charmap, &ctype->translit); |
| |
| if (ctype->translit != NULL) |
| { |
| /* First count how many entries we have. This is the upper limit |
| since some entries from the included files might be overwritten. */ |
| size_t number = 0; |
| size_t cnt; |
| struct translit_t *runp = ctype->translit; |
| struct translit_t **sorted; |
| size_t from_len, to_len; |
| |
| while (runp != NULL) |
| { |
| ++number; |
| runp = runp->next; |
| } |
| |
| /* Next we allocate an array large enough and fill in the values. */ |
| sorted = (struct translit_t **) alloca (number |
| * sizeof (struct translit_t **)); |
| runp = ctype->translit; |
| number = 0; |
| do |
| { |
| /* Search for the place where to insert this string. |
| XXX Better use a real sorting algorithm later. */ |
| size_t idx = 0; |
| int replace = 0; |
| |
| while (idx < number) |
| { |
| int res = wcscmp ((const wchar_t *) sorted[idx]->from, |
| (const wchar_t *) runp->from); |
| if (res == 0) |
| { |
| replace = 1; |
| break; |
| } |
| if (res > 0) |
| break; |
| ++idx; |
| } |
| |
| if (replace) |
| sorted[idx] = runp; |
| else |
| { |
| memmove (&sorted[idx + 1], &sorted[idx], |
| (number - idx) * sizeof (struct translit_t *)); |
| sorted[idx] = runp; |
| ++number; |
| } |
| |
| runp = runp->next; |
| } |
| while (runp != NULL); |
| |
| /* The next step is putting all the possible transliteration |
| strings in one memory block so that we can write it out. |
| We need several different blocks: |
| - index to the from-string array |
| - from-string array |
| - index to the to-string array |
| - to-string array. |
| */ |
| from_len = to_len = 0; |
| for (cnt = 0; cnt < number; ++cnt) |
| { |
| struct translit_to_t *srunp; |
| from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1; |
| srunp = sorted[cnt]->to; |
| while (srunp != NULL) |
| { |
| to_len += wcslen ((const wchar_t *) srunp->str) + 1; |
| srunp = srunp->next; |
| } |
| /* Plus one for the extra NUL character marking the end of |
| the list for the current entry. */ |
| ++to_len; |
| } |
| |
| /* We can allocate the arrays for the results. */ |
| ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t)); |
| ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t)); |
| ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t)); |
| ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t)); |
| |
| from_len = 0; |
| to_len = 0; |
| for (cnt = 0; cnt < number; ++cnt) |
| { |
| size_t len; |
| struct translit_to_t *srunp; |
| |
| ctype->translit_from_idx[cnt] = from_len; |
| ctype->translit_to_idx[cnt] = to_len; |
| |
| len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1; |
| wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len], |
| (const wchar_t *) sorted[cnt]->from, len); |
| from_len += len; |
| |
| ctype->translit_to_idx[cnt] = to_len; |
| srunp = sorted[cnt]->to; |
| while (srunp != NULL) |
| { |
| len = wcslen ((const wchar_t *) srunp->str) + 1; |
| wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len], |
| (const wchar_t *) srunp->str, len); |
| to_len += len; |
| srunp = srunp->next; |
| } |
| ctype->translit_to_tbl[to_len++] = L'\0'; |
| } |
| |
| /* Store the information about the length. */ |
| ctype->translit_idx_size = number; |
| ctype->translit_from_tbl_size = from_len * sizeof (uint32_t); |
| ctype->translit_to_tbl_size = to_len * sizeof (uint32_t); |
| } |
| else |
| { |
| /* Provide some dummy pointers since we have nothing to write out. */ |
| static uint32_t no_str = { 0 }; |
| |
| ctype->translit_from_idx = &no_str; |
| ctype->translit_from_tbl = &no_str; |
| ctype->translit_to_tbl = &no_str; |
| ctype->translit_idx_size = 0; |
| ctype->translit_from_tbl_size = 0; |
| ctype->translit_to_tbl_size = 0; |
| } |
| } |