| /* |
| * Copyright (c) 2008-2009 Brent Fulgham <bfulgham@gmail.org>. All rights reserved. |
| * |
| * This source code is a modified version of the CoreFoundation sources released by Apple Inc. under |
| * the terms of the APSL version 2.0 (see below). |
| * |
| * For information about changes from the original Apple source release can be found by reviewing the |
| * source control system for the project at https://sourceforge.net/svn/?group_id=246198. |
| * |
| * The original license information is as follows: |
| * |
| * Copyright (c) 2008 Apple Inc. All rights reserved. |
| * |
| * @APPLE_LICENSE_HEADER_START@ |
| * |
| * This file contains Original Code and/or Modifications of Original Code |
| * as defined in and that are subject to the Apple Public Source License |
| * Version 2.0 (the 'License'). You may not use this file except in |
| * compliance with the License. Please obtain a copy of the License at |
| * http://www.opensource.apple.com/apsl/ and read it before using this |
| * file. |
| * |
| * The Original Code and all software distributed under the License are |
| * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| * Please see the License for the specific language governing rights and |
| * limitations under the License. |
| * |
| * @APPLE_LICENSE_HEADER_END@ |
| */ |
| /* |
| CFLocaleIdentifier.c |
| Copyright (c) 2002-2007, Apple Inc. All rights reserved. |
| Responsibility: Christopher Kane |
| |
| CFLocaleIdentifier.c defines |
| - enum value kLocaleIdentifierCStringMax |
| - structs KeyStringToResultString, SpecialCaseUpdates |
| and provides the following data for the functions |
| CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, |
| CFLocaleCreateCanonicalLocaleIdentifierFromString |
| CFLocaleCreateCanonicalLanguageIdentifierFromString |
| |
| 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; |
| map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string |
| |
| 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; |
| map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string |
| |
| 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; |
| map old Apple string oldAppleLocaleToCanonical[n].key |
| to canonical locale string oldAppleLocaleToCanonical[n].result |
| for n = 0..kNumOldAppleLocaleToCanonical-1 |
| |
| 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; |
| map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key |
| to updated replacement localeStringPrefixToCanonical[].result |
| for n = 0..kNumLocaleStringPrefixToCanonical-1 |
| |
| 5. static const SpecialCaseUpdates specialCases[]; |
| various special cases for updating region codes, or for updating language codes based on region codes |
| |
| 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; |
| map locale string region tag localeStringRegionToDefaults[n].key |
| to default substrings to delete localeStringRegionToDefaults[n].result |
| for n = 0..kNumLocaleStringRegionToDefaults-1 |
| |
| 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; |
| map locale string initial part localeStringPrefixToDefaults[n].key |
| to default substrings to delete localeStringPrefixToDefaults[n].result |
| for n = 0..kNumLocaleStringPrefixToDefaults-1 |
| |
| 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; |
| map Apple locale string appleLocaleToLanguageString[].key |
| to equivalent language string appleLocaleToLanguageString[].result |
| for n = 0..kNumAppleLocaleToLanguageString-1 |
| |
| */ |
| |
| #include <CoreFoundation/CFString.h> |
| #include <ctype.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <unicode/uloc.h> |
| |
| #if DEPLOYMENT_TARGET_WINDOWS |
| extern size_t strlcpy(char *dst, const char *src, size_t siz); |
| extern size_t strlcat(char *dst, const char *src, size_t siz); |
| #endif |
| |
| // Max byte length of locale identifier (ASCII) as C string, including terminating null byte |
| enum { |
| kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 |
| }; |
| |
| // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString |
| struct KeyStringToResultString { |
| const char * key; |
| const char * result; |
| }; |
| typedef struct KeyStringToResultString KeyStringToResultString; |
| |
| // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString |
| struct SpecialCaseUpdates { |
| const char * lang; |
| const char * reg1; |
| const char * update1; |
| const char * reg2; |
| const char * update2; |
| }; |
| typedef struct SpecialCaseUpdates SpecialCaseUpdates; |
| |
| |
| static const char * const regionCodeToLocaleString[] = { |
| // map RegionCode (array index) to canonical locale string |
| // |
| // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray |
| // -------- ------------ ------------------ ------------ -------- string, if different ] |
| "en_US", // 0 verUS; 0 langEnglish; |
| "fr_FR", // 1 verFrance; 1 langFrench; |
| "en_GB", // 2 verBritain; 0 langEnglish; |
| "de_DE", // 3 verGermany; 2 langGerman; |
| "it_IT", // 4 verItaly; 3 langItalian; |
| "nl_NL", // 5 verNetherlands; 4 langDutch; |
| "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); |
| "sv_SE", // 7 verSweden; 5 langSwedish; |
| "es_ES", // 8 verSpain; 6 langSpanish; |
| "da_DK", // 9 verDenmark; 7 langDanish; |
| "pt_PT", // 10 verPortugal; 8 langPortuguese; |
| "fr_CA", // 11 verFrCanada; 1 langFrench; |
| "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" |
| "he_IL", // 13 verIsrael; 10 langHebrew; |
| "ja_JP", // 14 verJapan; 11 langJapanese; |
| "en_AU", // 15 verAustralia; 0 langEnglish; |
| "ar", // 16 verArabic; 12 langArabic; |
| "fi_FI", // 17 verFinland; 13 langFinnish; |
| "fr_CH", // 18 verFrSwiss; 1 langFrench; |
| "de_CH", // 19 verGrSwiss; 2 langGerman; |
| "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; |
| "is_IS", // 21 verIceland; 15 langIcelandic; |
| "mt_MT", // 22 verMalta; 16 langMaltese; |
| "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" |
| "tr_TR", // 24 verTurkey; 17 langTurkish; |
| "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia |
| "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping |
| "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping |
| "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping |
| "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" |
| "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping |
| "hi_IN", // 33 verIndiaHindi; 21 langHindi; |
| "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; |
| "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping |
| "it_CH", // 36 verItalianSwiss; 3 langItalian; |
| "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" |
| NULL, // 38 *unassigned; -1 none; * one-way mapping # "" |
| "ro_RO", // 39 verRomania; 37 langRomanian; |
| "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" |
| "lt_LT", // 41 verLithuania; 24 langLithuanian; |
| "pl_PL", // 42 verPoland; 25 langPolish; |
| "hu_HU", // 43 verHungary; 26 langHungarian; |
| "et_EE", // 44 verEstonia; 27 langEstonian; |
| "lv_LV", // 45 verLatvia; 28 langLatvian; |
| "se", // 46 verSami; 29 langSami; |
| "fo_FO", // 47 verFaroeIsl; 30 langFaroese; |
| "fa_IR", // 48 verIran; 31 langFarsi/Persian; |
| "ru_RU", // 49 verRussia; 32 langRussian; |
| "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); |
| "ko_KR", // 51 verKorea; 23 langKorean; |
| "zh_CN", // 52 verChina; 33 langSimpChinese; |
| "zh_TW", // 53 verTaiwan; 19 langTradChinese; |
| "th_TH", // 54 verThailand; 22 langThai; |
| "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> |
| "cs_CZ", // 56 verCzech; 38 langCzech; |
| "sk_SK", // 57 verSlovak; 39 langSlovak; |
| "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> |
| "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary |
| "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic |
| "be_BY", // 61 verBelarus; 46 langBelorussian; |
| "uk_UA", // 62 verUkraine; 45 langUkrainian; |
| NULL, // 63 *unused; -1 none; * one-way mapping # "" |
| "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping |
| "sr_CS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> |
| "sl_SI", // 66 verSlovenian; 40 langSlovenian; |
| "mk_MK", // 67 verMacedonian; 43 langMacedonian; |
| "hr_HR", // 68 verCroatia; 18 langCroatian; |
| NULL, // 69 *unused; -1 none; * one-way mapping # "" |
| "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" |
| "pt_BR", // 71 verBrazil; 8 langPortuguese; |
| "bg_BG", // 72 verBulgaria; 44 langBulgarian; |
| "ca_ES", // 73 verCatalonia; 130 langCatalan; |
| "mul", // 74 verMultilingual; -1 none; # "" |
| "gd", // 75 verScottishGaelic; 144 langScottishGaelic; |
| "gv", // 76 verManxGaelic; 145 langManxGaelic; |
| "br", // 77 verBreton; 142 langBreton; |
| "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; |
| "cy", // 79 verWelsh; 128 langWelsh; |
| "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" |
| "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> |
| "en_CA", // 82 verEngCanada; 0 langEnglish; |
| "dz_BT", // 83 verBhutan; 137 langDzongkha; |
| "hy_AM", // 84 verArmenian; 51 langArmenian; |
| "ka_GE", // 85 verGeorgian; 52 langGeorgian; |
| "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" |
| "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping |
| "to_TO", // 88 verTonga; 147 langTongan; |
| "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping |
| "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping |
| "fr_001", // 91 verFrenchUniversal; 1 langFrench; |
| "de_AT", // 92 verAustria; 2 langGerman; |
| "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" |
| "gu_IN", // 94 verGujarati; 69 langGujarati; |
| "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic |
| "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; |
| "vi_VN", // 97 verVietnam; 80 langVietnamese; |
| "fr_BE", // 98 verFrBelgium; 1 langFrench; |
| "uz_UZ", // 99 verUzbek; 47 langUzbek; |
| "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" |
| "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" |
| "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; |
| "eo", // 103 verEsperanto; 94 langEsperanto; |
| "mr_IN", // 104 verMarathi; 66 langMarathi; |
| "bo", // 105 verTibetan; 63 langTibetan; |
| "ne_NP", // 106 verNepal; 64 langNepali; |
| "kl", // 107 verGreenland; 149 langGreenlandic; |
| "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) |
| }; |
| enum { |
| kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) |
| }; |
| |
| static const char * const langCodeToLocaleString[] = { |
| // map LangCode (array index) to canonical locale string |
| // |
| // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray |
| // -------- -------------- ---------- -------- string, if different ] |
| "en", // 0 langEnglish; |
| "fr", // 1 langFrench; |
| "de", // 2 langGerman; |
| "it", // 3 langItalian; |
| "nl", // 4 langDutch; |
| "sv", // 5 langSwedish; |
| "es", // 6 langSpanish; |
| "da", // 7 langDanish; |
| "pt", // 8 langPortuguese; |
| "nb", // 9 langNorwegian (Bokmal); # "no" |
| "he", // 10 langHebrew -Hebr; |
| "ja", // 11 langJapanese -Jpan; |
| "ar", // 12 langArabic -Arab; |
| "fi", // 13 langFinnish; |
| "el", // 14 langGreek (modern)-Grek-mono; |
| "is", // 15 langIcelandic; |
| "mt", // 16 langMaltese -Latn; |
| "tr", // 17 langTurkish -Latn; |
| "hr", // 18 langCroatian; |
| "zh-Hant", // 19 langTradChinese; # "zh" |
| "ur", // 20 langUrdu -Arab; |
| "hi", // 21 langHindi -Deva; |
| "th", // 22 langThai -Thai; |
| "ko", // 23 langKorean -Hang; |
| "lt", // 24 langLithuanian; |
| "pl", // 25 langPolish; |
| "hu", // 26 langHungarian; |
| "et", // 27 langEstonian; |
| "lv", // 28 langLatvian; |
| "se", // 29 langSami; |
| "fo", // 30 langFaroese; |
| "fa", // 31 langFarsi/Persian -Arab; |
| "ru", // 32 langRussian -Cyrl; |
| "zh-Hans", // 33 langSimpChinese; # "zh" |
| "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" |
| "ga", // 35 langIrishGaelic (no dots); |
| "sq", // 36 langAlbanian; no region codes |
| "ro", // 37 langRomanian; |
| "cs", // 38 langCzech; |
| "sk", // 39 langSlovak; |
| "sl", // 40 langSlovenian; |
| "yi", // 41 langYiddish -Hebr; no region codes |
| "sr", // 42 langSerbian -Cyrl; |
| "mk", // 43 langMacedonian -Cyrl; |
| "bg", // 44 langBulgarian -Cyrl; |
| "uk", // 45 langUkrainian -Cyrl; |
| "be", // 46 langBelorussian -Cyrl; |
| "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab |
| "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab |
| "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" |
| "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" |
| "hy", // 51 langArmenian -Armn; |
| "ka", // 52 langGeorgian -Geor; |
| "mo", // 53 langMoldavian -Cyrl; no region codes |
| "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab |
| "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab |
| "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab |
| "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" |
| "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn" |
| "ps", // 59 langPashto -Arab; no region codes |
| "ku", // 60 langKurdish -Arab; no region codes |
| "ks", // 61 langKashmiri -Arab; no region codes |
| "sd", // 62 langSindhi -Arab; no region codes |
| "bo", // 63 langTibetan -Tibt; |
| "ne", // 64 langNepali -Deva; |
| "sa", // 65 langSanskrit -Deva; no region codes |
| "mr", // 66 langMarathi -Deva; |
| "bn", // 67 langBengali -Beng; |
| "as", // 68 langAssamese -Beng; no region codes |
| "gu", // 69 langGujarati -Gujr; |
| "pa", // 70 langPunjabi -Guru; |
| "or", // 71 langOriya -Orya; no region codes |
| "ml", // 72 langMalayalam -Mlym; no region codes |
| "kn", // 73 langKannada -Knda; no region codes |
| "ta", // 74 langTamil -Taml; no region codes |
| "te", // 75 langTelugu -Telu; no region codes |
| "si", // 76 langSinhalese -Sinh; no region codes |
| "my", // 77 langBurmese -Mymr; no region codes |
| "km", // 78 langKhmer -Khmr; no region codes |
| "lo", // 79 langLao -Laoo; no region codes |
| "vi", // 80 langVietnamese -Latn; |
| "id", // 81 langIndonesian -Latn; no region codes |
| "tl", // 82 langTagalog -Latn; no region codes |
| "ms", // 83 langMalayRoman -Latn; no region codes # "ms" |
| "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" |
| "am", // 85 langAmharic -Ethi; no region codes |
| "ti", // 86 langTigrinya -Ethi; no region codes |
| "om", // 87 langOromo -Ethi; no region codes |
| "so", // 88 langSomali -Latn; no region codes |
| "sw", // 89 langSwahili -Latn; no region codes |
| "rw", // 90 langKinyarwanda -Latn; no region codes |
| "rn", // 91 langRundi -Latn; no region codes |
| "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" |
| "mg", // 93 langMalagasy -Latn; no region codes |
| "eo", // 94 langEsperanto -Latn; |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) |
| NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) |
| "cy", // 128 langWelsh -Latn; |
| "eu", // 129 langBasque -Latn; no region codes |
| "ca", // 130 langCatalan -Latn; |
| "la", // 131 langLatin -Latn; no region codes |
| "qu", // 132 langQuechua -Latn; no region codes |
| "gn", // 133 langGuarani -Latn; no region codes |
| "ay", // 134 langAymara -Latn; no region codes |
| "tt-Cyrl", // 135 langTatar -Cyrl; no region codes |
| "ug", // 136 langUighur -Arab; no region codes |
| "dz", // 137 langDzongkha -Tibt; |
| "jv", // 138 langJavaneseRom -Latn; no region codes |
| "su", // 139 langSundaneseRom -Latn; no region codes |
| "gl", // 140 langGalician -Latn; no region codes |
| "af", // 141 langAfrikaans -Latn; |
| "br", // 142 langBreton -Latn; |
| "iu", // 143 langInuktitut -Cans; |
| "gd", // 144 langScottishGaelic; |
| "gv", // 145 langManxGaelic -Latn; |
| "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> |
| "to", // 147 langTongan -Latn; |
| "grc", // 148 langGreekAncient -Grek-poly; # "el" |
| "kl", // 149 langGreenlandic -Latn; |
| "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az" |
| "nn", // 151 langNynorsk -Latn; # (no entry) |
| }; |
| enum { |
| kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) |
| }; |
| |
| static const KeyStringToResultString oldAppleLocaleToCanonical[] = { |
| // Map obsolete/old-style Apple strings to canonical |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // non-canonical canonical [ comment ] # source/reason for non-canonical string |
| // string string |
| // ------------- --------- |
| { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray |
| { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray |
| { "Amharic", "am" }, // # __CFBundleLanguageNamesArray |
| { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray |
| { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray |
| { "Assamese", "as" }, // # __CFBundleLanguageNamesArray |
| { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray |
| { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") |
| { "Basque", "eu" }, // # __CFBundleLanguageNamesArray |
| { "Belarusian", "be" }, // # handle other names |
| { "Belorussian", "be" }, // # handle other names |
| { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray |
| { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled |
| { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above |
| { "Breton", "br" }, // # __CFBundleLanguageNamesArray |
| { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray |
| { "Burmese", "my" }, // # __CFBundleLanguageNamesArray |
| { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray |
| { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray |
| { "Chewa", "ny" }, // # handle other names |
| { "Chichewa", "ny" }, // # handle other names |
| { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") |
| { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key |
| { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below |
| { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled |
| { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray |
| { "Czech", "cs" }, // # __CFBundleLanguageNamesArray |
| { "Danish", "da" }, // # __CFBundleLanguageNamesArray |
| { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") |
| { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray |
| { "English", "en" }, // # __CFBundleLanguageNamesArray |
| { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray |
| { "Estonian", "et" }, // # __CFBundleLanguageNamesArray |
| { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray |
| { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray |
| { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray |
| { "Flemish", "nl-BE" }, // # handle other names |
| { "French", "fr" }, // # __CFBundleLanguageNamesArray |
| { "Galician", "gl" }, // # __CFBundleLanguageNamesArray |
| { "Gallegan", "gl" }, // # handle other names |
| { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray |
| { "German", "de" }, // # __CFBundleLanguageNamesArray |
| { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") |
| { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray |
| { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray |
| { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray |
| { "Hawaiian", "haw" }, // # handle new languages |
| { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray |
| { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray |
| { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray |
| { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray |
| { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray |
| { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray |
| { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") |
| { "Italian", "it" }, // # __CFBundleLanguageNamesArray |
| { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray |
| { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray |
| { "Kalaallisut", "kl" }, // # handle other names |
| { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray |
| { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray |
| { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray |
| { "Khmer", "km" }, // # __CFBundleLanguageNamesArray |
| { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray |
| { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray |
| { "Korean", "ko" }, // # __CFBundleLanguageNamesArray |
| { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray |
| { "Lao", "lo" }, // # __CFBundleLanguageNamesArray |
| { "Latin", "la" }, // # __CFBundleLanguageNamesArray |
| { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray |
| { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray |
| { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray |
| { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray |
| { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") |
| { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray |
| { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray |
| { "Manx", "gv" }, // # __CFBundleLanguageNamesArray |
| { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray |
| { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray |
| { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") |
| { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray |
| { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") |
| { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray |
| { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) |
| { "Oriya", "or" }, // # __CFBundleLanguageNamesArray |
| { "Oromo", "om" }, // # __CFBundleLanguageNamesArray |
| { "Panjabi", "pa" }, // # handle other names |
| { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray |
| { "Persian", "fa" }, // # handle other names |
| { "Polish", "pl" }, // # __CFBundleLanguageNamesArray |
| { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray |
| { "Portuguese, Brazilian", "pt-BR" }, // # handle other names |
| { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray |
| { "Pushto", "ps" }, // # handle other names |
| { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray |
| { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray |
| { "Ruanda", "rw" }, // # handle other names |
| { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray |
| { "Russian", "ru" }, // # __CFBundleLanguageNamesArray |
| { "Sami", "se" }, // # __CFBundleLanguageNamesArray |
| { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray |
| { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray |
| { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray |
| { "Simplified Chinese", "zh-Hans" }, // # handle other names |
| { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray |
| { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray |
| { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray |
| { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray |
| { "Somali", "so" }, // # __CFBundleLanguageNamesArray |
| { "Spanish", "es" }, // # __CFBundleLanguageNamesArray |
| { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray |
| { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray |
| { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray |
| { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray |
| { "Tajik", "tg" }, // # handle other names |
| { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray |
| { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray |
| { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray |
| { "Telugu", "te" }, // # __CFBundleLanguageNamesArray |
| { "Thai", "th" }, // # __CFBundleLanguageNamesArray |
| { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray |
| { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray |
| { "Tongan", "to" }, // # __CFBundleLanguageNamesArray |
| { "Traditional Chinese", "zh-Hant" }, // # handle other names |
| { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray |
| { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray |
| { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray |
| { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray |
| { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray |
| { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray |
| { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray |
| { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray |
| { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray |
| { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes |
| { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString |
| { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString |
| { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString |
| { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes |
| { "bn_??", "bn" }, // # from old LocaleRefGetPartString |
| { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes |
| { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes |
| { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes |
| { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> |
| { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> |
| { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes |
| { "el.El-P", "grc" }, // # from old LocaleRefGetPartString |
| { "en-ascii", "en_001" }, // # from earlier version of tables in this file! |
| { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes |
| { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes |
| { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes |
| { "es_XL", "es_419" }, // # from earlier version of tables in this file! |
| { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes |
| { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> |
| { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> |
| { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> |
| { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> |
| { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes |
| { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes |
| { "jv.La", "jv" }, // # logical extension // <1.9> |
| { "jw.La", "jv" }, // # from old LocaleRefGetPartString |
| { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString |
| { "kl.La", "kl" }, // # from old LocaleRefGetPartString |
| { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> |
| { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes |
| { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes |
| { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString |
| { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString |
| { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString |
| { "ms.La", "ms" }, // # from old LocaleRefGetPartString |
| { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString |
| { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString |
| // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
| { "pa_??", "pa" }, // # from old LocaleRefGetPartString |
| { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString |
| { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes |
| { "sr_??", "sr_CS" }, // # from old MapScriptInfoAndISOCodes // <1.18> |
| { "su.La", "su" }, // # from old LocaleRefGetPartString |
| { "yi.He", "yi" }, // # from old LocaleRefGetPartString |
| { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! |
| { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! |
| { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString |
| { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString |
| }; |
| enum { |
| kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString localeStringPrefixToCanonical[] = { |
| // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. |
| // (special cases for 'sh' handled separately) |
| // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. |
| // |
| // non-canonical canonical [ comment ] # source/reason for non-canonical string |
| // prefix prefix |
| // ------------- --------- |
| |
| { "afr", "af" }, // Afrikaans |
| { "alb", "sq" }, // Albanian |
| { "amh", "am" }, // Amharic |
| { "ara", "ar" }, // Arabic |
| { "arm", "hy" }, // Armenian |
| { "asm", "as" }, // Assamese |
| { "aym", "ay" }, // Aymara |
| { "aze", "az" }, // Azerbaijani |
| { "baq", "eu" }, // Basque |
| { "bel", "be" }, // Belarusian |
| { "ben", "bn" }, // Bengali |
| { "bih", "bh" }, // Bihari |
| { "bod", "bo" }, // Tibetan |
| { "bos", "bs" }, // Bosnian |
| { "bre", "br" }, // Breton |
| { "bul", "bg" }, // Bulgarian |
| { "bur", "my" }, // Burmese |
| { "cat", "ca" }, // Catalan |
| { "ces", "cs" }, // Czech |
| { "che", "ce" }, // Chechen |
| { "chi", "zh" }, // Chinese |
| { "cor", "kw" }, // Cornish |
| { "cos", "co" }, // Corsican |
| { "cym", "cy" }, // Welsh |
| { "cze", "cs" }, // Czech |
| { "dan", "da" }, // Danish |
| { "deu", "de" }, // German |
| { "dut", "nl" }, // Dutch |
| { "dzo", "dz" }, // Dzongkha |
| { "ell", "el" }, // Greek, Modern (1453-) |
| { "eng", "en" }, // English |
| { "epo", "eo" }, // Esperanto |
| { "est", "et" }, // Estonian |
| { "eus", "eu" }, // Basque |
| { "fao", "fo" }, // Faroese |
| { "fas", "fa" }, // Persian |
| { "fin", "fi" }, // Finnish |
| { "fra", "fr" }, // French |
| { "fre", "fr" }, // French |
| { "geo", "ka" }, // Georgian |
| { "ger", "de" }, // German |
| { "gla", "gd" }, // Gaelic,Scottish |
| { "gle", "ga" }, // Irish |
| { "glg", "gl" }, // Gallegan |
| { "glv", "gv" }, // Manx |
| { "gre", "el" }, // Greek, Modern (1453-) |
| { "grn", "gn" }, // Guarani |
| { "guj", "gu" }, // Gujarati |
| { "heb", "he" }, // Hebrew |
| { "hin", "hi" }, // Hindi |
| { "hrv", "hr" }, // Croatian |
| { "hun", "hu" }, // Hungarian |
| { "hye", "hy" }, // Armenian |
| { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066 |
| { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 |
| { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 |
| { "ice", "is" }, // Icelandic |
| { "iku", "iu" }, // Inuktitut |
| { "ile", "ie" }, // Interlingue |
| { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) |
| { "ina", "ia" }, // Interlingua |
| { "ind", "id" }, // Indonesian |
| { "isl", "is" }, // Icelandic |
| { "ita", "it" }, // Italian |
| { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) |
| { "jav", "jv" }, // Javanese |
| { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) |
| { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) |
| { "jpn", "ja" }, // Japanese |
| { "kal", "kl" }, // Kalaallisut |
| { "kan", "kn" }, // Kannada |
| { "kas", "ks" }, // Kashmiri |
| { "kat", "ka" }, // Georgian |
| { "kaz", "kk" }, // Kazakh |
| { "khm", "km" }, // Khmer |
| { "kin", "rw" }, // Kinyarwanda |
| { "kir", "ky" }, // Kirghiz |
| { "kor", "ko" }, // Korean |
| { "kur", "ku" }, // Kurdish |
| { "lao", "lo" }, // Lao |
| { "lat", "la" }, // Latin |
| { "lav", "lv" }, // Latvian |
| { "lit", "lt" }, // Lithuanian |
| { "ltz", "lb" }, // Letzeburgesch |
| { "mac", "mk" }, // Macedonian |
| { "mal", "ml" }, // Malayalam |
| { "mar", "mr" }, // Marathi |
| { "may", "ms" }, // Malay |
| { "mkd", "mk" }, // Macedonian |
| { "mlg", "mg" }, // Malagasy |
| { "mlt", "mt" }, // Maltese |
| { "mol", "mo" }, // Moldavian |
| { "mon", "mn" }, // Mongolian |
| { "msa", "ms" }, // Malay |
| { "mya", "my" }, // Burmese |
| { "nep", "ne" }, // Nepali |
| { "nld", "nl" }, // Dutch |
| { "nno", "nn" }, // Norwegian Nynorsk |
| { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb |
| { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString |
| { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString |
| { "nob", "nb" }, // Norwegian Bokmal |
| { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb |
| { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString |
| { "oci", "oc" }, // Occitan/Provencal |
| { "ori", "or" }, // Oriya |
| { "orm", "om" }, // Oromo,Galla |
| { "pan", "pa" }, // Panjabi |
| { "per", "fa" }, // Persian |
| { "pol", "pl" }, // Polish |
| { "por", "pt" }, // Portuguese |
| { "pus", "ps" }, // Pushto |
| { "que", "qu" }, // Quechua |
| { "roh", "rm" }, // Raeto-Romance |
| { "ron", "ro" }, // Romanian |
| { "rum", "ro" }, // Romanian |
| { "run", "rn" }, // Rundi |
| { "rus", "ru" }, // Russian |
| { "san", "sa" }, // Sanskrit |
| { "scc", "sr" }, // Serbian |
| { "scr", "hr" }, // Croatian |
| { "sin", "si" }, // Sinhalese |
| { "slk", "sk" }, // Slovak |
| { "slo", "sk" }, // Slovak |
| { "slv", "sl" }, // Slovenian |
| { "sme", "se" }, // Sami,Northern |
| { "snd", "sd" }, // Sindhi |
| { "som", "so" }, // Somali |
| { "spa", "es" }, // Spanish |
| { "sqi", "sq" }, // Albanian |
| { "srp", "sr" }, // Serbian |
| { "sun", "su" }, // Sundanese |
| { "swa", "sw" }, // Swahili |
| { "swe", "sv" }, // Swedish |
| { "tam", "ta" }, // Tamil |
| { "tat", "tt" }, // Tatar |
| { "tel", "te" }, // Telugu |
| { "tgk", "tg" }, // Tajik |
| { "tgl", "tl" }, // Tagalog |
| { "tha", "th" }, // Thai |
| { "tib", "bo" }, // Tibetan |
| { "tir", "ti" }, // Tigrinya |
| { "ton", "to" }, // Tongan |
| { "tuk", "tk" }, // Turkmen |
| { "tur", "tr" }, // Turkish |
| { "uig", "ug" }, // Uighur |
| { "ukr", "uk" }, // Ukrainian |
| { "urd", "ur" }, // Urdu |
| { "uzb", "uz" }, // Uzbek |
| { "vie", "vi" }, // Vietnamese |
| { "wel", "cy" }, // Welsh |
| { "yid", "yi" }, // Yiddish |
| { "zho", "zh" }, // Chinese |
| }; |
| enum { |
| kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) |
| }; |
| |
| |
| static const SpecialCaseUpdates specialCases[] = { |
| // Data for special cases |
| // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was |
| // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after |
| // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! However, ICU |
| // and RFC 3066bis will continue to use YU for this. So now CS is ambiguous. We guess as follows: If we |
| // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS to YU. |
| // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and |
| // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use |
| // hr; if there is a region tag of (now) YU we use sr; else we do not change it (not enough info). |
| // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the |
| // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). |
| { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) |
| { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) |
| { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) |
| { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) |
| { NULL, "-YU", "CS", NULL, NULL }, // then always change YU to CS (map old Yugoslavia code to new 2003-07 ISO code |
| // for Serbia & Montenegro per RFC3066bis & ICU) // <1.18> |
| // Note: do this after fixing CS for cs/sk as above. |
| { "sh", "-HR", "hr", "-CS", "sr" }, // if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) if we find |
| // HR (Croatia) or to 'sr' (Serbian) if we find CS (Serbia & Montenegro, Yugoslavia). // <1.18> |
| // Note: Do this after changing YU to CS as above. |
| { NULL, NULL, NULL, NULL, NULL } // terminator |
| }; |
| |
| |
| static const KeyStringToResultString localeStringRegionToDefaults[] = { |
| // For some region-code suffixes, there are default substrings to strip off for canonical string. |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // region default writing |
| // suffix system tags, strip comment |
| // -------- ------------- --------- |
| { "_CN", "-Hans" }, // mainland China, default is simplified |
| { "_HK", "-Hant" }, // Hong Kong, default is traditional |
| { "_MO", "-Hant" }, // Macao, default is traditional |
| { "_SG", "-Hans" }, // Singapore, default is simplified |
| { "_TW", "-Hant" }, // Taiwan, default is traditional |
| }; |
| enum { |
| kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString localeStringPrefixToDefaults[] = { |
| // For some initial portions of language tag, there are default substrings to strip off for canonical string. |
| // Must be sorted according to how strcmp compares the strings in the first column |
| // |
| // language default writing |
| // tag prefix system tags, strip comment |
| // -------- ------------- --------- |
| { "ab-", "-Cyrl" }, // Abkhazian |
| { "af-", "-Latn" }, // Afrikaans |
| { "am-", "-Ethi" }, // Amharic |
| { "ar-", "-Arab" }, // Arabic |
| { "as-", "-Beng" }, // Assamese |
| { "ay-", "-Latn" }, // Aymara |
| { "be-", "-Cyrl" }, // Belarusian |
| { "bg-", "-Cyrl" }, // Bulgarian |
| { "bn-", "-Beng" }, // Bengali |
| { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) |
| { "br-", "-Latn" }, // Breton (? not Suppress-Script) |
| { "bs-", "-Latn" }, // Bosnian |
| { "ca-", "-Latn" }, // Catalan |
| { "cs-", "-Latn" }, // Czech |
| { "cy-", "-Latn" }, // Welsh |
| { "da-", "-Latn" }, // Danish |
| { "de-", "-Latn -1901" }, // German, traditional orthography |
| { "dv-", "-Thaa" }, // Divehi/Maldivian |
| { "dz-", "-Tibt" }, // Dzongkha |
| { "el-", "-Grek" }, // Greek (modern, monotonic) |
| { "en-", "-Latn" }, // English |
| { "eo-", "-Latn" }, // Esperanto |
| { "es-", "-Latn" }, // Spanish |
| { "et-", "-Latn" }, // Estonian |
| { "eu-", "-Latn" }, // Basque |
| { "fa-", "-Arab" }, // Farsi |
| { "fi-", "-Latn" }, // Finnish |
| { "fo-", "-Latn" }, // Faroese |
| { "fr-", "-Latn" }, // French |
| { "ga-", "-Latn" }, // Irish |
| { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) |
| { "gl-", "-Latn" }, // Galician |
| { "gn-", "-Latn" }, // Guarani |
| { "gu-", "-Gujr" }, // Gujarati |
| { "gv-", "-Latn" }, // Manx |
| { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) |
| { "he-", "-Hebr" }, // Hebrew |
| { "hi-", "-Deva" }, // Hindi |
| { "hr-", "-Latn" }, // Croatian |
| { "hu-", "-Latn" }, // Hungarian |
| { "hy-", "-Armn" }, // Armenian |
| { "id-", "-Latn" }, // Indonesian |
| { "is-", "-Latn" }, // Icelandic |
| { "it-", "-Latn" }, // Italian |
| { "ja-", "-Jpan" }, // Japanese |
| { "ka-", "-Geor" }, // Georgian |
| { "kk-", "-Cyrl" }, // Kazakh |
| { "kl-", "-Latn" }, // Kalaallisut/Greenlandic |
| { "km-", "-Khmr" }, // Central Khmer |
| { "kn-", "-Knda" }, // Kannada |
| { "ko-", "-Hang" }, // Korean (? not Suppress-Script) |
| { "kok-", "-Deva" }, // Konkani |
| { "la-", "-Latn" }, // Latin |
| { "lb-", "-Latn" }, // Luxembourgish |
| { "lo-", "-Laoo" }, // Lao |
| { "lt-", "-Latn" }, // Lithuanian |
| { "lv-", "-Latn" }, // Latvian |
| { "mg-", "-Latn" }, // Malagasy |
| { "mk-", "-Cyrl" }, // Macedonian |
| { "ml-", "-Mlym" }, // Malayalam |
| { "mo-", "-Latn" }, // Moldavian |
| { "mr-", "-Deva" }, // Marathi |
| { "ms-", "-Latn" }, // Malay |
| { "mt-", "-Latn" }, // Maltese |
| { "my-", "-Mymr" }, // Burmese/Myanmar |
| { "nb-", "-Latn" }, // Norwegian Bokmal |
| { "ne-", "-Deva" }, // Nepali |
| { "nl-", "-Latn" }, // Dutch |
| { "nn-", "-Latn" }, // Norwegian Nynorsk |
| { "ny-", "-Latn" }, // Chichewa/Nyanja |
| { "om-", "-Latn" }, // Oromo |
| { "or-", "-Orya" }, // Oriya |
| { "pa-", "-Guru" }, // Punjabi |
| { "pl-", "-Latn" }, // Polish |
| { "ps-", "-Arab" }, // Pushto |
| { "pt-", "-Latn" }, // Portuguese |
| { "qu-", "-Latn" }, // Quechua |
| { "rn-", "-Latn" }, // Rundi |
| { "ro-", "-Latn" }, // Romanian |
| { "ru-", "-Cyrl" }, // Russian |
| { "rw-", "-Latn" }, // Kinyarwanda |
| { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) |
| { "se-", "-Latn" }, // Sami (? not Suppress-Script) |
| { "si-", "-Sinh" }, // Sinhala |
| { "sk-", "-Latn" }, // Slovak |
| { "sl-", "-Latn" }, // Slovenian |
| { "so-", "-Latn" }, // Somali |
| { "sq-", "-Latn" }, // Albanian |
| { "sv-", "-Latn" }, // Swedish |
| { "sw-", "-Latn" }, // Swahili |
| { "ta-", "-Taml" }, // Tamil |
| { "te-", "-Telu" }, // Telugu |
| { "th-", "-Thai" }, // Thai |
| { "ti-", "-Ethi" }, // Tigrinya |
| { "tl-", "-Latn" }, // Tagalog |
| { "tn-", "-Latn" }, // Tswana |
| { "to-", "-Latn" }, // Tonga of Tonga Islands |
| { "tr-", "-Latn" }, // Turkish |
| { "uk-", "-Cyrl" }, // Ukrainian |
| { "ur-", "-Arab" }, // Urdu |
| { "vi-", "-Latn" }, // Vietnamese |
| { "wo-", "-Latn" }, // Wolof |
| { "xh-", "-Latn" }, // Xhosa |
| { "yi-", "-Hebr" }, // Yiddish |
| { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) |
| { "zu-", "-Latn" }, // Zulu |
| }; |
| enum { |
| kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString appleLocaleToLanguageString[] = { |
| // Map locale strings that Apple uses as language IDs to real language strings. |
| // Must be sorted according to how strcmp compares the strings in the first column. |
| // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now |
| // handled in the code. <1.19> |
| // |
| // locale lang [ comment ] |
| // string string |
| // ------- ------- |
| { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] |
| { "zh_CN", "zh-Hans" }, // mainland China => simplified |
| { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used |
| { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used |
| { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used |
| { "zh_TW", "zh-Hant" }, // Taiwan => traditional |
| }; |
| enum { |
| kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) |
| }; |
| |
| static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { |
| // Map locale strings that Apple uses as language IDs to real language strings. |
| // Must be sorted according to how strcmp compares the strings in the first column. |
| // |
| // locale lang [ comment ] |
| // string string |
| // ------- ------- |
| { "de_AT", "de-AT" }, // Austrian German |
| { "de_CH", "de-CH" }, // Swiss German |
| // { "de_DE", "de-DE" }, // German for Germany (default), not currently used |
| { "en_AU", "en-AU" }, // Australian English |
| { "en_CA", "en-CA" }, // Canadian English |
| { "en_GB", "en-GB" }, // British English |
| // { "en_IE", "en-IE" }, // Irish English, not currently used |
| { "en_US", "en-US" }, // U.S. English |
| { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] |
| // { "fr_BE", "fr-BE" }, // Belgian French, not currently used |
| { "fr_CA", "fr-CA" }, // Canadian French |
| { "fr_CH", "fr-CH" }, // Swiss French |
| // { "fr_FR", "fr-FR" }, // French for France (default), not currently used |
| { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium |
| // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used |
| { "pt_BR", "pt-BR" }, // Brazilian Portuguese |
| { "pt_PT", "pt-PT" }, // Portuguese for Portugal |
| { "zh_CN", "zh-Hans" }, // mainland China => simplified |
| { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used |
| { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used |
| { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used |
| { "zh_TW", "zh-Hant" }, // Taiwan => traditional |
| }; |
| enum { |
| kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) |
| }; |
| |
| |
| struct LocaleToLegacyCodes { |
| const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' |
| RegionCode regCode; |
| LangCode langCode; |
| CFStringEncoding encoding; |
| }; |
| typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; |
| |
| static const LocaleToLegacyCodes localeToLegacyCodes[] = { |
| // locale RegionCode LangCode CFStringEncoding |
| { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn |
| { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi |
| { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; |
| { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; |
| { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; |
| { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl |
| { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; |
| { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; |
| { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; |
| { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; |
| { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; |
| { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; |
| { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; |
| { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; |
| { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; |
| { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany |
| { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, |
| { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; |
| { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece |
| { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, |
| { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic |
| { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) |
| { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, |
| { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; |
| { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) |
| { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag |
| { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, |
| { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, |
| { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; |
| { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; |
| { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, |
| { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, |
| { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) |
| { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, |
| { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) |
| { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots |
| { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, |
| { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; |
| { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; |
| { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) |
| { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; |
| { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; |
| { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; |
| { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; |
| { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, |
| { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, |
| { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; |
| { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; |
| { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, |
| { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly |
| { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, |
| { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, |
| { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; |
| { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; |
| { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; |
| { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; |
| { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab |
| { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; |
| { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; |
| { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; |
| { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; |
| { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; |
| { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; |
| { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; |
| { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; |
| { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, |
| { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, |
| { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; |
| { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; |
| { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong |
| { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; |
| { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; |
| { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; |
| { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; |
| { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; |
| { "mul", 74/*verMultilingual*/, -1, 0 }, |
| { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; |
| { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, |
| { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; |
| { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands |
| { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, |
| { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, |
| { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, |
| { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; |
| { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; |
| { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; |
| { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; |
| { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, |
| { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; |
| { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) |
| { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, |
| { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, |
| { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; |
| { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; |
| { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, |
| { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; |
| { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; |
| { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; |
| { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, |
| { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; |
| { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, |
| { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, |
| { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; |
| { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, |
| { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; |
| { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, |
| { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; |
| { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; |
| { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu |
| { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" |
| { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; |
| { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; |
| { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl |
| { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; |
| { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; |
| { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; |
| { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; |
| { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; |
| { "und", 55/*verScriptGeneric*/, -1, 0 }, |
| { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu |
| { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab |
| { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab |
| { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab |
| { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, |
| { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn |
| { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; |
| { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese |
| { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, |
| { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, |
| }; |
| enum { |
| kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) |
| }; |
| |
| /* |
| For reference here is a list of ICU locales with variants and how some |
| of them are canonicalized with the ICU function uloc_canonicalize: |
| |
| ICU 3.0 has: |
| en_US_POSIX x no change |
| hy_AM_REVISED x no change |
| ja_JP_TRADITIONAL -> ja_JP@calendar=japanese |
| th_TH_TRADITIONAL -> th_TH@calendar=buddhist |
| |
| ICU 2.8 also had the following (now obsolete): |
| ca_ES_PREEURO |
| de__PHONEBOOK -> de@collation=phonebook |
| de_AT_PREEURO |
| de_DE_PREEURO |
| de_LU_PREEURO |
| el_GR_PREEURO |
| en_BE_PREEURO |
| en_GB_EURO -> en_GB@currency=EUR |
| en_IE_PREEURO -> en_IE@currency=IEP |
| es__TRADITIONAL -> es@collation=traditional |
| es_ES_PREEURO |
| eu_ES_PREEURO |
| fi_FI_PREEURO |
| fr_BE_PREEURO |
| fr_FR_PREEURO -> fr_FR@currency=FRF |
| fr_LU_PREEURO |
| ga_IE_PREEURO |
| gl_ES_PREEURO |
| hi__DIRECT -> hi@collation=direct |
| it_IT_PREEURO |
| nl_BE_PREEURO |
| nl_NL_PREEURO |
| pt_PT_PREEURO |
| zh__PINYIN -> zh@collation=pinyin |
| zh_TW_STROKE -> zh_TW@collation=stroke |
| |
| */ |
| |
| // _CompareTestEntryToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // comparison function for bsearch |
| static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); |
| } |
| |
| // _CompareTestEntryPrefixToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. |
| // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. |
| static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; |
| const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; |
| |
| while ( *testPtr == *tablePtr && *tablePtr != 0 ) { |
| testPtr++; tablePtr++; |
| } |
| if ( *tablePtr != 0 ) { |
| // strings are different, and the string in the table has not run out; |
| // i.e. the table entry is not a prefix of the text string. |
| return ( *testPtr < *tablePtr )? -1: 1; |
| } |
| return 0; |
| } |
| |
| // _CompareLowerTestEntryPrefixToTableEntryKey |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. |
| // Lowercases the test string before comparison (the table should already have lowercased entries). |
| static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { |
| const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; |
| const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; |
| char lowerTestChar; |
| |
| while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> |
| testPtr++; tablePtr++; |
| } |
| if ( *tablePtr != 0 ) { |
| // strings are different, and the string in the table has not run out; |
| // i.e. the table entry is not a prefix of the text string. |
| if (lowerTestChar == '_') // <1.9> |
| return -1; // <1.9> |
| return ( lowerTestChar < *tablePtr )? -1: 1; |
| } |
| // The string in the table has run out. If the test string char is not alnum, |
| // then the string matches, else the test string sorts after. |
| return ( !isalnum(lowerTestChar) )? 0: 1; |
| } |
| |
| // _DeleteCharsAtPointer |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // remove _length_ characters from the beginning of the string indicated by _stringPtr_ |
| // (we know that the string has at least _length_ characters in it) |
| static void _DeleteCharsAtPointer(char *stringPtr, int length) { |
| do { |
| *stringPtr = stringPtr[length]; |
| } while (*stringPtr++ != 0); |
| } |
| |
| // _CopyReplacementAtPointer |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr |
| static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { |
| while (*replacementPtr != 0) { |
| *stringPtr++ = *replacementPtr++; |
| } |
| } |
| |
| // _CheckForTag |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { |
| return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); |
| } |
| |
| // _ReplacePrefix |
| // Move this code from _UpdateFullLocaleString into separate function // <1.10> |
| static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { |
| int newPrefixLen = (int)strlen(newPrefix); |
| int lengthDelta = newPrefixLen - oldPrefixLen; |
| |
| if (lengthDelta < 0) { |
| // replacement is shorter, delete chars by shifting tail of string |
| _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); |
| } else if (lengthDelta > 0) { |
| // replacement is longer... |
| int stringLen = (int)strlen(locString); |
| |
| if (stringLen + lengthDelta < locStringMaxLen) { |
| // make room by shifting tail of string |
| char * tailShiftPtr = locString + stringLen; |
| char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift |
| |
| while (tailShiftPtr >= tailStartPtr) { |
| tailShiftPtr[lengthDelta] = *tailShiftPtr; |
| tailShiftPtr--; |
| } |
| } else { |
| // no room, can't do substitution |
| newPrefix = NULL; |
| } |
| } |
| |
| if (newPrefix) { |
| // do the substitution |
| _CopyReplacementAtPointer(locString, newPrefix); |
| } |
| } |
| |
| // _UpdateFullLocaleString |
| // Given a locale string that uses standard codes (not a special old-style Apple string), |
| // update all the language codes and region codes to latest versions, map 3-letter |
| // language codes to 2-letter codes if possible, and normalize casing. If requested, return |
| // pointers to a language-region variant subtag (if present) and a region tag (if present). |
| // (add locStringMaxLen parameter) // <1.10> |
| static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, |
| char **langRegSubtagRef, char **regionTagRef, |
| char varKeyValueString[]) // <1.17> |
| { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| const SpecialCaseUpdates * specialCasePtr; |
| char * inLocalePtr; |
| char * subtagPtr; |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| char * variantTag = NULL; |
| Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; |
| |
| // 1. First replace any non-canonical prefix (case insensitive) with canonical |
| // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) |
| |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, |
| sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // replace key (at beginning of string) with result |
| _ReplacePrefix(inLocaleString, locStringMaxLen, (int)strlen(foundEntry->key), foundEntry->result); // <1.10> |
| } |
| |
| // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes |
| |
| inLocalePtr = inLocaleString; |
| subtagPtr = inLocaleString; |
| subtagHasDigits = false; |
| pastPrimarySubtag = false; |
| hadRegion = false; |
| |
| while ( true ) { |
| if ( isalpha(*inLocalePtr) ) { |
| // if not past a region tag, then lowercase, else uppercase |
| *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); |
| } else if ( isdigit(*inLocalePtr) ) { |
| subtagHasDigits = true; |
| } else { |
| |
| if (!pastPrimarySubtag) { |
| // may have a NULL primary subtag |
| if (subtagHasDigits) { |
| break; |
| } |
| pastPrimarySubtag = true; |
| } else if (!hadRegion) { |
| // We are after any primary language subtag, but not past any region tag. |
| // This subtag is preceded by '-' or '_'. |
| int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' |
| |
| if (subtagLength == 3 && !subtagHasDigits) { |
| // potential ISO 3166 code for region or language variant; if so, needs uppercasing |
| if (*subtagPtr == '_') { |
| regionTag = subtagPtr; |
| hadRegion = true; |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| subtagPtr[2] = toupper(subtagPtr[2]); |
| } else if (langRegSubtag == NULL) { |
| langRegSubtag = subtagPtr; |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| subtagPtr[2] = toupper(subtagPtr[2]); |
| } |
| } else if (subtagLength == 4 && subtagHasDigits) { |
| // potential UN M.49 region code |
| if (*subtagPtr == '_') { |
| regionTag = subtagPtr; |
| hadRegion = true; |
| } else if (langRegSubtag == NULL) { |
| langRegSubtag = subtagPtr; |
| } |
| } else if (subtagLength == 5 && !subtagHasDigits) { |
| // ISO 15924 script code, uppercase just the first letter |
| subtagPtr[1] = toupper(subtagPtr[1]); |
| } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> |
| hadRegion = true; |
| } |
| |
| if (!hadRegion) { |
| // convert improper '_' to '-' |
| *subtagPtr = '-'; |
| } |
| } else { |
| variantTag = subtagPtr; // <1.17> |
| } |
| |
| if (*inLocalePtr == '-' || *inLocalePtr == '_') { |
| subtagPtr = inLocalePtr; |
| subtagHasDigits = false; |
| } else { |
| break; |
| } |
| } |
| |
| inLocalePtr++; |
| } |
| |
| // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] |
| // If so, copy the keywords to varKeyValueString and delete the variant tag |
| // from the original string (but don't otherwise use the ICU canonicalization). |
| varKeyValueString[0] = 0; |
| if (variantTag) { |
| UErrorCode icuStatus; |
| int icuCanonStringLen; |
| char * varKeyValueStringPtr = varKeyValueString; |
| |
| icuStatus = U_ZERO_ERROR; |
| icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); |
| if ( U_SUCCESS(icuStatus) ) { |
| char * icuCanonStringPtr = varKeyValueString; |
| |
| if (icuCanonStringLen >= locStringMaxLen) |
| icuCanonStringLen = locStringMaxLen - 1; |
| varKeyValueString[icuCanonStringLen] = 0; |
| while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) |
| ++icuCanonStringPtr; |
| if (*icuCanonStringPtr != 0) { |
| // the canonicalized string has keywords |
| // delete the variant tag in the original string (and other trailing '_' or '-') |
| *variantTag-- = 0; |
| while (*variantTag == '_') |
| *variantTag-- = 0; |
| // delete all of the canonicalized string except the keywords |
| while (*icuCanonStringPtr != 0) |
| *varKeyValueStringPtr++ = *icuCanonStringPtr++; |
| } |
| *varKeyValueStringPtr = 0; |
| } |
| } |
| |
| // 4. Handle special cases of updating region codes, or updating language codes based on |
| // region code. |
| for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { |
| if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { |
| // OK, we matched any language specified. Now what needs updating? |
| char * foundTag; |
| |
| if ( isupper(specialCasePtr->update1[0]) ) { |
| // updating a region code |
| if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { |
| _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); |
| } |
| if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { |
| _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); |
| } |
| |
| } else { |
| // updating the language, there will be two choices based on region |
| if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || |
| ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { |
| _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); |
| } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || |
| ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { |
| _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); |
| } |
| } |
| } |
| } |
| |
| // 5. return pointers if requested. |
| if (langRegSubtagRef != NULL) { |
| *langRegSubtagRef = langRegSubtag; |
| } |
| if (regionTagRef != NULL) { |
| *regionTagRef = regionTag; |
| } |
| } |
| |
| |
| // _RemoveSubstringsIfPresent |
| // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) |
| // substringList is a list of space-separated substrings to strip if found in localeString |
| static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { |
| while (*substringList != 0) { |
| char currentSubstring[kLocaleIdentifierCStringMax]; |
| int substringLength = 0; |
| char * foundSubstring; |
| |
| // copy current substring & get its length |
| while ( isgraph(*substringList) ) { |
| currentSubstring[substringLength++] = *substringList++; |
| } |
| // move to next substring |
| while ( isspace(*substringList) ) { |
| substringList++; |
| } |
| |
| // search for current substring in locale string |
| if (substringLength == 0) |
| continue; |
| currentSubstring[substringLength] = 0; |
| foundSubstring = strstr(localeString, currentSubstring); |
| |
| // if substring is found, delete it |
| if (foundSubstring) { |
| _DeleteCharsAtPointer(foundSubstring, substringLength); |
| } |
| } |
| } |
| |
| |
| // _GetKeyValueString // <1.10> |
| // Removes any key-value string from inLocaleString, puts canonized version in keyValueString |
| |
| static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { |
| char * inLocalePtr = inLocaleString; |
| |
| while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { |
| inLocalePtr++; |
| } |
| if (*inLocalePtr != 0) { // we found a key-value section |
| char * keyValuePtr = keyValueString; |
| |
| *keyValuePtr = *inLocalePtr; |
| *inLocalePtr = 0; |
| do { |
| if ( *(++inLocalePtr) != ' ' ) { |
| *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> |
| } |
| } while (*inLocalePtr != 0); |
| } else { |
| keyValueString[0] = 0; |
| } |
| } |
| |
| static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { |
| if (keyValueString[0] != 0) { |
| UErrorCode uerr = U_ZERO_ERROR; |
| UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); |
| if ( uenum != NULL ) { |
| const char * keyword; |
| int32_t length; |
| char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value |
| while ( U_SUCCESS(uerr) ) { |
| keyword = uenum_next(uenum, &length, &uerr); |
| if ( keyword == NULL ) { |
| break; |
| } |
| length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); |
| length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); |
| } |
| uenum_close(uenum); |
| } |
| } |
| } |
| |
| __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) { |
| char inLocaleString[kLocaleIdentifierCStringMax]; |
| CFStringRef outStringRef = NULL; |
| |
| if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| char keyValueString[sizeof(inLocaleString)]; // <1.10> |
| char varKeyValueString[sizeof(inLocaleString)]; // <1.17> |
| |
| _GetKeyValueString(inLocaleString, keyValueString); // <1.10> |
| testEntry.result = NULL; |
| |
| // A. First check if input string matches an old-style string that has a replacement |
| // (do this before case normalization) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // It does match, so replace old string with new |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| varKeyValueString[0] = 0; |
| } else { |
| // B. No match with an old-style string, use input string but update codes, normalize case, etc. |
| _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), NULL, NULL, varKeyValueString); // <1.10><1.17> |
| } |
| |
| // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string |
| |
| // 1. Strip defaults in input string based on initial part of locale string |
| // (mainly to strip default script tag for a language) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // The input string begins with a character sequence for which |
| // there are default substrings which should be stripped if present |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| |
| // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageStringForCFBundle, kNumAppleLocaleToLanguageStringForCFBundle, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // it does match |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| } else { |
| // just delete the region tag and anything after |
| char * inLocalePtr = inLocaleString; |
| while (*inLocalePtr != 0 && *inLocalePtr != '_') { |
| inLocalePtr++; |
| } |
| *inLocalePtr = 0; |
| } |
| |
| // D. Re-append any key-value strings, now canonical // <1.10><1.17> |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); |
| |
| // All done, return what we came up with. |
| outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); |
| } |
| |
| return outStringRef; |
| } |
| |
| CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { |
| char inLocaleString[kLocaleIdentifierCStringMax]; |
| CFStringRef outStringRef = NULL; |
| |
| if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| char keyValueString[sizeof(inLocaleString)]; // <1.10> |
| char varKeyValueString[sizeof(inLocaleString)]; // <1.17> |
| |
| _GetKeyValueString(inLocaleString, keyValueString); // <1.10> |
| testEntry.result = NULL; |
| |
| // A. First check if input string matches an old-style string that has a replacement |
| // (do this before case normalization) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // It does match, so replace old string with new |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| varKeyValueString[0] = 0; |
| } else { |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| |
| // B. No match with an old-style string, use input string but update codes, normalize case, etc. |
| _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> |
| |
| // if the language part already includes a regional variant, then delete any region tag. <1.19> |
| if (langRegSubtag && regionTag) |
| *regionTag = 0; |
| } |
| |
| // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string |
| |
| // 1. Strip defaults in input string based on initial part of locale string |
| // (mainly to strip default script tag for a language) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // The input string begins with a character sequence for which |
| // there are default substrings which should be stripped if present |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| |
| // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // it does match |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| } else { |
| // skip to any region tag or java-type variant |
| char * inLocalePtr = inLocaleString; |
| while (*inLocalePtr != 0 && *inLocalePtr != '_') { |
| inLocalePtr++; |
| } |
| // if there is still a region tag, turn it into a language variant <1.19> |
| if (*inLocalePtr == '_') { |
| // handle 3-digit regions in addition to 2-letter ones |
| char * regionTag = inLocalePtr++; |
| long expectedLength = 0; |
| if ( isalpha(*inLocalePtr) ) { |
| while ( isalpha(*(++inLocalePtr)) ) |
| ; |
| expectedLength = 3; |
| } else if ( isdigit(*inLocalePtr) ) { |
| while ( isdigit(*(++inLocalePtr)) ) |
| ; |
| expectedLength = 4; |
| } |
| *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; |
| } |
| // anything else at/after '_' just gets deleted |
| *inLocalePtr = 0; |
| } |
| |
| // D. Re-append any key-value strings, now canonical // <1.10><1.17> |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); |
| |
| // All done, return what we came up with. |
| outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); |
| } |
| |
| return outStringRef; |
| } |
| |
| |
| CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { |
| char inLocaleString[kLocaleIdentifierCStringMax]; |
| CFStringRef outStringRef = NULL; |
| |
| if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { |
| KeyStringToResultString testEntry; |
| KeyStringToResultString * foundEntry; |
| char keyValueString[sizeof(inLocaleString)]; // <1.10> |
| char varKeyValueString[sizeof(inLocaleString)]; // <1.17> |
| |
| _GetKeyValueString(inLocaleString, keyValueString); // <1.10> |
| testEntry.result = NULL; |
| |
| // A. First check if input string matches an old-style Apple string that has a replacement |
| // (do this before case normalization) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| // It does match, so replace old string with new // <1.10> |
| strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); |
| varKeyValueString[0] = 0; |
| } else { |
| char * langRegSubtag = NULL; |
| char * regionTag = NULL; |
| |
| // B. No match with an old-style string, use input string but update codes, normalize case, etc. |
| _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> |
| |
| |
| // C. Now strip defaults that are implied by other fields. |
| |
| // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. |
| if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { |
| _DeleteCharsAtPointer(langRegSubtag, 3); |
| } |
| |
| // 2. Strip defaults in input string based on final region tag in locale string |
| // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) |
| if ( regionTag ) { |
| testEntry.key = regionTag; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); |
| if (foundEntry) { |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| } |
| |
| // 3. Strip defaults in input string based on initial part of locale string |
| // (mainly to strip default script tag for a language) |
| testEntry.key = inLocaleString; |
| foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, |
| sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); |
| if (foundEntry) { |
| // The input string begins with a character sequence for which |
| // there are default substrings which should be stripped if present |
| _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); |
| } |
| } |
| |
| // D. Re-append any key-value strings, now canonical // <1.10><1.17> |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
| _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); |
| |
| // Now create the CFString (even if empty!) |
| outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); |
| } |
| |
| return outStringRef; |
| } |
| |
| // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on |
| // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c |
| CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { |
| CFStringRef result = NULL; |
| if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { |
| const char *localeString = regionCodeToLocaleString[rcode]; |
| if (localeString != NULL && *localeString != '\0') { |
| result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); |
| } |
| } |
| if (result) return result; |
| if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { |
| const char *localeString = langCodeToLocaleString[lcode]; |
| if (localeString != NULL && *localeString != '\0') { |
| result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); |
| } |
| } |
| return result; |
| } |
| |
| |
| CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { |
| char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); |
| |
| UErrorCode icuStatus = U_ZERO_ERROR; |
| int32_t length = 0; |
| |
| // Extract the C string locale ID, for ICU |
| CFIndex outBytes = 0; |
| CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); |
| cLocaleID[outBytes] = '\0'; |
| |
| // Get the components |
| length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleLanguageCode, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleScriptCode, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleCountryCode, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); |
| if (U_SUCCESS(icuStatus) && length > 0) |
| { |
| CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); |
| CFDictionaryAddValue(working, kCFLocaleVariantCode, string); |
| CFRelease(string); |
| } |
| icuStatus = U_ZERO_ERROR; |
| |
| // Now get the keywords; open an enumerator on them |
| UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); |
| const char *locKey = NULL; |
| int32_t locKeyLen = 0; |
| while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) |
| { |
| char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| |
| // Get the value for this keyword |
| if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 |
| && U_SUCCESS(icuStatus)) |
| { |
| CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, (CFIndex)strlen(locKey), kCFStringEncodingASCII, true); |
| CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, (CFIndex)strlen(locValue), kCFStringEncodingASCII, true); |
| if (key && value) |
| CFDictionaryAddValue(working, key, value); |
| if (key) |
| CFRelease(key); |
| if (value) |
| CFRelease(value); |
| } |
| } |
| uenum_close(iter); |
| |
| // Convert to an immutable dictionary and return |
| CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); |
| CFRelease(working); |
| return result; |
| } |
| |
| typedef struct __AppendContext |
| { |
| char separator; |
| CFMutableStringRef working; |
| } __AppendContext; |
| |
| static void __AppendKeywords(const void *k, const void *v, void *c) |
| { |
| __AppendContext *context = (__AppendContext *) c; |
| CFStringRef key = (CFStringRef) k; |
| CFStringRef value = (CFStringRef) v; |
| if (CFEqual(key, kCFLocaleLanguageCode) || CFEqual(key, kCFLocaleScriptCode) || CFEqual(key, kCFLocaleCountryCode) || CFEqual(key, kCFLocaleVariantCode)) |
| return; |
| CFStringAppendFormat(context->working, NULL, CFSTR("%c%@%c%@"), context->separator, key, ULOC_KEYWORD_ASSIGN, value); |
| context->separator = ULOC_KEYWORD_ITEM_SEPARATOR; |
| } |
| |
| CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { |
| CFMutableStringRef working = CFStringCreateMutable(allocator, 0); |
| CFStringRef value = NULL; |
| bool country = false; |
| __AppendContext context = {ULOC_KEYWORD_SEPARATOR, working}; |
| |
| if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleLanguageCode))) |
| { |
| CFStringAppend(working, value); |
| } |
| |
| if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleScriptCode))) |
| { |
| CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); |
| } |
| |
| if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleCountryCode))) |
| { |
| CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); |
| country = true; |
| } |
| |
| if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleVariantCode))) |
| { |
| if (!country) |
| CFStringAppend(working, CFSTR("_")); |
| CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); |
| } |
| |
| // Now iterate through any remaining entries and append as keywords |
| CFDictionaryApplyFunction(dictionary, __AppendKeywords, &context); |
| |
| // Convert to immutable string and return |
| CFStringRef result = (CFStringRef)CFStringCreateCopy(allocator, working); |
| CFRelease(working); |
| return result; |
| } |
| |