blob: b380a89cfeed9bb87846a933ca869fde778c425c [file] [log] [blame]
/*
* Copyright (c) 2008-2009 Brent Fulgham <bfulgham@gmail.org>. All rights reserved.
*
* This source code is a modified version of the CoreFoundation sources released by Apple Inc. under
* the terms of the APSL version 2.0 (see below).
*
* For information about changes from the original Apple source release can be found by reviewing the
* source control system for the project at https://sourceforge.net/svn/?group_id=246198.
*
* The original license information is as follows:
*
* Copyright (c) 2008 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*
CFLocaleIdentifier.c
Copyright (c) 2002-2007, Apple Inc. All rights reserved.
Responsibility: Christopher Kane
CFLocaleIdentifier.c defines
- enum value kLocaleIdentifierCStringMax
- structs KeyStringToResultString, SpecialCaseUpdates
and provides the following data for the functions
CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
CFLocaleCreateCanonicalLocaleIdentifierFromString
CFLocaleCreateCanonicalLanguageIdentifierFromString
1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
map old Apple string oldAppleLocaleToCanonical[n].key
to canonical locale string oldAppleLocaleToCanonical[n].result
for n = 0..kNumOldAppleLocaleToCanonical-1
4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key
to updated replacement localeStringPrefixToCanonical[].result
for n = 0..kNumLocaleStringPrefixToCanonical-1
5. static const SpecialCaseUpdates specialCases[];
various special cases for updating region codes, or for updating language codes based on region codes
6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
map locale string region tag localeStringRegionToDefaults[n].key
to default substrings to delete localeStringRegionToDefaults[n].result
for n = 0..kNumLocaleStringRegionToDefaults-1
7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
map locale string initial part localeStringPrefixToDefaults[n].key
to default substrings to delete localeStringPrefixToDefaults[n].result
for n = 0..kNumLocaleStringPrefixToDefaults-1
8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
map Apple locale string appleLocaleToLanguageString[].key
to equivalent language string appleLocaleToLanguageString[].result
for n = 0..kNumAppleLocaleToLanguageString-1
*/
#include <CoreFoundation/CFString.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <unicode/uloc.h>
#if DEPLOYMENT_TARGET_WINDOWS
extern size_t strlcpy(char *dst, const char *src, size_t siz);
extern size_t strlcat(char *dst, const char *src, size_t siz);
#endif
// Max byte length of locale identifier (ASCII) as C string, including terminating null byte
enum {
kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100
};
// KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
struct KeyStringToResultString {
const char * key;
const char * result;
};
typedef struct KeyStringToResultString KeyStringToResultString;
// SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
struct SpecialCaseUpdates {
const char * lang;
const char * reg1;
const char * update1;
const char * reg2;
const char * update2;
};
typedef struct SpecialCaseUpdates SpecialCaseUpdates;
static const char * const regionCodeToLocaleString[] = {
// map RegionCode (array index) to canonical locale string
//
// canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray
// -------- ------------ ------------------ ------------ -------- string, if different ]
"en_US", // 0 verUS; 0 langEnglish;
"fr_FR", // 1 verFrance; 1 langFrench;
"en_GB", // 2 verBritain; 0 langEnglish;
"de_DE", // 3 verGermany; 2 langGerman;
"it_IT", // 4 verItaly; 3 langItalian;
"nl_NL", // 5 verNetherlands; 4 langDutch;
"nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch);
"sv_SE", // 7 verSweden; 5 langSwedish;
"es_ES", // 8 verSpain; 6 langSpanish;
"da_DK", // 9 verDenmark; 7 langDanish;
"pt_PT", // 10 verPortugal; 8 langPortuguese;
"fr_CA", // 11 verFrCanada; 1 langFrench;
"nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO"
"he_IL", // 13 verIsrael; 10 langHebrew;
"ja_JP", // 14 verJapan; 11 langJapanese;
"en_AU", // 15 verAustralia; 0 langEnglish;
"ar", // 16 verArabic; 12 langArabic;
"fi_FI", // 17 verFinland; 13 langFinnish;
"fr_CH", // 18 verFrSwiss; 1 langFrench;
"de_CH", // 19 verGrSwiss; 2 langGerman;
"el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono;
"is_IS", // 21 verIceland; 15 langIcelandic;
"mt_MT", // 22 verMalta; 16 langMaltese;
"el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # ""
"tr_TR", // 24 verTurkey; 17 langTurkish;
"hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia
"nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping
"nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping
"_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
"_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
"pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping
"nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO"
"da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping
"hi_IN", // 33 verIndiaHindi; 21 langHindi;
"ur_PK", // 34 verPakistanUrdu; 20 langUrdu;
"tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping
"it_CH", // 36 verItalianSwiss; 3 langItalian;
"en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en"
NULL, // 38 *unassigned; -1 none; * one-way mapping # ""
"ro_RO", // 39 verRomania; 37 langRomanian;
"grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR"
"lt_LT", // 41 verLithuania; 24 langLithuanian;
"pl_PL", // 42 verPoland; 25 langPolish;
"hu_HU", // 43 verHungary; 26 langHungarian;
"et_EE", // 44 verEstonia; 27 langEstonian;
"lv_LV", // 45 verLatvia; 28 langLatvian;
"se", // 46 verSami; 29 langSami;
"fo_FO", // 47 verFaroeIsl; 30 langFaroese;
"fa_IR", // 48 verIran; 31 langFarsi/Persian;
"ru_RU", // 49 verRussia; 32 langRussian;
"ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots);
"ko_KR", // 51 verKorea; 23 langKorean;
"zh_CN", // 52 verChina; 33 langSimpChinese;
"zh_TW", // 53 verTaiwan; 19 langTradChinese;
"th_TH", // 54 verThailand; 22 langThai;
"und", // 55 verScriptGeneric; -1 none; # "" // <1.9>
"cs_CZ", // 56 verCzech; 38 langCzech;
"sk_SK", // 57 verSlovak; 39 langSlovak;
"und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9>
"hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary
"bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic
"be_BY", // 61 verBelarus; 46 langBelorussian;
"uk_UA", // 62 verUkraine; 45 langUkrainian;
NULL, // 63 *unused; -1 none; * one-way mapping # ""
"el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping
"sr_CS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18>
"sl_SI", // 66 verSlovenian; 40 langSlovenian;
"mk_MK", // 67 verMacedonian; 43 langMacedonian;
"hr_HR", // 68 verCroatia; 18 langCroatian;
NULL, // 69 *unused; -1 none; * one-way mapping # ""
"de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE"
"pt_BR", // 71 verBrazil; 8 langPortuguese;
"bg_BG", // 72 verBulgaria; 44 langBulgarian;
"ca_ES", // 73 verCatalonia; 130 langCatalan;
"mul", // 74 verMultilingual; -1 none; # ""
"gd", // 75 verScottishGaelic; 144 langScottishGaelic;
"gv", // 76 verManxGaelic; 145 langManxGaelic;
"br", // 77 verBreton; 142 langBreton;
"iu_CA", // 78 verNunavut; 143 langInuktitut -Cans;
"cy", // 79 verWelsh; 128 langWelsh;
"_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
"ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx>
"en_CA", // 82 verEngCanada; 0 langEnglish;
"dz_BT", // 83 verBhutan; 137 langDzongkha;
"hy_AM", // 84 verArmenian; 51 langArmenian;
"ka_GE", // 85 verGeorgian; 52 langGeorgian;
"es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es"
"es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping
"to_TO", // 88 verTonga; 147 langTongan;
"pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping
"ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping
"fr_001", // 91 verFrenchUniversal; 1 langFrench;
"de_AT", // 92 verAustria; 2 langGerman;
"es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es"
"gu_IN", // 94 verGujarati; 69 langGujarati;
"pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic
"ur_IN", // 96 verIndiaUrdu; 20 langUrdu;
"vi_VN", // 97 verVietnam; 80 langVietnamese;
"fr_BE", // 98 verFrBelgium; 1 langFrench;
"uz_UZ", // 99 verUzbek; 47 langUzbek;
"en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # ""
"nn_NO", // 101 verNynorsk; 151 langNynorsk; # ""
"af_ZA", // 102 verAfrikaans; 141 langAfrikaans;
"eo", // 103 verEsperanto; 94 langEsperanto;
"mr_IN", // 104 verMarathi; 66 langMarathi;
"bo", // 105 verTibetan; 63 langTibetan;
"ne_NP", // 106 verNepal; 64 langNepali;
"kl", // 107 verGreenland; 149 langGreenlandic;
"en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry)
};
enum {
kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *)
};
static const char * const langCodeToLocaleString[] = {
// map LangCode (array index) to canonical locale string
//
// canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray
// -------- -------------- ---------- -------- string, if different ]
"en", // 0 langEnglish;
"fr", // 1 langFrench;
"de", // 2 langGerman;
"it", // 3 langItalian;
"nl", // 4 langDutch;
"sv", // 5 langSwedish;
"es", // 6 langSpanish;
"da", // 7 langDanish;
"pt", // 8 langPortuguese;
"nb", // 9 langNorwegian (Bokmal); # "no"
"he", // 10 langHebrew -Hebr;
"ja", // 11 langJapanese -Jpan;
"ar", // 12 langArabic -Arab;
"fi", // 13 langFinnish;
"el", // 14 langGreek (modern)-Grek-mono;
"is", // 15 langIcelandic;
"mt", // 16 langMaltese -Latn;
"tr", // 17 langTurkish -Latn;
"hr", // 18 langCroatian;
"zh-Hant", // 19 langTradChinese; # "zh"
"ur", // 20 langUrdu -Arab;
"hi", // 21 langHindi -Deva;
"th", // 22 langThai -Thai;
"ko", // 23 langKorean -Hang;
"lt", // 24 langLithuanian;
"pl", // 25 langPolish;
"hu", // 26 langHungarian;
"et", // 27 langEstonian;
"lv", // 28 langLatvian;
"se", // 29 langSami;
"fo", // 30 langFaroese;
"fa", // 31 langFarsi/Persian -Arab;
"ru", // 32 langRussian -Cyrl;
"zh-Hans", // 33 langSimpChinese; # "zh"
"nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl"
"ga", // 35 langIrishGaelic (no dots);
"sq", // 36 langAlbanian; no region codes
"ro", // 37 langRomanian;
"cs", // 38 langCzech;
"sk", // 39 langSlovak;
"sl", // 40 langSlovenian;
"yi", // 41 langYiddish -Hebr; no region codes
"sr", // 42 langSerbian -Cyrl;
"mk", // 43 langMacedonian -Cyrl;
"bg", // 44 langBulgarian -Cyrl;
"uk", // 45 langUkrainian -Cyrl;
"be", // 46 langBelorussian -Cyrl;
"uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab
"kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab
"az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az"
"az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az"
"hy", // 51 langArmenian -Armn;
"ka", // 52 langGeorgian -Geor;
"mo", // 53 langMoldavian -Cyrl; no region codes
"ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab
"tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab
"tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab
"mn-Mong", // 57 langMongolian -Mong; no region codes # "mn"
"mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn"
"ps", // 59 langPashto -Arab; no region codes
"ku", // 60 langKurdish -Arab; no region codes
"ks", // 61 langKashmiri -Arab; no region codes
"sd", // 62 langSindhi -Arab; no region codes
"bo", // 63 langTibetan -Tibt;
"ne", // 64 langNepali -Deva;
"sa", // 65 langSanskrit -Deva; no region codes
"mr", // 66 langMarathi -Deva;
"bn", // 67 langBengali -Beng;
"as", // 68 langAssamese -Beng; no region codes
"gu", // 69 langGujarati -Gujr;
"pa", // 70 langPunjabi -Guru;
"or", // 71 langOriya -Orya; no region codes
"ml", // 72 langMalayalam -Mlym; no region codes
"kn", // 73 langKannada -Knda; no region codes
"ta", // 74 langTamil -Taml; no region codes
"te", // 75 langTelugu -Telu; no region codes
"si", // 76 langSinhalese -Sinh; no region codes
"my", // 77 langBurmese -Mymr; no region codes
"km", // 78 langKhmer -Khmr; no region codes
"lo", // 79 langLao -Laoo; no region codes
"vi", // 80 langVietnamese -Latn;
"id", // 81 langIndonesian -Latn; no region codes
"tl", // 82 langTagalog -Latn; no region codes
"ms", // 83 langMalayRoman -Latn; no region codes # "ms"
"ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms"
"am", // 85 langAmharic -Ethi; no region codes
"ti", // 86 langTigrinya -Ethi; no region codes
"om", // 87 langOromo -Ethi; no region codes
"so", // 88 langSomali -Latn; no region codes
"sw", // 89 langSwahili -Latn; no region codes
"rw", // 90 langKinyarwanda -Latn; no region codes
"rn", // 91 langRundi -Latn; no region codes
"ny", // 92 langNyanja/Chewa -Latn; no region codes # ""
"mg", // 93 langMalagasy -Latn; no region codes
"eo", // 94 langEsperanto -Latn;
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap)
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap)
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap)
"cy", // 128 langWelsh -Latn;
"eu", // 129 langBasque -Latn; no region codes
"ca", // 130 langCatalan -Latn;
"la", // 131 langLatin -Latn; no region codes
"qu", // 132 langQuechua -Latn; no region codes
"gn", // 133 langGuarani -Latn; no region codes
"ay", // 134 langAymara -Latn; no region codes
"tt-Cyrl", // 135 langTatar -Cyrl; no region codes
"ug", // 136 langUighur -Arab; no region codes
"dz", // 137 langDzongkha -Tibt;
"jv", // 138 langJavaneseRom -Latn; no region codes
"su", // 139 langSundaneseRom -Latn; no region codes
"gl", // 140 langGalician -Latn; no region codes
"af", // 141 langAfrikaans -Latn;
"br", // 142 langBreton -Latn;
"iu", // 143 langInuktitut -Cans;
"gd", // 144 langScottishGaelic;
"gv", // 145 langManxGaelic -Latn;
"ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx>
"to", // 147 langTongan -Latn;
"grc", // 148 langGreekAncient -Grek-poly; # "el"
"kl", // 149 langGreenlandic -Latn;
"az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az"
"nn", // 151 langNynorsk -Latn; # (no entry)
};
enum {
kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *)
};
static const KeyStringToResultString oldAppleLocaleToCanonical[] = {
// Map obsolete/old-style Apple strings to canonical
// Must be sorted according to how strcmp compares the strings in the first column
//
// non-canonical canonical [ comment ] # source/reason for non-canonical string
// string string
// ------------- ---------
{ "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray
{ "Albanian", "sq" }, // # __CFBundleLanguageNamesArray
{ "Amharic", "am" }, // # __CFBundleLanguageNamesArray
{ "Arabic", "ar" }, // # __CFBundleLanguageNamesArray
{ "Armenian", "hy" }, // # __CFBundleLanguageNamesArray
{ "Assamese", "as" }, // # __CFBundleLanguageNamesArray
{ "Aymara", "ay" }, // # __CFBundleLanguageNamesArray
{ "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
{ "Basque", "eu" }, // # __CFBundleLanguageNamesArray
{ "Belarusian", "be" }, // # handle other names
{ "Belorussian", "be" }, // # handle other names
{ "Bengali", "bn" }, // # __CFBundleLanguageNamesArray
{ "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled
{ "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above
{ "Breton", "br" }, // # __CFBundleLanguageNamesArray
{ "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray
{ "Burmese", "my" }, // # __CFBundleLanguageNamesArray
{ "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray
{ "Catalan", "ca" }, // # __CFBundleLanguageNamesArray
{ "Chewa", "ny" }, // # handle other names
{ "Chichewa", "ny" }, // # handle other names
{ "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
{ "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key
{ "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below
{ "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled
{ "Croatian", "hr" }, // # __CFBundleLanguageNamesArray
{ "Czech", "cs" }, // # __CFBundleLanguageNamesArray
{ "Danish", "da" }, // # __CFBundleLanguageNamesArray
{ "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
{ "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray
{ "English", "en" }, // # __CFBundleLanguageNamesArray
{ "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray
{ "Estonian", "et" }, // # __CFBundleLanguageNamesArray
{ "Faroese", "fo" }, // # __CFBundleLanguageNamesArray
{ "Farsi", "fa" }, // # __CFBundleLanguageNamesArray
{ "Finnish", "fi" }, // # __CFBundleLanguageNamesArray
{ "Flemish", "nl-BE" }, // # handle other names
{ "French", "fr" }, // # __CFBundleLanguageNamesArray
{ "Galician", "gl" }, // # __CFBundleLanguageNamesArray
{ "Gallegan", "gl" }, // # handle other names
{ "Georgian", "ka" }, // # __CFBundleLanguageNamesArray
{ "German", "de" }, // # __CFBundleLanguageNamesArray
{ "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
{ "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray
{ "Guarani", "gn" }, // # __CFBundleLanguageNamesArray
{ "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray
{ "Hawaiian", "haw" }, // # handle new languages
{ "Hebrew", "he" }, // # __CFBundleLanguageNamesArray
{ "Hindi", "hi" }, // # __CFBundleLanguageNamesArray
{ "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray
{ "Icelandic", "is" }, // # __CFBundleLanguageNamesArray
{ "Indonesian", "id" }, // # __CFBundleLanguageNamesArray
{ "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray
{ "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
{ "Italian", "it" }, // # __CFBundleLanguageNamesArray
{ "Japanese", "ja" }, // # __CFBundleLanguageNamesArray
{ "Javanese", "jv" }, // # __CFBundleLanguageNamesArray
{ "Kalaallisut", "kl" }, // # handle other names
{ "Kannada", "kn" }, // # __CFBundleLanguageNamesArray
{ "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray
{ "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray
{ "Khmer", "km" }, // # __CFBundleLanguageNamesArray
{ "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray
{ "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray
{ "Korean", "ko" }, // # __CFBundleLanguageNamesArray
{ "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray
{ "Lao", "lo" }, // # __CFBundleLanguageNamesArray
{ "Latin", "la" }, // # __CFBundleLanguageNamesArray
{ "Latvian", "lv" }, // # __CFBundleLanguageNamesArray
{ "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray
{ "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray
{ "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray
{ "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
{ "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray
{ "Maltese", "mt" }, // # __CFBundleLanguageNamesArray
{ "Manx", "gv" }, // # __CFBundleLanguageNamesArray
{ "Marathi", "mr" }, // # __CFBundleLanguageNamesArray
{ "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray
{ "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
{ "Nepali", "ne" }, // # __CFBundleLanguageNamesArray
{ "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
{ "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray
{ "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray)
{ "Oriya", "or" }, // # __CFBundleLanguageNamesArray
{ "Oromo", "om" }, // # __CFBundleLanguageNamesArray
{ "Panjabi", "pa" }, // # handle other names
{ "Pashto", "ps" }, // # __CFBundleLanguageNamesArray
{ "Persian", "fa" }, // # handle other names
{ "Polish", "pl" }, // # __CFBundleLanguageNamesArray
{ "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray
{ "Portuguese, Brazilian", "pt-BR" }, // # handle other names
{ "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray
{ "Pushto", "ps" }, // # handle other names
{ "Quechua", "qu" }, // # __CFBundleLanguageNamesArray
{ "Romanian", "ro" }, // # __CFBundleLanguageNamesArray
{ "Ruanda", "rw" }, // # handle other names
{ "Rundi", "rn" }, // # __CFBundleLanguageNamesArray
{ "Russian", "ru" }, // # __CFBundleLanguageNamesArray
{ "Sami", "se" }, // # __CFBundleLanguageNamesArray
{ "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray
{ "Scottish", "gd" }, // # __CFBundleLanguageNamesArray
{ "Serbian", "sr" }, // # __CFBundleLanguageNamesArray
{ "Simplified Chinese", "zh-Hans" }, // # handle other names
{ "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray
{ "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray
{ "Slovak", "sk" }, // # __CFBundleLanguageNamesArray
{ "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray
{ "Somali", "so" }, // # __CFBundleLanguageNamesArray
{ "Spanish", "es" }, // # __CFBundleLanguageNamesArray
{ "Sundanese", "su" }, // # __CFBundleLanguageNamesArray
{ "Swahili", "sw" }, // # __CFBundleLanguageNamesArray
{ "Swedish", "sv" }, // # __CFBundleLanguageNamesArray
{ "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray
{ "Tajik", "tg" }, // # handle other names
{ "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray
{ "Tamil", "ta" }, // # __CFBundleLanguageNamesArray
{ "Tatar", "tt" }, // # __CFBundleLanguageNamesArray
{ "Telugu", "te" }, // # __CFBundleLanguageNamesArray
{ "Thai", "th" }, // # __CFBundleLanguageNamesArray
{ "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray
{ "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray
{ "Tongan", "to" }, // # __CFBundleLanguageNamesArray
{ "Traditional Chinese", "zh-Hant" }, // # handle other names
{ "Turkish", "tr" }, // # __CFBundleLanguageNamesArray
{ "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray
{ "Uighur", "ug" }, // # __CFBundleLanguageNamesArray
{ "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray
{ "Urdu", "ur" }, // # __CFBundleLanguageNamesArray
{ "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray
{ "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray
{ "Welsh", "cy" }, // # __CFBundleLanguageNamesArray
{ "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray
{ "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes
{ "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString
{ "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString
{ "az.La", "az-Latn" }, // # from old LocaleRefGetPartString
{ "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes
{ "bn_??", "bn" }, // # from old LocaleRefGetPartString
{ "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes
{ "br_??", "br" }, // # from old MapScriptInfoAndISOCodes
{ "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes
{ "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
{ "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
{ "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes
{ "el.El-P", "grc" }, // # from old LocaleRefGetPartString
{ "en-ascii", "en_001" }, // # from earlier version of tables in this file!
{ "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes
{ "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes
{ "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes
{ "es_XL", "es_419" }, // # from earlier version of tables in this file!
{ "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes
{ "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8>
{ "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8>
{ "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8>
{ "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8>
{ "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes
{ "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes
{ "jv.La", "jv" }, // # logical extension // <1.9>
{ "jw.La", "jv" }, // # from old LocaleRefGetPartString
{ "kk.Cy", "kk" }, // # from old LocaleRefGetPartString
{ "kl.La", "kl" }, // # from old LocaleRefGetPartString
{ "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9>
{ "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes
{ "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes
{ "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString
{ "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString
{ "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString
{ "ms.La", "ms" }, // # from old LocaleRefGetPartString
{ "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString
{ "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString
// { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
// { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
// { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
{ "pa_??", "pa" }, // # from old LocaleRefGetPartString
{ "sa.Dv", "sa" }, // # from old LocaleRefGetPartString
{ "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes
{ "sr_??", "sr_CS" }, // # from old MapScriptInfoAndISOCodes // <1.18>
{ "su.La", "su" }, // # from old LocaleRefGetPartString
{ "yi.He", "yi" }, // # from old LocaleRefGetPartString
{ "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file!
{ "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file!
{ "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString
{ "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString
{ "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString
{ "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString
};
enum {
kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString)
};
static const KeyStringToResultString localeStringPrefixToCanonical[] = {
// Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
// (special cases for 'sh' handled separately)
// First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
//
// non-canonical canonical [ comment ] # source/reason for non-canonical string
// prefix prefix
// ------------- ---------
{ "afr", "af" }, // Afrikaans
{ "alb", "sq" }, // Albanian
{ "amh", "am" }, // Amharic
{ "ara", "ar" }, // Arabic
{ "arm", "hy" }, // Armenian
{ "asm", "as" }, // Assamese
{ "aym", "ay" }, // Aymara
{ "aze", "az" }, // Azerbaijani
{ "baq", "eu" }, // Basque
{ "bel", "be" }, // Belarusian
{ "ben", "bn" }, // Bengali
{ "bih", "bh" }, // Bihari
{ "bod", "bo" }, // Tibetan
{ "bos", "bs" }, // Bosnian
{ "bre", "br" }, // Breton
{ "bul", "bg" }, // Bulgarian
{ "bur", "my" }, // Burmese
{ "cat", "ca" }, // Catalan
{ "ces", "cs" }, // Czech
{ "che", "ce" }, // Chechen
{ "chi", "zh" }, // Chinese
{ "cor", "kw" }, // Cornish
{ "cos", "co" }, // Corsican
{ "cym", "cy" }, // Welsh
{ "cze", "cs" }, // Czech
{ "dan", "da" }, // Danish
{ "deu", "de" }, // German
{ "dut", "nl" }, // Dutch
{ "dzo", "dz" }, // Dzongkha
{ "ell", "el" }, // Greek, Modern (1453-)
{ "eng", "en" }, // English
{ "epo", "eo" }, // Esperanto
{ "est", "et" }, // Estonian
{ "eus", "eu" }, // Basque
{ "fao", "fo" }, // Faroese
{ "fas", "fa" }, // Persian
{ "fin", "fi" }, // Finnish
{ "fra", "fr" }, // French
{ "fre", "fr" }, // French
{ "geo", "ka" }, // Georgian
{ "ger", "de" }, // German
{ "gla", "gd" }, // Gaelic,Scottish
{ "gle", "ga" }, // Irish
{ "glg", "gl" }, // Gallegan
{ "glv", "gv" }, // Manx
{ "gre", "el" }, // Greek, Modern (1453-)
{ "grn", "gn" }, // Guarani
{ "guj", "gu" }, // Gujarati
{ "heb", "he" }, // Hebrew
{ "hin", "hi" }, // Hindi
{ "hrv", "hr" }, // Croatian
{ "hun", "hu" }, // Hungarian
{ "hye", "hy" }, // Armenian
{ "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066
{ "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066
{ "i-navajo", "nv" }, // Navajo # deprecated RFC 3066
{ "ice", "is" }, // Icelandic
{ "iku", "iu" }, // Inuktitut
{ "ile", "ie" }, // Interlingue
{ "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989)
{ "ina", "ia" }, // Interlingua
{ "ind", "id" }, // Indonesian
{ "isl", "is" }, // Icelandic
{ "ita", "it" }, // Italian
{ "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989)
{ "jav", "jv" }, // Javanese
{ "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001)
{ "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989)
{ "jpn", "ja" }, // Japanese
{ "kal", "kl" }, // Kalaallisut
{ "kan", "kn" }, // Kannada
{ "kas", "ks" }, // Kashmiri
{ "kat", "ka" }, // Georgian
{ "kaz", "kk" }, // Kazakh
{ "khm", "km" }, // Khmer
{ "kin", "rw" }, // Kinyarwanda
{ "kir", "ky" }, // Kirghiz
{ "kor", "ko" }, // Korean
{ "kur", "ku" }, // Kurdish
{ "lao", "lo" }, // Lao
{ "lat", "la" }, // Latin
{ "lav", "lv" }, // Latvian
{ "lit", "lt" }, // Lithuanian
{ "ltz", "lb" }, // Letzeburgesch
{ "mac", "mk" }, // Macedonian
{ "mal", "ml" }, // Malayalam
{ "mar", "mr" }, // Marathi
{ "may", "ms" }, // Malay
{ "mkd", "mk" }, // Macedonian
{ "mlg", "mg" }, // Malagasy
{ "mlt", "mt" }, // Maltese
{ "mol", "mo" }, // Moldavian
{ "mon", "mn" }, // Mongolian
{ "msa", "ms" }, // Malay
{ "mya", "my" }, // Burmese
{ "nep", "ne" }, // Nepali
{ "nld", "nl" }, // Dutch
{ "nno", "nn" }, // Norwegian Nynorsk
{ "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb
{ "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
{ "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
{ "nob", "nb" }, // Norwegian Bokmal
{ "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb
{ "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString
{ "oci", "oc" }, // Occitan/Provencal
{ "ori", "or" }, // Oriya
{ "orm", "om" }, // Oromo,Galla
{ "pan", "pa" }, // Panjabi
{ "per", "fa" }, // Persian
{ "pol", "pl" }, // Polish
{ "por", "pt" }, // Portuguese
{ "pus", "ps" }, // Pushto
{ "que", "qu" }, // Quechua
{ "roh", "rm" }, // Raeto-Romance
{ "ron", "ro" }, // Romanian
{ "rum", "ro" }, // Romanian
{ "run", "rn" }, // Rundi
{ "rus", "ru" }, // Russian
{ "san", "sa" }, // Sanskrit
{ "scc", "sr" }, // Serbian
{ "scr", "hr" }, // Croatian
{ "sin", "si" }, // Sinhalese
{ "slk", "sk" }, // Slovak
{ "slo", "sk" }, // Slovak
{ "slv", "sl" }, // Slovenian
{ "sme", "se" }, // Sami,Northern
{ "snd", "sd" }, // Sindhi
{ "som", "so" }, // Somali
{ "spa", "es" }, // Spanish
{ "sqi", "sq" }, // Albanian
{ "srp", "sr" }, // Serbian
{ "sun", "su" }, // Sundanese
{ "swa", "sw" }, // Swahili
{ "swe", "sv" }, // Swedish
{ "tam", "ta" }, // Tamil
{ "tat", "tt" }, // Tatar
{ "tel", "te" }, // Telugu
{ "tgk", "tg" }, // Tajik
{ "tgl", "tl" }, // Tagalog
{ "tha", "th" }, // Thai
{ "tib", "bo" }, // Tibetan
{ "tir", "ti" }, // Tigrinya
{ "ton", "to" }, // Tongan
{ "tuk", "tk" }, // Turkmen
{ "tur", "tr" }, // Turkish
{ "uig", "ug" }, // Uighur
{ "ukr", "uk" }, // Ukrainian
{ "urd", "ur" }, // Urdu
{ "uzb", "uz" }, // Uzbek
{ "vie", "vi" }, // Vietnamese
{ "wel", "cy" }, // Welsh
{ "yid", "yi" }, // Yiddish
{ "zho", "zh" }, // Chinese
};
enum {
kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString)
};
static const SpecialCaseUpdates specialCases[] = {
// Data for special cases
// a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
// replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
// the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! However, ICU
// and RFC 3066bis will continue to use YU for this. So now CS is ambiguous. We guess as follows: If we
// see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS to YU.
// b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
// deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
// hr; if there is a region tag of (now) YU we use sr; else we do not change it (not enough info).
// c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
// "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
{ NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB)
{ NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05)
{ "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
{ "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
{ NULL, "-YU", "CS", NULL, NULL }, // then always change YU to CS (map old Yugoslavia code to new 2003-07 ISO code
// for Serbia & Montenegro per RFC3066bis & ICU) // <1.18>
// Note: do this after fixing CS for cs/sk as above.
{ "sh", "-HR", "hr", "-CS", "sr" }, // if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) if we find
// HR (Croatia) or to 'sr' (Serbian) if we find CS (Serbia & Montenegro, Yugoslavia). // <1.18>
// Note: Do this after changing YU to CS as above.
{ NULL, NULL, NULL, NULL, NULL } // terminator
};
static const KeyStringToResultString localeStringRegionToDefaults[] = {
// For some region-code suffixes, there are default substrings to strip off for canonical string.
// Must be sorted according to how strcmp compares the strings in the first column
//
// region default writing
// suffix system tags, strip comment
// -------- ------------- ---------
{ "_CN", "-Hans" }, // mainland China, default is simplified
{ "_HK", "-Hant" }, // Hong Kong, default is traditional
{ "_MO", "-Hant" }, // Macao, default is traditional
{ "_SG", "-Hans" }, // Singapore, default is simplified
{ "_TW", "-Hant" }, // Taiwan, default is traditional
};
enum {
kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString)
};
static const KeyStringToResultString localeStringPrefixToDefaults[] = {
// For some initial portions of language tag, there are default substrings to strip off for canonical string.
// Must be sorted according to how strcmp compares the strings in the first column
//
// language default writing
// tag prefix system tags, strip comment
// -------- ------------- ---------
{ "ab-", "-Cyrl" }, // Abkhazian
{ "af-", "-Latn" }, // Afrikaans
{ "am-", "-Ethi" }, // Amharic
{ "ar-", "-Arab" }, // Arabic
{ "as-", "-Beng" }, // Assamese
{ "ay-", "-Latn" }, // Aymara
{ "be-", "-Cyrl" }, // Belarusian
{ "bg-", "-Cyrl" }, // Bulgarian
{ "bn-", "-Beng" }, // Bengali
{ "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script)
{ "br-", "-Latn" }, // Breton (? not Suppress-Script)
{ "bs-", "-Latn" }, // Bosnian
{ "ca-", "-Latn" }, // Catalan
{ "cs-", "-Latn" }, // Czech
{ "cy-", "-Latn" }, // Welsh
{ "da-", "-Latn" }, // Danish
{ "de-", "-Latn -1901" }, // German, traditional orthography
{ "dv-", "-Thaa" }, // Divehi/Maldivian
{ "dz-", "-Tibt" }, // Dzongkha
{ "el-", "-Grek" }, // Greek (modern, monotonic)
{ "en-", "-Latn" }, // English
{ "eo-", "-Latn" }, // Esperanto
{ "es-", "-Latn" }, // Spanish
{ "et-", "-Latn" }, // Estonian
{ "eu-", "-Latn" }, // Basque
{ "fa-", "-Arab" }, // Farsi
{ "fi-", "-Latn" }, // Finnish
{ "fo-", "-Latn" }, // Faroese
{ "fr-", "-Latn" }, // French
{ "ga-", "-Latn" }, // Irish
{ "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script)
{ "gl-", "-Latn" }, // Galician
{ "gn-", "-Latn" }, // Guarani
{ "gu-", "-Gujr" }, // Gujarati
{ "gv-", "-Latn" }, // Manx
{ "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script)
{ "he-", "-Hebr" }, // Hebrew
{ "hi-", "-Deva" }, // Hindi
{ "hr-", "-Latn" }, // Croatian
{ "hu-", "-Latn" }, // Hungarian
{ "hy-", "-Armn" }, // Armenian
{ "id-", "-Latn" }, // Indonesian
{ "is-", "-Latn" }, // Icelandic
{ "it-", "-Latn" }, // Italian
{ "ja-", "-Jpan" }, // Japanese
{ "ka-", "-Geor" }, // Georgian
{ "kk-", "-Cyrl" }, // Kazakh
{ "kl-", "-Latn" }, // Kalaallisut/Greenlandic
{ "km-", "-Khmr" }, // Central Khmer
{ "kn-", "-Knda" }, // Kannada
{ "ko-", "-Hang" }, // Korean (? not Suppress-Script)
{ "kok-", "-Deva" }, // Konkani
{ "la-", "-Latn" }, // Latin
{ "lb-", "-Latn" }, // Luxembourgish
{ "lo-", "-Laoo" }, // Lao
{ "lt-", "-Latn" }, // Lithuanian
{ "lv-", "-Latn" }, // Latvian
{ "mg-", "-Latn" }, // Malagasy
{ "mk-", "-Cyrl" }, // Macedonian
{ "ml-", "-Mlym" }, // Malayalam
{ "mo-", "-Latn" }, // Moldavian
{ "mr-", "-Deva" }, // Marathi
{ "ms-", "-Latn" }, // Malay
{ "mt-", "-Latn" }, // Maltese
{ "my-", "-Mymr" }, // Burmese/Myanmar
{ "nb-", "-Latn" }, // Norwegian Bokmal
{ "ne-", "-Deva" }, // Nepali
{ "nl-", "-Latn" }, // Dutch
{ "nn-", "-Latn" }, // Norwegian Nynorsk
{ "ny-", "-Latn" }, // Chichewa/Nyanja
{ "om-", "-Latn" }, // Oromo
{ "or-", "-Orya" }, // Oriya
{ "pa-", "-Guru" }, // Punjabi
{ "pl-", "-Latn" }, // Polish
{ "ps-", "-Arab" }, // Pushto
{ "pt-", "-Latn" }, // Portuguese
{ "qu-", "-Latn" }, // Quechua
{ "rn-", "-Latn" }, // Rundi
{ "ro-", "-Latn" }, // Romanian
{ "ru-", "-Cyrl" }, // Russian
{ "rw-", "-Latn" }, // Kinyarwanda
{ "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script)
{ "se-", "-Latn" }, // Sami (? not Suppress-Script)
{ "si-", "-Sinh" }, // Sinhala
{ "sk-", "-Latn" }, // Slovak
{ "sl-", "-Latn" }, // Slovenian
{ "so-", "-Latn" }, // Somali
{ "sq-", "-Latn" }, // Albanian
{ "sv-", "-Latn" }, // Swedish
{ "sw-", "-Latn" }, // Swahili
{ "ta-", "-Taml" }, // Tamil
{ "te-", "-Telu" }, // Telugu
{ "th-", "-Thai" }, // Thai
{ "ti-", "-Ethi" }, // Tigrinya
{ "tl-", "-Latn" }, // Tagalog
{ "tn-", "-Latn" }, // Tswana
{ "to-", "-Latn" }, // Tonga of Tonga Islands
{ "tr-", "-Latn" }, // Turkish
{ "uk-", "-Cyrl" }, // Ukrainian
{ "ur-", "-Arab" }, // Urdu
{ "vi-", "-Latn" }, // Vietnamese
{ "wo-", "-Latn" }, // Wolof
{ "xh-", "-Latn" }, // Xhosa
{ "yi-", "-Hebr" }, // Yiddish
{ "zh-", "-Hani" }, // Chinese (? not Suppress-Script)
{ "zu-", "-Latn" }, // Zulu
};
enum {
kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString)
};
static const KeyStringToResultString appleLocaleToLanguageString[] = {
// Map locale strings that Apple uses as language IDs to real language strings.
// Must be sorted according to how strcmp compares the strings in the first column.
// Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
// handled in the code. <1.19>
//
// locale lang [ comment ]
// string string
// ------- -------
{ "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
{ "zh_CN", "zh-Hans" }, // mainland China => simplified
{ "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
{ "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
{ "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
{ "zh_TW", "zh-Hant" }, // Taiwan => traditional
};
enum {
kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString)
};
static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = {
// Map locale strings that Apple uses as language IDs to real language strings.
// Must be sorted according to how strcmp compares the strings in the first column.
//
// locale lang [ comment ]
// string string
// ------- -------
{ "de_AT", "de-AT" }, // Austrian German
{ "de_CH", "de-CH" }, // Swiss German
// { "de_DE", "de-DE" }, // German for Germany (default), not currently used
{ "en_AU", "en-AU" }, // Australian English
{ "en_CA", "en-CA" }, // Canadian English
{ "en_GB", "en-GB" }, // British English
// { "en_IE", "en-IE" }, // Irish English, not currently used
{ "en_US", "en-US" }, // U.S. English
{ "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
// { "fr_BE", "fr-BE" }, // Belgian French, not currently used
{ "fr_CA", "fr-CA" }, // Canadian French
{ "fr_CH", "fr-CH" }, // Swiss French
// { "fr_FR", "fr-FR" }, // French for France (default), not currently used
{ "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium
// { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used
{ "pt_BR", "pt-BR" }, // Brazilian Portuguese
{ "pt_PT", "pt-PT" }, // Portuguese for Portugal
{ "zh_CN", "zh-Hans" }, // mainland China => simplified
{ "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
{ "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
{ "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
{ "zh_TW", "zh-Hant" }, // Taiwan => traditional
};
enum {
kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString)
};
struct LocaleToLegacyCodes {
const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_'
RegionCode regCode;
LangCode langCode;
CFStringEncoding encoding;
};
typedef struct LocaleToLegacyCodes LocaleToLegacyCodes;
static const LocaleToLegacyCodes localeToLegacyCodes[] = {
// locale RegionCode LangCode CFStringEncoding
{ "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn
{ "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi
{ "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab;
{ "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng;
{ "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn;
{ "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl
{ "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab;
{ "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl;
{ "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn;
{ "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng;
{ "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt;
{ "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn;
{ "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn;
{ "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn;
{ "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn;
{ "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn;
{ "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany
{ "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ },
{ "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ },
{ "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ },
{ "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ },
{ "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt;
{ "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece
{ "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ },
{ "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic
{ "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten)
{ "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ },
{ "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn;
{ "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten)
{ "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag
{ "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ },
{ "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
{ "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
{ "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ },
{ "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn;
{ "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab;
{ "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ },
{ "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ },
{ "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten)
{ "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ },
{ "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ },
{ "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ },
{ "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ },
{ "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ },
{ "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after)
{ "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots
{ "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ },
{ "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn;
{ "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn;
{ "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it)
{ "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr;
{ "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn;
{ "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr;
{ "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva;
{ "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ },
{ "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ },
{ "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn;
{ "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn;
{ "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ },
{ "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly
{ "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ },
{ "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ },
{ "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans;
{ "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan;
{ "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn;
{ "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor;
{ "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab
{ "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn;
{ "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr;
{ "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda;
{ "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang;
{ "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab;
{ "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab;
{ "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
{ "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn;
{ "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo;
{ "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ },
{ "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ },
{ "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn;
{ "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym;
{ "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong
{ "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl;
{ "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong;
{ "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva;
{ "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn;
{ "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab;
{ "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn;
{ "mul", 74/*verMultilingual*/, -1, 0 },
{ "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr;
{ "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ },
{ "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva;
{ "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands
{ "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ },
{ "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ },
{ "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ },
{ "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn;
{ "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi;
{ "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya;
{ "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru;
{ "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ },
{ "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab;
{ "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten)
{ "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ },
{ "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ },
{ "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn;
{ "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn;
{ "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ },
{ "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn;
{ "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva;
{ "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab;
{ "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ },
{ "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh;
{ "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ },
{ "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ },
{ "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn;
{ "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ },
{ "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn;
{ "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ },
{ "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn;
{ "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml;
{ "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu
{ "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl"
{ "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
{ "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai;
{ "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi;
{ "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl
{ "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
{ "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
{ "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn;
{ "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn;
{ "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
{ "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
{ "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab;
{ "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl;
{ "und", 55/*verScriptGeneric*/, -1, 0 },
{ "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu
{ "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
{ "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
{ "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
{ "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ },
{ "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn
{ "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr;
{ "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese
{ "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
{ "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
{ "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
{ "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
{ "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
{ "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
{ "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
};
enum {
kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0])
};
/*
For reference here is a list of ICU locales with variants and how some
of them are canonicalized with the ICU function uloc_canonicalize:
ICU 3.0 has:
en_US_POSIX x no change
hy_AM_REVISED x no change
ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
th_TH_TRADITIONAL -> th_TH@calendar=buddhist
ICU 2.8 also had the following (now obsolete):
ca_ES_PREEURO
de__PHONEBOOK -> de@collation=phonebook
de_AT_PREEURO
de_DE_PREEURO
de_LU_PREEURO
el_GR_PREEURO
en_BE_PREEURO
en_GB_EURO -> en_GB@currency=EUR
en_IE_PREEURO -> en_IE@currency=IEP
es__TRADITIONAL -> es@collation=traditional
es_ES_PREEURO
eu_ES_PREEURO
fi_FI_PREEURO
fr_BE_PREEURO
fr_FR_PREEURO -> fr_FR@currency=FRF
fr_LU_PREEURO
ga_IE_PREEURO
gl_ES_PREEURO
hi__DIRECT -> hi@collation=direct
it_IT_PREEURO
nl_BE_PREEURO
nl_NL_PREEURO
pt_PT_PREEURO
zh__PINYIN -> zh@collation=pinyin
zh_TW_STROKE -> zh_TW@collation=stroke
*/
// _CompareTestEntryToTableEntryKey
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// comparison function for bsearch
static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key );
}
// _CompareTestEntryPrefixToTableEntryKey
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
// Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
while ( *testPtr == *tablePtr && *tablePtr != 0 ) {
testPtr++; tablePtr++;
}
if ( *tablePtr != 0 ) {
// strings are different, and the string in the table has not run out;
// i.e. the table entry is not a prefix of the text string.
return ( *testPtr < *tablePtr )? -1: 1;
}
return 0;
}
// _CompareLowerTestEntryPrefixToTableEntryKey
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
// Lowercases the test string before comparison (the table should already have lowercased entries).
static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
char lowerTestChar;
while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9>
testPtr++; tablePtr++;
}
if ( *tablePtr != 0 ) {
// strings are different, and the string in the table has not run out;
// i.e. the table entry is not a prefix of the text string.
if (lowerTestChar == '_') // <1.9>
return -1; // <1.9>
return ( lowerTestChar < *tablePtr )? -1: 1;
}
// The string in the table has run out. If the test string char is not alnum,
// then the string matches, else the test string sorts after.
return ( !isalnum(lowerTestChar) )? 0: 1;
}
// _DeleteCharsAtPointer
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// remove _length_ characters from the beginning of the string indicated by _stringPtr_
// (we know that the string has at least _length_ characters in it)
static void _DeleteCharsAtPointer(char *stringPtr, int length) {
do {
*stringPtr = stringPtr[length];
} while (*stringPtr++ != 0);
}
// _CopyReplacementAtPointer
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) {
while (*replacementPtr != 0) {
*stringPtr++ = *replacementPtr++;
}
}
// _CheckForTag
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) {
return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) );
}
// _ReplacePrefix
// Move this code from _UpdateFullLocaleString into separate function // <1.10>
static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) {
int newPrefixLen = (int)strlen(newPrefix);
int lengthDelta = newPrefixLen - oldPrefixLen;
if (lengthDelta < 0) {
// replacement is shorter, delete chars by shifting tail of string
_DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta);
} else if (lengthDelta > 0) {
// replacement is longer...
int stringLen = (int)strlen(locString);
if (stringLen + lengthDelta < locStringMaxLen) {
// make room by shifting tail of string
char * tailShiftPtr = locString + stringLen;
char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift
while (tailShiftPtr >= tailStartPtr) {
tailShiftPtr[lengthDelta] = *tailShiftPtr;
tailShiftPtr--;
}
} else {
// no room, can't do substitution
newPrefix = NULL;
}
}
if (newPrefix) {
// do the substitution
_CopyReplacementAtPointer(locString, newPrefix);
}
}
// _UpdateFullLocaleString
// Given a locale string that uses standard codes (not a special old-style Apple string),
// update all the language codes and region codes to latest versions, map 3-letter
// language codes to 2-letter codes if possible, and normalize casing. If requested, return
// pointers to a language-region variant subtag (if present) and a region tag (if present).
// (add locStringMaxLen parameter) // <1.10>
static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen,
char **langRegSubtagRef, char **regionTagRef,
char varKeyValueString[]) // <1.17>
{
KeyStringToResultString testEntry;
KeyStringToResultString * foundEntry;
const SpecialCaseUpdates * specialCasePtr;
char * inLocalePtr;
char * subtagPtr;
char * langRegSubtag = NULL;
char * regionTag = NULL;
char * variantTag = NULL;
Boolean subtagHasDigits, pastPrimarySubtag, hadRegion;
// 1. First replace any non-canonical prefix (case insensitive) with canonical
// (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical,
sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey );
if (foundEntry) {
// replace key (at beginning of string) with result
_ReplacePrefix(inLocaleString, locStringMaxLen, (int)strlen(foundEntry->key), foundEntry->result); // <1.10>
}
// 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
inLocalePtr = inLocaleString;
subtagPtr = inLocaleString;
subtagHasDigits = false;
pastPrimarySubtag = false;
hadRegion = false;
while ( true ) {
if ( isalpha(*inLocalePtr) ) {
// if not past a region tag, then lowercase, else uppercase
*inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr);
} else if ( isdigit(*inLocalePtr) ) {
subtagHasDigits = true;
} else {
if (!pastPrimarySubtag) {
// may have a NULL primary subtag
if (subtagHasDigits) {
break;
}
pastPrimarySubtag = true;
} else if (!hadRegion) {
// We are after any primary language subtag, but not past any region tag.
// This subtag is preceded by '-' or '_'.
int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_'
if (subtagLength == 3 && !subtagHasDigits) {
// potential ISO 3166 code for region or language variant; if so, needs uppercasing
if (*subtagPtr == '_') {
regionTag = subtagPtr;
hadRegion = true;
subtagPtr[1] = toupper(subtagPtr[1]);
subtagPtr[2] = toupper(subtagPtr[2]);
} else if (langRegSubtag == NULL) {
langRegSubtag = subtagPtr;
subtagPtr[1] = toupper(subtagPtr[1]);
subtagPtr[2] = toupper(subtagPtr[2]);
}
} else if (subtagLength == 4 && subtagHasDigits) {
// potential UN M.49 region code
if (*subtagPtr == '_') {
regionTag = subtagPtr;
hadRegion = true;
} else if (langRegSubtag == NULL) {
langRegSubtag = subtagPtr;
}
} else if (subtagLength == 5 && !subtagHasDigits) {
// ISO 15924 script code, uppercase just the first letter
subtagPtr[1] = toupper(subtagPtr[1]);
} else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17>
hadRegion = true;
}
if (!hadRegion) {
// convert improper '_' to '-'
*subtagPtr = '-';
}
} else {
variantTag = subtagPtr; // <1.17>
}
if (*inLocalePtr == '-' || *inLocalePtr == '_') {
subtagPtr = inLocalePtr;
subtagHasDigits = false;
} else {
break;
}
}
inLocalePtr++;
}
// 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669]
// If so, copy the keywords to varKeyValueString and delete the variant tag
// from the original string (but don't otherwise use the ICU canonicalization).
varKeyValueString[0] = 0;
if (variantTag) {
UErrorCode icuStatus;
int icuCanonStringLen;
char * varKeyValueStringPtr = varKeyValueString;
icuStatus = U_ZERO_ERROR;
icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus );
if ( U_SUCCESS(icuStatus) ) {
char * icuCanonStringPtr = varKeyValueString;
if (icuCanonStringLen >= locStringMaxLen)
icuCanonStringLen = locStringMaxLen - 1;
varKeyValueString[icuCanonStringLen] = 0;
while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR)
++icuCanonStringPtr;
if (*icuCanonStringPtr != 0) {
// the canonicalized string has keywords
// delete the variant tag in the original string (and other trailing '_' or '-')
*variantTag-- = 0;
while (*variantTag == '_')
*variantTag-- = 0;
// delete all of the canonicalized string except the keywords
while (*icuCanonStringPtr != 0)
*varKeyValueStringPtr++ = *icuCanonStringPtr++;
}
*varKeyValueStringPtr = 0;
}
}
// 4. Handle special cases of updating region codes, or updating language codes based on
// region code.
for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) {
if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) {
// OK, we matched any language specified. Now what needs updating?
char * foundTag;
if ( isupper(specialCasePtr->update1[0]) ) {
// updating a region code
if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) {
_CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1);
}
if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) {
_CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1);
}
} else {
// updating the language, there will be two choices based on region
if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) ||
( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) {
_CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1);
} else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) ||
( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) {
_CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2);
}
}
}
}
// 5. return pointers if requested.
if (langRegSubtagRef != NULL) {
*langRegSubtagRef = langRegSubtag;
}
if (regionTagRef != NULL) {
*regionTagRef = regionTag;
}
}
// _RemoveSubstringsIfPresent
// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
// substringList is a list of space-separated substrings to strip if found in localeString
static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) {
while (*substringList != 0) {
char currentSubstring[kLocaleIdentifierCStringMax];
int substringLength = 0;
char * foundSubstring;
// copy current substring & get its length
while ( isgraph(*substringList) ) {
currentSubstring[substringLength++] = *substringList++;
}
// move to next substring
while ( isspace(*substringList) ) {
substringList++;
}
// search for current substring in locale string
if (substringLength == 0)
continue;
currentSubstring[substringLength] = 0;
foundSubstring = strstr(localeString, currentSubstring);
// if substring is found, delete it
if (foundSubstring) {
_DeleteCharsAtPointer(foundSubstring, substringLength);
}
}
}
// _GetKeyValueString // <1.10>
// Removes any key-value string from inLocaleString, puts canonized version in keyValueString
static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) {
char * inLocalePtr = inLocaleString;
while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) {
inLocalePtr++;
}
if (*inLocalePtr != 0) { // we found a key-value section
char * keyValuePtr = keyValueString;
*keyValuePtr = *inLocalePtr;
*inLocalePtr = 0;
do {
if ( *(++inLocalePtr) != ' ' ) {
*(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11>
}
} while (*inLocalePtr != 0);
} else {
keyValueString[0] = 0;
}
}
static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) {
if (keyValueString[0] != 0) {
UErrorCode uerr = U_ZERO_ERROR;
UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr);
if ( uenum != NULL ) {
const char * keyword;
int32_t length;
char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value
while ( U_SUCCESS(uerr) ) {
keyword = uenum_next(uenum, &length, &uerr);
if ( keyword == NULL ) {
break;
}
length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr );
length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr );
}
uenum_close(uenum);
}
}
}
__private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
char inLocaleString[kLocaleIdentifierCStringMax];
CFStringRef outStringRef = NULL;
if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
KeyStringToResultString testEntry;
KeyStringToResultString * foundEntry;
char keyValueString[sizeof(inLocaleString)]; // <1.10>
char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
_GetKeyValueString(inLocaleString, keyValueString); // <1.10>
testEntry.result = NULL;
// A. First check if input string matches an old-style string that has a replacement
// (do this before case normalization)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
// It does match, so replace old string with new
strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
varKeyValueString[0] = 0;
} else {
// B. No match with an old-style string, use input string but update codes, normalize case, etc.
_UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), NULL, NULL, varKeyValueString); // <1.10><1.17>
}
// C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
// 1. Strip defaults in input string based on initial part of locale string
// (mainly to strip default script tag for a language)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
if (foundEntry) {
// The input string begins with a character sequence for which
// there are default substrings which should be stripped if present
_RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
}
// 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageStringForCFBundle, kNumAppleLocaleToLanguageStringForCFBundle,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
// it does match
strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
} else {
// just delete the region tag and anything after
char * inLocalePtr = inLocaleString;
while (*inLocalePtr != 0 && *inLocalePtr != '_') {
inLocalePtr++;
}
*inLocalePtr = 0;
}
// D. Re-append any key-value strings, now canonical // <1.10><1.17>
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
// All done, return what we came up with.
outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
}
return outStringRef;
}
CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
char inLocaleString[kLocaleIdentifierCStringMax];
CFStringRef outStringRef = NULL;
if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
KeyStringToResultString testEntry;
KeyStringToResultString * foundEntry;
char keyValueString[sizeof(inLocaleString)]; // <1.10>
char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
_GetKeyValueString(inLocaleString, keyValueString); // <1.10>
testEntry.result = NULL;
// A. First check if input string matches an old-style string that has a replacement
// (do this before case normalization)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
// It does match, so replace old string with new
strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
varKeyValueString[0] = 0;
} else {
char * langRegSubtag = NULL;
char * regionTag = NULL;
// B. No match with an old-style string, use input string but update codes, normalize case, etc.
_UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17><1.19>
// if the language part already includes a regional variant, then delete any region tag. <1.19>
if (langRegSubtag && regionTag)
*regionTag = 0;
}
// C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
// 1. Strip defaults in input string based on initial part of locale string
// (mainly to strip default script tag for a language)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
if (foundEntry) {
// The input string begins with a character sequence for which
// there are default substrings which should be stripped if present
_RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
}
// 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
// it does match
strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
} else {
// skip to any region tag or java-type variant
char * inLocalePtr = inLocaleString;
while (*inLocalePtr != 0 && *inLocalePtr != '_') {
inLocalePtr++;
}
// if there is still a region tag, turn it into a language variant <1.19>
if (*inLocalePtr == '_') {
// handle 3-digit regions in addition to 2-letter ones
char * regionTag = inLocalePtr++;
long expectedLength = 0;
if ( isalpha(*inLocalePtr) ) {
while ( isalpha(*(++inLocalePtr)) )
;
expectedLength = 3;
} else if ( isdigit(*inLocalePtr) ) {
while ( isdigit(*(++inLocalePtr)) )
;
expectedLength = 4;
}
*regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0;
}
// anything else at/after '_' just gets deleted
*inLocalePtr = 0;
}
// D. Re-append any key-value strings, now canonical // <1.10><1.17>
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
// All done, return what we came up with.
outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
}
return outStringRef;
}
CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
char inLocaleString[kLocaleIdentifierCStringMax];
CFStringRef outStringRef = NULL;
if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
KeyStringToResultString testEntry;
KeyStringToResultString * foundEntry;
char keyValueString[sizeof(inLocaleString)]; // <1.10>
char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
_GetKeyValueString(inLocaleString, keyValueString); // <1.10>
testEntry.result = NULL;
// A. First check if input string matches an old-style Apple string that has a replacement
// (do this before case normalization)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
// It does match, so replace old string with new // <1.10>
strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
varKeyValueString[0] = 0;
} else {
char * langRegSubtag = NULL;
char * regionTag = NULL;
// B. No match with an old-style string, use input string but update codes, normalize case, etc.
_UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17>
// C. Now strip defaults that are implied by other fields.
// 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) {
_DeleteCharsAtPointer(langRegSubtag, 3);
}
// 2. Strip defaults in input string based on final region tag in locale string
// (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
if ( regionTag ) {
testEntry.key = regionTag;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults,
sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
if (foundEntry) {
_RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
}
}
// 3. Strip defaults in input string based on initial part of locale string
// (mainly to strip default script tag for a language)
testEntry.key = inLocaleString;
foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
if (foundEntry) {
// The input string begins with a character sequence for which
// there are default substrings which should be stripped if present
_RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
}
}
// D. Re-append any key-value strings, now canonical // <1.10><1.17>
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
// Now create the CFString (even if empty!)
outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
}
return outStringRef;
}
// CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
// the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) {
CFStringRef result = NULL;
if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) {
const char *localeString = regionCodeToLocaleString[rcode];
if (localeString != NULL && *localeString != '\0') {
result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
}
}
if (result) return result;
if (0 <= lcode && lcode < kNumLangCodeToLocaleString) {
const char *localeString = langCodeToLocaleString[lcode];
if (localeString != NULL && *localeString != '\0') {
result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
}
}
return result;
}
CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) {
char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
UErrorCode icuStatus = U_ZERO_ERROR;
int32_t length = 0;
// Extract the C string locale ID, for ICU
CFIndex outBytes = 0;
CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes);
cLocaleID[outBytes] = '\0';
// Get the components
length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
if (U_SUCCESS(icuStatus) && length > 0)
{
CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
CFDictionaryAddValue(working, kCFLocaleLanguageCode, string);
CFRelease(string);
}
icuStatus = U_ZERO_ERROR;
length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
if (U_SUCCESS(icuStatus) && length > 0)
{
CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
CFDictionaryAddValue(working, kCFLocaleScriptCode, string);
CFRelease(string);
}
icuStatus = U_ZERO_ERROR;
length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
if (U_SUCCESS(icuStatus) && length > 0)
{
CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
CFDictionaryAddValue(working, kCFLocaleCountryCode, string);
CFRelease(string);
}
icuStatus = U_ZERO_ERROR;
length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
if (U_SUCCESS(icuStatus) && length > 0)
{
CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
CFDictionaryAddValue(working, kCFLocaleVariantCode, string);
CFRelease(string);
}
icuStatus = U_ZERO_ERROR;
// Now get the keywords; open an enumerator on them
UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus);
const char *locKey = NULL;
int32_t locKeyLen = 0;
while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus))
{
char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY];
// Get the value for this keyword
if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0
&& U_SUCCESS(icuStatus))
{
CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, (CFIndex)strlen(locKey), kCFStringEncodingASCII, true);
CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, (CFIndex)strlen(locValue), kCFStringEncodingASCII, true);
if (key && value)
CFDictionaryAddValue(working, key, value);
if (key)
CFRelease(key);
if (value)
CFRelease(value);
}
}
uenum_close(iter);
// Convert to an immutable dictionary and return
CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working);
CFRelease(working);
return result;
}
typedef struct __AppendContext
{
char separator;
CFMutableStringRef working;
} __AppendContext;
static void __AppendKeywords(const void *k, const void *v, void *c)
{
__AppendContext *context = (__AppendContext *) c;
CFStringRef key = (CFStringRef) k;
CFStringRef value = (CFStringRef) v;
if (CFEqual(key, kCFLocaleLanguageCode) || CFEqual(key, kCFLocaleScriptCode) || CFEqual(key, kCFLocaleCountryCode) || CFEqual(key, kCFLocaleVariantCode))
return;
CFStringAppendFormat(context->working, NULL, CFSTR("%c%@%c%@"), context->separator, key, ULOC_KEYWORD_ASSIGN, value);
context->separator = ULOC_KEYWORD_ITEM_SEPARATOR;
}
CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) {
CFMutableStringRef working = CFStringCreateMutable(allocator, 0);
CFStringRef value = NULL;
bool country = false;
__AppendContext context = {ULOC_KEYWORD_SEPARATOR, working};
if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleLanguageCode)))
{
CFStringAppend(working, value);
}
if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleScriptCode)))
{
CFStringAppendFormat(working, NULL, CFSTR("_%@"), value);
}
if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleCountryCode)))
{
CFStringAppendFormat(working, NULL, CFSTR("_%@"), value);
country = true;
}
if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleVariantCode)))
{
if (!country)
CFStringAppend(working, CFSTR("_"));
CFStringAppendFormat(working, NULL, CFSTR("_%@"), value);
}
// Now iterate through any remaining entries and append as keywords
CFDictionaryApplyFunction(dictionary, __AppendKeywords, &context);
// Convert to immutable string and return
CFStringRef result = (CFStringRef)CFStringCreateCopy(allocator, working);
CFRelease(working);
return result;
}