blob: dd1e4cff9c2fb24b2944d3c1f28c55010324466e [file] [log] [blame]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/wtf/text/case_map.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/renderer/platform/wtf/text/text_offset_map.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
using testing::ElementsAreArray;
namespace WTF {
namespace {
String To8BitOrNull(const String& source) {
if (source.IsNull() || source.Is8Bit())
return source;
if (!source.ContainsOnlyLatin1OrEmpty())
return String();
return String::Make8BitFrom16BitSource(source.Characters16(),
source.length());
}
} // namespace
static struct CaseMapTestData {
const char16_t* source;
const char* locale;
const char16_t* lower_expected;
const char16_t* upper_expected;
std::vector<TextOffsetMap::Entry> lower_map = {};
std::vector<TextOffsetMap::Entry> upper_map = {};
} case_map_test_data[] = {
// Empty string.
{nullptr, "", nullptr, nullptr},
{u"", "", u"", u""},
// Non-letters
{u"123", "", u"123", u"123"},
// ASCII lower/uppercases.
{u"xyz", "", u"xyz", u"XYZ"},
{u"XYZ", "", u"xyz", u"XYZ"},
{u"Xyz", "", u"xyz", u"XYZ"},
{u"xYz", "", u"xyz", u"XYZ"},
// German eszett. Uppercasing makes the string longer.
{u"\u00DF", "", u"\u00DF", u"SS", {}, {{1, 2}}},
{u"\u00DFz", "", u"\u00DFz", u"SSZ", {}, {{1, 2}}},
{u"x\u00DF", "", u"x\u00DF", u"XSS", {}, {{2, 3}}},
{u"x\u00DFz", "", u"x\u00DFz", u"XSSZ", {}, {{2, 3}}},
// Turkish/Azeri.
{u"\u0130", "tr", u"\u0069", u"\u0130"},
// Turkish/Azeri. Lowercasing can make the string shorter.
{u"I\u0307", "tr", u"i", u"I\u0307", {{2, 1}}},
// Lithuanian. Uppercasing can make the string shorter.
{u"i\u0307", "lt", u"i\u0307", u"I", {}, {{2, 1}}},
{u"i\u0307z", "lt", u"i\u0307z", u"IZ", {}, {{2, 1}}},
{u"xi\u0307", "lt", u"xi\u0307", u"XI", {}, {{3, 2}}},
{u"xi\u0307z", "lt", u"xi\u0307z", u"XIZ", {}, {{3, 2}}},
// Lithuanian. Lowercasing can make the string longer.
{u"\u00CC", "lt", u"\u0069\u0307\u0300", u"\u00CC", {{1, 3}}},
// Mix of longer ones and shorter ones.
{u"\u00DFi\u0307", "lt", u"\u00DFi\u0307", u"SSI", {}, {{1, 2}, {3, 3}}},
{u"\u00DFyi\u0307z",
"lt",
u"\u00DFyi\u0307z",
u"SSYIZ",
{},
{{1, 2}, {4, 4}}},
{u"i\u0307\u00DF", "lt", u"i\u0307\u00DF", u"ISS", {}, {{2, 1}, {3, 3}}},
};
std::ostream& operator<<(std::ostream& os, const CaseMapTestData& data) {
return os << String(data.source) << " locale=" << data.locale;
}
class CaseMapTest : public testing::Test,
public testing::WithParamInterface<CaseMapTestData> {};
INSTANTIATE_TEST_SUITE_P(CaseMapTest,
CaseMapTest,
testing::ValuesIn(case_map_test_data));
TEST_P(CaseMapTest, ToLowerWithoutOffset) {
const auto data = GetParam();
CaseMap case_map(data.locale);
String source(data.source);
String lower = case_map.ToLower(source);
EXPECT_EQ(lower, String(data.lower_expected));
}
TEST_P(CaseMapTest, ToUpperWithoutOffset) {
const auto data = GetParam();
CaseMap case_map(data.locale);
String source(data.source);
String upper = case_map.ToUpper(source);
EXPECT_EQ(upper, String(data.upper_expected));
}
TEST_P(CaseMapTest, ToLower) {
const auto data = GetParam();
CaseMap case_map(data.locale);
String source(data.source);
TextOffsetMap offset_map;
String lower = case_map.ToLower(source, &offset_map);
EXPECT_EQ(lower, String(data.lower_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.lower_map));
}
TEST_P(CaseMapTest, ToUpper) {
const auto data = GetParam();
CaseMap case_map(data.locale);
String source(data.source);
TextOffsetMap offset_map;
String upper = case_map.ToUpper(source, &offset_map);
EXPECT_EQ(upper, String(data.upper_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
}
TEST_P(CaseMapTest, ToLower8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String lower = case_map.ToLower(source, &offset_map);
EXPECT_EQ(lower, String(data.lower_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.lower_map));
}
TEST_P(CaseMapTest, ToUpper8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String upper = case_map.ToUpper(source, &offset_map);
EXPECT_EQ(upper, String(data.upper_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
}
struct CaseFoldingTestData {
const char* source_description;
const char* source;
const char** locale_list;
size_t locale_list_length;
const char* expected;
};
// \xC4\xB0 = U+0130 (capital dotted I)
// \xC4\xB1 = U+0131 (lowercase dotless I)
const char* g_turkic_input = "Isi\xC4\xB0 \xC4\xB0s\xC4\xB1I";
const char* g_greek_input =
"\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\xB4\xCF\x8C\xCF\x82 "
"\xCE\xA3\xCE\xBF \xCE\xA3\xCE\x9F o\xCE\xA3 \xCE\x9F\xCE\xA3 \xCF\x83 "
"\xE1\xBC\x95\xCE\xBE";
const char* g_lithuanian_input =
"I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D "
"\xC4\xA8 xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 "
"xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 XI X\xC3\x8F XJ "
"XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88";
const char* g_turkic_locales[] = {
"tr", "tr-TR", "tr_TR", "tr@foo=bar", "tr-US", "TR", "tr-tr", "tR",
"az", "az-AZ", "az_AZ", "az@foo=bar", "az-US", "Az", "AZ-AZ",
};
const char* g_non_turkic_locales[] = {
"en", "en-US", "en_US", "en@foo=bar", "EN", "En",
"ja", "el", "fil", "fi", "lt",
};
const char* g_greek_locales[] = {
"el", "el-GR", "el_GR", "el@foo=bar", "el-US", "EL", "el-gr", "eL",
};
const char* g_non_greek_locales[] = {
"en", "en-US", "en_US", "en@foo=bar", "EN", "En",
"ja", "tr", "az", "fil", "fi", "lt",
};
const char* g_lithuanian_locales[] = {
"lt", "lt-LT", "lt_LT", "lt@foo=bar", "lt-US", "LT", "lt-lt", "lT",
};
// Should not have "tr" or "az" because "lt" and 'tr/az' rules conflict with
// each other.
const char* g_non_lithuanian_locales[] = {
"en", "en-US", "en_US", "en@foo=bar", "EN", "En", "ja", "fil", "fi", "el",
};
TEST(CaseMapTest, ToUpperLocale) {
CaseFoldingTestData test_data_list[] = {
{
"Turkic input",
g_turkic_input,
g_turkic_locales,
sizeof(g_turkic_locales) / sizeof(const char*),
"IS\xC4\xB0\xC4\xB0 \xC4\xB0SII",
},
{
"Turkic input",
g_turkic_input,
g_non_turkic_locales,
sizeof(g_non_turkic_locales) / sizeof(const char*),
"ISI\xC4\xB0 \xC4\xB0SII",
},
{
"Greek input",
g_greek_input,
g_greek_locales,
sizeof(g_greek_locales) / sizeof(const char*),
"\xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 "
"\xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 "
"\xCE\xA3 \xCE\x95\xCE\x9E",
},
{
"Greek input",
g_greek_input,
g_non_greek_locales,
sizeof(g_non_greek_locales) / sizeof(const char*),
"\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 "
"\xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 "
"\xCE\xA3 \xE1\xBC\x9D\xCE\x9E",
},
{
"Lithuanian input",
g_lithuanian_input,
g_lithuanian_locales,
sizeof(g_lithuanian_locales) / sizeof(const char*),
"I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D "
"\xC4\xA8 XI\xCC\x88 XJ\xCC\x88 X\xC4\xAE\xCC\x88 XI\xCC\x80 "
"XI\xCC\x81 XI\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE "
"X\xC4\xAE\xCC\x88",
},
{
"Lithuanian input",
g_lithuanian_input,
g_non_lithuanian_locales,
sizeof(g_non_lithuanian_locales) / sizeof(const char*),
"I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D "
"\xC4\xA8 XI\xCC\x87\xCC\x88 XJ\xCC\x87\xCC\x88 "
"X\xC4\xAE\xCC\x87\xCC\x88 XI\xCC\x87\xCC\x80 XI\xCC\x87\xCC\x81 "
"XI\xCC\x87\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE "
"X\xC4\xAE\xCC\x88",
},
};
for (size_t i = 0; i < sizeof(test_data_list) / sizeof(test_data_list[0]);
++i) {
const char* expected = test_data_list[i].expected;
String source = String::FromUTF8(test_data_list[i].source);
for (size_t j = 0; j < test_data_list[i].locale_list_length; ++j) {
const char* locale = test_data_list[i].locale_list[j];
CaseMap case_map(locale);
EXPECT_EQ(expected, case_map.ToUpper(source).Utf8())
<< test_data_list[i].source_description << "; locale=" << locale;
}
}
}
TEST(CaseMapTest, ToLowerLocale) {
CaseFoldingTestData test_data_list[] = {
{
"Turkic input",
g_turkic_input,
g_turkic_locales,
sizeof(g_turkic_locales) / sizeof(const char*),
"\xC4\xB1sii is\xC4\xB1\xC4\xB1",
},
{
"Turkic input",
g_turkic_input,
g_non_turkic_locales,
sizeof(g_non_turkic_locales) / sizeof(const char*),
// U+0130 is lowercased to U+0069 followed by U+0307
"isii\xCC\x87 i\xCC\x87s\xC4\xB1i",
},
{
"Greek input",
g_greek_input,
g_greek_locales,
sizeof(g_greek_locales) / sizeof(const char*),
"\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 "
"\xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 "
"\xCF\x83 \xE1\xBC\x95\xCE\xBE",
},
{
"Greek input",
g_greek_input,
g_non_greek_locales,
sizeof(g_greek_locales) / sizeof(const char*),
"\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 "
"\xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 "
"\xCF\x83 \xE1\xBC\x95\xCE\xBE",
},
{
"Lithuanian input",
g_lithuanian_input,
g_lithuanian_locales,
sizeof(g_lithuanian_locales) / sizeof(const char*),
"i \xC3\xAF j j\xCC\x87\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x87\xCC\x88 "
"i\xCC\x87\xCC\x80 i\xCC\x87\xCC\x81 i\xCC\x87\xCC\x83 "
"xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 "
"xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 xi "
"x\xC3\xAF xj xj\xCC\x87\xCC\x88 x\xC4\xAF x\xC4\xAF\xCC\x87\xCC\x88",
},
{
"Lithuanian input",
g_lithuanian_input,
g_non_lithuanian_locales,
sizeof(g_non_lithuanian_locales) / sizeof(const char*),
"\x69 \xC3\xAF \x6A \x6A\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x88 \xC3\xAC "
"\xC3\xAD \xC4\xA9 \x78\x69\xCC\x87\xCC\x88 \x78\x6A\xCC\x87\xCC\x88 "
"\x78\xC4\xAF\xCC\x87\xCC\x88 \x78\x69\xCC\x87\xCC\x80 "
"\x78\x69\xCC\x87\xCC\x81 \x78\x69\xCC\x87\xCC\x83 \x78\x69 "
"\x78\xC3\xAF \x78\x6A \x78\x6A\xCC\x88 \x78\xC4\xAF "
"\x78\xC4\xAF\xCC\x88",
},
};
for (size_t i = 0; i < sizeof(test_data_list) / sizeof(test_data_list[0]);
++i) {
const char* expected = test_data_list[i].expected;
String source = String::FromUTF8(test_data_list[i].source);
for (size_t j = 0; j < test_data_list[i].locale_list_length; ++j) {
const char* locale = test_data_list[i].locale_list[j];
CaseMap case_map(locale);
EXPECT_EQ(expected, case_map.ToLower(source).Utf8())
<< test_data_list[i].source_description << "; locale=" << locale;
}
}
}
} // namespace WTF