blob: c5f2ee5d249e89cbe7cb815b8a6b69a30cef7f6b [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/text/text_break_iterator.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
namespace blink {
class TextBreakIteratorTest : public testing::Test {
protected:
void SetTestString(const char* test_string) {
test_string_ = String::FromUTF8(test_string);
}
void SetTestString16(Vector<UChar> input) {
test_string_ = String(input.data(), static_cast<unsigned>(input.size()));
}
// The expected break positions must be specified UTF-16 character boundaries.
void MatchLineBreaks(
LineBreakType line_break_type,
const Vector<int> expected_break_positions,
BreakSpaceType break_space = BreakSpaceType::kBeforeEverySpace) {
if (test_string_.Is8Bit()) {
test_string_ = String::Make16BitFrom8BitSource(test_string_.Characters8(),
test_string_.length());
}
LazyLineBreakIterator lazy_break_iterator(test_string_);
lazy_break_iterator.SetBreakType(line_break_type);
lazy_break_iterator.SetBreakSpace(break_space);
TestIsBreakable(expected_break_positions, lazy_break_iterator);
TestNextBreakOpportunity(expected_break_positions, lazy_break_iterator);
}
// Test IsBreakable() by iterating all positions. BreakingContext uses this
// interface.
void TestIsBreakable(const Vector<int> expected_break_positions,
const LazyLineBreakIterator& break_iterator) {
Vector<int> break_positions;
int next_breakable = -1;
for (unsigned i = 0; i <= test_string_.length(); i++) {
if (break_iterator.IsBreakable(i, next_breakable))
break_positions.push_back(i);
}
EXPECT_THAT(break_positions,
testing::ElementsAreArray(expected_break_positions))
<< test_string_ << " " << break_iterator.BreakType() << " "
<< break_iterator.BreakSpace();
}
// Test NextBreakOpportunity() by iterating break opportunities.
// ShapingLineBreaker uses this interface.
void TestNextBreakOpportunity(const Vector<int> expected_break_positions,
const LazyLineBreakIterator& break_iterator) {
Vector<int> break_positions;
for (unsigned i = 0; i <= test_string_.length(); i++) {
i = break_iterator.NextBreakOpportunity(i);
break_positions.push_back(i);
}
EXPECT_THAT(break_positions,
testing::ElementsAreArray(expected_break_positions))
<< test_string_ << " " << break_iterator.BreakType() << " "
<< break_iterator.BreakSpace();
}
unsigned TestLengthOfGraphemeCluster() {
return LengthOfGraphemeCluster(test_string_);
}
Vector<unsigned> GraphemesClusterList(String input,
unsigned start,
unsigned length) {
Vector<unsigned> result;
::blink::GraphemesClusterList(StringView(input, start, length), &result);
return result;
}
private:
String test_string_;
};
static const LineBreakType all_break_types[] = {
LineBreakType::kNormal, LineBreakType::kBreakAll,
LineBreakType::kBreakCharacter, LineBreakType::kKeepAll};
class BreakTypeTest : public TextBreakIteratorTest,
public testing::WithParamInterface<LineBreakType> {};
INSTANTIATE_TEST_SUITE_P(TextBreakIteratorTest,
BreakTypeTest,
testing::ValuesIn(all_break_types));
TEST_P(BreakTypeTest, EmptyString) {
LazyLineBreakIterator iterator(g_empty_string);
iterator.SetBreakType(GetParam());
EXPECT_TRUE(iterator.IsBreakable(0));
}
TEST_P(BreakTypeTest, EmptyNullString) {
LazyLineBreakIterator iterator(String{});
iterator.SetBreakType(GetParam());
EXPECT_TRUE(iterator.IsBreakable(0));
}
TEST_P(BreakTypeTest, EmptyDefaultConstructor) {
LazyLineBreakIterator iterator;
iterator.SetBreakType(GetParam());
EXPECT_TRUE(iterator.IsBreakable(0));
}
TEST_F(TextBreakIteratorTest, Basic) {
SetTestString("a b c");
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 4, 6});
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 6},
BreakSpaceType::kBeforeSpaceRun);
}
TEST_F(TextBreakIteratorTest, Newline) {
SetTestString("a\nb\n\nc\n d");
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 4, 6, 7, 9});
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 6, 9},
BreakSpaceType::kBeforeSpaceRun);
}
TEST_F(TextBreakIteratorTest, Tab) {
SetTestString("a\tb\t\tc");
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 4, 6});
MatchLineBreaks(LineBreakType::kNormal, {1, 3, 6},
BreakSpaceType::kBeforeSpaceRun);
}
TEST_F(TextBreakIteratorTest, LatinPunctuation) {
SetTestString("(ab) cd.");
MatchLineBreaks(LineBreakType::kNormal, {4, 8});
MatchLineBreaks(LineBreakType::kBreakAll, {2, 4, 6, 8});
MatchLineBreaks(LineBreakType::kBreakCharacter, {1, 2, 3, 4, 5, 6, 7, 8});
MatchLineBreaks(LineBreakType::kKeepAll, {4, 8});
}
TEST_F(TextBreakIteratorTest, Chinese) {
SetTestString("標準萬國碼");
MatchLineBreaks(LineBreakType::kNormal, {1, 2, 3, 4, 5});
MatchLineBreaks(LineBreakType::kBreakAll, {1, 2, 3, 4, 5});
MatchLineBreaks(LineBreakType::kBreakCharacter, {1, 2, 3, 4, 5});
MatchLineBreaks(LineBreakType::kKeepAll, {5});
}
TEST_F(TextBreakIteratorTest, ChineseMixed) {
SetTestString("標(準)萬ab國.碼");
MatchLineBreaks(LineBreakType::kNormal, {1, 4, 5, 7, 9, 10});
MatchLineBreaks(LineBreakType::kBreakAll, {1, 4, 5, 6, 7, 9, 10});
MatchLineBreaks(LineBreakType::kBreakCharacter,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
MatchLineBreaks(LineBreakType::kKeepAll, {1, 4, 9, 10});
}
TEST_F(TextBreakIteratorTest, ChineseSpaces) {
SetTestString("標 萬 a 國");
MatchLineBreaks(LineBreakType::kNormal, {1, 2, 4, 5, 7, 8, 10});
MatchLineBreaks(LineBreakType::kBreakAll, {1, 2, 4, 5, 7, 8, 10});
MatchLineBreaks(LineBreakType::kBreakCharacter,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
MatchLineBreaks(LineBreakType::kKeepAll, {1, 2, 4, 5, 7, 8, 10});
MatchLineBreaks(LineBreakType::kNormal, {1, 4, 7, 10},
BreakSpaceType::kBeforeSpaceRun);
}
TEST_F(TextBreakIteratorTest, KeepEmojiZWJFamilyIsolate) {
SetTestString(u8"\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466");
MatchLineBreaks(LineBreakType::kNormal, {11});
MatchLineBreaks(LineBreakType::kBreakAll, {11});
MatchLineBreaks(LineBreakType::kBreakCharacter, {11});
MatchLineBreaks(LineBreakType::kKeepAll, {11});
}
TEST_F(TextBreakIteratorTest, KeepEmojiModifierSequenceIsolate) {
SetTestString(u8"\u261D\U0001F3FB");
MatchLineBreaks(LineBreakType::kNormal, {3});
MatchLineBreaks(LineBreakType::kBreakAll, {3});
MatchLineBreaks(LineBreakType::kBreakCharacter, {3});
MatchLineBreaks(LineBreakType::kKeepAll, {3});
}
TEST_F(TextBreakIteratorTest, KeepEmojiZWJSequence) {
SetTestString(
u8"abc \U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F467 def");
MatchLineBreaks(LineBreakType::kNormal, {3, 15, 19});
MatchLineBreaks(LineBreakType::kBreakAll, {1, 2, 3, 15, 17, 18, 19});
MatchLineBreaks(LineBreakType::kBreakCharacter,
{1, 2, 3, 4, 15, 16, 17, 18, 19});
MatchLineBreaks(LineBreakType::kKeepAll, {3, 15, 19});
}
TEST_F(TextBreakIteratorTest, KeepEmojiModifierSequence) {
SetTestString(u8"abc \u261D\U0001F3FB def");
MatchLineBreaks(LineBreakType::kNormal, {3, 7, 11});
MatchLineBreaks(LineBreakType::kBreakAll, {1, 2, 3, 7, 9, 10, 11});
MatchLineBreaks(LineBreakType::kBreakCharacter,
{1, 2, 3, 4, 7, 8, 9, 10, 11});
MatchLineBreaks(LineBreakType::kKeepAll, {3, 7, 11});
}
TEST_F(TextBreakIteratorTest, NextBreakOpportunityAtEnd) {
LineBreakType break_types[] = {
LineBreakType::kNormal, LineBreakType::kBreakAll,
LineBreakType::kBreakCharacter, LineBreakType::kKeepAll};
for (const auto break_type : break_types) {
LazyLineBreakIterator break_iterator(String("1"));
break_iterator.SetBreakType(break_type);
EXPECT_EQ(1u, break_iterator.NextBreakOpportunity(1));
}
}
TEST_F(TextBreakIteratorTest, LengthOfGraphemeCluster) {
SetTestString("");
EXPECT_EQ(0u, TestLengthOfGraphemeCluster());
SetTestString16({});
EXPECT_EQ(0u, TestLengthOfGraphemeCluster());
SetTestString("a");
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("\n");
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("\r");
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString16({'a'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString16({'\n'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString16({'\r'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("abc");
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString16({'a', 'b', 'c'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("\r\n");
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({'\r', '\n'});
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString("\n\r");
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString16({'\n', '\r'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("\r\n\r");
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({'\r', '\n', '\r'});
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({'g', 0x308});
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({0x1100, 0x1161, 0x11A8});
EXPECT_EQ(3u, TestLengthOfGraphemeCluster());
SetTestString16({0x0BA8, 0x0BBF});
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({0x308, 'g'});
EXPECT_EQ(1u, TestLengthOfGraphemeCluster());
SetTestString("\r\nbc");
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
SetTestString16({'g', 0x308, 'b', 'c'});
EXPECT_EQ(2u, TestLengthOfGraphemeCluster());
}
TEST_F(TextBreakIteratorTest, GraphemesClusterListTest) {
EXPECT_EQ(GraphemesClusterList(u"hello", 0, 5),
Vector<unsigned>({0, 1, 2, 3, 4}));
EXPECT_EQ(GraphemesClusterList(u"hello", 2, 2), Vector<unsigned>({0, 1}));
EXPECT_EQ(GraphemesClusterList(u"voila\u0300!", 0, 7),
Vector<unsigned>({0, 1, 2, 3, 4, 4, 5}));
EXPECT_EQ(GraphemesClusterList(u"di\u0303\u031c\u0337!", 0, 6),
Vector<unsigned>({0, 1, 1, 1, 1, 2}));
EXPECT_EQ(GraphemesClusterList(u"🇨🇦", 0, 4), Vector<unsigned>({0, 0, 0, 0}));
EXPECT_EQ(GraphemesClusterList(u"🏳️‍🌈", 0, 6),
Vector<unsigned>({0, 0, 0, 0, 0, 0}));
// NO ZWJ on this sequence.
EXPECT_EQ(GraphemesClusterList(u"🏳🌈", 0, 4),
Vector<unsigned>({0, 0, 1, 1}));
// ARABIC LETTER MEEM + ARABIC FATHA
EXPECT_EQ(GraphemesClusterList(u"\u0645\u064E", 0, 2),
Vector<unsigned>({0, 0}));
}
} // namespace blink