blob: 16a1e90ede4a7f091aeeb0cf54360131f5774e9d [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/text/mathml_operator_dictionary.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
namespace blink {
namespace {
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-compact-special-tables
const char* operators_2_ascii_chars[] = {
"!!", "!=", "&&", "**", "*=", "++", "+=", "--", "-=", "->",
"..", "//", "/=", ":=", "<=", "<>", "==", ">=", "||"};
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-hexa-table
struct EntryRange {
uint16_t entry;
unsigned range_bounds_delta : 4;
};
static inline uint16_t ExtractKey(const EntryRange& range) {
return range.entry & 0x3FFF;
}
static inline uint16_t ExtractCategory(const EntryRange& range) {
return range.entry >> 12;
}
// The following representation is taken from the spec, and reduces storage
// requirements by mapping codepoints and category to better make use of the
// available bytes. For details see
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary.
static const EntryRange compact_dictionary[] = {
{0x8025, 0}, {0x802A, 0}, {0x402B, 0}, {0x402D, 0}, {0x802E, 0},
{0x402F, 0}, {0x803F, 1}, {0xC05C, 0}, {0x805E, 1}, {0x807C, 0},
{0x40B1, 0}, {0x80B7, 0}, {0x80D7, 0}, {0x40F7, 0}, {0x4322, 0},
{0x8323, 0}, {0x832B, 0}, {0x832F, 0}, {0x8332, 0}, {0x8422, 0},
{0x8443, 0}, {0x4444, 0}, {0xC461, 3}, {0x0590, 9}, {0x059C, 15},
{0x05AC, 1}, {0x05AF, 6}, {0x05B9, 0}, {0x05BC, 15}, {0x05CC, 0},
{0x05D0, 13}, {0x05E0, 15}, {0x05F0, 0}, {0x05F3, 0}, {0x05F5, 1},
{0x05FD, 2}, {0x8606, 0}, {0x860E, 0}, {0x4612, 4}, {0x8617, 0},
{0x4618, 0}, {0x4624, 0}, {0x4627, 3}, {0x4636, 0}, {0x4638, 0},
{0x863F, 1}, {0x468C, 3}, {0x4693, 3}, {0x8697, 0}, {0x4698, 0},
{0x8699, 0}, {0x469D, 2}, {0x86A0, 1}, {0x46BB, 2}, {0x46C4, 0},
{0x86C5, 0}, {0x46C6, 0}, {0x86C7, 0}, {0x86C9, 3}, {0x46CE, 1},
{0x46D2, 1}, {0x8705, 1}, {0x89A0, 1}, {0x89AA, 1}, {0x89AD, 4},
{0x4B95, 2}, {0x8BCB, 0}, {0x8BCD, 0}, {0x0BF0, 1}, {0x4BF4, 0},
{0x0BF5, 10}, {0x0D0A, 6}, {0x0D12, 1}, {0x0D21, 1}, {0x0D4E, 15},
{0x0D5E, 3}, {0x0D6E, 1}, {0x8D81, 1}, {0x8D99, 1}, {0x8DB5, 0},
{0x4DBC, 0}, {0x8DC2, 1}, {0x8DC9, 4}, {0x8DD8, 1}, {0x8DDB, 0},
{0x8DDF, 1}, {0x8DE2, 0}, {0x8DE7, 6}, {0x4DF6, 0}, {0x8DF8, 3},
{0x8E1D, 4}, {0x4E22, 12}, {0x8E2F, 8}, {0x4E38, 2}, {0x8E3B, 2},
{0x8E3F, 0}, {0x4E40, 15}, {0x8E50, 0}, {0x4E51, 15}, {0x4E61, 2},
{0x4EDA, 1}, {0x8EDC, 1}, {0x4EFB, 0}, {0x4EFD, 0}, {0x8EFE, 0},
{0x4F32, 0}, {0x0F45, 1}, {0x1021, 0}, {0x5028, 0}, {0x102B, 0},
{0x102D, 0}, {0x505B, 0}, {0x507B, 1}, {0x10AC, 0}, {0x10B1, 0},
{0x1332, 0}, {0x5416, 0}, {0x1418, 0}, {0x141C, 0}, {0x1600, 1},
{0x1603, 1}, {0x1607, 0}, {0xD60F, 2}, {0x1612, 1}, {0x161F, 3},
{0x962B, 8}, {0x163C, 0}, {0x16BE, 1}, {0xD6C0, 3}, {0x5708, 0},
{0x570A, 0}, {0x1710, 0}, {0x1719, 0}, {0x5729, 0}, {0x5B72, 0},
{0x1B95, 1}, {0x1BC0, 0}, {0x5BE6, 0}, {0x5BE8, 0}, {0x5BEA, 0},
{0x5BEC, 0}, {0x5BEE, 0}, {0x5D80, 0}, {0x5D83, 0}, {0x5D85, 0},
{0x5D87, 0}, {0x5D89, 0}, {0x5D8B, 0}, {0x5D8D, 0}, {0x5D8F, 0},
{0x5D91, 0}, {0x5D93, 0}, {0x5D95, 0}, {0x5D97, 0}, {0x1D9B, 15},
{0x1DAB, 4}, {0x5DFC, 0}, {0xDE00, 10}, {0x9E0B, 15}, {0x9E1B, 1},
{0x1EEC, 1}, {0xDEFC, 0}, {0xDEFF, 0}, {0x2021, 1}, {0x2026, 1},
{0x6029, 0}, {0x605D, 0}, {0xA05E, 1}, {0x2060, 0}, {0x607C, 1},
{0xA07E, 0}, {0x20A8, 0}, {0xA0AF, 0}, {0x20B0, 0}, {0x20B2, 2},
{0x20B8, 1}, {0xA2C6, 1}, {0xA2C9, 0}, {0x22CA, 1}, {0xA2CD, 0},
{0x22D8, 2}, {0xA2DC, 0}, {0x22DD, 0}, {0xA2F7, 0}, {0xA302, 0},
{0x2311, 0}, {0x2320, 0}, {0x2325, 0}, {0x2327, 0}, {0x232A, 0},
{0x2332, 0}, {0x6416, 0}, {0x2419, 2}, {0x241D, 2}, {0x2432, 5},
{0xA43E, 0}, {0x2457, 0}, {0x24DB, 1}, {0x6709, 0}, {0x670B, 0},
{0xA722, 1}, {0x672A, 0}, {0xA7B4, 1}, {0x27CD, 0}, {0xA7DC, 5},
{0x6B73, 0}, {0x6BE7, 0}, {0x6BE9, 0}, {0x6BEB, 0}, {0x6BED, 0},
{0x6BEF, 0}, {0x6D80, 0}, {0x6D84, 0}, {0x6D86, 0}, {0x6D88, 0},
{0x6D8A, 0}, {0x6D8C, 0}, {0x6D8E, 0}, {0x6D90, 0}, {0x6D92, 0},
{0x6D94, 0}, {0x6D96, 0}, {0x6D98, 0}, {0x6DFD, 0}};
} // namespace
MathMLOperatorDictionaryCategory FindCategory(
const String& content,
MathMLOperatorDictionaryForm form) {
DCHECK(!content.Is8Bit());
// Handle special cases and calculate a BMP code point used for the key.
uint16_t key{0};
if (content.length() == 1) {
UChar32 character = content[0];
if (character < kCombiningMinusSignBelow ||
character > kGreekCapitalReversedDottedLunateSigmaSymbol) {
// Accept BMP characters that are not in the ranges where 2-ASCII-chars
// operators are mapped below.
key = character;
}
} else if (content.length() == 2) {
UChar32 character = content.CharacterStartingAt(0);
if (character == kArabicMathematicalOperatorMeemWithHahWithTatweel ||
character == kArabicMathematicalOperatorHahWithDal) {
// Special handling of non-BMP Arabic operators.
if (form == MathMLOperatorDictionaryForm::kPostfix)
return MathMLOperatorDictionaryCategory::kI;
return MathMLOperatorDictionaryCategory::kNone;
} else if (content[1] == kCombiningLongSolidusOverlay ||
content[1] == kCombiningLongVerticalLineOverlay) {
// If the second character is COMBINING LONG SOLIDUS OVERLAY or
// COMBINING LONG VERTICAL LINE OVERLAY, then use the property of the
// first character.
key = content[0];
} else {
// Perform a binary search for 2-ASCII-chars operators.
const char** last =
operators_2_ascii_chars + base::size(operators_2_ascii_chars);
const char** entry = std::lower_bound(
operators_2_ascii_chars, last, content,
[](const char* lhs, const String& rhs) -> bool {
return lhs[0] < rhs[0] || (lhs[0] == rhs[0] && lhs[1] < rhs[1]);
});
if (entry != last && content == *entry)
key = kCombiningMinusSignBelow + (entry - operators_2_ascii_chars);
}
}
if (!key)
return MathMLOperatorDictionaryCategory::kNone;
// Handle special categories that are not encoded in the compact dictionary.
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-values
if (form == MathMLOperatorDictionaryForm::kPrefix &&
((kDoubleStruckItalicCapitalDCharacter <= key &&
key <= kDoubleStruckItalicSmallDCharacter) ||
key == kPartialDifferential ||
(kSquareRootCharacter <= key && key <= kFourthRootCharacter))) {
return MathMLOperatorDictionaryCategory::kK;
}
if (form == MathMLOperatorDictionaryForm::kInfix &&
(key == kComma || key == kColon || key == kSemiColon)) {
return MathMLOperatorDictionaryCategory::kM;
}
// Calculate the key for the compact dictionary.
if (kEnQuadCharacter <= key && key <= kHellschreiberPauseSymbol) {
// Map above range (U+2000–U+2BFF) to (U+0400-0x0FFF) to fit into
// 12 bits by decrementing with (U+2000 - U+0400) == 0x1C00.
key -= 0x1C00;
} else if (key > kGreekCapitalReversedDottedLunateSigmaSymbol) {
return MathMLOperatorDictionaryCategory::kNone;
}
// Bitmasks used to set form 2-bits (infix=00, prefix=01, postfix=10).
if (form == MathMLOperatorDictionaryForm::kPrefix)
key |= 0x1000;
else if (form == MathMLOperatorDictionaryForm::kPostfix)
key |= 0x2000;
DCHECK_LE(key, 0x2FFF);
// Perform a binary search on the compact dictionary.
const EntryRange* entry_range = std::upper_bound(
compact_dictionary, compact_dictionary + base::size(compact_dictionary),
key, [](uint16_t lhs, EntryRange rhs) -> bool {
return lhs < ExtractKey(rhs);
});
if (entry_range == compact_dictionary)
return MathMLOperatorDictionaryCategory::kNone;
entry_range--;
DCHECK_LE(ExtractKey(*entry_range), key);
if (key > (ExtractKey(*entry_range) + entry_range->range_bounds_delta))
return MathMLOperatorDictionaryCategory::kNone;
// An entry is found: set the properties according the category.
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-values
switch (ExtractCategory(*entry_range)) {
case 0x0:
return MathMLOperatorDictionaryCategory::kA;
case 0x4:
return MathMLOperatorDictionaryCategory::kB;
case 0x8:
return MathMLOperatorDictionaryCategory::kC;
case 0x1:
case 0x2:
case 0xC:
return MathMLOperatorDictionaryCategory::kDorEorL;
case 0x5:
case 0x6:
return MathMLOperatorDictionaryCategory::kForG;
case 0x9:
return MathMLOperatorDictionaryCategory::kH;
case 0xA:
return MathMLOperatorDictionaryCategory::kI;
case 0xD:
return MathMLOperatorDictionaryCategory::kJ;
}
NOTREACHED();
return MathMLOperatorDictionaryCategory::kNone;
}
} // namespace blink