blob: 1d76435dbbb1bbb4a613ad048eb3d4eea752b5db [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/wtf/text/math_transform.h"
#include "base/check.h"
#include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
namespace WTF {
namespace unicode {
static UChar32 mathVariantGreek(UChar32 code_point, UChar32 base_char) {
// As the ranges are contiguous, to find the desired math_variant range it
// is sufficient to multiply the position within the sequence order
// (multiplier) with the period of the sequence (which is constant for all
// number sequences) and to add the character point of the first character
// within the number math_variant range. To this the base_char calculated
// earlier is added to obtain the final code point.
auto ret = base_char + kMathBoldUpperAlpha +
(kMathItalicUpperAlpha - kMathBoldUpperAlpha);
return ret;
}
static UChar32 mathVariantLatin(UChar32 code_point, UChar32 base_char) {
// As the ranges are contiguous, to find the desired math_variant range it
// is sufficient to multiply the position within the sequence order
// (multiplier) with the period of the sequence (which is constant for all
// number sequences) and to add the character point of the first character
// within the number math_variant range. To this the base_char calculated
// earlier is added to obtain the final code point.
UChar32 transformed_char =
base_char + kMathBoldUpperA + (kMathItalicUpperA - kMathBoldUpperA);
// https://mathml-refresh.github.io/mathml-core/#italic-mappings
if (transformed_char == 0x1D455)
return 0x210E;
return transformed_char;
}
// TODO(https://crbug.com/1076420): this needs to handle all text-transform
// values.
UChar32 MathVariant(UChar32 code_point) {
// Exceptional characters with at most one possible transformation.
if (code_point == kHoleGreekUpperTheta)
return code_point; // Nothing at this code point is transformed
if (code_point == kGreekLetterDigamma)
return code_point;
if (code_point == kGreekSmallLetterDigamma)
return code_point;
if (code_point == kLatinSmallLetterDotlessI)
return kMathItalicSmallDotlessI;
if (code_point == kLatinSmallLetterDotlessJ)
return kMathItalicSmallDotlessJ;
// The Unicode mathematical blocks are divided into four segments: Latin,
// Greek, numbers and Arabic. In the case of the first three base_char
// represents the relative order in which the characters are encoded in the
// Unicode mathematical block, normalised to the first character of that
// sequence.
UChar32 base_char = 0;
enum CharacterType { kLatin, kGreekish };
CharacterType var_type;
const UChar32 kASCIIUpperStart = 'A';
const UChar32 kASCIILowerStart = 'a';
if (IsASCIIUpper(code_point)) {
base_char = code_point - kASCIIUpperStart;
var_type = kLatin;
} else if (IsASCIILower(code_point)) {
// Lowercase characters are placed immediately after the uppercase
// characters in the Unicode mathematical block. The constant subtraction
// represents the number of characters between the start of the sequence
// (capital A) and the first lowercase letter.
base_char =
kMathBoldSmallA - kMathBoldUpperA + code_point - kASCIILowerStart;
var_type = kLatin;
} else if (kGreekUpperAlpha <= code_point && code_point <= kGreekUpperOmega) {
base_char = code_point - kGreekUpperAlpha;
var_type = kGreekish;
} else if (kGreekLowerAlpha <= code_point && code_point <= kGreekLowerOmega) {
// Lowercase Greek comes after uppercase Greek.
// Note in this instance the presence of an additional character (Nabla)
// between the end of the uppercase Greek characters and the lowercase ones.
base_char = kMathBoldSmallAlpha - kMathBoldUpperAlpha + code_point -
kGreekLowerAlpha;
var_type = kGreekish;
} else {
switch (code_point) {
case kGreekUpperTheta:
base_char = kMathBoldUpperTheta - kMathBoldUpperAlpha;
break;
case kNabla:
base_char = kMathBoldNabla - kMathBoldUpperAlpha;
break;
case kPartialDifferential:
base_char = kMathBoldPartialDifferential - kMathBoldUpperAlpha;
break;
case kGreekLunateEpsilonSymbol:
base_char = kMathBoldEpsilonSymbol - kMathBoldUpperAlpha;
break;
case kGreekThetaSymbol:
base_char = kMathBoldThetaSymbol - kMathBoldUpperAlpha;
break;
case kGreekKappaSymbol:
base_char = kMathBoldKappaSymbol - kMathBoldUpperAlpha;
break;
case kGreekPhiSymbol:
base_char = kMathBoldPhiSymbol - kMathBoldUpperAlpha;
break;
case kGreekRhoSymbol:
base_char = kMathBoldRhoSymbol - kMathBoldUpperAlpha;
break;
case kGreekPiSymbol:
base_char = kMathBoldPiSymbol - kMathBoldUpperAlpha;
break;
default:
return code_point;
}
var_type = kGreekish;
}
if (var_type == kGreekish)
return mathVariantGreek(code_point, base_char);
DCHECK(var_type == kLatin);
return mathVariantLatin(code_point, base_char);
}
} // namespace unicode
} // namespace WTF