blob: a02dd222938609ce23c1363168b3dbd269887d32 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/core/editing/state_machines/forward_grapheme_boundary_state_machine.h"
#include <ostream> // NOLINT
#include "third_party/blink/renderer/core/editing/state_machines/state_machine_util.h"
#include "third_party/blink/renderer/core/editing/state_machines/text_segmentation_machine_state.h"
#include "third_party/blink/renderer/platform/text/character.h"
#include "third_party/blink/renderer/platform/wtf/text/unicode.h"
namespace blink {
namespace {
const UChar32 kUnsetCodePoint = WTF::unicode::kMaxCodepoint + 1;
} // namespace
#define FOR_EACH_FORWARD_GRAPHEME_BOUNDARY_STATE(V) \
/* Counting preceding regional indicators. This is initial state. */ \
V(kCountRIS) \
/* Waiting lead surrogate during counting regional indicators. */ \
V(kCountRISWaitLeadSurrogate) \
/* Waiting first following code unit. */ \
V(kStartForward) \
/* Waiting trail surrogate for the first following code point. */ \
V(kStartForwardWaitTrailSurrgate) \
/* Searching grapheme boundary. */ \
V(kSearch) \
/* Waiting trail surrogate during searching grapheme boundary. */ \
V(kSearchWaitTrailSurrogate) \
/* The state machine has stopped. */ \
V(kFinished)
enum class ForwardGraphemeBoundaryStateMachine::InternalState {
#define V(name) name,
FOR_EACH_FORWARD_GRAPHEME_BOUNDARY_STATE(V)
#undef V
};
std::ostream& operator<<(
std::ostream& os,
ForwardGraphemeBoundaryStateMachine::InternalState state) {
static const char* const kTexts[] = {
#define V(name) #name,
FOR_EACH_FORWARD_GRAPHEME_BOUNDARY_STATE(V)
#undef V
};
auto* const* const it = std::begin(kTexts) + static_cast<size_t>(state);
DCHECK_GE(it, std::begin(kTexts)) << "Unknown state value";
DCHECK_LT(it, std::end(kTexts)) << "Unknown state value";
return os << *it;
}
ForwardGraphemeBoundaryStateMachine::ForwardGraphemeBoundaryStateMachine()
: prev_code_point_(kUnsetCodePoint),
internal_state_(InternalState::kCountRIS) {}
TextSegmentationMachineState
ForwardGraphemeBoundaryStateMachine::FeedPrecedingCodeUnit(UChar code_unit) {
DCHECK_EQ(prev_code_point_, kUnsetCodePoint);
DCHECK_EQ(boundary_offset_, 0);
switch (internal_state_) {
case InternalState::kCountRIS:
DCHECK_EQ(pending_code_unit_, 0);
if (U16_IS_TRAIL(code_unit)) {
pending_code_unit_ = code_unit;
return MoveToNextState(InternalState::kCountRISWaitLeadSurrogate);
}
return MoveToNextState(InternalState::kStartForward);
case InternalState::kCountRISWaitLeadSurrogate:
DCHECK_NE(pending_code_unit_, 0);
if (U16_IS_LEAD(code_unit)) {
const UChar32 code_point =
U16_GET_SUPPLEMENTARY(code_unit, pending_code_unit_);
pending_code_unit_ = 0;
if (Character::IsRegionalIndicator(code_point)) {
++preceding_ris_count_;
return MoveToNextState(InternalState::kCountRIS);
}
}
pending_code_unit_ = 0;
return MoveToNextState(InternalState::kStartForward);
case InternalState::kStartForward: // Fallthrough
case InternalState::kStartForwardWaitTrailSurrgate: // Fallthrough
case InternalState::kSearch: // Fallthrough
case InternalState::kSearchWaitTrailSurrogate: // Fallthrough
NOTREACHED() << "Do not call feedPrecedingCodeUnit() once "
<< TextSegmentationMachineState::kNeedFollowingCodeUnit
<< " is returned. InternalState: " << internal_state_;
return Finish();
case InternalState::kFinished:
NOTREACHED() << "Do not call feedPrecedingCodeUnit() once it finishes.";
return Finish();
}
NOTREACHED() << "Unhandled state: " << internal_state_;
return Finish();
}
TextSegmentationMachineState
ForwardGraphemeBoundaryStateMachine::FeedFollowingCodeUnit(UChar code_unit) {
switch (internal_state_) {
case InternalState::kCountRIS: // Fallthrough
case InternalState::kCountRISWaitLeadSurrogate:
NOTREACHED() << "Do not call feedFollowingCodeUnit() until "
<< TextSegmentationMachineState::kNeedFollowingCodeUnit
<< " is returned. InternalState: " << internal_state_;
return Finish();
case InternalState::kStartForward:
DCHECK_EQ(prev_code_point_, kUnsetCodePoint);
DCHECK_EQ(boundary_offset_, 0);
DCHECK_EQ(pending_code_unit_, 0);
if (U16_IS_TRAIL(code_unit)) {
// Lonely trail surrogate.
boundary_offset_ = 1;
return Finish();
}
if (U16_IS_LEAD(code_unit)) {
pending_code_unit_ = code_unit;
return MoveToNextState(InternalState::kStartForwardWaitTrailSurrgate);
}
prev_code_point_ = code_unit;
boundary_offset_ = 1;
return MoveToNextState(InternalState::kSearch);
case InternalState::kStartForwardWaitTrailSurrgate:
DCHECK_EQ(prev_code_point_, kUnsetCodePoint);
DCHECK_EQ(boundary_offset_, 0);
DCHECK_NE(pending_code_unit_, 0);
if (U16_IS_TRAIL(code_unit)) {
prev_code_point_ = U16_GET_SUPPLEMENTARY(pending_code_unit_, code_unit);
boundary_offset_ = 2;
pending_code_unit_ = 0;
return MoveToNextState(InternalState::kSearch);
}
// Lonely lead surrogate.
boundary_offset_ = 1;
return Finish();
case InternalState::kSearch:
DCHECK_NE(prev_code_point_, kUnsetCodePoint);
DCHECK_NE(boundary_offset_, 0);
DCHECK_EQ(pending_code_unit_, 0);
if (U16_IS_LEAD(code_unit)) {
pending_code_unit_ = code_unit;
return MoveToNextState(InternalState::kSearchWaitTrailSurrogate);
}
if (U16_IS_TRAIL(code_unit))
return Finish(); // Lonely trail surrogate.
if (IsGraphemeBreak(prev_code_point_, code_unit))
return Finish();
prev_code_point_ = code_unit;
boundary_offset_ += 1;
return StaySameState();
case InternalState::kSearchWaitTrailSurrogate:
DCHECK_NE(prev_code_point_, kUnsetCodePoint);
DCHECK_NE(boundary_offset_, 0);
DCHECK_NE(pending_code_unit_, 0);
if (!U16_IS_TRAIL(code_unit))
return Finish(); // Lonely lead surrogate.
{
const UChar32 code_point =
U16_GET_SUPPLEMENTARY(pending_code_unit_, code_unit);
pending_code_unit_ = 0;
if (Character::IsRegionalIndicator(prev_code_point_) &&
Character::IsRegionalIndicator(code_point)) {
if (preceding_ris_count_ % 2 == 0) {
// Odd numbered RI case, note that prev_code_point_ is also RI.
boundary_offset_ += 2;
}
return Finish();
}
if (IsGraphemeBreak(prev_code_point_, code_point))
return Finish();
prev_code_point_ = code_point;
boundary_offset_ += 2;
return MoveToNextState(InternalState::kSearch);
}
case InternalState::kFinished:
NOTREACHED() << "Do not call feedFollowingCodeUnit() once it finishes.";
return Finish();
}
NOTREACHED() << "Unhandled staet: " << internal_state_;
return Finish();
}
TextSegmentationMachineState
ForwardGraphemeBoundaryStateMachine::TellEndOfPrecedingText() {
DCHECK(internal_state_ == InternalState::kCountRIS ||
internal_state_ == InternalState::kCountRISWaitLeadSurrogate)
<< "Do not call tellEndOfPrecedingText() once "
<< TextSegmentationMachineState::kNeedFollowingCodeUnit
<< " is returned. InternalState: " << internal_state_;
// Clear pending code unit since preceding buffer may end with lonely trail
// surrogate. We can just ignore it since preceding buffer is only used for
// counting preceding regional indicators.
pending_code_unit_ = 0;
return MoveToNextState(InternalState::kStartForward);
}
int ForwardGraphemeBoundaryStateMachine::FinalizeAndGetBoundaryOffset() {
if (internal_state_ != InternalState::kFinished)
FinishWithEndOfText();
DCHECK_GE(boundary_offset_, 0);
return boundary_offset_;
}
void ForwardGraphemeBoundaryStateMachine::Reset() {
pending_code_unit_ = 0;
boundary_offset_ = 0;
preceding_ris_count_ = 0;
prev_code_point_ = kUnsetCodePoint;
internal_state_ = InternalState::kCountRIS;
}
TextSegmentationMachineState ForwardGraphemeBoundaryStateMachine::Finish() {
DCHECK_NE(internal_state_, InternalState::kFinished);
internal_state_ = InternalState::kFinished;
return TextSegmentationMachineState::kFinished;
}
TextSegmentationMachineState
ForwardGraphemeBoundaryStateMachine::MoveToNextState(InternalState next_state) {
DCHECK_NE(next_state, InternalState::kFinished) << "Use finish() instead";
DCHECK_NE(next_state, internal_state_) << "Use staySameSatate() instead";
internal_state_ = next_state;
if (next_state == InternalState::kStartForward)
return TextSegmentationMachineState::kNeedFollowingCodeUnit;
return TextSegmentationMachineState::kNeedMoreCodeUnit;
}
TextSegmentationMachineState
ForwardGraphemeBoundaryStateMachine::StaySameState() {
DCHECK_EQ(internal_state_, InternalState::kSearch)
<< "Only Search can stay the same state.";
return TextSegmentationMachineState::kNeedMoreCodeUnit;
}
void ForwardGraphemeBoundaryStateMachine::FinishWithEndOfText() {
switch (internal_state_) {
case InternalState::kCountRIS: // Fallthrough
case InternalState::kCountRISWaitLeadSurrogate: // Fallthrough
case InternalState::kStartForward: // Fallthrough
return; // Haven't search anything to forward. Just finish.
case InternalState::kStartForwardWaitTrailSurrgate:
// Lonely lead surrogate.
boundary_offset_ = 1;
return;
case InternalState::kSearch: // Fallthrough
case InternalState::kSearchWaitTrailSurrogate: // Fallthrough
return;
case InternalState::kFinished: // Fallthrough
NOTREACHED() << "Do not call finishWithEndOfText() once it finishes.";
}
NOTREACHED() << "Unhandled state: " << internal_state_;
}
} // namespace blink