blob: bcb138e4437232fd6b2e25c77efd644ade070a21 [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
#include <memory>
#include <utility>
#include "base/auto_reset.h"
#include "base/numerics/safe_conversions.h"
#include "third_party/blink/public/common/features.h"
#include "third_party/blink/public/common/loader/loading_behavior_flag.h"
#include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h"
#include "third_party/blink/public/platform/platform.h"
#include "third_party/blink/public/platform/task_type.h"
#include "third_party/blink/renderer/core/css/media_values_cached.h"
#include "third_party/blink/renderer/core/css/style_engine.h"
#include "third_party/blink/renderer/core/dom/document_fragment.h"
#include "third_party/blink/renderer/core/dom/element.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/html/html_document.h"
#include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
#include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h"
#include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
#include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
#include "third_party/blink/renderer/core/html/parser/pump_session.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
#include "third_party/blink/renderer/core/loader/document_loader.h"
#include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
#include "third_party/blink/renderer/core/loader/preload_helper.h"
#include "third_party/blink/renderer/core/probe/core_probes.h"
#include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
#include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
#include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
#include "third_party/blink/renderer/platform/heap/handle.h"
#include "third_party/blink/renderer/platform/heap/heap.h"
#include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
#include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
#include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
#include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
#include "third_party/blink/renderer/platform/wtf/shared_buffer.h"
namespace blink {
static size_t g_discarded_token_count_for_testing = 0;
void ResetDiscardedTokenCountForTesting() {
g_discarded_token_count_for_testing = 0;
}
size_t GetDiscardedTokenCountForTesting() {
return g_discarded_token_count_for_testing;
}
// This sets the (default) maximum number of tokens which the foreground HTML
// parser should try to process in one go. Lower values generally mean faster
// first paints, larger values delay first paint, but make sure it's closer to
// the final page. This is the default value to use, if no Finch-provided
// value exists.
constexpr int kDefaultMaxTokenizationBudget = 250;
class EndIfDelayedForbiddenScope;
class ShouldCompleteScope;
class AttemptToEndForbiddenScope;
// This class encapsulates the internal state needed for synchronous foreground
// HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
// tracks what should be done after the pump completes.)
class HTMLDocumentParserState
: public GarbageCollected<HTMLDocumentParserState> {
friend EndIfDelayedForbiddenScope;
friend ShouldCompleteScope;
friend AttemptToEndForbiddenScope;
public:
// Keeps track of whether the parser needs to complete tokenization work,
// optionally followed by EndIfDelayed.
enum class DeferredParserState {
// Indicates that a tokenizer pump has either completed or hasn't been
// scheduled.
kNotScheduled = 0, // Enforce ordering in this enum.
// Indicates that a tokenizer pump is scheduled and hasn't completed yet.
kScheduled = 1,
// Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
kScheduledWithEndIfDelayed = 2
};
enum class MetaCSPTokenState {
// If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
// defer any preloads until we've added the CSP token to the document and
// applied the Content Security Policy.
kSeen = 0,
// Indicates that there is no meta CSP token in the upcoming chunk.
kNotSeen = 1,
// Indicates that we've added the CSP token to the document and we can now
// fetch preloads.
kProcessed = 2,
// Indicates that it's too late to apply a Content-Security policy (because
// we've exited the header section.)
kUnenforceable = 3,
};
explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
: state_(DeferredParserState::kNotScheduled),
meta_csp_state_(MetaCSPTokenState::kNotSeen),
mode_(mode),
end_if_delayed_forbidden_(0),
should_complete_(0),
should_attempt_to_end_on_eof_(0),
needs_link_header_dispatch_(true),
have_seen_first_byte_(false) {}
void Trace(Visitor* v) const {}
void SetState(DeferredParserState state) {
DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
state_ = state;
}
DeferredParserState GetState() const { return state_; }
bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
const char* GetStateAsString() const {
switch (state_) {
case DeferredParserState::kNotScheduled:
return "not_scheduled";
case DeferredParserState::kScheduled:
return "scheduled";
case DeferredParserState::kScheduledWithEndIfDelayed:
return "scheduled_with_end_if_delayed";
}
}
bool NeedsLinkHeaderPreloadsDispatch() const {
return needs_link_header_dispatch_;
}
void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; }
bool HaveSeenFirstByte() const { return have_seen_first_byte_; }
void SetHaveSeenFirstByte() { have_seen_first_byte_ = true; }
// Keeps track of whether Document::Finish has been called whilst parsing
// asynchronously. ShouldAttemptToEndOnEOF() means that the parser should
// close when there's no more input.
bool ShouldAttemptToEndOnEOF() const {
return should_attempt_to_end_on_eof_ > 0;
}
void SetAttemptToEndOnEOF() {
// This method should only be called from ::Finish.
should_attempt_to_end_on_eof_++;
// Should only ever call ::Finish once.
DCHECK(should_attempt_to_end_on_eof_ < 2);
}
bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
bool ShouldComplete() const {
return should_complete_ || GetMode() != kAllowDeferredParsing;
}
bool IsSynchronous() const {
return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
}
ParserSynchronizationPolicy GetMode() const { return mode_; }
void SetSeenCSPMetaTag(const bool seen) {
if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
return;
if (seen)
meta_csp_state_ = MetaCSPTokenState::kSeen;
else
meta_csp_state_ = MetaCSPTokenState::kNotSeen;
}
void SetExitedHeader() {
meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
}
bool HaveExitedHeader() const {
return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
}
private:
void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
void ExitEndIfDelayedForbidden() {
end_if_delayed_forbidden_--;
DCHECK_GE(end_if_delayed_forbidden_, 0);
}
void EnterAttemptToEndForbidden() {
DCHECK(should_attempt_to_end_on_eof_ > 0);
should_attempt_to_end_on_eof_ = 0;
}
void EnterShouldComplete() { should_complete_++; }
void ExitShouldComplete() {
should_complete_--;
DCHECK_GE(should_complete_, 0);
}
DeferredParserState state_;
MetaCSPTokenState meta_csp_state_;
ParserSynchronizationPolicy mode_;
int end_if_delayed_forbidden_;
int should_complete_;
// Set to non-zero if Document::Finish has been called and we're operating
// asynchronously.
int should_attempt_to_end_on_eof_;
bool needs_link_header_dispatch_;
bool have_seen_first_byte_;
};
class EndIfDelayedForbiddenScope {
STACK_ALLOCATED();
public:
explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
: state_(state) {
state_->EnterEndIfDelayedForbidden();
}
~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }
private:
HTMLDocumentParserState* state_;
};
class AttemptToEndForbiddenScope {
STACK_ALLOCATED();
public:
explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state)
: state_(state) {
state_->EnterAttemptToEndForbidden();
}
private:
HTMLDocumentParserState* state_;
};
class ShouldCompleteScope {
STACK_ALLOCATED();
public:
explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
state_->EnterShouldComplete();
}
~ShouldCompleteScope() { state_->ExitShouldComplete(); }
private:
HTMLDocumentParserState* state_;
};
// This is a direct transcription of step 4 from:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
static HTMLTokenizer::State TokenizerStateForContextElement(
Element* context_element,
bool report_errors,
const HTMLParserOptions& options) {
if (!context_element)
return HTMLTokenizer::kDataState;
const QualifiedName& context_tag = context_element->TagQName();
if (context_tag.Matches(html_names::kTitleTag) ||
context_tag.Matches(html_names::kTextareaTag))
return HTMLTokenizer::kRCDATAState;
if (context_tag.Matches(html_names::kStyleTag) ||
context_tag.Matches(html_names::kXmpTag) ||
context_tag.Matches(html_names::kIFrameTag) ||
context_tag.Matches(html_names::kNoembedTag) ||
(context_tag.Matches(html_names::kNoscriptTag) &&
options.scripting_flag) ||
context_tag.Matches(html_names::kNoframesTag))
return report_errors ? HTMLTokenizer::kRAWTEXTState
: HTMLTokenizer::kPLAINTEXTState;
if (context_tag.Matches(html_names::kScriptTag))
return report_errors ? HTMLTokenizer::kScriptDataState
: HTMLTokenizer::kPLAINTEXTState;
if (context_tag.Matches(html_names::kPlaintextTag))
return HTMLTokenizer::kPLAINTEXTState;
return HTMLTokenizer::kDataState;
}
class ScopedYieldTimer {
public:
// This object is created at the start of a block of parsing, and will
// report the time since the last block yielded if known.
ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
HTMLParserMetrics* metrics_reporter)
: timer_(timer), reporting_metrics_(metrics_reporter) {
if (!reporting_metrics_ || !(*timer_))
return;
metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
timer_->reset();
}
// The destructor creates a new timer, which will keep track of time until
// the next block starts.
~ScopedYieldTimer() {
if (reporting_metrics_)
*timer_ = std::make_unique<base::ElapsedTimer>();
}
private:
std::unique_ptr<base::ElapsedTimer>* timer_;
bool reporting_metrics_;
};
HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
ParserSynchronizationPolicy sync_policy,
ParserPrefetchPolicy prefetch_policy)
: HTMLDocumentParser(document,
kAllowScriptingContent,
sync_policy,
prefetch_policy) {
script_runner_ =
HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);
// Allow declarative shadow DOM for the document parser, if not explicitly
// disabled.
bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() !=
Document::DeclarativeShadowRootAllowState::kDeny;
tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
this, document, kAllowScriptingContent, options_, include_shadow_roots);
}
HTMLDocumentParser::HTMLDocumentParser(
DocumentFragment* fragment,
Element* context_element,
ParserContentPolicy parser_content_policy,
ParserPrefetchPolicy parser_prefetch_policy)
: HTMLDocumentParser(fragment->GetDocument(),
parser_content_policy,
kForceSynchronousParsing,
parser_prefetch_policy) {
// Allow declarative shadow DOM for the fragment parser only if explicitly
// enabled.
bool include_shadow_roots =
fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
Document::DeclarativeShadowRootAllowState::kAllow;
// No script_runner_ in fragment parser.
tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
this, fragment, context_element, parser_content_policy, options_,
include_shadow_roots);
// For now document fragment parsing never reports errors.
bool report_errors = false;
tokenizer_->SetState(TokenizerStateForContextElement(
context_element, report_errors, options_));
}
namespace {
int GetMaxTokenizationBudget() {
static int max = base::GetFieldTrialParamByFeatureAsInt(
features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget",
kDefaultMaxTokenizationBudget);
return max;
}
} // namespace
HTMLDocumentParser::HTMLDocumentParser(Document& document,
ParserContentPolicy content_policy,
ParserSynchronizationPolicy sync_policy,
ParserPrefetchPolicy prefetch_policy)
: ScriptableDocumentParser(document, content_policy),
options_(&document),
reentry_permit_(HTMLParserReentryPermit::Create()),
token_(sync_policy != kAllowAsynchronousParsing
? std::make_unique<HTMLToken>()
: nullptr),
tokenizer_(sync_policy != kAllowAsynchronousParsing
? std::make_unique<HTMLTokenizer>(options_)
: nullptr),
loading_task_runner_(sync_policy == kForceSynchronousParsing
? nullptr
: document.GetTaskRunner(TaskType::kNetworking)),
parser_scheduler_(sync_policy == kAllowAsynchronousParsing
? MakeGarbageCollected<HTMLParserScheduler>(
this,
loading_task_runner_.get())
: nullptr),
task_runner_state_(
MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
pending_csp_meta_token_(nullptr),
can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing),
end_was_delayed_(false),
have_background_parser_(false),
pump_session_nesting_level_(0),
pump_speculations_session_nesting_level_(0),
is_parsing_at_line_number_(false),
tried_loading_link_headers_(false),
added_pending_parser_blocking_stylesheet_(false),
is_waiting_for_stylesheets_(false),
scheduler_(sync_policy == kAllowDeferredParsing
? Thread::Current()->Scheduler()
: nullptr) {
DCHECK(CanParseAsynchronously() || (token_ && tokenizer_));
// Asynchronous parsing is not allowed in prefetch mode.
DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously());
// It is permissible to request the background HTML parser whilst also using
// --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually
// unintentional. To help flush out these cases, trigger a DCHECK.
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() ||
!CanParseAsynchronously());
// Report metrics for async document parsing only. The document
// must be main frame to meet UKM requirements, and must have a high
// resolution clock for high quality data.
if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() &&
document.GetFrame()->IsMainFrame() &&
base::TimeTicks::IsHighResolution()) {
metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
document.UkmSourceID(), document.UkmRecorder());
}
max_tokenization_budget_ = GetMaxTokenizationBudget();
// Don't create preloader for parsing clipboard content.
if (content_policy == kDisallowScriptingAndPluginContent)
return;
// Create preloader only when the document is:
// - attached to a frame (likely the prefetched resources will be loaded
// soon),
// - a HTML import document (blocks rendering and also resources will be
// loaded soon), or
// - is for no-state prefetch (made specifically for running preloader).
if (!document.GetFrame() && !document.IsHTMLImport() &&
!document.IsPrefetchOnly())
return;
if (prefetch_policy == kAllowPrefetching)
preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
}
HTMLDocumentParser::~HTMLDocumentParser() = default;
void HTMLDocumentParser::Dispose() {
// In Oilpan, HTMLDocumentParser can die together with Document, and detach()
// is not called in this case.
if (have_background_parser_)
StopBackgroundParser();
}
void HTMLDocumentParser::Trace(Visitor* visitor) const {
visitor->Trace(tree_builder_);
visitor->Trace(parser_scheduler_);
visitor->Trace(script_runner_);
visitor->Trace(preloader_);
visitor->Trace(task_runner_state_);
ScriptableDocumentParser::Trace(visitor);
HTMLParserScriptRunnerHost::Trace(visitor);
}
bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
return task_runner_state_->IsScheduled();
}
void HTMLDocumentParser::Detach() {
if (have_background_parser_)
StopBackgroundParser();
// Deschedule any pending tokenizer pumps.
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
DocumentParser::Detach();
if (script_runner_)
script_runner_->Detach();
tree_builder_->Detach();
// FIXME: It seems wrong that we would have a preload scanner here. Yet during
// fast/dom/HTMLScriptElement/script-load-events.html we do.
preload_scanner_.reset();
insertion_preload_scanner_.reset();
if (parser_scheduler_) {
parser_scheduler_->Detach();
parser_scheduler_.Clear();
}
// Oilpan: It is important to clear token_ to deallocate backing memory of
// HTMLToken::data_ and let the allocator reuse the memory for
// HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
// tokenizer_ first because tokenizer_ has a raw pointer to token_.
tokenizer_.reset();
token_.reset();
}
void HTMLDocumentParser::StopParsing() {
DocumentParser::StopParsing();
if (parser_scheduler_) {
parser_scheduler_->Detach();
parser_scheduler_.Clear();
}
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
if (have_background_parser_)
StopBackgroundParser();
}
// This kicks off "Once the user agent stops parsing" as described by:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
void HTMLDocumentParser::PrepareToStopParsing() {
TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
(void*)this);
// FIXME: It may not be correct to disable this for the background parser.
// That means hasInsertionPoint() may not be correct in some cases.
DCHECK(!HasInsertionPoint() || have_background_parser_);
// NOTE: This pump should only ever emit buffered character tokens.
if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) {
DCHECK(!have_background_parser_);
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
PumpTokenizerIfPossible();
}
if (IsStopped())
return;
DocumentParser::PrepareToStopParsing();
// We will not have a scriptRunner when parsing a DocumentFragment.
if (script_runner_)
GetDocument()->SetReadyState(Document::kInteractive);
// Setting the ready state above can fire mutation event and detach us from
// underneath. In that case, just bail out.
if (IsDetached())
return;
if (script_runner_)
script_runner_->RecordMetricsAtParseEnd();
AttemptToRunDeferredScriptsAndEnd();
}
bool HTMLDocumentParser::IsParsingFragment() const {
return tree_builder_->IsParsingFragment();
}
void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
// This method is called asynchronously, continues building the HTML document.
// This function should only be called when
// --enable-blink-features=ForceSynchronousHTMLParsing is available.
DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
// If we're scheduled for a tokenizer pump, then document should be attached
// and the parser should not be stopped, but sometimes a script completes
// loading (so we schedule a pump) but the Document is stopped in the meantime
// (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
DCHECK(task_runner_state_->GetState() ==
HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
!IsDetached());
TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
"parser", (void*)this, "state",
task_runner_state_->GetStateAsString());
bool should_call_delay_end =
task_runner_state_->GetState() ==
HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
if (task_runner_state_->IsScheduled()) {
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
if (should_call_delay_end) {
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
PumpTokenizerIfPossible();
EndIfDelayed();
} else {
PumpTokenizerIfPossible();
}
}
}
void HTMLDocumentParser::PumpTokenizerIfPossible() {
// This method is called synchronously, builds the HTML document up to
// the current budget, and optionally completes.
TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
(void*)this);
bool yielded = false;
CheckIfBlockingStylesheetAdded();
if (!IsStopped() &&
(!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
yielded = PumpTokenizer();
}
if (yielded) {
DCHECK(!task_runner_state_->ShouldComplete());
SchedulePumpTokenizer();
} else if (task_runner_state_->ShouldAttemptToEndOnEOF()) {
// Fall into this branch if ::Finish has been previously called and we've
// just finished asynchronously parsing everything.
AttemptToEnd();
} else if (task_runner_state_->ShouldEndIfDelayed()) {
// If we did not exceed the budget or parsed everything there was to
// parse, check if we should complete the document.
if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
EndIfDelayed();
} else {
ScheduleEndIfDelayed();
}
}
}
bool HTMLDocumentParser::IsScheduledForUnpause() const {
return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause();
}
// Used by HTMLParserScheduler
void HTMLDocumentParser::ResumeParsingAfterYield() {
DCHECK(CanParseAsynchronously());
DCHECK(have_background_parser_);
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get());
CheckIfBlockingStylesheetAdded();
if (IsStopped() || IsPaused())
return;
PumpPendingSpeculations();
}
void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
"parser", (void*)this);
DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));
TextPosition script_start_position = TextPosition::BelowRangePosition();
Element* script_element =
tree_builder_->TakeScriptToProcess(script_start_position);
// We will not have a scriptRunner when parsing a DocumentFragment.
if (script_runner_)
script_runner_->ProcessScriptElement(script_element, script_start_position);
CheckIfBlockingStylesheetAdded();
}
HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken() {
if (IsStopped())
return NoTokens;
// If we're paused waiting for a script, we try to execute scripts before
// continuing.
auto ret = HaveTokens;
if (tree_builder_->HasParserBlockingScript()) {
RunScriptsForPausedTreeBuilder();
ret = HaveTokensAfterScript;
}
if (IsStopped() || IsPaused())
return NoTokens;
return ret;
}
void HTMLDocumentParser::EnqueueTokenizedChunk(
std::unique_ptr<TokenizedChunk> chunk) {
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk");
DCHECK(chunk);
DCHECK(GetDocument());
if (!IsParsing())
return;
// ApplicationCache needs to be initialized before issuing preloads. We
// suspend preload until HTMLHTMLElement is inserted and ApplicationCache is
// initialized. Note: link rel preloads don't follow this policy per the spec.
// These directives should initiate a fetch as fast as possible.
if (!tried_loading_link_headers_ && GetDocument()->Loader()) {
// Note that on commit, the loader dispatched preloads for all the non-media
// links.
GetDocument()->Loader()->DispatchLinkHeaderPreloads(
base::OptionalOrNullptr(chunk->viewport),
PreloadHelper::kOnlyLoadMedia);
tried_loading_link_headers_ = true;
if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
// Link header preloads for prefetched signed exchanges won't be started
// until StartPrefetchedLinkHeaderPreloads() is called. See the header
// comment of PrefetchedSignedExchangeManager.
GetDocument()
->Loader()
->GetPrefetchedSignedExchangeManager()
->StartPrefetchedLinkHeaderPreloads();
}
}
// Defer preloads if any of the chunks contains a <meta> csp tag.
if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) {
pending_csp_meta_token_ =
&chunk->tokens.at(chunk->pending_csp_meta_token_index);
}
if (preloader_) {
bool appcache_fetched = false;
if (GetDocument()->Loader()) {
appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() !=
mojom::blink::kAppCacheNoCacheId);
}
bool appcache_initialized = GetDocument()->documentElement();
// Delay sending some requests if meta tag based CSP is present or
// if AppCache was used to fetch the HTML but was not yet initialized for
// this document.
if (pending_csp_meta_token_ ||
((!base::FeatureList::IsEnabled(
blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) ||
appcache_fetched) &&
!appcache_initialized)) {
PreloadRequestStream link_rel_preloads;
for (auto& request : chunk->preloads) {
// Link rel preloads don't need to wait for AppCache but they
// should probably wait for CSP.
if (!pending_csp_meta_token_ && request->IsLinkRelPreload())
link_rel_preloads.push_back(std::move(request));
else
queued_preloads_.push_back(std::move(request));
}
preloader_->TakeAndPreload(link_rel_preloads);
} else {
// We can safely assume that there are no queued preloads request after
// the document element is available, as we empty the queue immediately
// after the document element is created in documentElementAvailable().
DCHECK(queued_preloads_.IsEmpty());
preloader_->TakeAndPreload(chunk->preloads);
}
}
speculations_.push_back(std::move(chunk));
if (!IsPaused() && !IsScheduledForUnpause())
parser_scheduler_->ScheduleForUnpause();
}
void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser(
const DocumentEncodingData& data) {
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
GetDocument()->SetEncodingData(data);
}
void HTMLDocumentParser::ValidateSpeculations(
std::unique_ptr<TokenizedChunk> chunk) {
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
DCHECK(chunk);
// TODO(kouhei): We should simplify codepath here by disallowing
// ValidateSpeculations
// while IsPaused, and last_chunk_before_pause_ can simply be
// pushed to speculations_.
if (IsPaused()) {
// We're waiting on a network script or stylesheet, just save the chunk,
// we'll get a second ValidateSpeculations call after the script or
// stylesheet completes. This call should have been made immediately after
// RunScriptsForPausedTreeBuilder in the script case which may have started
// a network load and left us waiting.
DCHECK(!last_chunk_before_pause_);
last_chunk_before_pause_ = std::move(chunk);
return;
}
DCHECK(!last_chunk_before_pause_);
std::unique_ptr<HTMLTokenizer> tokenizer = std::move(tokenizer_);
std::unique_ptr<HTMLToken> token = std::move(token_);
if (!tokenizer) {
// There must not have been any changes to the HTMLTokenizer state on the
// main thread, which means the speculation buffer is correct.
return;
}
// Currently we're only smart enough to reuse the speculation buffer if the
// tokenizer both starts and ends in the DataState. That state is simplest
// because the HTMLToken is always in the Uninitialized state. We should
// consider whether we can reuse the speculation buffer in other states, but
// we'd likely need to do something more sophisticated with the HTMLToken.
if (chunk->tokenizer_state == HTMLTokenizer::kDataState &&
tokenizer->GetState() == HTMLTokenizer::kDataState &&
input_.Current().IsEmpty() &&
chunk->tree_builder_state ==
HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) {
DCHECK(token->IsUninitialized());
return;
}
DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token),
std::move(tokenizer));
}
void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom(
std::unique_ptr<TokenizedChunk> last_chunk_before_script,
std::unique_ptr<HTMLToken> token,
std::unique_ptr<HTMLTokenizer> tokenizer) {
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
// Clear back ref.
background_parser_->ClearParser();
size_t discarded_token_count = 0;
for (const auto& speculation : speculations_) {
discarded_token_count += speculation->tokens.size();
}
g_discarded_token_count_for_testing += discarded_token_count;
speculations_.clear();
pending_csp_meta_token_ = nullptr;
queued_preloads_.clear();
std::unique_ptr<BackgroundHTMLParser::Checkpoint> checkpoint =
std::make_unique<BackgroundHTMLParser::Checkpoint>();
checkpoint->parser = this;
checkpoint->token = std::move(token);
checkpoint->tokenizer = std::move(tokenizer);
checkpoint->tree_builder_state =
HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get());
checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint;
checkpoint->preload_scanner_checkpoint =
last_chunk_before_script->preload_scanner_checkpoint;
checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy();
// FIXME: This should be passed in instead of cleared.
input_.Current().Clear();
DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread());
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&BackgroundHTMLParser::ResumeFrom,
background_parser_, std::move(checkpoint)));
}
size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser(
std::unique_ptr<TokenizedChunk> pop_chunk,
bool* reached_end_of_file) {
TRACE_EVENT_WITH_FLOW0(
"blink,loading",
"HTMLDocumentParser::processTokenizedChunkFromBackgroundParser",
pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN);
base::AutoReset<bool> has_line_number(&is_parsing_at_line_number_, true);
SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1);
SECURITY_DCHECK(!InPumpSession());
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
DCHECK(!IsParsingFragment());
DCHECK(!IsPaused());
DCHECK(!IsStopped());
DCHECK(CanParseAsynchronously());
DCHECK(!tokenizer_);
DCHECK(!token_);
DCHECK(!last_chunk_before_pause_);
std::unique_ptr<TokenizedChunk> chunk(std::move(pop_chunk));
const CompactHTMLTokenStream& tokens = chunk->tokens;
size_t element_token_count = 0;
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint,
background_parser_, chunk->input_checkpoint));
for (const auto& token : tokens) {
DCHECK(!IsWaitingForScripts());
if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag ||
token.GetType() == HTMLToken::kEndTag))
element_token_count++;
text_position_ = token.GetTextPosition();
ConstructTreeFromCompactHTMLToken(token);
if (IsStopped())
break;
// Preloads were queued if there was a <meta> csp token in a tokenized
// chunk.
if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) {
pending_csp_meta_token_ = nullptr;
FetchQueuedPreloads();
}
if (IsPaused()) {
// The script or stylesheet should be the last token of this bunch.
DCHECK_EQ(&token, &tokens.back());
if (IsWaitingForScripts())
RunScriptsForPausedTreeBuilder();
ValidateSpeculations(std::move(chunk));
break;
}
if (token.GetType() == HTMLToken::kEndOfFile) {
// The EOF is assumed to be the last token of this bunch.
DCHECK_EQ(&token, &tokens.back());
// There should never be any chunks after the EOF.
DCHECK(speculations_.IsEmpty());
PrepareToStopParsing();
*reached_end_of_file = true;
break;
}
DCHECK(!tokenizer_);
DCHECK(!token_);
}
// Make sure all required pending text nodes are emitted before returning.
// This leaves "script", "style" and "svg" nodes text nodes intact.
if (!IsStopped())
tree_builder_->Flush(kFlushIfAtTextLimit);
is_parsing_at_line_number_ = false;
return element_token_count;
}
void HTMLDocumentParser::PumpPendingSpeculations() {
// If this assert fails, you need to call ValidateSpeculations to make sure
// tokenizer_ and token_ don't have state that invalidates speculations_.
DCHECK(!tokenizer_);
DCHECK(!token_);
DCHECK(!last_chunk_before_pause_);
DCHECK(!IsPaused());
DCHECK(!IsStopped());
DCHECK(!IsScheduledForUnpause());
DCHECK(!InPumpSession());
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
// FIXME: Here should never be reached when there is a blocking script,
// but it happens in unknown scenarios. See https://crbug.com/440901
if (IsWaitingForScripts()) {
parser_scheduler_->ScheduleForUnpause();
return;
}
// Do not allow pumping speculations in nested event loops.
if (pump_speculations_session_nesting_level_) {
parser_scheduler_->ScheduleForUnpause();
return;
}
probe::ParseHTML probe(GetDocument(), this);
SpeculationsPumpSession session(pump_speculations_session_nesting_level_);
bool reached_end_of_file = false;
while (!speculations_.IsEmpty()) {
DCHECK(!IsScheduledForUnpause());
size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser(
speculations_.TakeFirst(), &reached_end_of_file);
session.AddedElementTokens(element_token_count);
// Always check IsParsing first as document_ may be null. Surprisingly,
// IsScheduledForUnpause() may be set here as a result of
// ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript
// which invokes nested event loops. (e.g. inspector breakpoints)
CheckIfBlockingStylesheetAdded();
if (!IsParsing() || IsPaused() || IsScheduledForUnpause())
break;
if (speculations_.IsEmpty() ||
parser_scheduler_->YieldIfNeeded(
session, speculations_.front()->starting_script))
break;
}
if (metrics_reporter_) {
metrics_reporter_->AddChunk(session.ElapsedTime(),
session.ProcessedElementTokens());
if (reached_end_of_file)
metrics_reporter_->ReportMetricsAtParseEnd();
}
}
void HTMLDocumentParser::ForcePlaintextForTextDocument() {
if (CanParseAsynchronously()) {
// This method is called before any data is appended, so we have to start
// the background parser ourselves.
if (!have_background_parser_)
StartBackgroundParser();
// This task should be synchronous, because otherwise synchronous
// tokenizing can happen before plaintext is forced.
background_parser_->ForcePlaintextForTextDocument();
} else
tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
}
bool HTMLDocumentParser::PumpTokenizer() {
DCHECK(!GetDocument()->IsPrefetchOnly());
DCHECK(!IsStopped());
DCHECK(tokenizer_);
DCHECK(token_);
PumpSession session(pump_session_nesting_level_);
// If we're in kForceSynchronousParsing, always run until all available input
// is consumed.
bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
task_runner_state_->IsSynchronous() ||
pump_session_nesting_level_ > 1;
TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete",
should_run_until_completion, "parser", (void*)this);
// We tell the InspectorInstrumentation about every pump, even if we end up
// pumping nothing. It can filter out empty pumps itself.
// FIXME: input_.Current().length() is only accurate if we end up parsing the
// whole buffer in this pump. We should pass how much we parsed as part of
// DidWriteHTML instead of WillWriteHTML.
probe::ParseHTML probe(GetDocument(), this);
bool should_yield = false;
int budget = max_tokenization_budget_;
while (!should_yield) {
const auto next_token_status = CanTakeNextToken();
if (next_token_status == NoTokens) {
// No tokens left to process in this pump, so break
break;
} else if (next_token_status == HaveTokensAfterScript &&
task_runner_state_->HaveExitedHeader()) {
// Just executed a parser-blocking script in the body (which is usually
// very expensive), so expire the budget, yield, and permit paint if
// needed.
budget = 0;
if (!should_run_until_completion) {
should_yield = true;
break;
}
}
{
RUNTIME_CALL_TIMER_SCOPE(
V8PerIsolateData::MainThreadIsolate(),
RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
if (!tokenizer_->NextToken(input_.Current(), Token()))
break;
budget--;
}
ConstructTreeFromHTMLToken();
if (!should_run_until_completion && !IsPaused()) {
DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
should_yield = budget <= 0;
should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
should_yield &= task_runner_state_->HaveExitedHeader();
} else {
should_yield = false;
}
DCHECK(IsStopped() || Token().IsUninitialized());
}
if (IsStopped())
return false;
// There should only be PendingText left since the tree-builder always flushes
// the task queue before returning. In case that ever changes, crash.
tree_builder_->Flush(kFlushAlways);
CHECK(!IsStopped());
if (IsPaused()) {
DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);
if (preloader_) {
if (!preload_scanner_) {
preload_scanner_ = CreatePreloadScanner(
TokenPreloadScanner::ScannerType::kMainDocument);
preload_scanner_->AppendToEnd(input_.Current());
}
ScanAndPreload(preload_scanner_.get());
}
}
// should_run_until_completion implies that we should not yield
CHECK(!should_run_until_completion || !should_yield);
return should_yield;
}
void HTMLDocumentParser::SchedulePumpTokenizer() {
TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
DCHECK(!IsStopped());
DCHECK(!InPumpSession());
DCHECK(!task_runner_state_->ShouldComplete());
if (task_runner_state_->IsScheduled()) {
// If the parser is already scheduled, there's no need to do anything.
return;
}
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
WrapPersistent(this)));
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kScheduled);
}
void HTMLDocumentParser::ScheduleEndIfDelayed() {
TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
DCHECK(!IsStopped());
DCHECK(!InPumpSession());
DCHECK(!task_runner_state_->ShouldComplete());
// Schedule a pump callback if needed.
if (!task_runner_state_->IsScheduled()) {
loading_task_runner_->PostTask(
FROM_HERE,
WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
WrapPersistent(this)));
}
// If a pump is already scheduled, it's OK to just upgrade it to one
// which calls EndIfDelayed afterwards.
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
}
void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
DCHECK(!GetDocument()->IsPrefetchOnly());
AtomicHTMLToken atomic_token(Token());
// Check whether we've exited the header.
if (!task_runner_state_->HaveExitedHeader()) {
if (GetDocument()->body()) {
task_runner_state_->SetExitedHeader();
}
}
// We clear the token_ in case ConstructTreeFromAtomicToken
// synchronously re-enters the parser. We don't clear the token immedately
// for kCharacter tokens because the AtomicHTMLToken avoids copying the
// characters by keeping a pointer to the underlying buffer in the
// HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter
// the parser.
//
// FIXME: Stop clearing the token_ once we start running the parser off
// the main thread or once we stop allowing synchronous JavaScript
// execution from ParseAttribute.
if (Token().GetType() != HTMLToken::kCharacter)
Token().Clear();
tree_builder_->ConstructTree(&atomic_token);
CheckIfBlockingStylesheetAdded();
// FIXME: ConstructTree may synchronously cause Document to be detached.
if (!token_)
return;
if (!Token().IsUninitialized()) {
DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter);
Token().Clear();
}
}
void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken(
const CompactHTMLToken& compact_token) {
DCHECK(!GetDocument()->IsPrefetchOnly());
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
AtomicHTMLToken token(compact_token);
tree_builder_->ConstructTree(&token);
CheckIfBlockingStylesheetAdded();
}
bool HTMLDocumentParser::HasInsertionPoint() {
// FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
// model of the EOF character differs slightly from the one in the spec
// because our treatment is uniform between network-sourced and script-sourced
// input streams whereas the spec treats them differently.
return input_.HasInsertionPoint() ||
(WasCreatedByScript() && !input_.HaveSeenEndOfFile());
}
void HTMLDocumentParser::insert(const String& source) {
if (IsStopped())
return;
TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
source.length(), "parser", (void*)this);
if (!tokenizer_) {
DCHECK(!InPumpSession());
DCHECK(have_background_parser_ || WasCreatedByScript());
token_ = std::make_unique<HTMLToken>();
tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
}
SegmentedString excluded_line_number_source(source);
excluded_line_number_source.SetExcludeLineNumbers();
input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);
// Pump the the tokenizer to build the document from the given insert point.
// Should process everything available and not defer anything.
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
// Call EndIfDelayed manually at the end to maintain preload behaviour.
PumpTokenizerIfPossible();
if (IsPaused()) {
// Check the document.write() output with a separate preload scanner as
// the main scanner can't deal with insertions.
if (!insertion_preload_scanner_) {
insertion_preload_scanner_ =
CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
}
insertion_preload_scanner_->AppendToEnd(source);
if (preloader_) {
ScanAndPreload(insertion_preload_scanner_.get());
}
}
EndIfDelayed();
}
void HTMLDocumentParser::StartBackgroundParser() {
TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser");
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
DCHECK(!IsStopped());
DCHECK(CanParseAsynchronously());
DCHECK(!have_background_parser_);
DCHECK(GetDocument());
have_background_parser_ = true;
// Make sure that the viewport is up-to-date, so that the correct viewport
// dimensions will be fed to the background parser and preload scanner.
if (GetDocument()->Loader())
GetDocument()->GetStyleEngine().UpdateViewport();
std::unique_ptr<BackgroundHTMLParser::Configuration> config =
std::make_unique<BackgroundHTMLParser::Configuration>();
config->options = options_;
config->parser = this;
config->decoder = TakeDecoder();
// The background parser is created on the main thread, but may otherwise
// only be used from the parser thread.
background_parser_ =
BackgroundHTMLParser::Create(std::move(config), loading_task_runner_);
// TODO(csharrison): This is a hack to initialize MediaValuesCached on the
// correct thread. We should get rid of it.
// TODO(domfarolino): Remove this once Priority Hints is no longer in Origin
// Trial. This currently exists because the TokenPreloadScanner needs to know
// the status of the Priority Hints Origin Trial, and has no way of figuring
// this out on its own. See https://crbug.com/821464.
bool priority_hints_origin_trial_enabled =
RuntimeEnabledFeatures::PriorityHintsEnabled(
GetDocument()->GetExecutionContext());
background_parser_->Init(
GetDocument()->Url(),
std::make_unique<CachedDocumentParameters>(GetDocument()),
MediaValuesCached::MediaValuesCachedData(*GetDocument()),
priority_hints_origin_trial_enabled);
}
void HTMLDocumentParser::StopBackgroundParser() {
DCHECK(CanParseAsynchronously());
DCHECK(have_background_parser_);
DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
have_background_parser_ = false;
// Make this sync, as lsan triggers on some unittests if the task runner is
// used.
background_parser_->Stop();
}
void HTMLDocumentParser::Append(const String& input_source) {
TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
input_source.length(), "parser", (void*)this);
if (IsStopped())
return;
// We should never reach this point if we're using a parser thread, as
// appendBytes() will directly ship the data to the thread.
DCHECK(!CanParseAsynchronously());
const SegmentedString source(input_source);
if (!preload_scanner_ && GetDocument()->Url().IsValid() &&
(!task_runner_state_->IsSynchronous() ||
GetDocument()->IsPrefetchOnly() || IsPaused())) {
// If we're operating with synchronous, budgeted foreground HTML parsing
// or using the background parser, need to create a preload scanner to
// make sure that parser-blocking Javascript requests are dispatched in
// plenty of time, which prevents unnecessary delays.
// When parsing without a budget (e.g. for HTML fragment parsing), it's
// additional overhead to scan the string unless the parser's already
// paused whilst executing a script.
preload_scanner_ =
CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
}
if (GetDocument()->IsPrefetchOnly()) {
// Do not prefetch if there is an appcache.
if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0)
return;
preload_scanner_->AppendToEnd(source);
if (preloader_) {
// TODO(Richard.Townsend@arm.com): add test coverage of this branch.
// The crash in crbug.com/1166786 indicates that text documents are being
// speculatively prefetched.
ScanAndPreload(preload_scanner_.get());
}
// Return after the preload scanner, do not actually parse the document.
return;
}
if (preload_scanner_ && preloader_) {
preload_scanner_->AppendToEnd(source);
if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
(IsPaused() || !task_runner_state_->HaveSeenFirstByte())) {
// Should scan and preload if the parser's paused waiting for a resource,
// or if we're starting a document for the first time (we want to at least
// prefetch anything that's in the <head> section).
ScanAndPreload(preload_scanner_.get());
}
}
input_.AppendToEnd(source);
task_runner_state_->SetHaveSeenFirstByte();
if (InPumpSession()) {
// We've gotten data off the network in a nested write. We don't want to
// consume any more of the input stream now. Do not worry. We'll consume
// this data in a less-nested write().
return;
}
// Schedule a tokenizer pump to process this new data.
if (task_runner_state_->GetMode() ==
ParserSynchronizationPolicy::kAllowDeferredParsing &&
!task_runner_state_->ShouldComplete()) {
SchedulePumpTokenizer();
} else {
PumpTokenizerIfPossible();
}
}
void HTMLDocumentParser::end() {
DCHECK(!IsDetached());
DCHECK(!IsScheduledForUnpause());
if (have_background_parser_)
StopBackgroundParser();
// Informs the the rest of WebCore that parsing is really finished (and
// deletes this).
tree_builder_->Finished();
// All preloads should be done.
preloader_ = nullptr;
DocumentParser::StopParsing();
}
void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
DCHECK(IsStopping());
// FIXME: It may not be correct to disable this for the background parser.
// That means hasInsertionPoint() may not be correct in some cases.
DCHECK(!HasInsertionPoint() || have_background_parser_);
if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
return;
end();
}
bool HTMLDocumentParser::ShouldDelayEnd() const {
return InPumpSession() || IsPaused() || IsExecutingScript() ||
task_runner_state_->IsScheduled();
}
void HTMLDocumentParser::AttemptToEnd() {
// finish() indicates we will not receive any more data. If we are waiting on
// an external script to load, we can't finish parsing quite yet.
TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
(void*)this);
DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF());
AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_);
// We should only be in this state once after calling Finish.
// If there are pending scripts, future control flow should pass to
// EndIfDelayed.
if (ShouldDelayEnd()) {
end_was_delayed_ = true;
return;
}
PrepareToStopParsing();
}
void HTMLDocumentParser::EndIfDelayed() {
TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
(void*)this);
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
// If we've already been detached, don't bother ending.
if (IsDetached())
return;
if (!end_was_delayed_ || ShouldDelayEnd())
return;
end_was_delayed_ = false;
PrepareToStopParsing();
}
void HTMLDocumentParser::Finish() {
// FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes
// sense to call any methods on DocumentParser once it's been stopped.
// However, FrameLoader::Stop calls DocumentParser::Finish unconditionally.
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
Flush();
if (IsDetached())
return;
// Empty documents never got an append() call, and thus have never started a
// background parser. In those cases, we ignore CanParseAsynchronously() and
// fall through to the synchronous case.
if (have_background_parser_) {
if (!input_.HaveSeenEndOfFile())
input_.CloseWithoutMarkingEndOfFile();
loading_task_runner_->PostTask(
FROM_HERE,
WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_));
return;
}
if (!tokenizer_) {
DCHECK(!token_);
// We're finishing before receiving any data. Rather than booting up the
// background parser just to spin it down, we finish parsing synchronously.
token_ = std::make_unique<HTMLToken>();
tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
}
// We're not going to get any more data off the network, so we tell the input
// stream we've reached the end of file. finish() can be called more than
// once, if the first time does not call end().
if (!input_.HaveSeenEndOfFile())
input_.MarkEndOfFile();
// If there's any deferred work remaining, signal that we
// want to end the document once all work's complete.
task_runner_state_->SetAttemptToEndOnEOF();
if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
return;
}
AttemptToEnd();
}
bool HTMLDocumentParser::IsExecutingScript() const {
if (!script_runner_)
return false;
return script_runner_->IsExecutingScript();
}
bool HTMLDocumentParser::IsParsingAtLineNumber() const {
if (CanParseAsynchronously()) {
return is_parsing_at_line_number_ &&
ScriptableDocumentParser::IsParsingAtLineNumber();
}
return ScriptableDocumentParser::IsParsingAtLineNumber();
}
OrdinalNumber HTMLDocumentParser::LineNumber() const {
if (have_background_parser_)
return text_position_.line_;
return input_.Current().CurrentLine();
}
TextPosition HTMLDocumentParser::GetTextPosition() const {
if (have_background_parser_)
return text_position_;
const SegmentedString& current_string = input_.Current();
OrdinalNumber line = current_string.CurrentLine();
OrdinalNumber column = current_string.CurrentColumn();
return TextPosition(line, column);
}
bool HTMLDocumentParser::IsWaitingForScripts() const {
// When the TreeBuilder encounters a </script> tag, it returns to the
// HTMLDocumentParser where the script is transfered from the treebuilder to
// the script runner. The script runner will hold the script until its loaded
// and run. During any of this time, we want to count ourselves as "waiting
// for a script" and thus run the preload scanner, as well as delay completion
// of parsing.
bool tree_builder_has_blocking_script =
tree_builder_->HasParserBlockingScript();
bool script_runner_has_blocking_script =
script_runner_ && script_runner_->HasParserBlockingScript();
// Since the parser is paused while a script runner has a blocking script, it
// should never be possible to end up with both objects holding a blocking
// script.
DCHECK(
!(tree_builder_has_blocking_script && script_runner_has_blocking_script));
// If either object has a blocking script, the parser should be paused.
return tree_builder_has_blocking_script ||
script_runner_has_blocking_script ||
reentry_permit_->ParserPauseFlag();
}
void HTMLDocumentParser::ResumeParsingAfterPause() {
// This function runs after a parser-blocking script has completed. There are
// four possible cases:
// 1) Parsing with kForceSynchronousParsing, where there is no background
// parser and a tokenizer_'s defined.
// 2) Parsing with kAllowAsynchronousParsing, without a background parser. In
// this case, the document is usually being completed or parsing has
// otherwise stopped.
// 3) Parsing with kAllowAsynchronousParsing with a background parser. In this
// case, need to add any pending speculations to the document.
// 4) Parsing with kAllowDeferredParsing, with a tokenizer_.
TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
(void*)this);
DCHECK(!IsExecutingScript());
DCHECK(!IsPaused());
CheckIfBlockingStylesheetAdded();
if (IsStopped() || IsPaused())
return;
if (have_background_parser_) { // Case 3)
// If we paused in the middle of processing a token chunk,
// deal with that before starting to pump.
if (last_chunk_before_pause_) {
ValidateSpeculations(std::move(last_chunk_before_pause_));
DCHECK(!last_chunk_before_pause_);
PumpPendingSpeculations();
} else if (!IsScheduledForUnpause()) {
// Otherwise, start pumping if we're not already scheduled to unpause
// already.
PumpPendingSpeculations();
}
return;
}
insertion_preload_scanner_.reset();
if (tokenizer_) {
// Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing.
// kForceSynchronousParsing must pump the tokenizer synchronously,
// otherwise it can be deferred.
if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
!task_runner_state_->ShouldComplete() && !InPumpSession()) {
SchedulePumpTokenizer();
} else {
ShouldCompleteScope should_complete(task_runner_state_);
PumpTokenizerIfPossible();
}
} else {
// Case 2): kAllowAsynchronousParsing, no background parser available
// (indicating possible Document shutdown).
EndIfDelayed();
}
}
void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
TRACE_EVENT1(
"blink",
"HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
"parser", (void*)this);
DCHECK(preload_scanner_);
DCHECK(preloader_);
preload_scanner_->AppendToEnd(input_.Current());
ScanAndPreload(preload_scanner_.get());
}
void HTMLDocumentParser::NotifyScriptLoaded() {
TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
(void*)this);
DCHECK(script_runner_);
DCHECK(!IsExecutingScript());
scheduler::CooperativeSchedulingManager::AllowedStackScope
allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());
if (IsStopped()) {
return;
}
if (IsStopping()) {
AttemptToRunDeferredScriptsAndEnd();
return;
}
script_runner_->ExecuteScriptsWaitingForLoad();
if (!IsPaused())
ResumeParsingAfterPause();
}
void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
TRACE_EVENT0("blink",
"HTMLDocumentParser::ExecuteScriptsWaitingForResources");
if (IsStopped())
return;
DCHECK(GetDocument()->IsScriptExecutionReady());
if (is_waiting_for_stylesheets_)
is_waiting_for_stylesheets_ = false;
// Document only calls this when the Document owns the DocumentParser so this
// will not be called in the DocumentFragment case.
DCHECK(script_runner_);
script_runner_->ExecuteScriptsWaitingForResources();
if (!IsPaused())
ResumeParsingAfterPause();
}
void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
// In-body CSS doesn't block painting. The parser needs to pause so that
// the DOM doesn't include any elements that may depend on the CSS for style.
// The stylesheet can be added and removed during the parsing of a single
// token so don't actually set the bit to block parsing here, just track
// the state of the added sheet in case it does persist beyond a single
// token.
added_pending_parser_blocking_stylesheet_ = true;
}
void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
// Just toggle the stylesheet flag here (mostly for synchronous sheets).
// The document will also call into executeScriptsWaitingForResources
// which is when the parser will re-start, otherwise it will attempt to
// resume twice which could cause state machine issues.
added_pending_parser_blocking_stylesheet_ = false;
}
void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
if (added_pending_parser_blocking_stylesheet_) {
added_pending_parser_blocking_stylesheet_ = false;
is_waiting_for_stylesheets_ = true;
}
}
void HTMLDocumentParser::ParseDocumentFragment(
const String& source,
DocumentFragment* fragment,
Element* context_element,
ParserContentPolicy parser_content_policy) {
auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
fragment, context_element, parser_content_policy);
parser->Append(source);
parser->Finish();
// Allows ~DocumentParser to assert it was detached before destruction.
parser->Detach();
}
void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
(unsigned)length, "parser", (void*)this);
DCHECK(Thread::MainThread()->IsCurrentThread());
if (!length || IsStopped())
return;
if (CanParseAsynchronously()) {
if (!have_background_parser_)
StartBackgroundParser();
std::unique_ptr<Vector<char>> buffer =
std::make_unique<Vector<char>>(length);
memcpy(buffer->data(), data, length);
loading_task_runner_->PostTask(
FROM_HERE,
WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread,
background_parser_, std::move(buffer)));
return;
}
DecodedDataDocumentParser::AppendBytes(data, length);
}
void HTMLDocumentParser::Flush() {
TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
// If we've got no decoder, we never received any data.
if (IsDetached() || NeedsDecoder())
return;
if (CanParseAsynchronously()) {
// In some cases, flush() is called without any invocation of appendBytes.
// Fallback to synchronous parsing in that case.
if (!have_background_parser_) {
can_parse_asynchronously_ = false;
token_ = std::make_unique<HTMLToken>();
tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
DecodedDataDocumentParser::Flush();
return;
}
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_));
} else {
DecodedDataDocumentParser::Flush();
}
}
void HTMLDocumentParser::SetDecoder(
std::unique_ptr<TextResourceDecoder> decoder) {
DCHECK(decoder);
DecodedDataDocumentParser::SetDecoder(std::move(decoder));
if (have_background_parser_) {
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder,
background_parser_, TakeDecoder()));
}
}
void HTMLDocumentParser::DocumentElementAvailable() {
TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
Document* document = GetDocument();
DCHECK(document);
DCHECK(document->documentElement());
Element* documentElement = GetDocument()->documentElement();
if (documentElement->hasAttribute(u"\u26A1") ||
documentElement->hasAttribute("amp") ||
documentElement->hasAttribute("i-amphtml-layout")) {
// The DocumentLoader fetches a main resource and handles the result.
// But it may not be available if JavaScript appends HTML to the page later
// in the page's lifetime. This can happen both from in-page JavaScript and
// from extensions. See example callstacks linked from crbug.com/931330.
if (document->Loader()) {
document->Loader()->DidObserveLoadingBehavior(
kLoadingBehaviorAmpDocumentLoaded);
}
}
if (preloader_)
FetchQueuedPreloads();
}
std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
TokenPreloadScanner::ScannerType scanner_type) {
return std::make_unique<HTMLPreloadScanner>(
options_, GetDocument()->Url(),
std::make_unique<CachedDocumentParameters>(GetDocument()),
MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type);
}
void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
DCHECK(preloader_);
bool seen_csp_meta_tag = false;
base::Optional<ViewportDescription> viewport_description;
PreloadRequestStream requests =
scanner->Scan(GetDocument()->ValidBaseElementURL(), &viewport_description,
seen_csp_meta_tag);
// Make sure that the viewport is up-to-date, so that the correct viewport
// dimensions will be fed to the background parser and preload scanner.
if (GetDocument()->Loader() &&
task_runner_state_->GetMode() == kAllowDeferredParsing) {
if (viewport_description.has_value()) {
GetDocument()->GetStyleEngine().UpdateViewport();
}
if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) {
if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
TRACE_EVENT0("blink",
"HTMLDocumentParser::DispatchSignedExchangeManager");
// Link header preloads for prefetched signed exchanges won't be started
// until StartPrefetchedLinkHeaderPreloads() is called. See the header
// comment of PrefetchedSignedExchangeManager.
GetDocument()
->Loader()
->GetPrefetchedSignedExchangeManager()
->StartPrefetchedLinkHeaderPreloads();
} else {
TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads");
GetDocument()->Loader()->DispatchLinkHeaderPreloads(
base::OptionalOrNullptr(viewport_description),
PreloadHelper::kOnlyLoadMedia);
}
task_runner_state_->DispatchedLinkHeaderPreloads();
}
}
task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag);
for (auto& request : requests) {
queued_preloads_.push_back(std::move(request));
}
FetchQueuedPreloads();
}
void HTMLDocumentParser::FetchQueuedPreloads() {
DCHECK(preloader_);
TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");
if (CanParseAsynchronously()) {
if (pending_csp_meta_token_ || !GetDocument()->documentElement())
return;
}
if (!queued_preloads_.IsEmpty())
preloader_->TakeAndPreload(queued_preloads_);
}
} // namespace blink