chromium/src/third_party/blink/renderer/core/html/parser/html_document_parser.cc - manifest_repos/chromium_src - Git at Google

 /*
  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"

 #include <memory>
 #include <utility>

 #include "base/auto_reset.h"
 #include "base/numerics/safe_conversions.h"
 #include "third_party/blink/public/common/features.h"
 #include "third_party/blink/public/common/loader/loading_behavior_flag.h"
 #include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h"
 #include "third_party/blink/public/platform/platform.h"
 #include "third_party/blink/public/platform/task_type.h"
 #include "third_party/blink/renderer/core/css/media_values_cached.h"
 #include "third_party/blink/renderer/core/css/style_engine.h"
 #include "third_party/blink/renderer/core/dom/document_fragment.h"
 #include "third_party/blink/renderer/core/dom/element.h"
 #include "third_party/blink/renderer/core/frame/local_frame.h"
 #include "third_party/blink/renderer/core/html/html_document.h"
 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
 #include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
 #include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
 #include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h"
 #include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
 #include "third_party/blink/renderer/core/html/parser/pump_session.h"
 #include "third_party/blink/renderer/core/html_names.h"
 #include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
 #include "third_party/blink/renderer/core/loader/document_loader.h"
 #include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
 #include "third_party/blink/renderer/core/loader/preload_helper.h"
 #include "third_party/blink/renderer/core/probe/core_probes.h"
 #include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
 #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
 #include "third_party/blink/renderer/platform/heap/handle.h"
 #include "third_party/blink/renderer/platform/heap/heap.h"
 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
 #include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
 #include "third_party/blink/renderer/platform/runtime_enabled_features.h"
 #include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
 #include "third_party/blink/renderer/platform/scheduler/public/thread.h"
 #include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
 #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
 #include "third_party/blink/renderer/platform/wtf/shared_buffer.h"

 namespace blink {

 static size_t g_discarded_token_count_for_testing = 0;

 void ResetDiscardedTokenCountForTesting() {
   g_discarded_token_count_for_testing = 0;
 }

 size_t GetDiscardedTokenCountForTesting() {
   return g_discarded_token_count_for_testing;
 }

 // This sets the (default) maximum number of tokens which the foreground HTML
 // parser should try to process in one go. Lower values generally mean faster
 // first paints, larger values delay first paint, but make sure it's closer to
 // the final page. This is the default value to use, if no Finch-provided
 // value exists.
 constexpr int kDefaultMaxTokenizationBudget = 250;

 class EndIfDelayedForbiddenScope;
 class ShouldCompleteScope;
 class AttemptToEndForbiddenScope;

 // This class encapsulates the internal state needed for synchronous foreground
 // HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
 // tracks what should be done after the pump completes.)
 class HTMLDocumentParserState
     : public GarbageCollected<HTMLDocumentParserState> {
   friend EndIfDelayedForbiddenScope;
   friend ShouldCompleteScope;
   friend AttemptToEndForbiddenScope;

  public:
   // Keeps track of whether the parser needs to complete tokenization work,
   // optionally followed by EndIfDelayed.
   enum class DeferredParserState {
     // Indicates that a tokenizer pump has either completed or hasn't been
     // scheduled.
     kNotScheduled = 0,  // Enforce ordering in this enum.
     // Indicates that a tokenizer pump is scheduled and hasn't completed yet.
     kScheduled = 1,
     // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
     kScheduledWithEndIfDelayed = 2
   };

   enum class MetaCSPTokenState {
     // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
     // defer any preloads until we've added the CSP token to the document and
     // applied the Content Security Policy.
     kSeen = 0,
     // Indicates that there is no meta CSP token in the upcoming chunk.
     kNotSeen = 1,
     // Indicates that we've added the CSP token to the document and we can now
     // fetch preloads.
     kProcessed = 2,
     // Indicates that it's too late to apply a Content-Security policy (because
     // we've exited the header section.)
     kUnenforceable = 3,
   };

   explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
       : state_(DeferredParserState::kNotScheduled),
         meta_csp_state_(MetaCSPTokenState::kNotSeen),
         mode_(mode),
         end_if_delayed_forbidden_(0),
         should_complete_(0),
         should_attempt_to_end_on_eof_(0),
         needs_link_header_dispatch_(true),
         have_seen_first_byte_(false) {}

   void Trace(Visitor* v) const {}

   void SetState(DeferredParserState state) {
     DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
     state_ = state;
   }
   DeferredParserState GetState() const { return state_; }

   bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
   const char* GetStateAsString() const {
     switch (state_) {
       case DeferredParserState::kNotScheduled:
         return "not_scheduled";
       case DeferredParserState::kScheduled:
         return "scheduled";
       case DeferredParserState::kScheduledWithEndIfDelayed:
         return "scheduled_with_end_if_delayed";
     }
   }

   bool NeedsLinkHeaderPreloadsDispatch() const {
     return needs_link_header_dispatch_;
   }
   void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; }

   bool HaveSeenFirstByte() const { return have_seen_first_byte_; }
   void SetHaveSeenFirstByte() { have_seen_first_byte_ = true; }

   // Keeps track of whether Document::Finish has been called whilst parsing
   // asynchronously. ShouldAttemptToEndOnEOF() means that the parser should
   // close when there's no more input.
   bool ShouldAttemptToEndOnEOF() const {
     return should_attempt_to_end_on_eof_ > 0;
   }
   void SetAttemptToEndOnEOF() {
     // This method should only be called from ::Finish.
     should_attempt_to_end_on_eof_++;
     // Should only ever call ::Finish once.
     DCHECK(should_attempt_to_end_on_eof_ < 2);
   }

   bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
   bool ShouldComplete() const {
     return should_complete_ || GetMode() != kAllowDeferredParsing;
   }
   bool IsSynchronous() const {
     return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
   }
   ParserSynchronizationPolicy GetMode() const { return mode_; }

   void SetSeenCSPMetaTag(const bool seen) {
     if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
       return;
     if (seen)
       meta_csp_state_ = MetaCSPTokenState::kSeen;
     else
       meta_csp_state_ = MetaCSPTokenState::kNotSeen;
   }

   void SetExitedHeader() {
     meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
   }
   bool HaveExitedHeader() const {
     return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
   }

  private:
   void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
   void ExitEndIfDelayedForbidden() {
     end_if_delayed_forbidden_--;
     DCHECK_GE(end_if_delayed_forbidden_, 0);
   }

   void EnterAttemptToEndForbidden() {
     DCHECK(should_attempt_to_end_on_eof_ > 0);
     should_attempt_to_end_on_eof_ = 0;
   }

   void EnterShouldComplete() { should_complete_++; }
   void ExitShouldComplete() {
     should_complete_--;
     DCHECK_GE(should_complete_, 0);
   }

   DeferredParserState state_;
   MetaCSPTokenState meta_csp_state_;
   ParserSynchronizationPolicy mode_;
   int end_if_delayed_forbidden_;
   int should_complete_;
   // Set to non-zero if Document::Finish has been called and we're operating
   // asynchronously.
   int should_attempt_to_end_on_eof_;
   bool needs_link_header_dispatch_;
   bool have_seen_first_byte_;
 };

 class EndIfDelayedForbiddenScope {
   STACK_ALLOCATED();

  public:
   explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
       : state_(state) {
     state_->EnterEndIfDelayedForbidden();
   }
   ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }

  private:
   HTMLDocumentParserState* state_;
 };

 class AttemptToEndForbiddenScope {
   STACK_ALLOCATED();

  public:
   explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state)
       : state_(state) {
     state_->EnterAttemptToEndForbidden();
   }

  private:
   HTMLDocumentParserState* state_;
 };

 class ShouldCompleteScope {
   STACK_ALLOCATED();

  public:
   explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
     state_->EnterShouldComplete();
   }
   ~ShouldCompleteScope() { state_->ExitShouldComplete(); }

  private:
   HTMLDocumentParserState* state_;
 };

 // This is a direct transcription of step 4 from:
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
 static HTMLTokenizer::State TokenizerStateForContextElement(
     Element* context_element,
     bool report_errors,
     const HTMLParserOptions& options) {
   if (!context_element)
     return HTMLTokenizer::kDataState;

   const QualifiedName& context_tag = context_element->TagQName();

   if (context_tag.Matches(html_names::kTitleTag) ||
       context_tag.Matches(html_names::kTextareaTag))
     return HTMLTokenizer::kRCDATAState;
   if (context_tag.Matches(html_names::kStyleTag) ||
       context_tag.Matches(html_names::kXmpTag) ||
       context_tag.Matches(html_names::kIFrameTag) ||
       context_tag.Matches(html_names::kNoembedTag) ||
       (context_tag.Matches(html_names::kNoscriptTag) &&
        options.scripting_flag) ||
       context_tag.Matches(html_names::kNoframesTag))
     return report_errors ? HTMLTokenizer::kRAWTEXTState
                          : HTMLTokenizer::kPLAINTEXTState;
   if (context_tag.Matches(html_names::kScriptTag))
     return report_errors ? HTMLTokenizer::kScriptDataState
                          : HTMLTokenizer::kPLAINTEXTState;
   if (context_tag.Matches(html_names::kPlaintextTag))
     return HTMLTokenizer::kPLAINTEXTState;
   return HTMLTokenizer::kDataState;
 }

 class ScopedYieldTimer {
  public:
   // This object is created at the start of a block of parsing, and will
   // report the time since the last block yielded if known.
   ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
                    HTMLParserMetrics* metrics_reporter)
       : timer_(timer), reporting_metrics_(metrics_reporter) {
     if (!reporting_metrics_ || !(*timer_))
       return;

     metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
     timer_->reset();
   }

   // The destructor creates a new timer, which will keep track of time until
   // the next block starts.
   ~ScopedYieldTimer() {
     if (reporting_metrics_)
       *timer_ = std::make_unique<base::ElapsedTimer>();
   }

  private:
   std::unique_ptr<base::ElapsedTimer>* timer_;
   bool reporting_metrics_;
 };

 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
                                        ParserSynchronizationPolicy sync_policy,
                                        ParserPrefetchPolicy prefetch_policy)
     : HTMLDocumentParser(document,
                          kAllowScriptingContent,
                          sync_policy,
                          prefetch_policy) {
   script_runner_ =
       HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);

   // Allow declarative shadow DOM for the document parser, if not explicitly
   // disabled.
   bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() !=
                               Document::DeclarativeShadowRootAllowState::kDeny;
   tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
       this, document, kAllowScriptingContent, options_, include_shadow_roots);
 }

 HTMLDocumentParser::HTMLDocumentParser(
     DocumentFragment* fragment,
     Element* context_element,
     ParserContentPolicy parser_content_policy,
     ParserPrefetchPolicy parser_prefetch_policy)
     : HTMLDocumentParser(fragment->GetDocument(),
                          parser_content_policy,
                          kForceSynchronousParsing,
                          parser_prefetch_policy) {
   // Allow declarative shadow DOM for the fragment parser only if explicitly
   // enabled.
   bool include_shadow_roots =
       fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
       Document::DeclarativeShadowRootAllowState::kAllow;

   // No script_runner_ in fragment parser.
   tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
       this, fragment, context_element, parser_content_policy, options_,
       include_shadow_roots);

   // For now document fragment parsing never reports errors.
   bool report_errors = false;
   tokenizer_->SetState(TokenizerStateForContextElement(
       context_element, report_errors, options_));
 }

 namespace {
 int GetMaxTokenizationBudget() {
   static int max = base::GetFieldTrialParamByFeatureAsInt(
       features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget",
       kDefaultMaxTokenizationBudget);
   return max;
 }
 }  // namespace

 HTMLDocumentParser::HTMLDocumentParser(Document& document,
                                        ParserContentPolicy content_policy,
                                        ParserSynchronizationPolicy sync_policy,
                                        ParserPrefetchPolicy prefetch_policy)
     : ScriptableDocumentParser(document, content_policy),
       options_(&document),
       reentry_permit_(HTMLParserReentryPermit::Create()),
       token_(sync_policy != kAllowAsynchronousParsing
                  ? std::make_unique<HTMLToken>()
                  : nullptr),
       tokenizer_(sync_policy != kAllowAsynchronousParsing
                      ? std::make_unique<HTMLTokenizer>(options_)
                      : nullptr),
       loading_task_runner_(sync_policy == kForceSynchronousParsing
                                ? nullptr
                                : document.GetTaskRunner(TaskType::kNetworking)),
       parser_scheduler_(sync_policy == kAllowAsynchronousParsing
                             ? MakeGarbageCollected<HTMLParserScheduler>(
                                   this,
                                   loading_task_runner_.get())
                             : nullptr),
       task_runner_state_(
           MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
       pending_csp_meta_token_(nullptr),
       can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing),
       end_was_delayed_(false),
       have_background_parser_(false),
       pump_session_nesting_level_(0),
       pump_speculations_session_nesting_level_(0),
       is_parsing_at_line_number_(false),
       tried_loading_link_headers_(false),
       added_pending_parser_blocking_stylesheet_(false),
       is_waiting_for_stylesheets_(false),
       scheduler_(sync_policy == kAllowDeferredParsing
                      ? Thread::Current()->Scheduler()
                      : nullptr) {
   DCHECK(CanParseAsynchronously() || (token_ && tokenizer_));
   // Asynchronous parsing is not allowed in prefetch mode.
   DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously());

   // It is permissible to request the background HTML parser whilst also using
   // --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually
   // unintentional. To help flush out these cases, trigger a DCHECK.
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() ||
          !CanParseAsynchronously());

   // Report metrics for async document parsing only. The document
   // must be main frame to meet UKM requirements, and must have a high
   // resolution clock for high quality data.
   if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() &&
       document.GetFrame()->IsMainFrame() &&
       base::TimeTicks::IsHighResolution()) {
     metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
         document.UkmSourceID(), document.UkmRecorder());
   }

   max_tokenization_budget_ = GetMaxTokenizationBudget();

   // Don't create preloader for parsing clipboard content.
   if (content_policy == kDisallowScriptingAndPluginContent)
     return;

   // Create preloader only when the document is:
   // - attached to a frame (likely the prefetched resources will be loaded
   // soon),
   // - a HTML import document (blocks rendering and also resources will be
   // loaded soon), or
   // - is for no-state prefetch (made specifically for running preloader).
   if (!document.GetFrame() && !document.IsHTMLImport() &&
       !document.IsPrefetchOnly())
     return;

   if (prefetch_policy == kAllowPrefetching)
     preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
 }

 HTMLDocumentParser::~HTMLDocumentParser() = default;

 void HTMLDocumentParser::Dispose() {
   // In Oilpan, HTMLDocumentParser can die together with Document, and detach()
   // is not called in this case.
   if (have_background_parser_)
     StopBackgroundParser();
 }

 void HTMLDocumentParser::Trace(Visitor* visitor) const {
   visitor->Trace(tree_builder_);
   visitor->Trace(parser_scheduler_);
   visitor->Trace(script_runner_);
   visitor->Trace(preloader_);
   visitor->Trace(task_runner_state_);
   ScriptableDocumentParser::Trace(visitor);
   HTMLParserScriptRunnerHost::Trace(visitor);
 }

 bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
   return task_runner_state_->IsScheduled();
 }

 void HTMLDocumentParser::Detach() {
   if (have_background_parser_)
     StopBackgroundParser();
   // Deschedule any pending tokenizer pumps.
   task_runner_state_->SetState(
       HTMLDocumentParserState::DeferredParserState::kNotScheduled);
   DocumentParser::Detach();
   if (script_runner_)
     script_runner_->Detach();
   tree_builder_->Detach();
   // FIXME: It seems wrong that we would have a preload scanner here. Yet during
   // fast/dom/HTMLScriptElement/script-load-events.html we do.
   preload_scanner_.reset();
   insertion_preload_scanner_.reset();
   if (parser_scheduler_) {
     parser_scheduler_->Detach();
     parser_scheduler_.Clear();
   }
   // Oilpan: It is important to clear token_ to deallocate backing memory of
   // HTMLToken::data_ and let the allocator reuse the memory for
   // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
   // tokenizer_ first because tokenizer_ has a raw pointer to token_.
   tokenizer_.reset();
   token_.reset();
 }

 void HTMLDocumentParser::StopParsing() {
   DocumentParser::StopParsing();
   if (parser_scheduler_) {
     parser_scheduler_->Detach();
     parser_scheduler_.Clear();
   }
   task_runner_state_->SetState(
       HTMLDocumentParserState::DeferredParserState::kNotScheduled);
   if (have_background_parser_)
     StopBackgroundParser();
 }

 // This kicks off "Once the user agent stops parsing" as described by:
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
 void HTMLDocumentParser::PrepareToStopParsing() {
   TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
                (void*)this);
   // FIXME: It may not be correct to disable this for the background parser.
   // That means hasInsertionPoint() may not be correct in some cases.
   DCHECK(!HasInsertionPoint() || have_background_parser_);

   // NOTE: This pump should only ever emit buffered character tokens.
   if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) {
     DCHECK(!have_background_parser_);
     ShouldCompleteScope should_complete(task_runner_state_);
     EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
     PumpTokenizerIfPossible();
   }

   if (IsStopped())
     return;

   DocumentParser::PrepareToStopParsing();

   // We will not have a scriptRunner when parsing a DocumentFragment.
   if (script_runner_)
     GetDocument()->SetReadyState(Document::kInteractive);

   // Setting the ready state above can fire mutation event and detach us from
   // underneath. In that case, just bail out.
   if (IsDetached())
     return;

   if (script_runner_)
     script_runner_->RecordMetricsAtParseEnd();

   AttemptToRunDeferredScriptsAndEnd();
 }

 bool HTMLDocumentParser::IsParsingFragment() const {
   return tree_builder_->IsParsingFragment();
 }

 void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
   // This method is called asynchronously, continues building the HTML document.
   // This function should only be called when
   // --enable-blink-features=ForceSynchronousHTMLParsing is available.
   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   // If we're scheduled for a tokenizer pump, then document should be attached
   // and the parser should not be stopped, but sometimes a script completes
   // loading (so we schedule a pump) but the Document is stopped in the meantime
   // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
   DCHECK(task_runner_state_->GetState() ==
              HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
          !IsDetached());
   TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
                "parser", (void*)this, "state",
                task_runner_state_->GetStateAsString());
   bool should_call_delay_end =
       task_runner_state_->GetState() ==
       HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
   if (task_runner_state_->IsScheduled()) {
     task_runner_state_->SetState(
         HTMLDocumentParserState::DeferredParserState::kNotScheduled);
     if (should_call_delay_end) {
       EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
       PumpTokenizerIfPossible();
       EndIfDelayed();
     } else {
       PumpTokenizerIfPossible();
     }
   }
 }

 void HTMLDocumentParser::PumpTokenizerIfPossible() {
   // This method is called synchronously, builds the HTML document up to
   // the current budget, and optionally completes.
   TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
                (void*)this);

   bool yielded = false;
   CheckIfBlockingStylesheetAdded();
   if (!IsStopped() &&
       (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
     yielded = PumpTokenizer();
   }

   if (yielded) {
     DCHECK(!task_runner_state_->ShouldComplete());
     SchedulePumpTokenizer();
   } else if (task_runner_state_->ShouldAttemptToEndOnEOF()) {
     // Fall into this branch if ::Finish has been previously called and we've
     // just finished asynchronously parsing everything.
     AttemptToEnd();
   } else if (task_runner_state_->ShouldEndIfDelayed()) {
     // If we did not exceed the budget or parsed everything there was to
     // parse, check if we should complete the document.
     if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
       EndIfDelayed();
     } else {
       ScheduleEndIfDelayed();
     }
   }
 }

 bool HTMLDocumentParser::IsScheduledForUnpause() const {
   return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause();
 }

 // Used by HTMLParserScheduler
 void HTMLDocumentParser::ResumeParsingAfterYield() {
   DCHECK(CanParseAsynchronously());
   DCHECK(have_background_parser_);
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

   ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get());

   CheckIfBlockingStylesheetAdded();
   if (IsStopped() || IsPaused())
     return;

   PumpPendingSpeculations();
 }

 void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
   TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
                "parser", (void*)this);
   DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));

   TextPosition script_start_position = TextPosition::BelowRangePosition();
   Element* script_element =
       tree_builder_->TakeScriptToProcess(script_start_position);
   // We will not have a scriptRunner when parsing a DocumentFragment.
   if (script_runner_)
     script_runner_->ProcessScriptElement(script_element, script_start_position);
   CheckIfBlockingStylesheetAdded();
 }

 HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken() {
   if (IsStopped())
     return NoTokens;

   // If we're paused waiting for a script, we try to execute scripts before
   // continuing.
   auto ret = HaveTokens;
   if (tree_builder_->HasParserBlockingScript()) {
     RunScriptsForPausedTreeBuilder();
     ret = HaveTokensAfterScript;
   }
   if (IsStopped() || IsPaused())
     return NoTokens;
   return ret;
 }

 void HTMLDocumentParser::EnqueueTokenizedChunk(
     std::unique_ptr<TokenizedChunk> chunk) {
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk");

   DCHECK(chunk);
   DCHECK(GetDocument());

   if (!IsParsing())
     return;

   // ApplicationCache needs to be initialized before issuing preloads. We
   // suspend preload until HTMLHTMLElement is inserted and ApplicationCache is
   // initialized. Note: link rel preloads don't follow this policy per the spec.
   // These directives should initiate a fetch as fast as possible.
   if (!tried_loading_link_headers_ && GetDocument()->Loader()) {
     // Note that on commit, the loader dispatched preloads for all the non-media
     // links.
     GetDocument()->Loader()->DispatchLinkHeaderPreloads(
         base::OptionalOrNullptr(chunk->viewport),
         PreloadHelper::kOnlyLoadMedia);
     tried_loading_link_headers_ = true;
     if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
       // Link header preloads for prefetched signed exchanges won't be started
       // until StartPrefetchedLinkHeaderPreloads() is called. See the header
       // comment of PrefetchedSignedExchangeManager.
       GetDocument()
           ->Loader()
           ->GetPrefetchedSignedExchangeManager()
           ->StartPrefetchedLinkHeaderPreloads();
     }
   }

   // Defer preloads if any of the chunks contains a <meta> csp tag.
   if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) {
     pending_csp_meta_token_ =
         &chunk->tokens.at(chunk->pending_csp_meta_token_index);
   }

   if (preloader_) {
     bool appcache_fetched = false;
     if (GetDocument()->Loader()) {
       appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() !=
                           mojom::blink::kAppCacheNoCacheId);
     }
     bool appcache_initialized = GetDocument()->documentElement();
     // Delay sending some requests if meta tag based CSP is present or
     // if AppCache was used to fetch the HTML but was not yet initialized for
     // this document.
     if (pending_csp_meta_token_ ||
         ((!base::FeatureList::IsEnabled(
               blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) ||
           appcache_fetched) &&
          !appcache_initialized)) {
       PreloadRequestStream link_rel_preloads;
       for (auto& request : chunk->preloads) {
         // Link rel preloads don't need to wait for AppCache but they
         // should probably wait for CSP.
         if (!pending_csp_meta_token_ && request->IsLinkRelPreload())
           link_rel_preloads.push_back(std::move(request));
         else
           queued_preloads_.push_back(std::move(request));
       }
       preloader_->TakeAndPreload(link_rel_preloads);
     } else {
       // We can safely assume that there are no queued preloads request after
       // the document element is available, as we empty the queue immediately
       // after the document element is created in documentElementAvailable().
       DCHECK(queued_preloads_.IsEmpty());
       preloader_->TakeAndPreload(chunk->preloads);
     }
   }

   speculations_.push_back(std::move(chunk));

   if (!IsPaused() && !IsScheduledForUnpause())
     parser_scheduler_->ScheduleForUnpause();
 }

 void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser(
     const DocumentEncodingData& data) {
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   GetDocument()->SetEncodingData(data);
 }

 void HTMLDocumentParser::ValidateSpeculations(
     std::unique_ptr<TokenizedChunk> chunk) {
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   DCHECK(chunk);
   // TODO(kouhei): We should simplify codepath here by disallowing
   // ValidateSpeculations
   // while IsPaused, and last_chunk_before_pause_ can simply be
   // pushed to speculations_.
   if (IsPaused()) {
     // We're waiting on a network script or stylesheet, just save the chunk,
     // we'll get a second ValidateSpeculations call after the script or
     // stylesheet completes. This call should have been made immediately after
     // RunScriptsForPausedTreeBuilder in the script case which may have started
     // a network load and left us waiting.
     DCHECK(!last_chunk_before_pause_);
     last_chunk_before_pause_ = std::move(chunk);
     return;
   }

   DCHECK(!last_chunk_before_pause_);
   std::unique_ptr<HTMLTokenizer> tokenizer = std::move(tokenizer_);
   std::unique_ptr<HTMLToken> token = std::move(token_);

   if (!tokenizer) {
     // There must not have been any changes to the HTMLTokenizer state on the
     // main thread, which means the speculation buffer is correct.
     return;
   }

   // Currently we're only smart enough to reuse the speculation buffer if the
   // tokenizer both starts and ends in the DataState. That state is simplest
   // because the HTMLToken is always in the Uninitialized state. We should
   // consider whether we can reuse the speculation buffer in other states, but
   // we'd likely need to do something more sophisticated with the HTMLToken.
   if (chunk->tokenizer_state == HTMLTokenizer::kDataState &&
       tokenizer->GetState() == HTMLTokenizer::kDataState &&
       input_.Current().IsEmpty() &&
       chunk->tree_builder_state ==
           HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) {
     DCHECK(token->IsUninitialized());
     return;
   }

   DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token),
                                    std::move(tokenizer));
 }

 void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom(
     std::unique_ptr<TokenizedChunk> last_chunk_before_script,
     std::unique_ptr<HTMLToken> token,
     std::unique_ptr<HTMLTokenizer> tokenizer) {
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   // Clear back ref.
   background_parser_->ClearParser();

   size_t discarded_token_count = 0;
   for (const auto& speculation : speculations_) {
     discarded_token_count += speculation->tokens.size();
   }
   g_discarded_token_count_for_testing += discarded_token_count;

   speculations_.clear();
   pending_csp_meta_token_ = nullptr;
   queued_preloads_.clear();

   std::unique_ptr<BackgroundHTMLParser::Checkpoint> checkpoint =
       std::make_unique<BackgroundHTMLParser::Checkpoint>();
   checkpoint->parser = this;
   checkpoint->token = std::move(token);
   checkpoint->tokenizer = std::move(tokenizer);
   checkpoint->tree_builder_state =
       HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get());
   checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint;
   checkpoint->preload_scanner_checkpoint =
       last_chunk_before_script->preload_scanner_checkpoint;
   checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy();
   // FIXME: This should be passed in instead of cleared.
   input_.Current().Clear();

   DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread());
   loading_task_runner_->PostTask(
       FROM_HERE, WTF::Bind(&BackgroundHTMLParser::ResumeFrom,
                            background_parser_, std::move(checkpoint)));
 }

 size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser(
     std::unique_ptr<TokenizedChunk> pop_chunk,
     bool* reached_end_of_file) {
   TRACE_EVENT_WITH_FLOW0(
       "blink,loading",
       "HTMLDocumentParser::processTokenizedChunkFromBackgroundParser",
       pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN);
   base::AutoReset<bool> has_line_number(&is_parsing_at_line_number_, true);

   SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1);
   SECURITY_DCHECK(!InPumpSession());
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   DCHECK(!IsParsingFragment());
   DCHECK(!IsPaused());
   DCHECK(!IsStopped());
   DCHECK(CanParseAsynchronously());
   DCHECK(!tokenizer_);
   DCHECK(!token_);
   DCHECK(!last_chunk_before_pause_);

   std::unique_ptr<TokenizedChunk> chunk(std::move(pop_chunk));
   const CompactHTMLTokenStream& tokens = chunk->tokens;
   size_t element_token_count = 0;

   loading_task_runner_->PostTask(
       FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint,
                            background_parser_, chunk->input_checkpoint));

   for (const auto& token : tokens) {
     DCHECK(!IsWaitingForScripts());

     if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag ||
                                     token.GetType() == HTMLToken::kEndTag))
       element_token_count++;

     text_position_ = token.GetTextPosition();

     ConstructTreeFromCompactHTMLToken(token);

     if (IsStopped())
       break;

     // Preloads were queued if there was a <meta> csp token in a tokenized
     // chunk.
     if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) {
       pending_csp_meta_token_ = nullptr;
       FetchQueuedPreloads();
     }

     if (IsPaused()) {
       // The script or stylesheet should be the last token of this bunch.
       DCHECK_EQ(&token, &tokens.back());
       if (IsWaitingForScripts())
         RunScriptsForPausedTreeBuilder();
       ValidateSpeculations(std::move(chunk));
       break;
     }

     if (token.GetType() == HTMLToken::kEndOfFile) {
       // The EOF is assumed to be the last token of this bunch.
       DCHECK_EQ(&token, &tokens.back());
       // There should never be any chunks after the EOF.
       DCHECK(speculations_.IsEmpty());
       PrepareToStopParsing();
       *reached_end_of_file = true;
       break;
     }

     DCHECK(!tokenizer_);
     DCHECK(!token_);
   }

   // Make sure all required pending text nodes are emitted before returning.
   // This leaves "script", "style" and "svg" nodes text nodes intact.
   if (!IsStopped())
     tree_builder_->Flush(kFlushIfAtTextLimit);

   is_parsing_at_line_number_ = false;

   return element_token_count;
 }

 void HTMLDocumentParser::PumpPendingSpeculations() {
   // If this assert fails, you need to call ValidateSpeculations to make sure
   // tokenizer_ and token_ don't have state that invalidates speculations_.
   DCHECK(!tokenizer_);
   DCHECK(!token_);
   DCHECK(!last_chunk_before_pause_);
   DCHECK(!IsPaused());
   DCHECK(!IsStopped());
   DCHECK(!IsScheduledForUnpause());
   DCHECK(!InPumpSession());
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

   // FIXME: Here should never be reached when there is a blocking script,
   // but it happens in unknown scenarios. See https://crbug.com/440901
   if (IsWaitingForScripts()) {
     parser_scheduler_->ScheduleForUnpause();
     return;
   }

   // Do not allow pumping speculations in nested event loops.
   if (pump_speculations_session_nesting_level_) {
     parser_scheduler_->ScheduleForUnpause();
     return;
   }

   probe::ParseHTML probe(GetDocument(), this);

   SpeculationsPumpSession session(pump_speculations_session_nesting_level_);
   bool reached_end_of_file = false;
   while (!speculations_.IsEmpty()) {
     DCHECK(!IsScheduledForUnpause());
     size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser(
         speculations_.TakeFirst(), &reached_end_of_file);
     session.AddedElementTokens(element_token_count);

     // Always check IsParsing first as document_ may be null. Surprisingly,
     // IsScheduledForUnpause() may be set here as a result of
     // ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript
     // which invokes nested event loops. (e.g. inspector breakpoints)
     CheckIfBlockingStylesheetAdded();
     if (!IsParsing() || IsPaused() || IsScheduledForUnpause())
       break;

     if (speculations_.IsEmpty() ||
         parser_scheduler_->YieldIfNeeded(
             session, speculations_.front()->starting_script))
       break;
   }

   if (metrics_reporter_) {
     metrics_reporter_->AddChunk(session.ElapsedTime(),
                                 session.ProcessedElementTokens());
     if (reached_end_of_file)
       metrics_reporter_->ReportMetricsAtParseEnd();
   }
 }

 void HTMLDocumentParser::ForcePlaintextForTextDocument() {
   if (CanParseAsynchronously()) {
     // This method is called before any data is appended, so we have to start
     // the background parser ourselves.
     if (!have_background_parser_)
       StartBackgroundParser();

     // This task should be synchronous, because otherwise synchronous
     // tokenizing can happen before plaintext is forced.
     background_parser_->ForcePlaintextForTextDocument();
   } else
     tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
 }

 bool HTMLDocumentParser::PumpTokenizer() {
   DCHECK(!GetDocument()->IsPrefetchOnly());
   DCHECK(!IsStopped());
   DCHECK(tokenizer_);
   DCHECK(token_);

   PumpSession session(pump_session_nesting_level_);

   // If we're in kForceSynchronousParsing, always run until all available input
   // is consumed.
   bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
                                      task_runner_state_->IsSynchronous() ||
                                      pump_session_nesting_level_ > 1;
   TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete",
                should_run_until_completion, "parser", (void*)this);

   // We tell the InspectorInstrumentation about every pump, even if we end up
   // pumping nothing.  It can filter out empty pumps itself.
   // FIXME: input_.Current().length() is only accurate if we end up parsing the
   // whole buffer in this pump.  We should pass how much we parsed as part of
   // DidWriteHTML instead of WillWriteHTML.
   probe::ParseHTML probe(GetDocument(), this);

   bool should_yield = false;
   int budget = max_tokenization_budget_;

   while (!should_yield) {
     const auto next_token_status = CanTakeNextToken();
     if (next_token_status == NoTokens) {
       // No tokens left to process in this pump, so break
       break;
     } else if (next_token_status == HaveTokensAfterScript &&
                task_runner_state_->HaveExitedHeader()) {
       // Just executed a parser-blocking script in the body (which is usually
       // very expensive), so expire the budget, yield, and permit paint if
       // needed.
       budget = 0;
       if (!should_run_until_completion) {
         should_yield = true;
         break;
       }
     }
     {
       RUNTIME_CALL_TIMER_SCOPE(
           V8PerIsolateData::MainThreadIsolate(),
           RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
       if (!tokenizer_->NextToken(input_.Current(), Token()))
         break;
       budget--;
     }
     ConstructTreeFromHTMLToken();
     if (!should_run_until_completion && !IsPaused()) {
       DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
       should_yield = budget <= 0;
       should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
       should_yield &= task_runner_state_->HaveExitedHeader();
     } else {
       should_yield = false;
     }
     DCHECK(IsStopped() || Token().IsUninitialized());
   }

   if (IsStopped())
     return false;

   // There should only be PendingText left since the tree-builder always flushes
   // the task queue before returning. In case that ever changes, crash.
   tree_builder_->Flush(kFlushAlways);
   CHECK(!IsStopped());

   if (IsPaused()) {
     DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);

     if (preloader_) {
       if (!preload_scanner_) {
         preload_scanner_ = CreatePreloadScanner(
             TokenPreloadScanner::ScannerType::kMainDocument);
         preload_scanner_->AppendToEnd(input_.Current());
       }
       ScanAndPreload(preload_scanner_.get());
     }
   }

   // should_run_until_completion implies that we should not yield
   CHECK(!should_run_until_completion || !should_yield);
   return should_yield;
 }

 void HTMLDocumentParser::SchedulePumpTokenizer() {
   TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   DCHECK(!IsStopped());
   DCHECK(!InPumpSession());
   DCHECK(!task_runner_state_->ShouldComplete());
   if (task_runner_state_->IsScheduled()) {
     // If the parser is already scheduled, there's no need to do anything.
     return;
   }
   loading_task_runner_->PostTask(
       FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
                            WrapPersistent(this)));
   task_runner_state_->SetState(
       HTMLDocumentParserState::DeferredParserState::kScheduled);
 }

 void HTMLDocumentParser::ScheduleEndIfDelayed() {
   TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   DCHECK(!IsStopped());
   DCHECK(!InPumpSession());
   DCHECK(!task_runner_state_->ShouldComplete());

   // Schedule a pump callback if needed.
   if (!task_runner_state_->IsScheduled()) {
     loading_task_runner_->PostTask(
         FROM_HERE,
         WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
                   WrapPersistent(this)));
   }
   // If a pump is already scheduled, it's OK to just upgrade it to one
   // which calls EndIfDelayed afterwards.
   task_runner_state_->SetState(
       HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
 }

 void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
   DCHECK(!GetDocument()->IsPrefetchOnly());

   AtomicHTMLToken atomic_token(Token());

   // Check whether we've exited the header.
   if (!task_runner_state_->HaveExitedHeader()) {
     if (GetDocument()->body()) {
       task_runner_state_->SetExitedHeader();
     }
   }

   // We clear the token_ in case ConstructTreeFromAtomicToken
   // synchronously re-enters the parser. We don't clear the token immedately
   // for kCharacter tokens because the AtomicHTMLToken avoids copying the
   // characters by keeping a pointer to the underlying buffer in the
   // HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter
   // the parser.
   //
   // FIXME: Stop clearing the token_ once we start running the parser off
   // the main thread or once we stop allowing synchronous JavaScript
   // execution from ParseAttribute.
   if (Token().GetType() != HTMLToken::kCharacter)
     Token().Clear();

   tree_builder_->ConstructTree(&atomic_token);
   CheckIfBlockingStylesheetAdded();

   // FIXME: ConstructTree may synchronously cause Document to be detached.
   if (!token_)
     return;

   if (!Token().IsUninitialized()) {
     DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter);
     Token().Clear();
   }
 }

 void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken(
     const CompactHTMLToken& compact_token) {
   DCHECK(!GetDocument()->IsPrefetchOnly());
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   AtomicHTMLToken token(compact_token);
   tree_builder_->ConstructTree(&token);
   CheckIfBlockingStylesheetAdded();
 }

 bool HTMLDocumentParser::HasInsertionPoint() {
   // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
   // model of the EOF character differs slightly from the one in the spec
   // because our treatment is uniform between network-sourced and script-sourced
   // input streams whereas the spec treats them differently.
   return input_.HasInsertionPoint() ||
          (WasCreatedByScript() && !input_.HaveSeenEndOfFile());
 }

 void HTMLDocumentParser::insert(const String& source) {
   if (IsStopped())
     return;

   TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
                source.length(), "parser", (void*)this);

   if (!tokenizer_) {
     DCHECK(!InPumpSession());
     DCHECK(have_background_parser_ || WasCreatedByScript());
     token_ = std::make_unique<HTMLToken>();
     tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
   }

   SegmentedString excluded_line_number_source(source);
   excluded_line_number_source.SetExcludeLineNumbers();
   input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);

   // Pump the the tokenizer to build the document from the given insert point.
   // Should process everything available and not defer anything.
   ShouldCompleteScope should_complete(task_runner_state_);
   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
   // Call EndIfDelayed manually at the end to maintain preload behaviour.
   PumpTokenizerIfPossible();

   if (IsPaused()) {
     // Check the document.write() output with a separate preload scanner as
     // the main scanner can't deal with insertions.
     if (!insertion_preload_scanner_) {
       insertion_preload_scanner_ =
           CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
     }
     insertion_preload_scanner_->AppendToEnd(source);
     if (preloader_) {
       ScanAndPreload(insertion_preload_scanner_.get());
     }
   }
   EndIfDelayed();
 }

 void HTMLDocumentParser::StartBackgroundParser() {
   TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser");
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
   DCHECK(!IsStopped());
   DCHECK(CanParseAsynchronously());
   DCHECK(!have_background_parser_);
   DCHECK(GetDocument());
   have_background_parser_ = true;

   // Make sure that the viewport is up-to-date, so that the correct viewport
   // dimensions will be fed to the background parser and preload scanner.
   if (GetDocument()->Loader())
     GetDocument()->GetStyleEngine().UpdateViewport();

   std::unique_ptr<BackgroundHTMLParser::Configuration> config =
       std::make_unique<BackgroundHTMLParser::Configuration>();
   config->options = options_;
   config->parser = this;
   config->decoder = TakeDecoder();

   // The background parser is created on the main thread, but may otherwise
   // only be used from the parser thread.
   background_parser_ =
       BackgroundHTMLParser::Create(std::move(config), loading_task_runner_);
   // TODO(csharrison): This is a hack to initialize MediaValuesCached on the
   // correct thread. We should get rid of it.

   // TODO(domfarolino): Remove this once Priority Hints is no longer in Origin
   // Trial. This currently exists because the TokenPreloadScanner needs to know
   // the status of the Priority Hints Origin Trial, and has no way of figuring
   // this out on its own. See https://crbug.com/821464.
   bool priority_hints_origin_trial_enabled =
       RuntimeEnabledFeatures::PriorityHintsEnabled(
           GetDocument()->GetExecutionContext());

   background_parser_->Init(
       GetDocument()->Url(),
       std::make_unique<CachedDocumentParameters>(GetDocument()),
       MediaValuesCached::MediaValuesCachedData(*GetDocument()),
       priority_hints_origin_trial_enabled);
 }

 void HTMLDocumentParser::StopBackgroundParser() {
   DCHECK(CanParseAsynchronously());
   DCHECK(have_background_parser_);
   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

   have_background_parser_ = false;

   // Make this sync, as lsan triggers on some unittests if the task runner is
   // used.
   background_parser_->Stop();
 }

 void HTMLDocumentParser::Append(const String& input_source) {
   TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
                input_source.length(), "parser", (void*)this);

   if (IsStopped())
     return;

   // We should never reach this point if we're using a parser thread, as
   // appendBytes() will directly ship the data to the thread.
   DCHECK(!CanParseAsynchronously());

   const SegmentedString source(input_source);

   if (!preload_scanner_ && GetDocument()->Url().IsValid() &&
       (!task_runner_state_->IsSynchronous() ||
        GetDocument()->IsPrefetchOnly() || IsPaused())) {
     // If we're operating with synchronous, budgeted foreground HTML parsing
     // or using the background parser, need to create a preload scanner to
     // make sure that parser-blocking Javascript requests are dispatched in
     // plenty of time, which prevents unnecessary delays.
     // When parsing without a budget (e.g. for HTML fragment parsing), it's
     // additional overhead to scan the string unless the parser's already
     // paused whilst executing a script.
     preload_scanner_ =
         CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
   }

   if (GetDocument()->IsPrefetchOnly()) {
     // Do not prefetch if there is an appcache.
     if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0)
       return;

     preload_scanner_->AppendToEnd(source);
     if (preloader_) {
       // TODO(Richard.Townsend@arm.com): add test coverage of this branch.
       // The crash in crbug.com/1166786 indicates that text documents are being
       // speculatively prefetched.
       ScanAndPreload(preload_scanner_.get());
     }

     // Return after the preload scanner, do not actually parse the document.
     return;
   }
   if (preload_scanner_ && preloader_) {
     preload_scanner_->AppendToEnd(source);
     if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
         (IsPaused() || !task_runner_state_->HaveSeenFirstByte())) {
       // Should scan and preload if the parser's paused waiting for a resource,
       // or if we're starting a document for the first time (we want to at least
       // prefetch anything that's in the <head> section).
       ScanAndPreload(preload_scanner_.get());
     }
   }

   input_.AppendToEnd(source);
   task_runner_state_->SetHaveSeenFirstByte();

   if (InPumpSession()) {
     // We've gotten data off the network in a nested write. We don't want to
     // consume any more of the input stream now.  Do not worry.  We'll consume
     // this data in a less-nested write().
     return;
   }

   // Schedule a tokenizer pump to process this new data.
   if (task_runner_state_->GetMode() ==
           ParserSynchronizationPolicy::kAllowDeferredParsing &&
       !task_runner_state_->ShouldComplete()) {
     SchedulePumpTokenizer();
   } else {
     PumpTokenizerIfPossible();
   }
 }

 void HTMLDocumentParser::end() {
   DCHECK(!IsDetached());
   DCHECK(!IsScheduledForUnpause());

   if (have_background_parser_)
     StopBackgroundParser();

   // Informs the the rest of WebCore that parsing is really finished (and
   // deletes this).
   tree_builder_->Finished();

   // All preloads should be done.
   preloader_ = nullptr;

   DocumentParser::StopParsing();
 }

 void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
   DCHECK(IsStopping());
   // FIXME: It may not be correct to disable this for the background parser.
   // That means hasInsertionPoint() may not be correct in some cases.
   DCHECK(!HasInsertionPoint() || have_background_parser_);
   if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
     return;
   end();
 }

 bool HTMLDocumentParser::ShouldDelayEnd() const {
   return InPumpSession() || IsPaused() || IsExecutingScript() ||
          task_runner_state_->IsScheduled();
 }

 void HTMLDocumentParser::AttemptToEnd() {
   // finish() indicates we will not receive any more data. If we are waiting on
   // an external script to load, we can't finish parsing quite yet.
   TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
                (void*)this);
   DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF());
   AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_);
   // We should only be in this state once after calling Finish.
   // If there are pending scripts, future control flow should pass to
   // EndIfDelayed.
   if (ShouldDelayEnd()) {
     end_was_delayed_ = true;
     return;
   }
   PrepareToStopParsing();
 }

 void HTMLDocumentParser::EndIfDelayed() {
   TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
                (void*)this);
   ShouldCompleteScope should_complete(task_runner_state_);
   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
   // If we've already been detached, don't bother ending.
   if (IsDetached())
     return;

   if (!end_was_delayed_ || ShouldDelayEnd())
     return;

   end_was_delayed_ = false;
   PrepareToStopParsing();
 }

 void HTMLDocumentParser::Finish() {
   // FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes
   // sense to call any methods on DocumentParser once it's been stopped.
   // However, FrameLoader::Stop calls DocumentParser::Finish unconditionally.

   ShouldCompleteScope should_complete(task_runner_state_);
   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
   Flush();
   if (IsDetached())
     return;

   // Empty documents never got an append() call, and thus have never started a
   // background parser. In those cases, we ignore CanParseAsynchronously() and
   // fall through to the synchronous case.
   if (have_background_parser_) {
     if (!input_.HaveSeenEndOfFile())
       input_.CloseWithoutMarkingEndOfFile();
     loading_task_runner_->PostTask(
         FROM_HERE,
         WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_));
     return;
   }

   if (!tokenizer_) {
     DCHECK(!token_);
     // We're finishing before receiving any data. Rather than booting up the
     // background parser just to spin it down, we finish parsing synchronously.
     token_ = std::make_unique<HTMLToken>();
     tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
   }

   // We're not going to get any more data off the network, so we tell the input
   // stream we've reached the end of file. finish() can be called more than
   // once, if the first time does not call end().
   if (!input_.HaveSeenEndOfFile())
     input_.MarkEndOfFile();

   // If there's any deferred work remaining, signal that we
   // want to end the document once all work's complete.
   task_runner_state_->SetAttemptToEndOnEOF();
   if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
     return;
   }

   AttemptToEnd();
 }

 bool HTMLDocumentParser::IsExecutingScript() const {
   if (!script_runner_)
     return false;
   return script_runner_->IsExecutingScript();
 }

 bool HTMLDocumentParser::IsParsingAtLineNumber() const {
   if (CanParseAsynchronously()) {
     return is_parsing_at_line_number_ &&
            ScriptableDocumentParser::IsParsingAtLineNumber();
   }
   return ScriptableDocumentParser::IsParsingAtLineNumber();
 }

 OrdinalNumber HTMLDocumentParser::LineNumber() const {
   if (have_background_parser_)
     return text_position_.line_;

   return input_.Current().CurrentLine();
 }

 TextPosition HTMLDocumentParser::GetTextPosition() const {
   if (have_background_parser_)
     return text_position_;

   const SegmentedString& current_string = input_.Current();
   OrdinalNumber line = current_string.CurrentLine();
   OrdinalNumber column = current_string.CurrentColumn();

   return TextPosition(line, column);
 }

 bool HTMLDocumentParser::IsWaitingForScripts() const {
   // When the TreeBuilder encounters a </script> tag, it returns to the
   // HTMLDocumentParser where the script is transfered from the treebuilder to
   // the script runner. The script runner will hold the script until its loaded
   // and run. During any of this time, we want to count ourselves as "waiting
   // for a script" and thus run the preload scanner, as well as delay completion
   // of parsing.
   bool tree_builder_has_blocking_script =
       tree_builder_->HasParserBlockingScript();
   bool script_runner_has_blocking_script =
       script_runner_ && script_runner_->HasParserBlockingScript();
   // Since the parser is paused while a script runner has a blocking script, it
   // should never be possible to end up with both objects holding a blocking
   // script.
   DCHECK(
       !(tree_builder_has_blocking_script && script_runner_has_blocking_script));
   // If either object has a blocking script, the parser should be paused.
   return tree_builder_has_blocking_script ||
          script_runner_has_blocking_script ||
          reentry_permit_->ParserPauseFlag();
 }

 void HTMLDocumentParser::ResumeParsingAfterPause() {
   // This function runs after a parser-blocking script has completed. There are
   // four possible cases:
   // 1) Parsing with kForceSynchronousParsing, where there is no background
   //    parser and a tokenizer_'s defined.
   // 2) Parsing with kAllowAsynchronousParsing, without a background parser. In
   //    this case, the document is usually being completed or parsing has
   //    otherwise stopped.
   // 3) Parsing with kAllowAsynchronousParsing with a background parser. In this
   //    case, need to add any pending speculations to the document.
   // 4) Parsing with kAllowDeferredParsing, with a tokenizer_.
   TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
                (void*)this);
   DCHECK(!IsExecutingScript());
   DCHECK(!IsPaused());

   CheckIfBlockingStylesheetAdded();
   if (IsStopped() || IsPaused())
     return;

   if (have_background_parser_) {  // Case 3)
     // If we paused in the middle of processing a token chunk,
     // deal with that before starting to pump.
     if (last_chunk_before_pause_) {
       ValidateSpeculations(std::move(last_chunk_before_pause_));
       DCHECK(!last_chunk_before_pause_);
       PumpPendingSpeculations();
     } else if (!IsScheduledForUnpause()) {
       // Otherwise, start pumping if we're not already scheduled to unpause
       // already.
       PumpPendingSpeculations();
     }
     return;
   }

   insertion_preload_scanner_.reset();
   if (tokenizer_) {
     // Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing.
     // kForceSynchronousParsing must pump the tokenizer synchronously,
     // otherwise it can be deferred.
     if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
         !task_runner_state_->ShouldComplete() && !InPumpSession()) {
       SchedulePumpTokenizer();
     } else {
       ShouldCompleteScope should_complete(task_runner_state_);
       PumpTokenizerIfPossible();
     }
   } else {
     // Case 2): kAllowAsynchronousParsing, no background parser available
     // (indicating possible Document shutdown).
     EndIfDelayed();
   }
 }

 void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
   TRACE_EVENT1(
       "blink",
       "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
       "parser", (void*)this);
   DCHECK(preload_scanner_);
   DCHECK(preloader_);
   preload_scanner_->AppendToEnd(input_.Current());
   ScanAndPreload(preload_scanner_.get());
 }

 void HTMLDocumentParser::NotifyScriptLoaded() {
   TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
                (void*)this);
   DCHECK(script_runner_);
   DCHECK(!IsExecutingScript());

   scheduler::CooperativeSchedulingManager::AllowedStackScope
       allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());

   if (IsStopped()) {
     return;
   }

   if (IsStopping()) {
     AttemptToRunDeferredScriptsAndEnd();
     return;
   }

   script_runner_->ExecuteScriptsWaitingForLoad();
   if (!IsPaused())
     ResumeParsingAfterPause();
 }

 void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
   TRACE_EVENT0("blink",
                "HTMLDocumentParser::ExecuteScriptsWaitingForResources");
   if (IsStopped())
     return;

   DCHECK(GetDocument()->IsScriptExecutionReady());

   if (is_waiting_for_stylesheets_)
     is_waiting_for_stylesheets_ = false;

   // Document only calls this when the Document owns the DocumentParser so this
   // will not be called in the DocumentFragment case.
   DCHECK(script_runner_);
   script_runner_->ExecuteScriptsWaitingForResources();
   if (!IsPaused())
     ResumeParsingAfterPause();
 }

 void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
   // In-body CSS doesn't block painting. The parser needs to pause so that
   // the DOM doesn't include any elements that may depend on the CSS for style.
   // The stylesheet can be added and removed during the parsing of a single
   // token so don't actually set the bit to block parsing here, just track
   // the state of the added sheet in case it does persist beyond a single
   // token.
   added_pending_parser_blocking_stylesheet_ = true;
 }

 void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
   // Just toggle the stylesheet flag here (mostly for synchronous sheets).
   // The document will also call into executeScriptsWaitingForResources
   // which is when the parser will re-start, otherwise it will attempt to
   // resume twice which could cause state machine issues.
   added_pending_parser_blocking_stylesheet_ = false;
 }

 void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
   if (added_pending_parser_blocking_stylesheet_) {
     added_pending_parser_blocking_stylesheet_ = false;
     is_waiting_for_stylesheets_ = true;
   }
 }

 void HTMLDocumentParser::ParseDocumentFragment(
     const String& source,
     DocumentFragment* fragment,
     Element* context_element,
     ParserContentPolicy parser_content_policy) {
   auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
       fragment, context_element, parser_content_policy);
   parser->Append(source);
   parser->Finish();
   // Allows ~DocumentParser to assert it was detached before destruction.
   parser->Detach();
 }

 void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
   TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
                (unsigned)length, "parser", (void*)this);

   DCHECK(Thread::MainThread()->IsCurrentThread());

   if (!length || IsStopped())
     return;

   if (CanParseAsynchronously()) {
     if (!have_background_parser_)
       StartBackgroundParser();

     std::unique_ptr<Vector<char>> buffer =
         std::make_unique<Vector<char>>(length);
     memcpy(buffer->data(), data, length);

     loading_task_runner_->PostTask(
         FROM_HERE,
         WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread,
                   background_parser_, std::move(buffer)));
     return;
   }

   DecodedDataDocumentParser::AppendBytes(data, length);
 }

 void HTMLDocumentParser::Flush() {
   TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
   // If we've got no decoder, we never received any data.
   if (IsDetached() || NeedsDecoder())
     return;

   if (CanParseAsynchronously()) {
     // In some cases, flush() is called without any invocation of appendBytes.
     // Fallback to synchronous parsing in that case.
     if (!have_background_parser_) {
       can_parse_asynchronously_ = false;
       token_ = std::make_unique<HTMLToken>();
       tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
       DecodedDataDocumentParser::Flush();
       return;
     }

     loading_task_runner_->PostTask(
         FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_));
   } else {
     DecodedDataDocumentParser::Flush();
   }
 }

 void HTMLDocumentParser::SetDecoder(
     std::unique_ptr<TextResourceDecoder> decoder) {
   DCHECK(decoder);
   DecodedDataDocumentParser::SetDecoder(std::move(decoder));

   if (have_background_parser_) {
     loading_task_runner_->PostTask(
         FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder,
                              background_parser_, TakeDecoder()));
   }
 }

 void HTMLDocumentParser::DocumentElementAvailable() {
   TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
   Document* document = GetDocument();
   DCHECK(document);
   DCHECK(document->documentElement());
   Element* documentElement = GetDocument()->documentElement();
   if (documentElement->hasAttribute(u"\u26A1") ||
       documentElement->hasAttribute("amp") ||
       documentElement->hasAttribute("i-amphtml-layout")) {
     // The DocumentLoader fetches a main resource and handles the result.
     // But it may not be available if JavaScript appends HTML to the page later
     // in the page's lifetime. This can happen both from in-page JavaScript and
     // from extensions. See example callstacks linked from crbug.com/931330.
     if (document->Loader()) {
       document->Loader()->DidObserveLoadingBehavior(
           kLoadingBehaviorAmpDocumentLoaded);
     }
   }
   if (preloader_)
     FetchQueuedPreloads();
 }

 std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
     TokenPreloadScanner::ScannerType scanner_type) {
   return std::make_unique<HTMLPreloadScanner>(
       options_, GetDocument()->Url(),
       std::make_unique<CachedDocumentParameters>(GetDocument()),
       MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type);
 }

 void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
   TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
   DCHECK(preloader_);
   bool seen_csp_meta_tag = false;
   base::Optional<ViewportDescription> viewport_description;
   PreloadRequestStream requests =
       scanner->Scan(GetDocument()->ValidBaseElementURL(), &viewport_description,
                     seen_csp_meta_tag);
   // Make sure that the viewport is up-to-date, so that the correct viewport
   // dimensions will be fed to the background parser and preload scanner.
   if (GetDocument()->Loader() &&
       task_runner_state_->GetMode() == kAllowDeferredParsing) {
     if (viewport_description.has_value()) {
       GetDocument()->GetStyleEngine().UpdateViewport();
     }
     if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) {
       if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
         TRACE_EVENT0("blink",
                      "HTMLDocumentParser::DispatchSignedExchangeManager");
         // Link header preloads for prefetched signed exchanges won't be started
         // until StartPrefetchedLinkHeaderPreloads() is called. See the header
         // comment of PrefetchedSignedExchangeManager.
         GetDocument()
             ->Loader()
             ->GetPrefetchedSignedExchangeManager()
             ->StartPrefetchedLinkHeaderPreloads();
       } else {
         TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads");
         GetDocument()->Loader()->DispatchLinkHeaderPreloads(
             base::OptionalOrNullptr(viewport_description),
             PreloadHelper::kOnlyLoadMedia);
       }
       task_runner_state_->DispatchedLinkHeaderPreloads();
     }
   }

   task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag);
   for (auto& request : requests) {
     queued_preloads_.push_back(std::move(request));
   }
   FetchQueuedPreloads();
 }

 void HTMLDocumentParser::FetchQueuedPreloads() {
   DCHECK(preloader_);
   TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");

   if (CanParseAsynchronously()) {
     if (pending_csp_meta_token_ || !GetDocument()->documentElement())
       return;
   }

   if (!queued_preloads_.IsEmpty())
     preloader_->TakeAndPreload(queued_preloads_);
 }

 }  // namespace blink