blob: 97ebb884b7fd9ea362524fa7714722020058f951 [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_DOCUMENT_PARSER_H_
#define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_DOCUMENT_PARSER_H_
#include <memory>
#include "base/memory/scoped_refptr.h"
#include "base/memory/weak_ptr.h"
#include "base/single_thread_task_runner.h"
#include "third_party/blink/renderer/core/core_export.h"
#include "third_party/blink/renderer/core/dom/parser_content_policy.h"
#include "third_party/blink/renderer/core/dom/scriptable_document_parser.h"
#include "third_party/blink/renderer/core/html/parser/background_html_input_stream.h"
#include "third_party/blink/renderer/core/html/parser/html_input_stream.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_options.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_reentry_permit.h"
#include "third_party/blink/renderer/core/html/parser/html_preload_scanner.h"
#include "third_party/blink/renderer/core/html/parser/html_token.h"
#include "third_party/blink/renderer/core/html/parser/html_tokenizer.h"
#include "third_party/blink/renderer/core/html/parser/html_tree_builder_simulator.h"
#include "third_party/blink/renderer/core/html/parser/parser_synchronization_policy.h"
#include "third_party/blink/renderer/core/html/parser/preload_request.h"
#include "third_party/blink/renderer/core/html/parser/text_resource_decoder.h"
#include "third_party/blink/renderer/core/page/viewport_description.h"
#include "third_party/blink/renderer/core/script/html_parser_script_runner_host.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
#include "third_party/blink/renderer/platform/wtf/deque.h"
#include "third_party/blink/renderer/platform/wtf/text/text_position.h"
namespace blink {
class BackgroundHTMLParser;
class CompactHTMLToken;
class Document;
class DocumentEncodingData;
class DocumentFragment;
class Element;
class HTMLDocument;
class HTMLParserMetrics;
class HTMLParserScheduler;
class HTMLParserScriptRunner;
class HTMLPreloadScanner;
class HTMLResourcePreloader;
class HTMLTreeBuilder;
class HTMLDocumentParserState;
enum ParserPrefetchPolicy {
// Indicates that prefetches/preloads should happen for this document type.
kAllowPrefetching,
// Indicates that prefetches are forbidden for this document type.
kDisallowPrefetching
};
// TODO(https://crbug.com/1049898): These are only exposed to make it possible
// to delete an expired histogram. The test should be rewritten to test at a
// different level, so it won't have to make assertions about internal state.
void CORE_EXPORT ResetDiscardedTokenCountForTesting();
size_t CORE_EXPORT GetDiscardedTokenCountForTesting();
class CORE_EXPORT HTMLDocumentParser : public ScriptableDocumentParser,
private HTMLParserScriptRunnerHost {
USING_PRE_FINALIZER(HTMLDocumentParser, Dispose);
public:
HTMLDocumentParser(HTMLDocument&,
ParserSynchronizationPolicy,
ParserPrefetchPolicy prefetch_policy = kAllowPrefetching);
HTMLDocumentParser(DocumentFragment*,
Element* context_element,
ParserContentPolicy,
ParserPrefetchPolicy prefetch_policy = kAllowPrefetching);
~HTMLDocumentParser() override;
void Trace(Visitor*) const override;
// TODO(alexclarke): Remove when background parser goes away.
void Dispose();
// Exposed for HTMLParserScheduler
void ResumeParsingAfterYield();
static void ParseDocumentFragment(
const String&,
DocumentFragment*,
Element* context_element,
ParserContentPolicy = kAllowScriptingContent);
// Exposed for testing.
HTMLParserScriptRunnerHost* AsHTMLParserScriptRunnerHostForTesting() {
return this;
}
// Returns true if any tokenizer pumps / end if delayed / asynchronous work is
// scheduled. Exposed so that tests can check that the parser's exited in a
// good state.
bool HasPendingWorkScheduledForTesting() const;
HTMLTokenizer* Tokenizer() const { return tokenizer_.get(); }
TextPosition GetTextPosition() const final;
bool IsParsingAtLineNumber() const final;
OrdinalNumber LineNumber() const final;
HTMLParserReentryPermit* ReentryPermit() { return reentry_permit_.get(); }
struct TokenizedChunk {
USING_FAST_MALLOC(TokenizedChunk);
public:
CompactHTMLTokenStream tokens;
PreloadRequestStream preloads;
base::Optional<ViewportDescription> viewport;
HTMLTokenizer::State tokenizer_state;
HTMLTreeBuilderSimulator::State tree_builder_state;
HTMLInputCheckpoint input_checkpoint;
TokenPreloadScannerCheckpoint preload_scanner_checkpoint;
bool starting_script;
// Index into |tokens| of the last <meta> csp tag in |tokens|. Preloads will
// be deferred until this token is parsed. Will be noPendingToken if there
// are no csp tokens.
int pending_csp_meta_token_index;
static constexpr int kNoPendingToken = -1;
};
void EnqueueTokenizedChunk(std::unique_ptr<TokenizedChunk>);
void DidReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
void AppendBytes(const char* bytes, size_t length) override;
void Flush() final;
void SetDecoder(std::unique_ptr<TextResourceDecoder>) final;
void SetMaxTokenizationBudgetForTesting(int budget) {
max_tokenization_budget_ = budget;
}
protected:
void insert(const String&) final;
void Append(const String&) override;
void Finish() final;
HTMLTreeBuilder* TreeBuilder() const { return tree_builder_.Get(); }
void ForcePlaintextForTextDocument();
private:
HTMLDocumentParser(Document&,
ParserContentPolicy,
ParserSynchronizationPolicy,
ParserPrefetchPolicy);
enum NextTokenStatus { NoTokens, HaveTokens, HaveTokensAfterScript };
// DocumentParser
void Detach() final;
bool HasInsertionPoint() final;
void PrepareToStopParsing() final;
void StopParsing() final;
bool IsPaused() const {
return IsWaitingForScripts() || is_waiting_for_stylesheets_;
}
bool IsWaitingForScripts() const final;
bool IsExecutingScript() const final;
void ExecuteScriptsWaitingForResources() final;
void DidAddPendingParserBlockingStylesheet() final;
void DidLoadAllPendingParserBlockingStylesheets() final;
void CheckIfBlockingStylesheetAdded();
void DocumentElementAvailable() override;
// HTMLParserScriptRunnerHost
void NotifyScriptLoaded() final;
HTMLInputStream& InputStream() final { return input_; }
bool HasPreloadScanner() const final {
return preload_scanner_.get() && !CanParseAsynchronously();
}
void AppendCurrentInputStreamToPreloadScannerAndScan() final;
void StartBackgroundParser();
void StopBackgroundParser();
void ValidateSpeculations(std::unique_ptr<TokenizedChunk> last_chunk);
void DiscardSpeculationsAndResumeFrom(
std::unique_ptr<TokenizedChunk> last_chunk,
std::unique_ptr<HTMLToken>,
std::unique_ptr<HTMLTokenizer>);
size_t ProcessTokenizedChunkFromBackgroundParser(
std::unique_ptr<TokenizedChunk>,
bool*);
void PumpPendingSpeculations();
NextTokenStatus CanTakeNextToken();
bool PumpTokenizer();
void PumpTokenizerIfPossible();
void DeferredPumpTokenizerIfPossible();
void SchedulePumpTokenizer();
void ScheduleEndIfDelayed();
void ConstructTreeFromHTMLToken();
void ConstructTreeFromCompactHTMLToken(const CompactHTMLToken&);
void RunScriptsForPausedTreeBuilder();
void ResumeParsingAfterPause();
// AttemptToEnd stops document parsing if nothing's currently delaying the end
// of parsing.
void AttemptToEnd();
// EndIfDelayed stops document parsing if AttemptToEnd was previously delayed,
// or if there are no scripts/resources/nested pumps delaying the end of
// parsing.
void EndIfDelayed();
void AttemptToRunDeferredScriptsAndEnd();
void end();
bool CanParseAsynchronously() const { return can_parse_asynchronously_; }
bool IsParsingFragment() const;
bool IsScheduledForUnpause() const;
bool InPumpSession() const { return pump_session_nesting_level_ > 0; }
// ShouldDelayEnd assesses whether any resources, scripts or nested pumps are
// delaying the end of parsing.
bool ShouldDelayEnd() const;
std::unique_ptr<HTMLPreloadScanner> CreatePreloadScanner(
TokenPreloadScanner::ScannerType);
// Let the given HTMLPreloadScanner scan the input it has, and then preload
// resources using the resulting PreloadRequests and |preloader_|.
void ScanAndPreload(HTMLPreloadScanner*);
void FetchQueuedPreloads();
HTMLToken& Token() { return *token_; }
HTMLParserOptions options_;
HTMLInputStream input_;
scoped_refptr<HTMLParserReentryPermit> reentry_permit_;
std::unique_ptr<HTMLToken> token_;
std::unique_ptr<HTMLTokenizer> tokenizer_;
Member<HTMLParserScriptRunner> script_runner_;
Member<HTMLTreeBuilder> tree_builder_;
std::unique_ptr<HTMLPreloadScanner> preload_scanner_;
// A scanner used only for input provided to the insert() method.
std::unique_ptr<HTMLPreloadScanner> insertion_preload_scanner_;
scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner_;
Member<HTMLParserScheduler> parser_scheduler_;
TextPosition text_position_;
// FIXME: last_chunk_before_pause_, tokenizer_, token_, and input_ should be
// combined into a single state object so they can be set and cleared together
// and passed between threads together.
std::unique_ptr<TokenizedChunk> last_chunk_before_pause_;
Deque<std::unique_ptr<TokenizedChunk>> speculations_;
// Using WeakPtr for GarbageCollected is discouraged. But in this case this is
// ok because HTMLDocumentParser guarantees to revoke all WeakPtrs in the pre
// finalizer.
base::WeakPtr<BackgroundHTMLParser> background_parser_;
Member<HTMLResourcePreloader> preloader_;
Member<HTMLDocumentParserState> task_runner_state_;
PreloadRequestStream queued_preloads_;
// Metrics gathering and reporting
std::unique_ptr<HTMLParserMetrics> metrics_reporter_;
// A timer for how long we are inactive after yielding
std::unique_ptr<base::ElapsedTimer> yield_timer_;
// If this is non-null, then there is a meta CSP token somewhere in the
// speculation buffer. Preloads will be deferred until a token matching this
// pointer is parsed and the CSP policy is applied. Note that this pointer
// tracks the *last* meta token in the speculation buffer, so it overestimates
// how long to defer preloads. This is for simplicity, as the alternative
// would require keeping track of token positions of preload requests.
CompactHTMLToken* pending_csp_meta_token_;
int max_tokenization_budget_;
bool can_parse_asynchronously_;
bool end_was_delayed_;
bool have_background_parser_;
unsigned pump_session_nesting_level_;
unsigned pump_speculations_session_nesting_level_;
bool is_parsing_at_line_number_;
bool tried_loading_link_headers_;
bool added_pending_parser_blocking_stylesheet_;
bool is_waiting_for_stylesheets_;
ThreadScheduler* scheduler_;
};
} // namespace blink
#endif // THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_DOCUMENT_PARSER_H_