/*
 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_TOKEN_H_
#define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_TOKEN_H_

#include <memory>
#include <utility>

#include "base/macros.h"
#include "third_party/blink/renderer/core/dom/attribute.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
#include "third_party/blink/renderer/platform/wtf/forward.h"

namespace blink {

class DoctypeData {
  USING_FAST_MALLOC(DoctypeData);

 public:
  DoctypeData()
      : has_public_identifier_(false),
        has_system_identifier_(false),
        force_quirks_(false) {}

  bool has_public_identifier_;
  bool has_system_identifier_;
  WTF::Vector<UChar> public_identifier_;
  WTF::Vector<UChar> system_identifier_;
  bool force_quirks_;

  DISALLOW_COPY_AND_ASSIGN(DoctypeData);
};

static inline Attribute* FindAttributeInVector(Vector<Attribute>& attributes,
                                               const QualifiedName& name) {
  for (unsigned i = 0; i < attributes.size(); ++i) {
    if (attributes.at(i).GetName().Matches(name))
      return &attributes.at(i);
  }
  return nullptr;
}

class HTMLToken {
  USING_FAST_MALLOC(HTMLToken);

 public:
  enum TokenType {
    kUninitialized,
    DOCTYPE,
    kStartTag,
    kEndTag,
    kComment,
    kCharacter,
    kEndOfFile,
  };

  class Attribute {
    DISALLOW_NEW();

   public:
    class Range {
      DISALLOW_NEW();

     public:
      static constexpr int kInvalidOffset = -1;

      inline void Clear() {
#if DCHECK_IS_ON()
        start = kInvalidOffset;
        end = kInvalidOffset;
#endif
      }

      // Check Range instance that is actively being parsed.
      inline void CheckValidStart() const {
        DCHECK_NE(start, kInvalidOffset);
        DCHECK_GE(start, 0);
      }

      // Check Range instance which finished parse.
      inline void CheckValid() const {
        CheckValidStart();
        DCHECK_NE(end, kInvalidOffset);
        DCHECK_GE(end, 0);
        DCHECK_LE(start, end);
      }

      int start;
      int end;
    };

    AtomicString GetName() const { return AtomicString(name_); }
    String NameAttemptStaticStringCreation() const {
      return AttemptStaticStringCreation(name_, kLikely8Bit);
    }
    const Vector<UChar, 32>& NameAsVector() const { return name_; }
    const Vector<UChar, 32>& ValueAsVector() const { return value_; }

    void AppendToName(UChar c) { name_.push_back(c); }

    scoped_refptr<StringImpl> Value8BitIfNecessary() const {
      return StringImpl::Create8BitIfPossible(value_);
    }
    String Value() const { return String(value_); }

    void AppendToValue(UChar c) { value_.push_back(c); }
    void AppendToValue(const String& value) { value.AppendTo(value_); }
    void ClearValue() { value_.clear(); }

    const Range& NameRange() const { return name_range_; }
    const Range& ValueRange() const { return value_range_; }
    Range& MutableNameRange() { return name_range_; }
    Range& MutableValueRange() { return value_range_; }

   private:
    Vector<UChar, 32> name_;
    Vector<UChar, 32> value_;
    Range name_range_;
    Range value_range_;
  };

  typedef Vector<Attribute, 10> AttributeList;

  // By using an inline capacity of 256, we avoid spilling over into an malloced
  // buffer approximately 99% of the time based on a non-scientific browse
  // around a number of popular web sites on 23 May 2013.
  typedef Vector<UChar, 256> DataVector;

  HTMLToken() { Clear(); }

  void Clear() {
    type_ = kUninitialized;
    range_.Clear();
    range_.start = 0;
    base_offset_ = 0;
    // Don't call Vector::clear() as that would destroy the
    // alloced VectorBuffer. If the innerHTML'd content has
    // two 257 character text nodes in a row, we'll needlessly
    // thrash malloc. When we finally finish the parse the
    // HTMLToken will be destroyed and the VectorBuffer released.
    data_.Shrink(0);
    or_all_data_ = 0;
  }

  bool IsUninitialized() { return type_ == kUninitialized; }
  TokenType GetType() const { return type_; }

  void MakeEndOfFile() {
    DCHECK_EQ(type_, kUninitialized);
    type_ = kEndOfFile;
  }

  // Range and offset methods exposed for HTMLSourceTracker and
  // HTMLViewSourceParser.
  int StartIndex() const { return range_.start; }
  int EndIndex() const { return range_.end; }

  void SetBaseOffset(int offset) { base_offset_ = offset; }

  void end(int end_offset) { range_.end = end_offset - base_offset_; }

  const DataVector& Data() const {
    DCHECK(type_ == kCharacter || type_ == kComment || type_ == kStartTag ||
           type_ == kEndTag);
    return data_;
  }

  bool IsAll8BitData() const { return (or_all_data_ <= 0xff); }

  const DataVector& GetName() const {
    DCHECK(type_ == kStartTag || type_ == kEndTag || type_ == DOCTYPE);
    return data_;
  }

  void AppendToName(UChar character) {
    DCHECK(type_ == kStartTag || type_ == kEndTag || type_ == DOCTYPE);
    DCHECK(character);
    data_.push_back(character);
    or_all_data_ |= character;
  }

  /* DOCTYPE Tokens */

  bool ForceQuirks() const {
    DCHECK_EQ(type_, DOCTYPE);
    return doctype_data_->force_quirks_;
  }

  void SetForceQuirks() {
    DCHECK_EQ(type_, DOCTYPE);
    doctype_data_->force_quirks_ = true;
  }

  void BeginDOCTYPE() {
    DCHECK_EQ(type_, kUninitialized);
    type_ = DOCTYPE;
    doctype_data_ = std::make_unique<DoctypeData>();
  }

  void BeginDOCTYPE(UChar character) {
    DCHECK(character);
    BeginDOCTYPE();
    data_.push_back(character);
    or_all_data_ |= character;
  }

  // FIXME: Distinguish between a missing public identifer and an empty one.
  const WTF::Vector<UChar>& PublicIdentifier() const {
    DCHECK_EQ(type_, DOCTYPE);
    return doctype_data_->public_identifier_;
  }

  // FIXME: Distinguish between a missing system identifer and an empty one.
  const WTF::Vector<UChar>& SystemIdentifier() const {
    DCHECK_EQ(type_, DOCTYPE);
    return doctype_data_->system_identifier_;
  }

  void SetPublicIdentifierToEmptyString() {
    DCHECK_EQ(type_, DOCTYPE);
    doctype_data_->has_public_identifier_ = true;
    doctype_data_->public_identifier_.clear();
  }

  void SetSystemIdentifierToEmptyString() {
    DCHECK_EQ(type_, DOCTYPE);
    doctype_data_->has_system_identifier_ = true;
    doctype_data_->system_identifier_.clear();
  }

  void AppendToPublicIdentifier(UChar character) {
    DCHECK(character);
    DCHECK_EQ(type_, DOCTYPE);
    DCHECK(doctype_data_->has_public_identifier_);
    doctype_data_->public_identifier_.push_back(character);
  }

  void AppendToSystemIdentifier(UChar character) {
    DCHECK(character);
    DCHECK_EQ(type_, DOCTYPE);
    DCHECK(doctype_data_->has_system_identifier_);
    doctype_data_->system_identifier_.push_back(character);
  }

  std::unique_ptr<DoctypeData> ReleaseDoctypeData() {
    return std::move(doctype_data_);
  }

  /* Start/End Tag Tokens */

  bool SelfClosing() const {
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    return self_closing_;
  }

  void SetSelfClosing() {
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    self_closing_ = true;
  }

  void BeginStartTag(UChar character) {
    DCHECK(character);
    DCHECK_EQ(type_, kUninitialized);
    type_ = kStartTag;
    self_closing_ = false;
    current_attribute_ = nullptr;
    attributes_.clear();

    data_.push_back(character);
    or_all_data_ |= character;
  }

  void BeginEndTag(LChar character) {
    DCHECK_EQ(type_, kUninitialized);
    type_ = kEndTag;
    self_closing_ = false;
    current_attribute_ = nullptr;
    attributes_.clear();

    data_.push_back(character);
  }

  void BeginEndTag(const Vector<LChar, 32>& characters) {
    DCHECK_EQ(type_, kUninitialized);
    type_ = kEndTag;
    self_closing_ = false;
    current_attribute_ = nullptr;
    attributes_.clear();

    data_.AppendVector(characters);
  }

  void AddNewAttribute() {
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    attributes_.Grow(attributes_.size() + 1);
    current_attribute_ = &attributes_.back();
    current_attribute_->MutableNameRange().Clear();
    current_attribute_->MutableValueRange().Clear();
  }

  void BeginAttributeName(int offset) {
    current_attribute_->MutableNameRange().start = offset - base_offset_;
    current_attribute_->NameRange().CheckValidStart();
  }

  void EndAttributeName(int offset) {
    int index = offset - base_offset_;
    current_attribute_->MutableNameRange().end = index;
    current_attribute_->NameRange().CheckValid();
    current_attribute_->MutableValueRange().start = index;
    current_attribute_->MutableValueRange().end = index;
  }

  void BeginAttributeValue(int offset) {
    current_attribute_->MutableValueRange().Clear();
    current_attribute_->MutableValueRange().start = offset - base_offset_;
    current_attribute_->ValueRange().CheckValidStart();
  }

  void EndAttributeValue(int offset) {
    current_attribute_->MutableValueRange().end = offset - base_offset_;
    current_attribute_->ValueRange().CheckValid();
  }

  void AppendToAttributeName(UChar character) {
    DCHECK(character);
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    current_attribute_->NameRange().CheckValidStart();
    current_attribute_->AppendToName(character);
  }

  void AppendToAttributeValue(UChar character) {
    DCHECK(character);
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    current_attribute_->ValueRange().CheckValidStart();
    current_attribute_->AppendToValue(character);
  }

  void AppendToAttributeValue(wtf_size_t i, const String& value) {
    DCHECK(!value.IsEmpty());
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    attributes_[i].AppendToValue(value);
  }

  const AttributeList& Attributes() const {
    DCHECK(type_ == kStartTag || type_ == kEndTag);
    return attributes_;
  }

  const Attribute* GetAttributeItem(const QualifiedName& name) const {
    for (unsigned i = 0; i < attributes_.size(); ++i) {
      if (attributes_.at(i).GetName() == name.LocalName())
        return &attributes_.at(i);
    }
    return nullptr;
  }

  /* Character Tokens */

  // Starting a character token works slightly differently than starting
  // other types of tokens because we want to save a per-character branch.
  void EnsureIsCharacterToken() {
    DCHECK(type_ == kUninitialized || type_ == kCharacter);
    type_ = kCharacter;
  }

  const DataVector& Characters() const {
    DCHECK_EQ(type_, kCharacter);
    return data_;
  }

  void AppendToCharacter(char character) {
    DCHECK_EQ(type_, kCharacter);
    data_.push_back(character);
  }

  void AppendToCharacter(UChar character) {
    DCHECK_EQ(type_, kCharacter);
    data_.push_back(character);
    or_all_data_ |= character;
  }

  void AppendToCharacter(const Vector<LChar, 32>& characters) {
    DCHECK_EQ(type_, kCharacter);
    data_.AppendVector(characters);
  }

  /* Comment Tokens */

  const DataVector& Comment() const {
    DCHECK_EQ(type_, kComment);
    return data_;
  }

  void BeginComment() {
    DCHECK_EQ(type_, kUninitialized);
    type_ = kComment;
  }

  void AppendToComment(UChar character) {
    DCHECK(character);
    DCHECK_EQ(type_, kComment);
    data_.push_back(character);
    or_all_data_ |= character;
  }

 private:
  TokenType type_;
  Attribute::Range range_;  // Always starts at zero.
  int base_offset_;
  DataVector data_;
  UChar or_all_data_;

  // For StartTag and EndTag
  bool self_closing_;
  AttributeList attributes_;

  // A pointer into attributes_ used during lexing.
  Attribute* current_attribute_;

  // For DOCTYPE
  std::unique_ptr<DoctypeData> doctype_data_;

  DISALLOW_COPY_AND_ASSIGN(HTMLToken);
};

#ifndef NDEBUG
const char* ToString(HTMLToken::TokenType);
#endif

}  // namespace blink

#endif
