blob: 6b3c03319ebd94ed6be0e7494318840ed96f560f [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/wtf/text/atomic_string_table.h"
#include "base/notreached.h"
#include "third_party/blink/renderer/platform/wtf/text/character_visitor.h"
#include "third_party/blink/renderer/platform/wtf/text/string_hash.h"
#include "third_party/blink/renderer/platform/wtf/text/utf8.h"
namespace WTF {
namespace {
// TODO(ajwong): consider replacing with a span in the future.
template <typename CharacterType>
struct HashTranslatorCharBuffer {
const CharacterType* s;
unsigned length;
};
typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
struct UCharBufferTranslator {
static unsigned GetHash(const UCharBuffer& buf) {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.s, buf.length);
}
static bool Equal(StringImpl* const& str, const UCharBuffer& buf) {
return WTF::Equal(str, buf.s, buf.length);
}
static void Translate(StringImpl*& location,
const UCharBuffer& buf,
unsigned hash) {
auto string = StringImpl::Create8BitIfPossible(buf.s, buf.length);
if (string)
string->AddRef();
location = string.get();
location->SetHash(hash);
location->SetIsAtomic();
}
};
struct HashAndUTF8Characters {
unsigned hash;
const char* characters;
unsigned length;
unsigned utf16_length;
};
struct HashAndUTF8CharactersTranslator {
static unsigned GetHash(const HashAndUTF8Characters& buffer) {
return buffer.hash;
}
static bool Equal(StringImpl* const& string,
const HashAndUTF8Characters& buffer) {
if (buffer.utf16_length != string->length())
return false;
// If buffer contains only ASCII characters UTF-8 and UTF16 length are the
// same.
if (buffer.utf16_length != buffer.length) {
if (string->Is8Bit()) {
const LChar* characters8 = string->Characters8();
return unicode::EqualLatin1WithUTF8(
characters8, characters8 + string->length(), buffer.characters,
buffer.characters + buffer.length);
}
const UChar* characters16 = string->Characters16();
return unicode::EqualUTF16WithUTF8(
characters16, characters16 + string->length(), buffer.characters,
buffer.characters + buffer.length);
}
if (string->Is8Bit()) {
const LChar* string_characters = string->Characters8();
for (unsigned i = 0; i < buffer.length; ++i) {
DCHECK(IsASCII(buffer.characters[i]));
if (string_characters[i] != buffer.characters[i])
return false;
}
return true;
}
const UChar* string_characters = string->Characters16();
for (unsigned i = 0; i < buffer.length; ++i) {
DCHECK(IsASCII(buffer.characters[i]));
if (string_characters[i] != buffer.characters[i])
return false;
}
return true;
}
static void Translate(StringImpl*& location,
const HashAndUTF8Characters& buffer,
unsigned hash) {
scoped_refptr<StringImpl> new_string;
// If buffer contains only ASCII characters, the UTF-8 and UTF-16 lengths
// are the same.
bool is_all_ascii = buffer.utf16_length == buffer.length;
if (!is_all_ascii) {
UChar* target;
new_string = StringImpl::CreateUninitialized(buffer.utf16_length, target);
const char* source = buffer.characters;
if (unicode::ConvertUTF8ToUTF16(&source, source + buffer.length, &target,
target + buffer.utf16_length,
&is_all_ascii) != unicode::kConversionOK)
NOTREACHED();
} else {
new_string = StringImpl::Create(buffer.characters, buffer.length);
}
new_string->AddRef();
location = new_string.get();
location->SetHash(hash);
location->SetIsAtomic();
}
};
struct StringViewLookupTranslator {
static unsigned GetHash(const StringView& buf) {
StringImpl* shared_impl = buf.SharedImpl();
if (LIKELY(shared_impl))
return shared_impl->GetHash();
if (buf.Is8Bit()) {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters8(),
buf.length());
} else {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters16(),
buf.length());
}
}
static bool Equal(StringImpl* const& str, const StringView& buf) {
return *str == buf;
}
};
// Allows lookups of the ASCII-lowercase version of a string without actually
// allocating memory to store it. Instead, the translator computes the results
// of hash and equality computations as if we had done so.
struct LowercaseStringViewLookupTranslator {
template <typename CharType>
static UChar ToASCIILowerUChar(CharType ch) {
return ToASCIILower(ch);
}
// Computes the hash that |query| would have if it were first converted to
// ASCII lowercase.
static unsigned GetHash(const StringView& query) {
// If possible, use cached hash if the string is lowercased.
StringImpl* shared_impl = query.SharedImpl();
if (LIKELY(shared_impl && query.IsLowerASCII()))
return shared_impl->GetHash();
if (query.Is8Bit()) {
return StringHasher::ComputeHashAndMaskTop8Bits<LChar,
ToASCIILowerUChar<LChar>>(
query.Characters8(), query.length());
} else {
return StringHasher::ComputeHashAndMaskTop8Bits<UChar,
ToASCIILowerUChar<UChar>>(
query.Characters16(), query.length());
}
}
// Returns true if the hashtable |bucket| contains a string which is the ASCII
// lowercase version of |query|.
static bool Equal(StringImpl* const& bucket, const StringView& query) {
// This is similar to EqualIgnoringASCIICase, but not the same.
// In particular, it validates that |bucket| is a lowercase version of
// |query|.
//
// Unlike EqualIgnoringASCIICase, it returns false if they are equal
// ignoring ASCII case but |bucket| contains an uppercase ASCII character.
//
// However, similar optimizations are used here as there, so these should
// have generally similar correctness and performance constraints.
if (bucket->length() != query.length())
return false;
if (bucket->Bytes() == query.Bytes() && bucket->Is8Bit() == query.Is8Bit())
return query.IsLowerASCII();
return WTF::VisitCharacters(*bucket, [&](const auto* bch, wtf_size_t) {
return WTF::VisitCharacters(query, [&](const auto* qch, wtf_size_t len) {
for (wtf_size_t i = 0; i < len; ++i) {
if (bch[i] != ToASCIILower(qch[i]))
return false;
}
return true;
});
});
}
};
} // namespace
AtomicStringTable::AtomicStringTable() {
for (StringImpl* string : StringImpl::AllStaticStrings().Values())
Add(string);
}
AtomicStringTable::~AtomicStringTable() {
for (StringImpl* string : table_) {
if (!string->IsStatic()) {
DCHECK(string->IsAtomic());
string->UnsetIsAtomic();
}
}
}
void AtomicStringTable::ReserveCapacity(unsigned size) {
table_.ReserveCapacityForSize(size);
}
template <typename T, typename HashTranslator>
scoped_refptr<StringImpl> AtomicStringTable::AddToStringTable(const T& value) {
HashSet<StringImpl*>::AddResult add_result =
table_.AddWithTranslator<HashTranslator>(value);
// If the string is newly-translated, then we need to adopt it.
// The boolean in the pair tells us if that is so.
return add_result.is_new_entry ? base::AdoptRef(*add_result.stored_value)
: *add_result.stored_value;
}
scoped_refptr<StringImpl> AtomicStringTable::Add(const UChar* s,
unsigned length) {
if (!s)
return nullptr;
if (!length)
return StringImpl::empty_;
UCharBuffer buffer = {s, length};
return AddToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
}
typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
struct LCharBufferTranslator {
static unsigned GetHash(const LCharBuffer& buf) {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.s, buf.length);
}
static bool Equal(StringImpl* const& str, const LCharBuffer& buf) {
return WTF::Equal(str, buf.s, buf.length);
}
static void Translate(StringImpl*& location,
const LCharBuffer& buf,
unsigned hash) {
auto string = StringImpl::Create(buf.s, buf.length);
string->AddRef();
location = string.get();
location->SetHash(hash);
location->SetIsAtomic();
}
};
scoped_refptr<StringImpl> AtomicStringTable::Add(const LChar* s,
unsigned length) {
if (!s)
return nullptr;
if (!length)
return StringImpl::empty_;
LCharBuffer buffer = {s, length};
return AddToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
}
StringImpl* AtomicStringTable::Add(StringImpl* string) {
if (!string->length())
return StringImpl::empty_;
StringImpl* result = *table_.insert(string).stored_value;
if (!result->IsAtomic())
result->SetIsAtomic();
DCHECK(!string->IsStatic() || result->IsStatic());
return result;
}
scoped_refptr<StringImpl> AtomicStringTable::AddUTF8(
const char* characters_start,
const char* characters_end) {
HashAndUTF8Characters buffer;
buffer.characters = characters_start;
buffer.hash = unicode::CalculateStringHashAndLengthFromUTF8MaskingTop8Bits(
characters_start, characters_end, buffer.length, buffer.utf16_length);
if (!buffer.hash)
return nullptr;
return AddToStringTable<HashAndUTF8Characters,
HashAndUTF8CharactersTranslator>(buffer);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
StringImpl* string) {
DCHECK(string->length());
const auto& it = table_.find(string);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
const StringView& string) {
DCHECK(string.length());
const auto& it = table_.Find<StringViewLookupTranslator>(string);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFindLowercasedSlow(
const StringView& string) {
DCHECK(string.length());
const auto& it = table_.Find<LowercaseStringViewLookupTranslator>(string);
if (it == table_.end())
return WeakResult();
DCHECK(StringView(*it).IsLowerASCII());
DCHECK(EqualIgnoringASCIICase(*it, string));
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const LChar* chars,
unsigned length) {
if (!chars)
return WeakResult();
// Mirror the empty logic in Add().
if (!length)
return WeakResult(StringImpl::empty_);
LCharBuffer buffer = {chars, length};
const auto& it = table_.Find<LCharBufferTranslator>(buffer);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const UChar* chars,
unsigned length) {
if (!chars)
return WeakResult();
// Mirror the empty logic in Add().
if (!length)
return WeakResult(StringImpl::empty_);
UCharBuffer buffer = {chars, length};
const auto& it = table_.Find<UCharBufferTranslator>(buffer);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
void AtomicStringTable::Remove(StringImpl* string) {
DCHECK(string->IsAtomic());
auto iterator = table_.find(string);
CHECK_NE(iterator, table_.end());
table_.erase(iterator);
}
} // namespace WTF