blob: 027bf993a9900450e450c3ae85529dd3a880ceb3 [file] [log] [blame]
/*
* Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved.
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/platform/wtf/text/text_encoding_registry.h"
#include <atomic>
#include <memory>
#include "third_party/blink/renderer/platform/wtf/hash_map.h"
#include "third_party/blink/renderer/platform/wtf/hash_set.h"
#include "third_party/blink/renderer/platform/wtf/std_lib_extras.h"
#include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/text/string_view.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_icu.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_latin1.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_replacement.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_utf16.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_utf8.h"
#include "third_party/blink/renderer/platform/wtf/text/text_encoding.h"
#include "third_party/blink/renderer/platform/wtf/threading_primitives.h"
#include "third_party/blink/renderer/platform/wtf/wtf.h"
namespace WTF {
const size_t kMaxEncodingNameLength = 63;
// Hash for all-ASCII strings that does case folding.
struct TextEncodingNameHash {
static bool Equal(const char* s1, const char* s2) {
char c1;
char c2;
do {
c1 = *s1++;
c2 = *s2++;
if (ToASCIILower(c1) != ToASCIILower(c2))
return false;
} while (c1 && c2);
return !c1 && !c2;
}
// This algorithm is the one-at-a-time hash from:
// http://burtleburtle.net/bob/hash/hashfaq.html
// http://burtleburtle.net/bob/hash/doobs.html
static unsigned GetHash(const char* s) {
unsigned h = WTF::kStringHashingStartValue;
for (;;) {
char c = *s++;
if (!c) {
h += (h << 3);
h ^= (h >> 11);
h += (h << 15);
return h;
}
h += ToASCIILower(c);
h += (h << 10);
h ^= (h >> 6);
}
}
static const bool safe_to_compare_to_empty_or_deleted = false;
};
struct TextCodecFactory {
NewTextCodecFunction function;
const void* additional_data;
TextCodecFactory(NewTextCodecFunction f = nullptr, const void* d = nullptr)
: function(f), additional_data(d) {}
};
typedef HashMap<const char*, const char*, TextEncodingNameHash>
TextEncodingNameMap;
typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
static Mutex& EncodingRegistryMutex() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(Mutex, mutex, ());
return mutex;
}
static TextEncodingNameMap* g_text_encoding_name_map;
static TextCodecMap* g_text_codec_map;
namespace {
static std::atomic_bool g_did_extend_text_codec_maps{false};
ALWAYS_INLINE bool AtomicDidExtendTextCodecMaps() {
return g_did_extend_text_codec_maps.load(std::memory_order_acquire);
}
ALWAYS_INLINE void AtomicSetDidExtendTextCodecMaps() {
g_did_extend_text_codec_maps.store(true, std::memory_order_release);
}
} // namespace
#if ERROR_DISABLED
static inline void checkExistingName(const char*, const char*) {}
#else
static void CheckExistingName(const char* alias, const char* atomic_name) {
const char* old_atomic_name = g_text_encoding_name_map->at(alias);
if (!old_atomic_name)
return;
if (old_atomic_name == atomic_name)
return;
// Keep the warning silent about one case where we know this will happen.
if (strcmp(alias, "ISO-8859-8-I") == 0 &&
strcmp(old_atomic_name, "ISO-8859-8-I") == 0 &&
EqualIgnoringASCIICase(atomic_name, "iso-8859-8"))
return;
LOG(ERROR) << "alias " << alias << " maps to " << old_atomic_name
<< " already, but someone is trying to make it map to "
<< atomic_name;
}
#endif
static bool IsUndesiredAlias(const char* alias) {
// Reject aliases with version numbers that are supported by some back-ends
// (such as "ISO_2022,locale=ja,version=0" in ICU).
for (const char* p = alias; *p; ++p) {
if (*p == ',')
return true;
}
// 8859_1 is known to (at least) ICU, but other browsers don't support this
// name - and having it caused a compatibility
// problem, see bug 43554.
if (0 == strcmp(alias, "8859_1"))
return true;
return false;
}
static void AddToTextEncodingNameMap(const char* alias, const char* name) {
DCHECK_LE(strlen(alias), kMaxEncodingNameLength);
if (IsUndesiredAlias(alias))
return;
const char* atomic_name = g_text_encoding_name_map->at(name);
DCHECK(strcmp(alias, name) == 0 || atomic_name);
if (!atomic_name)
atomic_name = name;
CheckExistingName(alias, atomic_name);
g_text_encoding_name_map->insert(alias, atomic_name);
}
static void AddToTextCodecMap(const char* name,
NewTextCodecFunction function,
const void* additional_data) {
const char* atomic_name = g_text_encoding_name_map->at(name);
DCHECK(atomic_name);
g_text_codec_map->insert(atomic_name,
TextCodecFactory(function, additional_data));
}
// Note that this can be called both the main thread and worker threads.
static void BuildBaseTextCodecMaps() {
DCHECK(!g_text_codec_map);
DCHECK(!g_text_encoding_name_map);
EncodingRegistryMutex().AssertAcquired();
g_text_codec_map = new TextCodecMap;
g_text_encoding_name_map = new TextEncodingNameMap;
TextCodecLatin1::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecLatin1::RegisterCodecs(AddToTextCodecMap);
TextCodecUTF8::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecUTF8::RegisterCodecs(AddToTextCodecMap);
TextCodecUTF16::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecUTF16::RegisterCodecs(AddToTextCodecMap);
TextCodecUserDefined::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecUserDefined::RegisterCodecs(AddToTextCodecMap);
}
static void ExtendTextCodecMaps() {
TextCodecReplacement::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecReplacement::RegisterCodecs(AddToTextCodecMap);
TextCodecICU::RegisterEncodingNames(AddToTextEncodingNameMap);
TextCodecICU::RegisterCodecs(AddToTextCodecMap);
}
std::unique_ptr<TextCodec> NewTextCodec(const TextEncoding& encoding) {
MutexLocker lock(EncodingRegistryMutex());
DCHECK(g_text_codec_map);
TextCodecFactory factory = g_text_codec_map->at(encoding.GetName());
DCHECK(factory.function);
return factory.function(encoding, factory.additional_data);
}
const char* AtomicCanonicalTextEncodingName(const char* name) {
if (!name || !name[0])
return nullptr;
MutexLocker lock(EncodingRegistryMutex());
if (!g_text_encoding_name_map)
BuildBaseTextCodecMaps();
if (const char* atomic_name = g_text_encoding_name_map->at(name))
return atomic_name;
if (AtomicDidExtendTextCodecMaps())
return nullptr;
ExtendTextCodecMaps();
AtomicSetDidExtendTextCodecMaps();
return g_text_encoding_name_map->at(name);
}
template <typename CharacterType>
const char* AtomicCanonicalTextEncodingName(const CharacterType* characters,
size_t length) {
char buffer[kMaxEncodingNameLength + 1];
size_t j = 0;
for (size_t i = 0; i < length; ++i) {
char c = static_cast<char>(characters[i]);
if (j == kMaxEncodingNameLength || c != characters[i])
return nullptr;
buffer[j++] = c;
}
buffer[j] = 0;
return AtomicCanonicalTextEncodingName(buffer);
}
const char* AtomicCanonicalTextEncodingName(const String& alias) {
if (!alias.length())
return nullptr;
if (alias.Contains('\0'))
return nullptr;
if (alias.Is8Bit())
return AtomicCanonicalTextEncodingName<LChar>(alias.Characters8(),
alias.length());
return AtomicCanonicalTextEncodingName<UChar>(alias.Characters16(),
alias.length());
}
bool NoExtendedTextEncodingNameUsed() {
return !AtomicDidExtendTextCodecMaps();
}
Vector<String> TextEncodingAliasesForTesting() {
Vector<String> results;
{
MutexLocker lock(EncodingRegistryMutex());
if (!g_text_encoding_name_map)
BuildBaseTextCodecMaps();
if (!AtomicDidExtendTextCodecMaps()) {
ExtendTextCodecMaps();
AtomicSetDidExtendTextCodecMaps();
}
CopyKeysToVector(*g_text_encoding_name_map, results);
}
return results;
}
#ifndef NDEBUG
void DumpTextEncodingNameMap() {
unsigned size = g_text_encoding_name_map->size();
fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size);
MutexLocker lock(EncodingRegistryMutex());
for (const auto& it : *g_text_encoding_name_map)
fprintf(stderr, "'%s' => '%s'\n", it.key, it.value);
}
#endif
} // namespace WTF