chromium/src/third_party/blink/renderer/platform/wtf/text/wtf_string.cc - manifest_repos/chromium_src - Git at Google

 /*
  * (C) 1999 Lars Knoll (knoll@kde.org)
  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
  * reserved.
  * Copyright (C) 2007-2009 Torch Mobile, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */

 #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"

 #include <locale.h>
 #include <stdarg.h>
 #include <algorithm>
 #include "base/callback.h"
 #include "base/strings/string_util.h"
 #include "build/build_config.h"
 #include "third_party/blink/renderer/platform/wtf/dtoa.h"
 #include "third_party/blink/renderer/platform/wtf/math_extras.h"
 #include "third_party/blink/renderer/platform/wtf/size_assertions.h"
 #include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
 #include "third_party/blink/renderer/platform/wtf/text/case_map.h"
 #include "third_party/blink/renderer/platform/wtf/text/character_names.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
 #include "third_party/blink/renderer/platform/wtf/text/unicode.h"
 #include "third_party/blink/renderer/platform/wtf/text/utf8.h"
 #include "third_party/blink/renderer/platform/wtf/vector.h"
 #include "third_party/perfetto/include/perfetto/tracing/traced_value.h"

 namespace WTF {

 ASSERT_SIZE(String, void*);

 // Construct a string with UTF-16 data.
 String::String(const UChar* characters, unsigned length)
     : impl_(characters ? StringImpl::Create(characters, length) : nullptr) {}

 // Construct a string with UTF-16 data, from a null-terminated source.
 String::String(const UChar* str) {
   if (!str)
     return;
   impl_ = StringImpl::Create(str, LengthOfNullTerminatedString(str));
 }

 // Construct a string with latin1 data.
 String::String(const LChar* characters, unsigned length)
     : impl_(characters ? StringImpl::Create(characters, length) : nullptr) {}

 String::String(const char* characters, unsigned length)
     : impl_(characters
                 ? StringImpl::Create(reinterpret_cast<const LChar*>(characters),
                                      length)
                 : nullptr) {}

 #if defined(ARCH_CPU_64_BITS)
 String::String(const char* characters, size_t length)
     : String(characters, SafeCast<unsigned>(length)) {}
 #endif  // defined(ARCH_CPU_64_BITS)

 int CodeUnitCompare(const String& a, const String& b) {
   return CodeUnitCompare(a.Impl(), b.Impl());
 }

 int CodeUnitCompareIgnoringASCIICase(const String& a, const char* b) {
   return CodeUnitCompareIgnoringASCIICase(a.Impl(),
                                           reinterpret_cast<const LChar*>(b));
 }

 wtf_size_t String::Find(base::RepeatingCallback<bool(UChar)> match_callback,
                         wtf_size_t index) const {
   return impl_ ? impl_->Find(match_callback, index) : kNotFound;
 }

 UChar32 String::CharacterStartingAt(unsigned i) const {
   if (!impl_ || i >= impl_->length())
     return 0;
   return impl_->CharacterStartingAt(i);
 }

 void String::Ensure16Bit() {
   if (IsNull())
     return;
   if (!Is8Bit())
     return;
   if (unsigned length = this->length())
     impl_ = Make16BitFrom8BitSource(impl_->Characters8(), length).ReleaseImpl();
   else
     impl_ = StringImpl::empty16_bit_;
 }

 void String::Truncate(unsigned length) {
   if (impl_)
     impl_ = impl_->Truncate(length);
 }

 void String::Remove(unsigned start, unsigned length_to_remove) {
   if (impl_)
     impl_ = impl_->Remove(start, length_to_remove);
 }

 String String::Substring(unsigned pos, unsigned len) const {
   if (!impl_)
     return String();
   return impl_->Substring(pos, len);
 }

 String String::DeprecatedLower() const {
   if (!impl_)
     return String();
   return CaseMap::FastToLowerInvariant(impl_.get());
 }

 String String::LowerASCII() const {
   if (!impl_)
     return String();
   return impl_->LowerASCII();
 }

 String String::UpperASCII() const {
   if (!impl_)
     return String();
   return impl_->UpperASCII();
 }

 String String::StripWhiteSpace() const {
   if (!impl_)
     return String();
   return impl_->StripWhiteSpace();
 }

 String String::StripWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space) const {
   if (!impl_)
     return String();
   return impl_->StripWhiteSpace(is_white_space);
 }

 String String::SimplifyWhiteSpace(StripBehavior strip_behavior) const {
   if (!impl_)
     return String();
   return impl_->SimplifyWhiteSpace(strip_behavior);
 }

 String String::SimplifyWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space,
                                   StripBehavior strip_behavior) const {
   if (!impl_)
     return String();
   return impl_->SimplifyWhiteSpace(is_white_space, strip_behavior);
 }

 String String::RemoveCharacters(CharacterMatchFunctionPtr find_match) const {
   if (!impl_)
     return String();
   return impl_->RemoveCharacters(find_match);
 }

 String String::FoldCase() const {
   if (!impl_)
     return String();
   return impl_->FoldCase();
 }

 String String::Format(const char* format, ...) {
   // vsnprintf is locale sensitive when converting floats to strings
   // and we need it to always use a decimal point. Double check that
   // the locale is compatible, and also that it is the default "C"
   // locale so that we aren't just lucky. Android's locales work
   // differently so can't check the same way there.
   DCHECK_EQ(strcmp(localeconv()->decimal_point, "."), 0);
 #if !defined(OS_ANDROID)
   DCHECK_EQ(strcmp(setlocale(LC_NUMERIC, NULL), "C"), 0);
 #endif  // !OS_ANDROID

   va_list args;

   // TODO(esprehn): base uses 1024, maybe we should use a bigger size too.
   static const unsigned kDefaultSize = 256;
   Vector<char, kDefaultSize> buffer(kDefaultSize);

   va_start(args, format);
   int length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
   va_end(args);

   // TODO(esprehn): This can only happen if there's an encoding error, what's
   // the locale set to inside blink? Can this happen? We should probably CHECK
   // instead.
   if (length < 0)
     return String();

   if (static_cast<unsigned>(length) >= buffer.size()) {
     // vsnprintf doesn't include the NUL terminator in the length so we need to
     // add space for it when growing.
     buffer.Grow(length + 1);

     // We need to call va_end() and then va_start() each time we use args, as
     // the contents of args is undefined after the call to vsnprintf according
     // to http://man.cx/snprintf(3)
     //
     // Not calling va_end/va_start here happens to work on lots of systems, but
     // fails e.g. on 64bit Linux.
     va_start(args, format);
     length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
     va_end(args);
   }

   CHECK_LT(static_cast<unsigned>(length), buffer.size());
   return String(reinterpret_cast<const LChar*>(buffer.data()), length);
 }

 String String::EncodeForDebugging() const {
   if (IsNull())
     return "<null>";

   StringBuilder builder;
   builder.Append('"');
   for (unsigned index = 0; index < length(); ++index) {
     // Print shorthands for select cases.
     UChar character = (*impl_)[index];
     switch (character) {
       case '\t':
         builder.Append("\\t");
         break;
       case '\n':
         builder.Append("\\n");
         break;
       case '\r':
         builder.Append("\\r");
         break;
       case '"':
         builder.Append("\\\"");
         break;
       case '\\':
         builder.Append("\\\\");
         break;
       default:
         if (IsASCIIPrintable(character)) {
           builder.Append(static_cast<char>(character));
         } else {
           // Print "\uXXXX" for control or non-ASCII characters.
           builder.AppendFormat("\\u%04X", character);
         }
         break;
     }
   }
   builder.Append('"');
   return builder.ToString();
 }

 String String::Number(float number) {
   return Number(static_cast<double>(number));
 }

 String String::Number(double number, unsigned precision) {
   NumberToStringBuffer buffer;
   return String(NumberToFixedPrecisionString(number, precision, buffer));
 }

 String String::NumberToStringECMAScript(double number) {
   NumberToStringBuffer buffer;
   return String(NumberToString(number, buffer));
 }

 String String::NumberToStringFixedWidth(double number,
                                         unsigned decimal_places) {
   NumberToStringBuffer buffer;
   return String(NumberToFixedWidthString(number, decimal_places, buffer));
 }

 int String::ToIntStrict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToInt(NumberParsingOptions::kStrict, ok);
 }

 unsigned String::ToUIntStrict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToUInt(NumberParsingOptions::kStrict, ok);
 }

 unsigned String::HexToUIntStrict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->HexToUIntStrict(ok);
 }

 uint64_t String::HexToUInt64Strict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->HexToUInt64Strict(ok);
 }

 int64_t String::ToInt64Strict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToInt64(NumberParsingOptions::kStrict, ok);
 }

 uint64_t String::ToUInt64Strict(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToUInt64(NumberParsingOptions::kStrict, ok);
 }

 int String::ToInt(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToInt(NumberParsingOptions::kLoose, ok);
 }

 unsigned String::ToUInt(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0;
   }
   return impl_->ToUInt(NumberParsingOptions::kLoose, ok);
 }

 double String::ToDouble(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0.0;
   }
   return impl_->ToDouble(ok);
 }

 float String::ToFloat(bool* ok) const {
   if (!impl_) {
     if (ok)
       *ok = false;
     return 0.0f;
   }
   return impl_->ToFloat(ok);
 }

 String String::IsolatedCopy() const {
   if (!impl_)
     return String();
   return impl_->IsolatedCopy();
 }

 bool String::IsSafeToSendToAnotherThread() const {
   return !impl_ || impl_->IsSafeToSendToAnotherThread();
 }

 void String::Split(const StringView& separator,
                    bool allow_empty_entries,
                    Vector<String>& result) const {
   result.clear();

   unsigned start_pos = 0;
   wtf_size_t end_pos;
   while ((end_pos = Find(separator, start_pos)) != kNotFound) {
     if (allow_empty_entries || start_pos != end_pos)
       result.push_back(Substring(start_pos, end_pos - start_pos));
     start_pos = end_pos + separator.length();
   }
   if (allow_empty_entries || start_pos != length())
     result.push_back(Substring(start_pos));
 }

 void String::Split(UChar separator,
                    bool allow_empty_entries,
                    Vector<String>& result) const {
   result.clear();

   unsigned start_pos = 0;
   wtf_size_t end_pos;
   while ((end_pos = find(separator, start_pos)) != kNotFound) {
     if (allow_empty_entries || start_pos != end_pos)
       result.push_back(Substring(start_pos, end_pos - start_pos));
     start_pos = end_pos + 1;
   }
   if (allow_empty_entries || start_pos != length())
     result.push_back(Substring(start_pos));
 }

 std::string String::Ascii() const {
   // Printable ASCII characters 32..127 and the null character are
   // preserved, characters outside of this range are converted to '?'.

   unsigned length = this->length();
   if (!length)
     return std::string();

   std::string ascii(length, '\0');
   if (this->Is8Bit()) {
     const LChar* characters = this->Characters8();

     for (unsigned i = 0; i < length; ++i) {
       LChar ch = characters[i];
       ascii[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
     }
     return ascii;
   }

   const UChar* characters = this->Characters16();
   for (unsigned i = 0; i < length; ++i) {
     UChar ch = characters[i];
     ascii[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : static_cast<char>(ch);
   }

   return ascii;
 }

 std::string String::Latin1() const {
   // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
   // preserved, characters outside of this range are converted to '?'.
   unsigned length = this->length();

   if (!length)
     return std::string();

   if (Is8Bit()) {
     return std::string(reinterpret_cast<const char*>(this->Characters8()),
                        length);
   }

   const UChar* characters = this->Characters16();
   std::string latin1(length, '\0');
   for (unsigned i = 0; i < length; ++i) {
     UChar ch = characters[i];
     latin1[i] = ch > 0xff ? '?' : static_cast<char>(ch);
   }

   return latin1;
 }

 // Helper to write a three-byte UTF-8 code point to the buffer, caller must
 // check room is available.
 static inline void PutUTF8Triple(char*& buffer, UChar ch) {
   DCHECK_GE(ch, 0x0800);
   *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
   *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
   *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
 }

 std::string String::Utf8(UTF8ConversionMode mode) const {
   unsigned length = this->length();

   if (!length)
     return std::string();

   // Allocate a buffer big enough to hold all the characters
   // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
   // Optimization ideas, if we find this function is hot:
   //  * We could speculatively create a std::string to contain 'length'
   //    characters, and resize if necessary (i.e. if the buffer contains
   //    non-ascii characters). (Alternatively, scan the buffer first for
   //    ascii characters, so we know this will be sufficient).
   //  * We could allocate a std::string with an appropriate size to
   //    have a good chance of being able to write the string into the
   //    buffer without reallocing (say, 1.5 x length).
   if (length > std::numeric_limits<unsigned>::max() / 3)
     return std::string();
   Vector<char, 1024> buffer_vector(length * 3);

   char* buffer = buffer_vector.data();

   if (Is8Bit()) {
     const LChar* characters = this->Characters8();

     unicode::ConversionResult result =
         unicode::ConvertLatin1ToUTF8(&characters, characters + length, &buffer,
                                      buffer + buffer_vector.size());
     // (length * 3) should be sufficient for any conversion
     DCHECK_NE(result, unicode::kTargetExhausted);
   } else {
     const UChar* characters = this->Characters16();

     if (mode == kStrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) {
       const UChar* characters_end = characters + length;
       char* buffer_end = buffer + buffer_vector.size();
       while (characters < characters_end) {
         // Use strict conversion to detect unpaired surrogates.
         unicode::ConversionResult result = unicode::ConvertUTF16ToUTF8(
             &characters, characters_end, &buffer, buffer_end, true);
         DCHECK_NE(result, unicode::kTargetExhausted);
         // Conversion fails when there is an unpaired surrogate.  Put
         // replacement character (U+FFFD) instead of the unpaired
         // surrogate.
         if (result != unicode::kConversionOK) {
           DCHECK_LE(0xD800, *characters);
           DCHECK_LE(*characters, 0xDFFF);
           // There should be room left, since one UChar hasn't been
           // converted.
           DCHECK_LE(buffer + 3, buffer_end);
           PutUTF8Triple(buffer, kReplacementCharacter);
           ++characters;
         }
       }
     } else {
       bool strict = mode == kStrictUTF8Conversion;
       unicode::ConversionResult result =
           unicode::ConvertUTF16ToUTF8(&characters, characters + length, &buffer,
                                       buffer + buffer_vector.size(), strict);
       // (length * 3) should be sufficient for any conversion
       DCHECK_NE(result, unicode::kTargetExhausted);

       // Only produced from strict conversion.
       if (result == unicode::kSourceIllegal) {
         DCHECK(strict);
         return std::string();
       }

       // Check for an unconverted high surrogate.
       if (result == unicode::kSourceExhausted) {
         if (strict)
           return std::string();
         // This should be one unpaired high surrogate. Treat it the same
         // was as an unpaired high surrogate would have been handled in
         // the middle of a string with non-strict conversion - which is
         // to say, simply encode it to UTF-8.
         DCHECK_EQ(characters + 1, this->Characters16() + length);
         DCHECK_GE(*characters, 0xD800);
         DCHECK_LE(*characters, 0xDBFF);
         // There should be room left, since one UChar hasn't been
         // converted.
         DCHECK_LE(buffer + 3, buffer + buffer_vector.size());
         PutUTF8Triple(buffer, *characters);
       }
     }
   }

   return std::string(buffer_vector.data(), buffer - buffer_vector.data());
 }

 String String::Make8BitFrom16BitSource(const UChar* source, wtf_size_t length) {
   if (!length)
     return g_empty_string;

   LChar* destination;
   String result = String::CreateUninitialized(length, destination);

   CopyLCharsFromUCharSource(destination, source, length);

   return result;
 }

 String String::Make16BitFrom8BitSource(const LChar* source, wtf_size_t length) {
   if (!length)
     return g_empty_string16_bit;

   UChar* destination;
   String result = String::CreateUninitialized(length, destination);

   StringImpl::CopyChars(destination, source, length);

   return result;
 }

 String String::FromUTF8(const LChar* string_start, size_t string_length) {
   wtf_size_t length = SafeCast<wtf_size_t>(string_length);

   if (!string_start)
     return String();

   if (!length)
     return g_empty_string;

   ASCIIStringAttributes attributes = CharacterAttributes(string_start, length);
   if (attributes.contains_only_ascii)
     return StringImpl::Create(string_start, length, attributes);

   Vector<UChar, 1024> buffer(length);
   UChar* buffer_start = buffer.data();

   UChar* buffer_current = buffer_start;
   const char* string_current = reinterpret_cast<const char*>(string_start);
   if (unicode::ConvertUTF8ToUTF16(
           &string_current, reinterpret_cast<const char*>(string_start + length),
           &buffer_current,
           buffer_current + buffer.size()) != unicode::kConversionOK)
     return String();

   unsigned utf16_length =
       static_cast<wtf_size_t>(buffer_current - buffer_start);
   DCHECK_LT(utf16_length, length);
   return StringImpl::Create(buffer_start, utf16_length);
 }

 String String::FromUTF8(const LChar* string) {
   if (!string)
     return String();
   return FromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
 }

 String String::FromUTF8(base::StringPiece s) {
   return FromUTF8(reinterpret_cast<const LChar*>(s.data()), s.size());
 }

 String String::FromUTF8WithLatin1Fallback(const LChar* string, size_t size) {
   String utf8 = FromUTF8(string, size);
   if (!utf8)
     return String(string, SafeCast<wtf_size_t>(size));
   return utf8;
 }

 std::ostream& operator<<(std::ostream& out, const String& string) {
   return out << string.EncodeForDebugging().Utf8();
 }

 #ifndef NDEBUG
 void String::Show() const {
   DLOG(INFO) << *this;
 }
 #endif

 void String::WriteIntoTracedValue(perfetto::TracedValue context) const {
   StringUTF8Adaptor adaptor(*this);
   std::move(context).WriteString(adaptor.data(), adaptor.size());
 }

 }  // namespace WTF
	/*
	* (C) 1999 Lars Knoll (knoll@kde.org)
	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
	* reserved.
	* Copyright (C) 2007-2009 Torch Mobile, Inc.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*/

	#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"

	#include <locale.h>
	#include <stdarg.h>
	#include <algorithm>
	#include "base/callback.h"
	#include "base/strings/string_util.h"
	#include "build/build_config.h"
	#include "third_party/blink/renderer/platform/wtf/dtoa.h"
	#include "third_party/blink/renderer/platform/wtf/math_extras.h"
	#include "third_party/blink/renderer/platform/wtf/size_assertions.h"
	#include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
	#include "third_party/blink/renderer/platform/wtf/text/case_map.h"
	#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
	#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
	#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
	#include "third_party/blink/renderer/platform/wtf/text/unicode.h"
	#include "third_party/blink/renderer/platform/wtf/text/utf8.h"
	#include "third_party/blink/renderer/platform/wtf/vector.h"
	#include "third_party/perfetto/include/perfetto/tracing/traced_value.h"

	namespace WTF {

	ASSERT_SIZE(String, void*);

	// Construct a string with UTF-16 data.
	String::String(const UChar* characters, unsigned length)
	: impl_(characters ? StringImpl::Create(characters, length) : nullptr) {}

	// Construct a string with UTF-16 data, from a null-terminated source.
	String::String(const UChar* str) {
	if (!str)
	return;
	impl_ = StringImpl::Create(str, LengthOfNullTerminatedString(str));
	}

	// Construct a string with latin1 data.
	String::String(const LChar* characters, unsigned length)
	: impl_(characters ? StringImpl::Create(characters, length) : nullptr) {}

	String::String(const char* characters, unsigned length)
	: impl_(characters
	? StringImpl::Create(reinterpret_cast<const LChar*>(characters),
	length)
	: nullptr) {}

	#if defined(ARCH_CPU_64_BITS)
	String::String(const char* characters, size_t length)
	: String(characters, SafeCast<unsigned>(length)) {}
	#endif // defined(ARCH_CPU_64_BITS)

	int CodeUnitCompare(const String& a, const String& b) {
	return CodeUnitCompare(a.Impl(), b.Impl());
	}

	int CodeUnitCompareIgnoringASCIICase(const String& a, const char* b) {
	return CodeUnitCompareIgnoringASCIICase(a.Impl(),
	reinterpret_cast<const LChar*>(b));
	}

	wtf_size_t String::Find(base::RepeatingCallback<bool(UChar)> match_callback,
	wtf_size_t index) const {
	return impl_ ? impl_->Find(match_callback, index) : kNotFound;
	}

	UChar32 String::CharacterStartingAt(unsigned i) const {
	if (!impl_ \|\| i >= impl_->length())
	return 0;
	return impl_->CharacterStartingAt(i);
	}

	void String::Ensure16Bit() {
	if (IsNull())
	return;
	if (!Is8Bit())
	return;
	if (unsigned length = this->length())
	impl_ = Make16BitFrom8BitSource(impl_->Characters8(), length).ReleaseImpl();
	else
	impl_ = StringImpl::empty16_bit_;
	}

	void String::Truncate(unsigned length) {
	if (impl_)
	impl_ = impl_->Truncate(length);
	}

	void String::Remove(unsigned start, unsigned length_to_remove) {
	if (impl_)
	impl_ = impl_->Remove(start, length_to_remove);
	}

	String String::Substring(unsigned pos, unsigned len) const {
	if (!impl_)
	return String();
	return impl_->Substring(pos, len);
	}

	String String::DeprecatedLower() const {
	if (!impl_)
	return String();
	return CaseMap::FastToLowerInvariant(impl_.get());
	}

	String String::LowerASCII() const {
	if (!impl_)
	return String();
	return impl_->LowerASCII();
	}

	String String::UpperASCII() const {
	if (!impl_)
	return String();
	return impl_->UpperASCII();
	}

	String String::StripWhiteSpace() const {
	if (!impl_)
	return String();
	return impl_->StripWhiteSpace();
	}

	String String::StripWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space) const {
	if (!impl_)
	return String();
	return impl_->StripWhiteSpace(is_white_space);
	}

	String String::SimplifyWhiteSpace(StripBehavior strip_behavior) const {
	if (!impl_)
	return String();
	return impl_->SimplifyWhiteSpace(strip_behavior);
	}

	String String::SimplifyWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space,
	StripBehavior strip_behavior) const {
	if (!impl_)
	return String();
	return impl_->SimplifyWhiteSpace(is_white_space, strip_behavior);
	}

	String String::RemoveCharacters(CharacterMatchFunctionPtr find_match) const {
	if (!impl_)
	return String();
	return impl_->RemoveCharacters(find_match);
	}

	String String::FoldCase() const {
	if (!impl_)
	return String();
	return impl_->FoldCase();
	}

	String String::Format(const char* format, ...) {
	// vsnprintf is locale sensitive when converting floats to strings
	// and we need it to always use a decimal point. Double check that
	// the locale is compatible, and also that it is the default "C"
	// locale so that we aren't just lucky. Android's locales work
	// differently so can't check the same way there.
	DCHECK_EQ(strcmp(localeconv()->decimal_point, "."), 0);
	#if !defined(OS_ANDROID)
	DCHECK_EQ(strcmp(setlocale(LC_NUMERIC, NULL), "C"), 0);
	#endif // !OS_ANDROID

	va_list args;

	// TODO(esprehn): base uses 1024, maybe we should use a bigger size too.
	static const unsigned kDefaultSize = 256;
	Vector<char, kDefaultSize> buffer(kDefaultSize);

	va_start(args, format);
	int length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
	va_end(args);

	// TODO(esprehn): This can only happen if there's an encoding error, what's
	// the locale set to inside blink? Can this happen? We should probably CHECK
	// instead.
	if (length < 0)
	return String();

	if (static_cast<unsigned>(length) >= buffer.size()) {
	// vsnprintf doesn't include the NUL terminator in the length so we need to
	// add space for it when growing.
	buffer.Grow(length + 1);

	// We need to call va_end() and then va_start() each time we use args, as
	// the contents of args is undefined after the call to vsnprintf according
	// to http://man.cx/snprintf(3)
	//
	// Not calling va_end/va_start here happens to work on lots of systems, but
	// fails e.g. on 64bit Linux.
	va_start(args, format);
	length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
	va_end(args);
	}

	CHECK_LT(static_cast<unsigned>(length), buffer.size());
	return String(reinterpret_cast<const LChar*>(buffer.data()), length);
	}

	String String::EncodeForDebugging() const {
	if (IsNull())
	return "<null>";

	StringBuilder builder;
	builder.Append('"');
	for (unsigned index = 0; index < length(); ++index) {
	// Print shorthands for select cases.
	UChar character = (*impl_)[index];
	switch (character) {
	case '\t':
	builder.Append("\\t");
	break;
	case '\n':
	builder.Append("\\n");
	break;
	case '\r':
	builder.Append("\\r");
	break;
	case '"':
	builder.Append("\\\"");
	break;
	case '\\':
	builder.Append("\\\\");
	break;
	default:
	if (IsASCIIPrintable(character)) {
	builder.Append(static_cast<char>(character));
	} else {
	// Print "\uXXXX" for control or non-ASCII characters.
	builder.AppendFormat("\\u%04X", character);
	}
	break;
	}
	}
	builder.Append('"');
	return builder.ToString();
	}

	String String::Number(float number) {
	return Number(static_cast<double>(number));
	}

	String String::Number(double number, unsigned precision) {
	NumberToStringBuffer buffer;
	return String(NumberToFixedPrecisionString(number, precision, buffer));
	}

	String String::NumberToStringECMAScript(double number) {
	NumberToStringBuffer buffer;
	return String(NumberToString(number, buffer));
	}

	String String::NumberToStringFixedWidth(double number,
	unsigned decimal_places) {
	NumberToStringBuffer buffer;
	return String(NumberToFixedWidthString(number, decimal_places, buffer));
	}

	int String::ToIntStrict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToInt(NumberParsingOptions::kStrict, ok);
	}

	unsigned String::ToUIntStrict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToUInt(NumberParsingOptions::kStrict, ok);
	}

	unsigned String::HexToUIntStrict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->HexToUIntStrict(ok);
	}

	uint64_t String::HexToUInt64Strict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->HexToUInt64Strict(ok);
	}

	int64_t String::ToInt64Strict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToInt64(NumberParsingOptions::kStrict, ok);
	}

	uint64_t String::ToUInt64Strict(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToUInt64(NumberParsingOptions::kStrict, ok);
	}

	int String::ToInt(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToInt(NumberParsingOptions::kLoose, ok);
	}

	unsigned String::ToUInt(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0;
	}
	return impl_->ToUInt(NumberParsingOptions::kLoose, ok);
	}

	double String::ToDouble(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0.0;
	}
	return impl_->ToDouble(ok);
	}

	float String::ToFloat(bool* ok) const {
	if (!impl_) {
	if (ok)
	*ok = false;
	return 0.0f;
	}
	return impl_->ToFloat(ok);
	}

	String String::IsolatedCopy() const {
	if (!impl_)
	return String();
	return impl_->IsolatedCopy();
	}

	bool String::IsSafeToSendToAnotherThread() const {
	return !impl_ \|\| impl_->IsSafeToSendToAnotherThread();
	}

	void String::Split(const StringView& separator,
	bool allow_empty_entries,
	Vector<String>& result) const {
	result.clear();

	unsigned start_pos = 0;
	wtf_size_t end_pos;
	while ((end_pos = Find(separator, start_pos)) != kNotFound) {
	if (allow_empty_entries \|\| start_pos != end_pos)
	result.push_back(Substring(start_pos, end_pos - start_pos));
	start_pos = end_pos + separator.length();
	}
	if (allow_empty_entries \|\| start_pos != length())
	result.push_back(Substring(start_pos));
	}

	void String::Split(UChar separator,
	bool allow_empty_entries,
	Vector<String>& result) const {
	result.clear();

	unsigned start_pos = 0;
	wtf_size_t end_pos;
	while ((end_pos = find(separator, start_pos)) != kNotFound) {
	if (allow_empty_entries \|\| start_pos != end_pos)
	result.push_back(Substring(start_pos, end_pos - start_pos));
	start_pos = end_pos + 1;
	}
	if (allow_empty_entries \|\| start_pos != length())
	result.push_back(Substring(start_pos));
	}

	std::string String::Ascii() const {
	// Printable ASCII characters 32..127 and the null character are
	// preserved, characters outside of this range are converted to '?'.

	unsigned length = this->length();
	if (!length)
	return std::string();

	std::string ascii(length, '\0');
	if (this->Is8Bit()) {
	const LChar* characters = this->Characters8();

	for (unsigned i = 0; i < length; ++i) {
	LChar ch = characters[i];
	ascii[i] = ch && (ch < 0x20 \|\| ch > 0x7f) ? '?' : ch;
	}
	return ascii;
	}

	const UChar* characters = this->Characters16();
	for (unsigned i = 0; i < length; ++i) {
	UChar ch = characters[i];
	ascii[i] = ch && (ch < 0x20 \|\| ch > 0x7f) ? '?' : static_cast<char>(ch);
	}

	return ascii;
	}

	std::string String::Latin1() const {
	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
	// preserved, characters outside of this range are converted to '?'.
	unsigned length = this->length();

	if (!length)
	return std::string();

	if (Is8Bit()) {
	return std::string(reinterpret_cast<const char*>(this->Characters8()),
	length);
	}

	const UChar* characters = this->Characters16();
	std::string latin1(length, '\0');
	for (unsigned i = 0; i < length; ++i) {
	UChar ch = characters[i];
	latin1[i] = ch > 0xff ? '?' : static_cast<char>(ch);
	}

	return latin1;
	}

	// Helper to write a three-byte UTF-8 code point to the buffer, caller must
	// check room is available.
	static inline void PutUTF8Triple(char*& buffer, UChar ch) {
	DCHECK_GE(ch, 0x0800);
	*buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);
	*buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);
	*buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);
	}

	std::string String::Utf8(UTF8ConversionMode mode) const {
	unsigned length = this->length();

	if (!length)
	return std::string();

	// Allocate a buffer big enough to hold all the characters
	// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
	// Optimization ideas, if we find this function is hot:
	// * We could speculatively create a std::string to contain 'length'
	// characters, and resize if necessary (i.e. if the buffer contains
	// non-ascii characters). (Alternatively, scan the buffer first for
	// ascii characters, so we know this will be sufficient).
	// * We could allocate a std::string with an appropriate size to
	// have a good chance of being able to write the string into the
	// buffer without reallocing (say, 1.5 x length).
	if (length > std::numeric_limits<unsigned>::max() / 3)
	return std::string();
	Vector<char, 1024> buffer_vector(length * 3);

	char* buffer = buffer_vector.data();

	if (Is8Bit()) {
	const LChar* characters = this->Characters8();

	unicode::ConversionResult result =
	unicode::ConvertLatin1ToUTF8(&characters, characters + length, &buffer,
	buffer + buffer_vector.size());
	// (length * 3) should be sufficient for any conversion
	DCHECK_NE(result, unicode::kTargetExhausted);
	} else {
	const UChar* characters = this->Characters16();

	if (mode == kStrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) {
	const UChar* characters_end = characters + length;
	char* buffer_end = buffer + buffer_vector.size();
	while (characters < characters_end) {
	// Use strict conversion to detect unpaired surrogates.
	unicode::ConversionResult result = unicode::ConvertUTF16ToUTF8(
	&characters, characters_end, &buffer, buffer_end, true);
	DCHECK_NE(result, unicode::kTargetExhausted);
	// Conversion fails when there is an unpaired surrogate. Put
	// replacement character (U+FFFD) instead of the unpaired
	// surrogate.
	if (result != unicode::kConversionOK) {
	DCHECK_LE(0xD800, *characters);
	DCHECK_LE(*characters, 0xDFFF);
	// There should be room left, since one UChar hasn't been
	// converted.
	DCHECK_LE(buffer + 3, buffer_end);
	PutUTF8Triple(buffer, kReplacementCharacter);
	++characters;
	}
	}
	} else {
	bool strict = mode == kStrictUTF8Conversion;
	unicode::ConversionResult result =
	unicode::ConvertUTF16ToUTF8(&characters, characters + length, &buffer,
	buffer + buffer_vector.size(), strict);
	// (length * 3) should be sufficient for any conversion
	DCHECK_NE(result, unicode::kTargetExhausted);

	// Only produced from strict conversion.
	if (result == unicode::kSourceIllegal) {
	DCHECK(strict);
	return std::string();
	}

	// Check for an unconverted high surrogate.
	if (result == unicode::kSourceExhausted) {
	if (strict)
	return std::string();
	// This should be one unpaired high surrogate. Treat it the same
	// was as an unpaired high surrogate would have been handled in
	// the middle of a string with non-strict conversion - which is
	// to say, simply encode it to UTF-8.
	DCHECK_EQ(characters + 1, this->Characters16() + length);
	DCHECK_GE(*characters, 0xD800);
	DCHECK_LE(*characters, 0xDBFF);
	// There should be room left, since one UChar hasn't been
	// converted.
	DCHECK_LE(buffer + 3, buffer + buffer_vector.size());
	PutUTF8Triple(buffer, *characters);
	}
	}
	}

	return std::string(buffer_vector.data(), buffer - buffer_vector.data());
	}

	String String::Make8BitFrom16BitSource(const UChar* source, wtf_size_t length) {
	if (!length)
	return g_empty_string;

	LChar* destination;
	String result = String::CreateUninitialized(length, destination);

	CopyLCharsFromUCharSource(destination, source, length);

	return result;
	}

	String String::Make16BitFrom8BitSource(const LChar* source, wtf_size_t length) {
	if (!length)
	return g_empty_string16_bit;

	UChar* destination;
	String result = String::CreateUninitialized(length, destination);

	StringImpl::CopyChars(destination, source, length);

	return result;
	}

	String String::FromUTF8(const LChar* string_start, size_t string_length) {
	wtf_size_t length = SafeCast<wtf_size_t>(string_length);

	if (!string_start)
	return String();

	if (!length)
	return g_empty_string;

	ASCIIStringAttributes attributes = CharacterAttributes(string_start, length);
	if (attributes.contains_only_ascii)
	return StringImpl::Create(string_start, length, attributes);

	Vector<UChar, 1024> buffer(length);
	UChar* buffer_start = buffer.data();

	UChar* buffer_current = buffer_start;
	const char* string_current = reinterpret_cast<const char*>(string_start);
	if (unicode::ConvertUTF8ToUTF16(
	&string_current, reinterpret_cast<const char*>(string_start + length),
	&buffer_current,
	buffer_current + buffer.size()) != unicode::kConversionOK)
	return String();

	unsigned utf16_length =
	static_cast<wtf_size_t>(buffer_current - buffer_start);
	DCHECK_LT(utf16_length, length);
	return StringImpl::Create(buffer_start, utf16_length);
	}

	String String::FromUTF8(const LChar* string) {
	if (!string)
	return String();
	return FromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
	}

	String String::FromUTF8(base::StringPiece s) {
	return FromUTF8(reinterpret_cast<const LChar*>(s.data()), s.size());
	}

	String String::FromUTF8WithLatin1Fallback(const LChar* string, size_t size) {
	String utf8 = FromUTF8(string, size);
	if (!utf8)
	return String(string, SafeCast<wtf_size_t>(size));
	return utf8;
	}

	std::ostream& operator<<(std::ostream& out, const String& string) {
	return out << string.EncodeForDebugging().Utf8();
	}

	#ifndef NDEBUG
	void String::Show() const {
	DLOG(INFO) << *this;
	}
	#endif

	void String::WriteIntoTracedValue(perfetto::TracedValue context) const {
	StringUTF8Adaptor adaptor(*this);
	std::move(context).WriteString(adaptor.data(), adaptor.size());
	}

	} // namespace WTF