| /* |
| * Copyright (c) 2008-2009 Brent Fulgham <bfulgham@gmail.org>. All rights reserved. |
| * |
| * This source code is a modified version of the CoreFoundation sources released by Apple Inc. under |
| * the terms of the APSL version 2.0 (see below). |
| * |
| * For information about changes from the original Apple source release can be found by reviewing the |
| * source control system for the project at https://sourceforge.net/svn/?group_id=246198. |
| * |
| * The original license information is as follows: |
| * |
| * Copyright (c) 2008 Apple Inc. All rights reserved. |
| * |
| * @APPLE_LICENSE_HEADER_START@ |
| * |
| * This file contains Original Code and/or Modifications of Original Code |
| * as defined in and that are subject to the Apple Public Source License |
| * Version 2.0 (the 'License'). You may not use this file except in |
| * compliance with the License. Please obtain a copy of the License at |
| * http://www.opensource.apple.com/apsl/ and read it before using this |
| * file. |
| * |
| * The Original Code and all software distributed under the License are |
| * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| * Please see the License for the specific language governing rights and |
| * limitations under the License. |
| * |
| * @APPLE_LICENSE_HEADER_END@ |
| */ |
| /* CFString.c |
| Copyright 1998-2002, Apple, Inc. All rights reserved. |
| Responsibility: Ali Ozer |
| |
| !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined. |
| */ |
| |
| #include <CoreFoundation/CFBase.h> |
| #include <CoreFoundation/CFString.h> |
| #include <CoreFoundation/CFDictionary.h> |
| #include "CFStringEncodingConverterExt.h" |
| #include "CFUniChar.h" |
| #include "CFUnicodeDecomposition.h" |
| #include "CFUnicodePrecomposition.h" |
| #include "CFPriv.h" |
| #include "CFInternal.h" |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <string.h> |
| #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD |
| #include <unistd.h> |
| #else |
| extern size_t strlcpy(char *dst, const char *src, size_t siz); |
| extern size_t strlcat(char *dst, const char *src, size_t siz); |
| #endif |
| |
| #if defined(__GNUC__) |
| #define LONG_DOUBLE_SUPPORT 1 |
| #else |
| #define LONG_DOUBLE_SUPPORT 0 |
| #endif |
| |
| |
| |
| #define USE_STRING_ROM 0 |
| |
| |
| #ifndef INSTRUMENT_SHARED_STRINGS |
| #define INSTRUMENT_SHARED_STRINGS 0 |
| #endif |
| |
| #if DEPLOYMENT_TARGET_MACOSX |
| __private_extern__ |
| #else |
| CF_EXPORT const |
| #endif |
| CFStringRef __kCFLocaleCollatorID; |
| |
| #if INSTRUMENT_SHARED_STRINGS |
| #include <sys/stat.h> /* for umask() */ |
| |
| static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) { |
| static CFSpinLock_t lock = CFSpinLockInit; |
| |
| if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM |
| |
| __CFSpinLock(&lock); |
| static int fd; |
| if (! fd) { |
| extern char **_NSGetProgname(void); |
| const char *name = *_NSGetProgname(); |
| if (! name) name = "UNKNOWN"; |
| umask(0); |
| char path[1024]; |
| sprintf(path, "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid()); |
| fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666); |
| if (fd <= 0) { |
| int error = errno; |
| const char *errString = strerror(error); |
| fprintf(stderr, "open() failed with error %d (%s)\n", error, errString); |
| } |
| } |
| if (fd > 0) { |
| char *buffer = NULL; |
| char formatString[256]; |
| sprintf(formatString, "%%-8d\t%%-16s\t%%.%lds\n", byteCount); |
| int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes); |
| if (buffer && resultCount > 0) write(fd, buffer, resultCount); |
| else puts("Couldn't record allocation event"); |
| free(buffer); |
| } |
| __CFSpinUnlock(&lock); |
| } |
| #endif //INSTRUMENT_SHARED_STRINGS |
| |
| |
| |
| typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar); |
| |
| #if DEPLOYMENT_TARGET_MACOSX |
| extern size_t malloc_good_size(size_t size); |
| #endif |
| extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars); |
| |
| #if defined(DEBUG) |
| |
| // We put this into C & Pascal strings if we can't convert |
| #define CONVERSIONFAILURESTR "CFString conversion failed" |
| |
| // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert |
| static Boolean __CFConstantStringTableBeingFreed = false; |
| |
| #endif |
| |
| |
| |
| // This section is for CFString compatibility and other behaviors... |
| |
| static CFOptionFlags _CFStringCompatibilityMask = 0; |
| |
| #define Bug2967272 1 |
| |
| void _CFStringSetCompatibility(CFOptionFlags mask) { |
| _CFStringCompatibilityMask |= mask; |
| } |
| |
| CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) { |
| return (_CFStringCompatibilityMask & mask) == mask; |
| } |
| |
| |
| |
| // Two constant strings used by CFString; these are initialized in CFStringInitialize |
| CONST_STRING_DECL(kCFEmptyString, "") |
| |
| // This is separate for C++ |
| struct __notInlineMutable { |
| void *buffer; |
| CFIndex length; |
| CFIndex capacity; // Capacity in bytes |
| unsigned int hasGap:1; // Currently unused |
| unsigned int isFixedCapacity:1; |
| unsigned int isExternalMutable:1; |
| unsigned int capacityProvidedExternally:1; |
| #if __LP64__ |
| unsigned long desiredCapacity:60; |
| #else |
| unsigned long desiredCapacity:28; |
| #endif |
| CFAllocatorRef contentsAllocator; // Optional |
| }; // The only mutable variant for CFString |
| |
| |
| /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields. |
| */ |
| struct __CFString { |
| CFRuntimeBase base; |
| union { // In many cases the allocated structs are smaller than these |
| struct __inline1 { |
| CFIndex length; |
| } inline1; // Bytes follow the length |
| struct __notInlineImmutable1 { |
| void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString |
| CFIndex length; |
| CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used |
| } notInlineImmutable1; // This is the usual not-inline immutable CFString |
| struct __notInlineImmutable2 { |
| void *buffer; |
| CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used |
| } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte) |
| struct __notInlineMutable notInlineMutable; |
| } variants; |
| }; |
| |
| /* |
| I = is immutable |
| E = not inline contents |
| U = is Unicode |
| N = has NULL byte |
| L = has length byte |
| D = explicit deallocator for contents (for mutable objects, allocator) |
| C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really |
| if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit |
| |
| Also need (only for mutable) |
| F = is fixed |
| G = has gap |
| Cap, DesCap = capacity |
| |
| B7 B6 B5 B4 B3 B2 B1 B0 |
| U N L C I |
| |
| B6 B5 |
| 0 0 inline contents |
| 0 1 E (freed with default allocator) |
| 1 0 E (not freed) |
| 1 1 E D |
| |
| !!! Note: Constant CFStrings use the bit patterns: |
| C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable) |
| D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable) |
| The bit usages should not be modified in a way that would effect these bit patterns. |
| */ |
| |
| enum { |
| __kCFFreeContentsWhenDoneMask = 0x020, |
| __kCFFreeContentsWhenDone = 0x020, |
| __kCFContentsMask = 0x060, |
| __kCFHasInlineContents = 0x000, |
| __kCFNotInlineContentsNoFree = 0x040, // Don't free |
| __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function |
| __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function |
| __kCFHasContentsAllocatorMask = 0x060, |
| __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator |
| __kCFHasContentsDeallocatorMask = 0x060, |
| __kCFHasContentsDeallocator = 0x060, |
| __kCFIsMutableMask = 0x01, |
| __kCFIsMutable = 0x01, |
| __kCFIsUnicodeMask = 0x10, |
| __kCFIsUnicode = 0x10, |
| __kCFHasNullByteMask = 0x08, |
| __kCFHasNullByte = 0x08, |
| __kCFHasLengthByteMask = 0x04, |
| __kCFHasLengthByte = 0x04, |
| // !!! Bit 0x02 has been freed up |
| }; |
| |
| |
| // !!! Assumptions: |
| // Mutable strings are not inline |
| // Compile-time constant strings are not inline |
| // Mutable strings always have explicit length (but they might also have length byte and null byte) |
| // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings) |
| // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2 |
| |
| /* The following set of functions and macros need to be updated on change to the bit configuration |
| */ |
| CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;} |
| CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;} |
| CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;} |
| CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;} |
| CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;} |
| CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;} |
| CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;} |
| CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;} |
| CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte |
| CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) { |
| #if __LP64__ |
| return str->base._rc == 0; |
| #else |
| return (str->base._cfinfo[CF_RC_BITS]) == 0; |
| #endif |
| } |
| |
| CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents |
| |
| /* Returns ptr to the buffer (which might include the length byte) |
| */ |
| CF_INLINE const void *__CFStrContents(CFStringRef str) { |
| if (__CFStrIsInline(str)) { |
| return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0)); |
| } else { // Not inline; pointer is always word 2 |
| return str->variants.notInlineImmutable1.buffer; |
| } |
| } |
| |
| static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) { |
| return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); } |
| |
| // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator |
| CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) { |
| return *__CFStrContentsDeallocatorPtr(str); |
| } |
| |
| // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator |
| CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) { |
| *__CFStrContentsDeallocatorPtr(str) = contentsAllocator; |
| } |
| |
| static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) { |
| CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string"); |
| CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string"); |
| return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator); |
| } |
| |
| // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom |
| CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) { |
| return *(__CFStrContentsAllocatorPtr(str)); |
| } |
| |
| // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom |
| CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) { |
| *(__CFStrContentsAllocatorPtr(str)) = alloc; |
| } |
| |
| /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed. |
| */ |
| CF_INLINE CFIndex __CFStrLength(CFStringRef str) { |
| if (__CFStrHasExplicitLength(str)) { |
| if (__CFStrIsInline(str)) { |
| return str->variants.inline1.length; |
| } else { |
| return str->variants.notInlineImmutable1.length; |
| } |
| } else { |
| return (CFIndex)(*((uint8_t *)__CFStrContents(str))); |
| } |
| } |
| |
| CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) { |
| if (__CFStrHasExplicitLength(str)) { |
| if (__CFStrIsInline(str)) { |
| return str->variants.inline1.length; |
| } else { |
| return str->variants.notInlineImmutable1.length; |
| } |
| } else { |
| return (CFIndex)(*((uint8_t *)buffer)); |
| } |
| } |
| |
| Boolean __CFStringIsMutable(CFStringRef str) { |
| return __CFStrIsMutable(str); |
| } |
| |
| Boolean __CFStringIsEightBit(CFStringRef str) { |
| return __CFStrIsEightBit(str); |
| } |
| |
| /* Sets the content pointer for immutable or mutable strings. |
| */ |
| CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) { |
| // XXX_PCB catch all writes for mutable string case. |
| CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p); |
| } |
| CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);} |
| |
| CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) { |
| if (__CFStrIsInline(str)) { |
| ((CFMutableStringRef)str)->variants.inline1.length = v; |
| } else { |
| ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v; |
| } |
| } |
| |
| CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;} |
| CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;} |
| CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);} |
| CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);} |
| |
| |
| // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only |
| CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;} |
| CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;} |
| CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;} |
| CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;} |
| CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;} |
| CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;} |
| |
| // If capacity is provided externally, we only change it when we need to grow beyond it |
| CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;} |
| CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;} |
| CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;} |
| |
| // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer. |
| CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;} |
| CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;} |
| |
| // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store. |
| CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;} |
| CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;} |
| |
| |
| static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) { |
| void *ptr; |
| CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); |
| ptr = CFAllocatorAllocate(alloc, size, 0); |
| if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)"); |
| return ptr; |
| } |
| |
| static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) { |
| CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); |
| if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) { |
| // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle. |
| auto_zone_release(__CFCollectableZone, buffer); |
| } else { |
| CFAllocatorDeallocate(alloc, buffer); |
| } |
| } |
| |
| |
| |
| |
| /* CFString specific init flags |
| Note that you cannot count on the external buffer not being copied. |
| Also, if you specify an external buffer, you should not change it behind the CFString's back. |
| */ |
| enum { |
| __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */ |
| kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */ |
| kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */ |
| kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */ |
| }; |
| |
| /* System Encoding. |
| */ |
| static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId; |
| static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId; |
| CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId; |
| |
| CFStringEncoding CFStringGetSystemEncoding(void) { |
| |
| if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) { |
| const CFStringEncodingConverter *converter = NULL; |
| #if DEPLOYMENT_TARGET_MACOSX |
| __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available |
| #elif DEPLOYMENT_TARGET_WINDOWS |
| __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available |
| #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD |
| __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default |
| #else // Solaris && HP-UX ? |
| __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default |
| #endif |
| converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding); |
| |
| __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL); |
| } |
| |
| return __CFDefaultSystemEncoding; |
| } |
| |
| // Fast version for internal use |
| |
| CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) { |
| if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding(); |
| return __CFDefaultSystemEncoding; |
| } |
| |
| CFStringEncoding CFStringFileSystemEncoding(void) { |
| if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) { |
| #if DEPLOYMENT_TARGET_MACOSX |
| __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8; |
| #else |
| __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding(); |
| #endif |
| } |
| |
| return __CFDefaultFileSystemEncoding; |
| } |
| |
| /* ??? Is returning length when no other answer is available the right thing? |
| */ |
| CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) { |
| if (encoding == kCFStringEncodingUTF8) { |
| return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility |
| } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32 |
| return length * sizeof(UTF32Char); |
| } else { |
| encoding &= 0xFFF; // Mask off non-base part |
| } |
| switch (encoding) { |
| case kCFStringEncodingUnicode: |
| return length * sizeof(UniChar); |
| |
| case kCFStringEncodingNonLossyASCII: |
| return length * 6; // 1 Unichar could expand to 6 bytes |
| |
| case kCFStringEncodingMacRoman: |
| case kCFStringEncodingWindowsLatin1: |
| case kCFStringEncodingISOLatin1: |
| case kCFStringEncodingNextStepLatin: |
| case kCFStringEncodingASCII: |
| return length / sizeof(uint8_t); |
| |
| default: |
| return length / sizeof(uint8_t); |
| } |
| } |
| |
| |
| /* Returns whether the indicated encoding can be stored in 8-bit chars |
| */ |
| CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) { |
| switch (encoding & 0xFFF) { // just use encoding base |
| case kCFStringEncodingInvalidId: |
| case kCFStringEncodingUnicode: |
| case kCFStringEncodingNonLossyASCII: |
| return false; |
| |
| case kCFStringEncodingMacRoman: |
| case kCFStringEncodingWindowsLatin1: |
| case kCFStringEncodingISOLatin1: |
| case kCFStringEncodingNextStepLatin: |
| case kCFStringEncodingASCII: |
| return true; |
| |
| default: return false; |
| } |
| } |
| |
| /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode) |
| ??? Perhaps only ASCII fits the bill due to Unicode decomposition. |
| */ |
| CFStringEncoding __CFStringComputeEightBitStringEncoding(void) { |
| if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) { |
| CFStringEncoding systemEncoding = CFStringGetSystemEncoding(); |
| if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined. |
| return kCFStringEncodingASCII; |
| } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) { |
| __CFDefaultEightBitStringEncoding = systemEncoding; |
| } else { |
| __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII; |
| } |
| } |
| |
| return __CFDefaultEightBitStringEncoding; |
| } |
| |
| /* Returns whether the provided bytes can be stored in ASCII |
| */ |
| CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { |
| while (len--) if ((uint8_t)(*bytes++) >= 128) return false; |
| return true; |
| } |
| |
| /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString. |
| */ |
| CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) { |
| if (encoding == __CFStringGetEightBitStringEncoding()) return true; |
| if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true; |
| return false; |
| } |
| |
| |
| /* Returns whether a length byte can be tacked on to a string of the indicated length. |
| */ |
| CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) { |
| #define __kCFMaxPascalStrLen 255 |
| return (len <= __kCFMaxPascalStrLen) ? true : false; |
| } |
| |
| /* Various string assertions |
| */ |
| #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID) |
| #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf)) |
| #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf)) |
| #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);} |
| #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);} |
| #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx) |
| #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen) |
| |
| |
| /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. |
| Additional complications are applied in the following order: |
| - desiredCapacity, which is the minimum (except initially things can be at zero) |
| - rounding up to factor of 8 |
| - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256 |
| - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX |
| */ |
| #define SHRINKFACTOR(c) (c / 2) |
| |
| #if __LP64__ |
| #define GROWFACTOR(c) ((c * 3 + 1) / 2) |
| #else |
| #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2)) |
| #endif |
| |
| CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) { |
| if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */ |
| if ((capacity < reqCapacity) || /* We definitely need the room... */ |
| (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */ |
| ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */ |
| (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */ |
| CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */ |
| CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize; |
| if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */ |
| newCapacity = desiredCapacity; |
| } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */ |
| newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */ |
| } |
| if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */ |
| newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0); |
| #if defined(__APPLE__) |
| } else { |
| newCapacity = malloc_good_size(newCapacity); |
| #endif |
| } |
| return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity)); |
| } |
| } |
| return capacity; |
| } |
| |
| |
| /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result. |
| numBlocks is current total number of blocks within buffer. |
| blockSize is the size of each block in bytes |
| ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap |
| insertLength is the final spacing between the remaining blocks |
| |
| Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO |
| if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H |
| if insertLength = 0, result = A B D G H |
| |
| Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO |
| if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U |
| |
| */ |
| typedef struct _CFStringDeferredRange { |
| CFIndex beginning; |
| CFIndex length; |
| CFIndex shift; |
| } CFStringDeferredRange; |
| |
| typedef struct _CFStringStackInfo { |
| CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another) |
| CFIndex count; // Number of elements actually stored |
| CFStringDeferredRange *stack; |
| Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done |
| char _padding[3]; |
| } CFStringStackInfo; |
| |
| CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) { |
| si->count = si->count - 1; |
| *topRange = si->stack[si->count]; |
| } |
| |
| CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) { |
| if (si->count == si->capacity) { |
| // increase size of the stack |
| si->capacity = (si->capacity + 4) * 2; |
| if (si->hasMalloced) { |
| si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0); |
| } else { |
| CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0); |
| memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange)); |
| si->stack = newStack; |
| si->hasMalloced = true; |
| } |
| } |
| si->stack[si->count] = *newRange; |
| si->count = si->count + 1; |
| } |
| |
| static void rearrangeBlocks( |
| uint8_t *buffer, |
| CFIndex numBlocks, |
| CFIndex blockSize, |
| const CFRange *ranges, |
| CFIndex numRanges, |
| CFIndex insertLength) { |
| |
| #define origStackSize 10 |
| CFStringDeferredRange origStack[origStackSize]; |
| CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}}; |
| CFStringDeferredRange currentNonRange = {0, 0, 0}; |
| CFIndex currentRange = 0; |
| CFIndex amountShifted = 0; |
| |
| // must have at least 1 range left. |
| |
| while (currentRange < numRanges) { |
| currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize; |
| if ((numRanges - currentRange) == 1) { |
| // at the end. |
| currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning; |
| if (currentNonRange.length == 0) break; |
| } else { |
| currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning; |
| } |
| currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize); |
| amountShifted = currentNonRange.shift; |
| if (amountShifted <= 0) { |
| // process current item and rest of stack |
| if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); |
| while (si.count > 0) { |
| pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. |
| if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); |
| } |
| } else { |
| // add currentNonRange to stack. |
| push (&si, ¤tNonRange); |
| } |
| currentRange++; |
| } |
| |
| // no more ranges. if anything is on the stack, process. |
| |
| while (si.count > 0) { |
| pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. |
| if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); |
| } |
| if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack); |
| } |
| |
| /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.) |
| */ |
| static void copyBlocks( |
| const uint8_t *srcBuffer, |
| uint8_t *dstBuffer, |
| CFIndex srcLength, |
| Boolean srcIsUnicode, |
| Boolean dstIsUnicode, |
| const CFRange *ranges, |
| CFIndex numRanges, |
| CFIndex insertLength) { |
| |
| CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks |
| CFIndex dstLocationInBytes = 0; // ditto |
| CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); |
| CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t)); |
| CFIndex rangeIndex = 0; |
| CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t)); |
| |
| // Loop over the ranges, copying the range to be preserved (right before each range) |
| while (rangeIndex < numRanges) { |
| CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved |
| if (srcLengthInBytes > 0) { |
| if (srcIsUnicode == dstIsUnicode) { |
| memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes); |
| } else { |
| __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes); |
| } |
| } |
| srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff |
| dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes; |
| rangeIndex++; |
| } |
| |
| // Do last range (the one beyond last range) |
| if (srcLocationInBytes < srcLength * srcBlockSize) { |
| if (srcIsUnicode == dstIsUnicode) { |
| memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes); |
| } else { |
| __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes); |
| } |
| } |
| } |
| |
| /* Call the callback; if it doesn't exist or returns false, then log |
| */ |
| static void __CFStringHandleOutOfMemory(CFTypeRef obj) { |
| CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save."); |
| CFBadErrorCallBack cb = _CFGetOutOfMemoryErrorCallBack(); |
| if (NULL == cb || !cb(obj, CFSTR("NS/CFString"), msg)) { |
| CFLog(kCFLogLevelCritical, CFSTR("%@"), msg); |
| } |
| } |
| |
| /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.) |
| */ |
| static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) { |
| const uint8_t *curContents = (uint8_t *)__CFStrContents(str); |
| CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0; |
| CFIndex newLength; |
| |
| // Compute new length of the string |
| if (numDeleteRanges == 1) { |
| newLength = curLength + insertLength - deleteRanges[0].length; |
| } else { |
| CFIndex cnt; |
| newLength = curLength + insertLength * numDeleteRanges; |
| for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length; |
| } |
| |
| __CFAssertIfFixedLengthIsOK(str, newLength); |
| |
| if (newLength == 0) { |
| // An somewhat optimized code-path for this special case, with the following implicit values: |
| // newIsUnicode = false |
| // useLengthAndNullBytes = false |
| // newCharSize = sizeof(uint8_t) |
| // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally |
| // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway) |
| CFIndex curCapacity = __CFStrCapacity(str); |
| CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t)); |
| if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all |
| if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents); |
| __CFStrSetContentPtr(str, NULL); |
| __CFStrSetCapacity(str, 0); |
| __CFStrClearCapacityProvidedExternally(str); |
| __CFStrClearHasLengthAndNullBytes(str); |
| if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode |
| } else { |
| if (!__CFStrIsExternalMutable(str)) { |
| __CFStrClearUnicode(str); |
| if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room |
| __CFStrSetHasLengthAndNullBytes(str); |
| ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0; |
| } else { |
| __CFStrClearHasLengthAndNullBytes(str); |
| } |
| } |
| } |
| __CFStrSetExplicitLength(str, 0); |
| } else { /* This else-clause assumes newLength > 0 */ |
| Boolean oldIsUnicode = __CFStrIsUnicode(str); |
| Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str); |
| CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); |
| Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */; |
| CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */ |
| CFIndex curCapacity = __CFStrCapacity(str); |
| CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize); |
| Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */ |
| uint8_t *newContents; |
| if (allocNewBuffer) { |
| newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity); |
| if (!newContents) { // Try allocating without extra room |
| newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize); |
| newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity); |
| if (!newContents) { |
| __CFStringHandleOutOfMemory(str); |
| // Ideally control doesn't come here at all since we expect the above call to raise an exception. |
| // If control comes here, there isn't much we can do. |
| } |
| } |
| } else { |
| newContents = (uint8_t *)curContents; |
| } |
| |
| Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str); |
| |
| CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__); |
| |
| if (hasLengthAndNullBytes) curContents++; |
| if (useLengthAndNullBytes) newContents++; |
| |
| if (curContents) { |
| if (oldIsUnicode == newIsUnicode) { |
| if (newContents == curContents) { |
| rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength); |
| } else { |
| copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); |
| } |
| } else if (newIsUnicode) { /* this implies we have a new buffer */ |
| copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); |
| } |
| if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */ |
| if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents); |
| } |
| |
| if (!newIsUnicode) { |
| if (useLengthAndNullBytes) { |
| newContents[newLength] = 0; /* Always have null byte, if not unicode */ |
| newContents--; /* Undo the damage from above */ |
| newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0; |
| if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str); |
| } else { |
| if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); |
| } |
| if (oldIsUnicode) __CFStrClearUnicode(str); |
| } else { // New is unicode... |
| if (!oldIsUnicode) __CFStrSetUnicode(str); |
| if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); |
| } |
| __CFStrSetExplicitLength(str, newLength); |
| |
| if (allocNewBuffer) { |
| __CFStrSetCapacity(str, newCapacity); |
| __CFStrClearCapacityProvidedExternally(str); |
| __CFStrSetContentPtr(str, newContents); |
| } |
| } |
| } |
| |
| /* Same as above, but takes one range (very common case) |
| */ |
| CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) { |
| __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode); |
| } |
| |
| |
| #if defined(DEBUG) |
| static Boolean __CFStrIsConstantString(CFStringRef str); |
| #endif |
| |
| static void __CFStringDeallocate(CFTypeRef cf) { |
| CFStringRef str = (CFStringRef)cf; |
| |
| // If in DEBUG mode, check to see if the string a CFSTR, and complain. |
| CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str); |
| |
| if (!__CFStrIsInline(str)) { |
| uint8_t *contents; |
| Boolean isMutable = __CFStrIsMutable(str); |
| if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) { |
| if (isMutable) { |
| __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents); |
| } else { |
| if (__CFStrHasContentsDeallocator(str)) { |
| CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str); |
| CFAllocatorDeallocate(contentsDeallocator, contents); |
| CFRelease(contentsDeallocator); |
| } else { |
| CFAllocatorRef alloc = __CFGetAllocator(str); |
| CFAllocatorDeallocate(alloc, contents); |
| } |
| } |
| } |
| if (isMutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str)); |
| } |
| } |
| |
| static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) { |
| CFStringRef str1 = (CFStringRef)cf1; |
| CFStringRef str2 = (CFStringRef)cf2; |
| const uint8_t *contents1; |
| const uint8_t *contents2; |
| CFIndex len1; |
| |
| /* !!! We do not need IsString assertions, as the CFBase runtime assures this */ |
| /* !!! We do not need == test, as the CFBase runtime assures this */ |
| |
| contents1 = (uint8_t *)__CFStrContents(str1); |
| contents2 = (uint8_t *)__CFStrContents(str2); |
| len1 = __CFStrLength2(str1, contents1); |
| |
| if (len1 != __CFStrLength2(str2, contents2)) return false; |
| |
| contents1 += __CFStrSkipAnyLengthByte(str1); |
| contents2 += __CFStrSkipAnyLengthByte(str2); |
| |
| if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) { |
| return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true; |
| } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */ |
| CFStringInlineBuffer buf; |
| CFIndex buf_idx = 0; |
| |
| CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1)); |
| for (buf_idx = 0; buf_idx < len1; buf_idx++) { |
| if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false; |
| } |
| } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */ |
| CFStringInlineBuffer buf; |
| CFIndex buf_idx = 0; |
| |
| CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1)); |
| for (buf_idx = 0; buf_idx < len1; buf_idx++) { |
| if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false; |
| } |
| } else { /* Both strings have Unicode contents */ |
| CFIndex idx; |
| for (idx = 0; idx < len1; idx++) { |
| if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false; |
| } |
| } |
| return true; |
| } |
| |
| |
| /* String hashing: Should give the same results whatever the encoding; so we hash UniChars. |
| If the length is less than or equal to 96, then the hash function is simply the |
| following (n is the nth UniChar character, starting from 0): |
| |
| hash(-1) = length |
| hash(n) = hash(n-1) * 257 + unichar(n); |
| Hash = hash(length-1) * ((length & 31) + 1) |
| |
| If the length is greater than 96, then the above algorithm applies to |
| characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive; |
| thus the first, middle, and last 32 characters. |
| |
| Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4 |
| If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted. |
| !!! We haven't updated for LP64 yet |
| |
| NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below. |
| |
| Hash function was changed between Panther and Tiger, and Tiger and Leopard. |
| */ |
| #define HashEverythingLimit 96 |
| |
| #define HashNextFourUniChars(accessStart, accessEnd, pointer) \ |
| {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;} |
| |
| #define HashNextUniChar(accessStart, accessEnd, pointer) \ |
| {result = result * 257 + (accessStart 0 accessEnd); pointer++;} |
| |
| |
| /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing. |
| */ |
| CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) { |
| CFHashCode result = actualLen; |
| if (len <= HashEverythingLimit) { |
| const UniChar *end4 = uContents + (len & ~3); |
| const UniChar *end = uContents + len; |
| while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours |
| while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones... |
| } else { |
| const UniChar *contents, *end; |
| contents = uContents; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| contents = uContents + (len >> 1) - 16; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| end = uContents + len; |
| contents = end - 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| } |
| return result + (result << (actualLen & 31)); |
| } |
| |
| /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check. |
| */ |
| CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) { |
| #if defined(DEBUG) |
| if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea |
| CFIndex cnt; |
| Boolean err = false; |
| if (len <= HashEverythingLimit) { |
| for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true; |
| } else { |
| for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true; |
| for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true; |
| for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true; |
| } |
| if (err) { |
| // Can't do log here, as it might be too early |
| fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n"); |
| } |
| } |
| #endif |
| CFHashCode result = len; |
| if (len <= HashEverythingLimit) { |
| const uint8_t *end4 = cContents + (len & ~3); |
| const uint8_t *end = cContents + len; |
| while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours |
| while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones... |
| } else { |
| const uint8_t *contents, *end; |
| contents = cContents; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); |
| contents = cContents + (len >> 1) - 16; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); |
| end = cContents + len; |
| contents = end - 32; |
| while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); |
| } |
| return result + (result << (len & 31)); |
| } |
| |
| CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) { |
| CFHashCode result = len; |
| if (len <= HashEverythingLimit) { |
| const uint8_t *end4 = bytes + (len & ~3); |
| const uint8_t *end = bytes + len; |
| while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours |
| while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones... |
| } else { |
| const uint8_t *contents, *end; |
| contents = bytes; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| contents = bytes + (len >> 1) - 16; |
| end = contents + 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| end = bytes + len; |
| contents = end - 32; |
| while (contents < end) HashNextFourUniChars(contents[, ], contents); |
| } |
| return result + (result << (len & 31)); |
| } |
| |
| CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) { |
| return __CFStrHashEightBit(bytes, len); |
| } |
| |
| CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) { |
| return __CFStrHashCharacters(characters, len, len); |
| } |
| |
| /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash. |
| */ |
| CFHashCode CFStringHashNSString(CFStringRef str) { |
| UniChar buffer[HashEverythingLimit]; |
| CFIndex bufLen; // Number of characters in the buffer for hashing |
| CFIndex len = 0; // Actual length of the string |
| |
| CF_OBJC_CALL0(CFIndex, len, str, "length"); |
| if (len <= HashEverythingLimit) { |
| CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len)); |
| bufLen = len; |
| } else { |
| CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 32)); |
| CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+32, CFRangeMake((len >> 1) - 16, 32)); |
| CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+64, CFRangeMake(len - 32, 32)); |
| bufLen = HashEverythingLimit; |
| } |
| return __CFStrHashCharacters(buffer, bufLen, len); |
| } |
| |
| CFHashCode __CFStringHash(CFTypeRef cf) { |
| /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */ |
| CFStringRef str = (CFStringRef)cf; |
| const uint8_t *contents = (uint8_t *)__CFStrContents(str); |
| CFIndex len = __CFStrLength2(str, contents); |
| |
| if (__CFStrIsEightBit(str)) { |
| contents += __CFStrSkipAnyLengthByte(str); |
| return __CFStrHashEightBit(contents, len); |
| } else { |
| return __CFStrHashCharacters((const UniChar *)contents, len, len); |
| } |
| } |
| |
| |
| static CFStringRef __CFStringCopyDescription(CFTypeRef cf) { |
| return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf); |
| } |
| |
| static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) { |
| return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf); |
| } |
| |
| static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID; |
| |
| typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString); |
| |
| static const CFRuntimeClass __CFStringClass = { |
| 0, |
| "CFString", |
| NULL, // init |
| (CF_STRING_CREATE_COPY)CFStringCreateCopy, |
| __CFStringDeallocate, |
| __CFStringEqual, |
| __CFStringHash, |
| __CFStringCopyFormattingDescription, |
| __CFStringCopyDescription |
| }; |
| |
| __private_extern__ void __CFStringInitialize(void) { |
| __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass); |
| } |
| |
| CFTypeID CFStringGetTypeID(void) { |
| return __kCFStringTypeID; |
| } |
| |
| |
| static Boolean CFStrIsUnicode(CFStringRef str) { |
| CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString"); |
| return __CFStrIsUnicode(str); |
| } |
| |
| |
| |
| #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1) |
| |
| /* contentsDeallocator indicates how to free the data if it's noCopy == true: |
| kCFAllocatorNull: don't free |
| ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here) |
| NULL: default allocator |
| otherwise it's the allocator that should be used (it will be explicitly stored) |
| if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC |
| hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode |
| possiblyExternalFormat indicates that the bytes might have BOM and be swapped |
| tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so) |
| numBytes contains the actual number of bytes in "bytes", including Length byte, |
| BUT not the NULL byte at the end |
| bytes should not contain BOM characters |
| !!! Various flags should be combined to reduce number of arguments, if possible |
| */ |
| __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3( |
| CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, |
| Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, |
| CFAllocatorRef contentsDeallocator, UInt32 converterFlags) { |
| |
| CFMutableStringRef str; |
| CFVarWidthCharBuffer vBuf; |
| CFIndex size; |
| Boolean useLengthByte = false; |
| Boolean useNullByte = false; |
| Boolean useInlineData = false; |
| |
| #if INSTRUMENT_SHARED_STRINGS |
| const char *recordedEncoding; |
| char encodingBuffer[128]; |
| if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode"; |
| else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII"; |
| else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8"; |
| else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman"; |
| else { |
| sprintf(encodingBuffer, "0x%lX", (unsigned long)encoding); |
| recordedEncoding = encodingBuffer; |
| } |
| #endif |
| |
| if (alloc == NULL) alloc = __CFGetDefaultAllocator(); |
| |
| if (contentsDeallocator == ALLOCATORSFREEFUNC) { |
| contentsDeallocator = alloc; |
| } else if (contentsDeallocator == NULL) { |
| contentsDeallocator = __CFGetDefaultAllocator(); |
| } |
| |
| if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string! |
| if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak). |
| CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
| } |
| return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most |
| } |
| |
| // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL |
| |
| vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode |
| |
| // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive. |
| Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding); |
| |
| // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways). |
| Boolean stringROMShouldIgnoreNoCopy = false; |
| |
| // First check to see if the data needs to be converted... |
| // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy |
| |
| if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && !stringSupportsEightBitCFRepresentation)) { |
| const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0); |
| CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0); |
| Boolean usingPassedInMemory = false; |
| |
| vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff |
| vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary |
| |
| if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) { |
| // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose. |
| return NULL; |
| } |
| |
| encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode; |
| |
| // Update our flag according to whether the decoded buffer is ASCII |
| stringSupportsEightBitCFRepresentation = vBuf.isASCII; |
| |
| if (!usingPassedInMemory) { |
| |
| // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM. |
| stringROMShouldIgnoreNoCopy = true; |
| |
| // Make the parameters fit the new situation |
| numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar)); |
| hasLengthByte = hasNullByte = false; |
| |
| // Get rid of the original buffer if its not being used |
| if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { |
| CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
| } |
| contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... |
| |
| // See if we can reuse any storage the decode func might have allocated |
| // We do this only for Unicode, as otherwise we would not have NULL and Length bytes |
| |
| if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) { |
| vBuf.shouldFreeChars = false; // Transferring ownership to the CFString |
| bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage |
| noCopy = true; |
| #if INSTRUMENT_SHARED_STRINGS |
| if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy"; |
| else recordedEncoding = "ForeignUnicode-NoCopy"; |
| #endif |
| } else { |
| #if INSTRUMENT_SHARED_STRINGS |
| if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy"; |
| else recordedEncoding = "ForeignUnicode-Copy"; |
| #endif |
| bytes = vBuf.chars.unicode; |
| noCopy = false; // Can't do noCopy anymore |
| // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func |
| } |
| |
| } |
| |
| // At this point, all necessary input arguments have been changed to reflect the new state |
| |
| } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII |
| CFIndex cnt; |
| CFIndex len = numBytes / sizeof(UniChar); |
| Boolean allASCII = true; |
| |
| for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) { |
| allASCII = false; |
| break; |
| } |
| |
| if (allASCII) { // Yes we can! |
| uint8_t *ptr, *mem; |
| Boolean newHasLengthByte = __CFCanUseLengthByte(len); |
| numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte |
| // See if we can use that temporary local buffer in vBuf... |
| if (numBytes >= __kCFVarWidthLocalBufferSize) { |
| mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0); |
| if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)"); |
| } else { |
| mem = ptr = (uint8_t *)(vBuf.localBuffer); |
| } |
| if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII) |
| // Copy the Unicode bytes into the new ASCII buffer |
| hasLengthByte = newHasLengthByte; |
| hasNullByte = true; |
| if (hasLengthByte) *ptr++ = (uint8_t)len; |
| for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]); |
| ptr[len] = 0; |
| if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { |
| CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
| } |
| // Now make everything look like we had an ASCII buffer to start with |
| bytes = mem; |
| encoding = kCFStringEncodingASCII; |
| contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... |
| noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around |
| numBytes--; // Should not contain the NULL byte at end... |
| stringSupportsEightBitCFRepresentation = true; // We're ASCII now! |
| stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM |
| #if INSTRUMENT_SHARED_STRINGS |
| recordedEncoding = "U->A"; |
| #endif |
| } |
| } |
| |
| // At this point, all necessary input arguments have been changed to reflect the new state |
| } |
| |
| // Now determine the necessary size |
| |
| Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation; |
| |
| #if INSTRUMENT_SHARED_STRINGS |
| if (stringSupportsROM) { |
| const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0); |
| CFIndex realNumBytes = numBytes - !! hasLengthByte; |
| __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes); |
| } |
| #endif |
| |
| CFStringRef romResult = NULL; |
| |
| #if USE_STRING_ROM |
| |
| if (stringSupportsROM) { |
| // Disable the string ROM if necessary |
| static char sDisableStringROM = -1; |
| if (sDisableStringROM == -1) sDisableStringROM = !! getenv("CFStringDisableROM"); |
| |
| if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte); |
| } |
| /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */ |
| if (romResult) { |
| if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { |
| CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
| } |
| |
| /* these don't get used again, but clear them for consistency */ |
| noCopy = false; |
| bytes = NULL; |
| |
| /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */ |
| str = (CFMutableStringRef)romResult; |
| } |
| #endif |
| |
| if (! romResult) { |
| // Now determine the necessary size |
| |
| if (noCopy) { |
| |
| size = sizeof(void *); // Pointer to the buffer |
| if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) { |
| size += sizeof(void *); // The contentsDeallocator |
| } |
| if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length |
| useLengthByte = hasLengthByte; |
| useNullByte = hasNullByte; |
| |
| } else { // Inline data; reserve space for it |
| |
| useInlineData = true; |
| size = numBytes; |
| |
| if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) { |
| useLengthByte = true; |
| if (!hasLengthByte) size += 1; |
| } else { |
| size += sizeof(CFIndex); // Explicit length |
| } |
| if (hasNullByte || encoding != kCFStringEncodingUnicode) { |
| useNullByte = true; |
| size += 1; |
| } |
| } |
| |
| #ifdef STRING_SIZE_STATS |
| // Dump alloced CFString size info every so often |
| static int cnt = 0; |
| static unsigned sizes[256] = {0}; |
| int allocedSize = size + sizeof(CFRuntimeBase); |
| if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++; |
| if ((++cnt % 1000) == 0) { |
| printf ("\nTotal: %d\n", cnt); |
| int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " "); |
| } |
| #endif |
| |
| // Finally, allocate! |
| |
| str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL); |
| if (str) { |
| if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)"); |
| |
| __CFStrSetInfoBits(str, |
| (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) | |
| ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) | |
| (useNullByte ? __kCFHasNullByte : 0) | |
| (useLengthByte ? __kCFHasLengthByte : 0)); |
| |
| if (!useLengthByte) { |
| CFIndex length = numBytes - (hasLengthByte ? 1 : 0); |
| if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar); |
| __CFStrSetExplicitLength(str, length); |
| } |
| |
| if (useInlineData) { |
| uint8_t *contents = (uint8_t *)__CFStrContents(str); |
| if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes; |
| memmove(contents, bytes, numBytes); |
| if (useNullByte) contents[numBytes] = 0; |
| } else { |
| __CFStrSetContentPtr(str, bytes); |
| if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, (CFAllocatorRef)CFRetain(contentsDeallocator)); |
| } |
| } else { |
| if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { |
| CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
| } |
| } |
| } |
| if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes); |
| |
| return str; |
| } |
| |
| /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated |
| */ |
| CFStringRef __CFStringCreateImmutableFunnel2( |
| CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, |
| Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, |
| CFAllocatorRef contentsDeallocator) { |
| return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0); |
| } |
| |
| |
| |
| CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) { |
| CFIndex len = (CFIndex)(*(uint8_t *)pStr); |
| return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0); |
| } |
| |
| |
| CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) { |
| CFIndex len = (CFIndex)strlen(cStr); |
| return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0); |
| } |
| |
| CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { |
| CFIndex len = (CFIndex)(*(uint8_t *)pStr); |
| return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0); |
| } |
| |
| |
| CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { |
| CFIndex len = (CFIndex)strlen(cStr); |
| return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0); |
| } |
| |
| |
| CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) { |
| return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } |
| |
| |
| CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) { |
| return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0); |
| } |
| |
| |
| CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) { |
| return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } |
| |
| CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { |
| return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0); |
| } |
| |
| CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { |
| return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0); |
| } |
| |
| CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { |
| return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments); |
| } |
| |
| CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { |
| CFStringRef str; |
| CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release |
| __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine |
| _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments); |
| // ??? copy/release should not be necessary here -- just make immutable, compress if possible |
| // (However, this does make the string inline, and cause the supplied allocator to be used...) |
| str = (CFStringRef)CFStringCreateCopy(alloc, outputString); |
| CFRelease(outputString); |
| return str; |
| } |
| |
| CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) { |
| CFStringRef result; |
| va_list argList; |
| |
| va_start(argList, format); |
| result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList); |
| va_end(argList); |
| |
| return result; |
| } |
| |
| CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) { |
| // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length)); |
| |
| __CFAssertIsString(str); |
| __CFAssertRangeIsInStringBounds(str, range.location, range.length); |
| |
| if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */ |
| return (CFStringRef)CFStringCreateCopy(alloc, str); |
| } else if (__CFStrIsEightBit(str)) { |
| const uint8_t *contents = (const uint8_t *)__CFStrContents(str); |
| return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } else { |
| const UniChar *contents = (UniChar *)__CFStrContents(str); |
| return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } |
| } |
| |
| CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) { |
| // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy"); |
| |
| __CFAssertIsString(str); |
| if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable |
| ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using |
| (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant |
| CFRetain(str); // Then just retain instead of making a true copy |
| return str; |
| } |
| if (__CFStrIsEightBit((CFStringRef)str)) { |
| const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str); |
| return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } else { |
| const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str); |
| return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); |
| } |
| } |
| |
| |
| |
| /*** Constant string stuff... ***/ |
| |
| /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table. |
| */ |
| static CFMutableDictionaryRef constantStringTable = NULL; |
| static CFSpinLock_t _CFSTRLock = CFSpinLockInit; |
| |
| static CFStringRef __cStrCopyDescription(const void *ptr) { |
| return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull); |
| } |
| |
| static Boolean __cStrEqual(const void *ptr1, const void *ptr2) { |
| return (strcmp((const char *)ptr1, (const char *)ptr2) == 0); |
| } |
| |
| static CFHashCode __cStrHash(const void *ptr) { |
| // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently |
| const char *cStr = (const char *)ptr; |
| CFIndex len = (CFIndex)strlen(cStr); |
| CFHashCode result = 0; |
| if (len <= 4) { // All chars |
| unsigned cnt = len; |
| while (cnt--) result += (result << 8) + *cStr++; |
| } else { // First and last 2 chars |
| result += (result << 8) + cStr[0]; |
| result += (result << 8) + cStr[1]; |
| result += (result << 8) + cStr[len-2]; |
| result += (result << 8) + cStr[len-1]; |
| } |
| result += (result << (len & 31)); |
| return result; |
| } |
| |
| |
| CFStringRef __CFStringMakeConstantString(const char *cStr) { |
| CFStringRef result; |
| #if defined(DEBUG) |
| // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging |
| if ('\0' == *cStr) return kCFEmptyString; |
| #endif |
| if (constantStringTable == NULL) { |
| CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash}; |
| CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks; |
| constantStringValueCallBacks.equal = NULL; // So that we only find strings that are == |
| CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks); |
| _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing |
| __CFSpinLock(&_CFSTRLock); |
| if (constantStringTable == NULL) constantStringTable = table; |
| __CFSpinUnlock(&_CFSTRLock); |
| if (constantStringTable != table) CFRelease(table); |
| } |
| |
| __CFSpinLock(&_CFSTRLock); |
| if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) { |
| __CFSpinUnlock(&_CFSTRLock); |
| } else { |
| __CFSpinUnlock(&_CFSTRLock); |
| |
| { |
| char *key; |
| Boolean isASCII = true; |
| // Given this code path is rarer these days, OK to do this extra work to verify the strings |
| const char *tmp = cStr; |
| while (*tmp) { |
| if (*(tmp++) & 0x80) { |
| isASCII = false; |
| break; |
| } |
| } |
| if (!isASCII) { |
| CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); |
| tmp = cStr; |
| while (*tmp) { |
| CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp); |
| tmp++; |
| } |
| CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms); |
| CFRelease(ms); |
| } |
| // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility |
| result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman); |
| if (result == NULL) { |
| CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing")); |
| HALT; |
| } |
| if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)"); |
| if (__CFStrIsEightBit(result)) { |
| key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result); |
| } else { // For some reason the string is not 8-bit! |
| key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, (CFIndex)strlen(cStr) + 1, 0); |
| if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)"); |
| strlcpy(key, cStr, strlen(cStr) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed) |
| } |
| |
| { |
| CFStringRef resultToBeReleased = result; |
| CFIndex count; |
| __CFSpinLock(&_CFSTRLock); |
| count = CFDictionaryGetCount(constantStringTable); |
| CFDictionaryAddValue(constantStringTable, key, result); |
| if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there |
| result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key); |
| } else { |
| #if __LP64__ |
| ((struct __CFString *)result)->base._rc = 0; |
| #else |
| ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0; |
| #endif |
| } |
| __CFSpinUnlock(&_CFSTRLock); |
| // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table |
| CFRelease(resultToBeReleased); |
| } |
| } |
| } |
| return result; |
| } |
| |
| #if defined(DEBUG) |
| static Boolean __CFStrIsConstantString(CFStringRef str) { |
| Boolean found = false; |
| if (constantStringTable) { |
| __CFSpinLock(&_CFSTRLock); |
| found = CFDictionaryContainsValue(constantStringTable, str); |
| __CFSpinUnlock(&_CFSTRLock); |
| } |
| return found; |
| } |
| #endif |
| |
| |
| #if DEPLOYMENT_TARGET_WINDOWS |
| __private_extern__ void __CFStringCleanup (void) { |
| /* in case library is unloaded, release store for the constant string table */ |
| if (constantStringTable != NULL) { |
| #if defined(DEBUG) |
| __CFConstantStringTableBeingFreed = true; |
| CFRelease(constantStringTable); |
| __CFConstantStringTableBeingFreed = false; |
| #else |
| CFRelease(constantStringTable); |
| #endif |
| } |
| } |
| #endif |
| |
| |
| // Can pass in NSString as replacement string |
| // Call with numRanges > 0, and incrementing ranges |
| |
| static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) { |
| int cnt; |
| CFStringRef copy = NULL; |
| if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case |
| CFIndex replacementLength = CFStringGetLength(replacement); |
| |
| __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); |
| |
| if (__CFStrIsUnicode(str)) { |
| UniChar *contents = (UniChar *)__CFStrContents(str); |
| UniChar *firstReplacement = contents + ranges[0].location; |
| // Extract the replacementString into the first location, then copy from there |
| CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement); |
| for (cnt = 1; cnt < numRanges; cnt++) { |
| // The ranges are in terms of the original string; so offset by the change in length due to insertion |
| contents += replacementLength - ranges[cnt - 1].length; |
| memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar)); |
| } |
| } else { |
| uint8_t *contents = (uint8_t *)__CFStrContents(str); |
| uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str); |
| // Extract the replacementString into the first location, then copy from there |
| CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL); |
| contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into |
| for (cnt = 1; cnt < numRanges; cnt++) { |
| // The ranges are in terms of the original string; so offset by the change in length due to insertion |
| contents += replacementLength - ranges[cnt - 1].length; |
| memmove(contents + ranges[cnt].location, firstReplacement, replacementLength); |
| } |
| } |
| if (copy) CFRelease(copy); |
| } |
| |
| // Can pass in NSString as replacement string |
| |
| CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { |
| CFStringRef copy = NULL; |
| if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case |
| CFIndex replacementLength = CFStringGetLength(replacement); |
| |
| __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); |
| |
| if (__CFStrIsUnicode(str)) { |
| UniChar *contents = (UniChar *)__CFStrContents(str); |
| CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location); |
| } else { |
| uint8_t *contents = (uint8_t *)__CFStrContents(str); |
| CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL); |
| } |
| |
| if (copy) CFRelease(copy); |
| } |
| |
| /* If client does not provide a minimum capacity |
| */ |
| #define DEFAULTMINCAPACITY 32 |
| |
| CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) { |
| CFMutableStringRef str; |
| Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false; |
| |
| if (alloc == NULL) alloc = __CFGetDefaultAllocator(); |
| |
| // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator... |
| str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL); |
| if (str) { |
| if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)"); |
| |
| __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits); |
| str->variants.notInlineMutable.buffer = NULL; |
| __CFStrSetExplicitLength(str, 0); |
| str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0; |
| if (maxLength != 0) __CFStrSetIsFixed(str); |
| __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength); |
| __CFStrSetCapacity(str, 0); |
| } |
| return str; |
| } |
| |
| CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) { |
| CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree; |
| CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode); |
| if (string) { |
| __CFStrSetIsExternalMutable(string); |
| if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, (CFAllocatorRef)CFRetain(externalCharactersAllocator)); |
| CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity); |
| } |
| return string; |
| } |
| |
| CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) { |
| return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree); |
| } |
| |
| CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) { |
| CFMutableStringRef newString; |
| |
| // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy"); |
| |
| __CFAssertIsString(string); |
| |
| newString = CFStringCreateMutable(alloc, maxLength); |
| __CFStringReplace(newString, CFRangeMake(0, 0), string); |
| |
| return newString; |
| } |
| |
| |
| __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) { |
| __CFAssertIsStringAndMutable(str); |
| __CFStrSetDesiredCapacity(str, len); |
| } |
| |
| |
| /* This one is for CF |
| */ |
| CFIndex CFStringGetLength(CFStringRef str) { |
| CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length"); |
| |
| __CFAssertIsString(str); |
| return __CFStrLength(str); |
| } |
| |
| /* This one is for NSCFString; it does not ObjC dispatch or assertion check |
| */ |
| CFIndex _CFStringGetLength2(CFStringRef str) { |
| return __CFStrLength(str); |
| } |
| |
| |
| /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere. |
| */ |
| CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) { |
| if (__CFStrIsEightBit(str)) { |
| contents += __CFStrSkipAnyLengthByte(str); |
| #if defined(DEBUG) |
| if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) { |
| // Can't do log here, as it might be too early |
| fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n"); |
| } |
| #endif |
| return __CFCharToUniCharTable[contents[idx]]; |
| } |
| |
| return ((UniChar *)contents)[idx]; |
| } |
| |
| /* This one is for the CF API |
| */ |
| UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) { |
| CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx); |
| |
| __CFAssertIsString(str); |
| __CFAssertIndexIsInStringBounds(str, idx); |
| return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str)); |
| } |
| |
| /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check |
| */ |
| int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) { |
| const uint8_t *contents = (const uint8_t *)__CFStrContents(str); |
| if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; |
| *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents); |
| return _CFStringErrNone; |
| } |
| |
| |
| /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere. |
| */ |
| CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) { |
| if (__CFStrIsEightBit(str)) { |
| __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length); |
| } else { |
| const UniChar *uContents = ((UniChar *)contents) + range.location; |
| memmove(buffer, uContents, range.length * sizeof(UniChar)); |
| } |
| } |
| |
| /* This one is for the CF API |
| */ |
| void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { |
| CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length)); |
| |
| __CFAssertIsString(str); |
| __CFAssertRangeIsInStringBounds(str, range.location, range.length); |
| __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str)); |
| } |
| |
| /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check |
| */ |
| int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { |
| const uint8_t *contents = (const uint8_t *)__CFStrContents(str); |
| if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; |
| __CFStringGetCharactersGuts(str, range, buffer, contents); |
| return _CFStringErrNone; |
| } |
| |
| |
| CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) { |
| |
| /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */ |
| __CFAssertIsNotNegative(maxBufLen); |
| |
| if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it... |
| __CFAssertIsString(str); |
| __CFAssertRangeIsInStringBounds(str, range.location, range.length); |
| |
| if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string |
| const unsigned char *contents = (const unsigned char *)__CFStrContents(str); |
| CFIndex cLength = range.length; |
| |
| if (buffer) { |
| if (cLength > maxBufLen) cLength = maxBufLen; |
| memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength); |
| } |
| if (usedBufLen) *usedBufLen = cLength; |
| |
| return cLength; |
| } |
| } |
| |
| return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen); |
| } |
| |
| |
| ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) { |
| |
| if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ |
| __CFAssertIsString(str); |
| if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII |
| const uint8_t *contents = (const uint8_t *)__CFStrContents(str); |
| if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte |
| return (ConstStringPtr)contents; |
| } |
| // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? |
| } |
| return NULL; |
| } |
| |
| |
| const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) { |
| |
| if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL; |
| // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? |
| |
| CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true); |
| |
| __CFAssertIsString(str); |
| |
| if (__CFStrHasNullByte(str)) { |
| // Note: this is called a lot, 27000 times to open a small xcode project with one file open. |
| // Of these uses about 1500 are for cStrings/utf8strings. |
| return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str); |
| } else { |
| return NULL; |
| } |
| } |
| |
| |
| const UniChar *CFStringGetCharactersPtr(CFStringRef str) { |
| |
| CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents"); |
| |
| __CFAssertIsString(str); |
| if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str); |
| return NULL; |
| } |
| |
| |
| Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) { |
| CFIndex length; |
| CFIndex usedLen; |
| |
| __CFAssertIsNotNegative(bufferSize); |
| if (bufferSize < 1) return false; |
| |
| if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ |
| length = CFStringGetLength(str); |
| if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring |
| } else { |
| const uint8_t *contents; |
| |
| __CFAssertIsString(str); |
| |
| contents = (const uint8_t *)__CFStrContents(str); |
| length = __CFStrLength2(str, contents); |
| |
| if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring |
| |
| if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string |
| if (length >= bufferSize) return false; |
| memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length); |
| *buffer = (unsigned char)length; |
| return true; |
| } |
| } |
| |
| if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) { |
| |
| #if defined(DEBUG) |
| if (bufferSize > 0) { |
| strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1); |
| buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1)); |
| } |
| #else |
| if (bufferSize > 0) buffer[0] = 0; |
| #endif |
| return false; |
| } |
| *buffer = (unsigned char)usedLen; |
| return true; |
| } |
| |
| Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) { |
| const uint8_t *contents; |
| CFIndex len; |
| |
| __CFAssertIsNotNegative(bufferSize); |
| if (bufferSize < 1) return false; |
| |
| CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding); |
| |
| __CFAssertIsString(str); |
| |
| contents = (const uint8_t *)__CFStrContents(str); |
| len = __CFStrLength2(str, contents); |
| |
| if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string |
| if (len >= bufferSize) return false; |
| memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len); |
| buffer[len] = 0; |
| return true; |
| } else { |
| CFIndex usedLen; |
| |
| if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) { |
| buffer[usedLen] = '\0'; |
| return true; |
| } else { |
| #if defined(DEBUG) |
| strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize); |
| #else |
| if (bufferSize > 0) buffer[0] = 0; |
| #endif |
| return false; |
| } |
| } |
| } |
| |
| static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) { |
| CFStringRef collatorID; |
| const char *langID = NULL; |
| static const void *lastLocale = NULL; |
| static const char *lastLangID = NULL; |
| static CFSpinLock_t lock = CFSpinLockInit; |
| |
| __CFSpinLock(&lock); |
| if ((NULL != lastLocale) && (lastLocale == locale)) { |
| __CFSpinUnlock(&lock); |
| return lastLangID; |
| } |
| __CFSpinUnlock(&lock); |
| |
| collatorID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID); |
| |
| // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ??? |
| if (__CFStrLength(collatorID) > 1) { |
| const void *contents = __CFStrContents(collatorID); |
| const char *string; |
| char buffer[2]; |
| |
| if (__CFStrIsEightBit(collatorID)) { |
| string = ((const char *)contents) + __CFStrSkipAnyLengthByte(collatorID); |
| } else { |
| const UTF16Char *characters = (const UTF16Char *)contents; |
| |
| buffer[0] = (char)*(characters++); |
| buffer[1] = (char)*characters; |
| string = buffer; |
| } |
| |
| if (!strncmp(string, "az", 2)) { // Azerbaijani |
| langID = "az"; |
| } else if (!strncmp(string, "lt", 2)) { // Lithuanian |
| langID = "lt"; |
| } else if (!strncmp(string, "tr", 2)) { // Turkish |
| langID = "tr"; |
| } |
| } |
| |
| __CFSpinLock(&lock); |
| lastLocale = locale; |
| lastLangID = langID; |
| __CFSpinUnlock(&lock); |
| |
| return langID; |
| } |
| |
| static int8_t __CFCheckLocaleCFType = -1; |
| |
| CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) { |
| if (locale) { |
| if (__CFCheckLocaleCFType < 0) __CFCheckLocaleCFType = !_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther); |
| if (!__CFCheckLocaleCFType || (CFGetTypeID(locale) == CFLocaleGetTypeID())) return true; |
| } |
| return false; |
| } |
| |
| #define MAX_CASE_MAPPING_BUF (8) |
| #define ZERO_WIDTH_JOINER (0x200D) |
| #define COMBINING_GRAPHEME_JOINER (0x034F) |
| // Hangul ranges |
| #define HANGUL_CHOSEONG_START (0x1100) |
| #define HANGUL_CHOSEONG_END (0x115F) |
| #define HANGUL_JUNGSEONG_START (0x1160) |
| #define HANGUL_JUNGSEONG_END (0x11A2) |
| #define HANGUL_JONGSEONG_START (0x11A8) |
| #define HANGUL_JONGSEONG_END (0x11F9) |
| |
| #define HANGUL_SYLLABLE_START (0xAC00) |
| #define HANGUL_SYLLABLE_END (0xD7AF) |
| |
| |
| // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8 |
| static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) { |
| CFIndex filledLength = 0, currentIndex = index; |
| |
| if (0 != character) { |
| UTF16Char lowSurrogate; |
| CFIndex planeNo = (character >> 16); |
| bool isTurkikCapitalI = false; |
| static const uint8_t *decompBMP = NULL; |
| static const uint8_t *graphemeBMP = NULL; |
| |
| if (NULL == decompBMP) { |
| decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0); |
| graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0); |
| } |
| |
| ++currentIndex; |
| |
| if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII |
| if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) { |
| character += ('a' - 'A'); |
| *outCharacters = character; |
| filledLength = 1; |
| } |
| } else { |
| // do width-insensitive mapping |
| if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) { |
| (void)CFUniCharCompatibilityDecompose(&character, 1, 1); |
| *outCharacters = character; |
| filledLength = 1; |
| } |
| |
| // map surrogates |
| if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) { |
| character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); |
| ++currentIndex; |
| planeNo = (character >> 16); |
| } |
| |
| // decompose |
| if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) { |
| if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) { |
| UTF32Char original = character; |
| |
| filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength); |
| character = *outCharacters; |
| |
| if ((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) { |
| filledLength = 1; // reset if Roman, Greek, Cyrillic |
| } else if (0 == (flags & kCFCompareNonliteral)) { |
| character = original; |
| filledLength = 0; |
| } |
| } |
| } |
| |
| // fold case |
| if (flags & kCFCompareCaseInsensitive) { |
| const uint8_t *nonBaseBitmap; |
| bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? true : false); |
| static const uint8_t *lowerBMP = NULL; |
| static const uint8_t *caseFoldBMP = NULL; |
| |
| if (NULL == lowerBMP) { |
| lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0); |
| caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0); |
| } |
| |
| if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing |
| if (filledLength > 1) { |
| if (0x0307 == outCharacters[1]) { |
| if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1)); |
| character = *outCharacters = 'i'; |
| isTurkikCapitalI = true; |
| } |
| } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) { |
| character = *outCharacters = 'i'; |
| filledLength = 1; |
| ++currentIndex; |
| isTurkikCapitalI = true; |
| } |
| } |
| if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) { |
| UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF]; |
| const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit; |
| UTF32Char *outCharactersP = outCharacters; |
| uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode); |
| |
| bufferLimit = bufferP + bufferLength; |
| |
| if (filledLength > 0) --filledLength; // decrement filledLength (will add back later) |
| |
| // make space for casefold characters |
| if ((filledLength > 0) && (bufferLength > 1)) { |
| CFIndex totalScalerLength = 0; |
| |
| while (bufferP < bufferLimit) { |
| if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP; |
| ++totalScalerLength; |
| } |
| memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char)); |
| bufferP = caseFoldBuffer; |
| } |
| |
| // fill |
| while (bufferP < bufferLimit) { |
| character = *(bufferP++); |
| if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) { |
| character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++)); |
| nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16)); |
| } else { |
| nonBaseBitmap = graphemeBMP; |
| } |
| |
| if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { |
| *(outCharactersP++) = character; |
| ++filledLength; |
| } |
| } |
| } |
| } |
| } |
| |
| // collect following combining marks |
| if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) { |
| const uint8_t *nonBaseBitmap; |
| const uint8_t *decompBitmap; |
| bool doFill = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? false : true); |
| |
| if (0 == filledLength) { |
| *outCharacters = character; // filledLength will be updated below on demand |
| |
| if (doFill) { // check if really needs to fill |
| UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); |
| |
| if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { |
| nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate); |
| nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16)); |
| decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16)); |
| } else { |
| nonBaseBitmap = graphemeBMP; |
| decompBitmap = decompBMP; |
| } |
| |
| if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) { |
| filledLength = 1; // For the base character |
| |
| if ((0 == (flags & kCFCompareDiacriticsInsensitiveCompatibilityMask)) || (nonBaseCharacter > 0x050F)) { |
| if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) { |
| filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength); |
| } else { |
| outCharacters[filledLength++] = nonBaseCharacter; |
| } |
| } |
| currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2); |
| } else { |
| doFill = false; |
| } |
| } |
| } |
| |
| while (filledLength < maxBufferLength) { // do the rest |
| character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); |
| |
| if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { |
| character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); |
| nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16)); |
| decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16)); |
| } else { |
| nonBaseBitmap = graphemeBMP; |
| decompBitmap = decompBMP; |
| } |
| if (isTurkikCapitalI) { |
| isTurkikCapitalI = false; |
| } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { |
| if (doFill) { |
| if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) { |
| CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength); |
| |
| if (0 == currentLength) break; // didn't fit |
| |
| filledLength += currentLength; |
| } else { |
| outCharacters[filledLength++] = character; |
| } |
| } else if (0 == filledLength) { |
| filledLength = 1; // For the base character |
| } |
| currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2); |
| } else { |
| break; |
| } |
| } |
| |
| if (filledLength > 1) { |
| UTF32Char *sortCharactersLimit = outCharacters + filledLength; |
| UTF32Char *sortCharacters = sortCharactersLimit - 1; |
| |
| while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters; |
| |
| if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort |
| } |
| } |
| } |
| |
| if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index); |
| |
| return filledLength; |
| } |
| |
| #define kCFStringStackBufferLength (64) |
| |
| CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions, CFLocaleRef locale) { |
| /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */ |
| UTF32Char strBuf1[kCFStringStackBufferLength]; |
| UTF32Char strBuf2[kCFStringStackBufferLength]; |
| CFStringInlineBuffer inlineBuf1, inlineBuf2; |
| UTF32Char str1Char, str2Char; |
| CFIndex str1UsedLen, str2UsedLen; |
| CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0; |
| CFIndex str2Len = CFStringGetLength(string2); |
| bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false); |
| bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) ? true : false); |
| bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareWidthInsensitive)) ? true : false); |
| bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false); |
| const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0); |
| const uint8_t *langCode; |
| CFComparisonResult compareResult = kCFCompareEqualTo; |
| UTF16Char otherChar; |
| Boolean freeLocale = false; |
| |
| #define _CFCompareStringsWithLocale(A, B, C, D, E, F) (0) |
| locale = NULL; |
| |
| if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) { |
| locale = CFLocaleCopyCurrent(); |
| freeLocale = true; |
| } |
| |
| langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale)); |
| |
| if ((NULL == locale) && !numerically) { // could do binary comp (be careful when adding new flags) |
| CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding(); |
| const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding); |
| const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding); |
| CFIndex factor = sizeof(uint8_t); |
| |
| if ((NULL != str1Bytes) && (NULL != str2Bytes)) { |
| compareOptions &= ~kCFCompareNonliteral; // remove non-literal |
| |
| if (kCFStringEncodingASCII == eightBitEncoding) { |
| if (caseInsensitive) { |
| #if DEPLOYMENT_TARGET_MACOSX |
| int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL); |
| #elif DEPLOYMENT_TARGET_WINDOWS && !defined(__GNUC__) |
| int cmpResult = _strnicmp_l((const char*)str1Bytes + rangeToCompare.location, (const char*)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL); |
| #else |
| int cmpResult = strncasecmp((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len)); |
| #endif |
| |
| if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; |
| |
| return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); |
| } |
| } else if (caseInsensitive || diacriticsInsensitive) { |
| CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len); |
| |
| str1Bytes += rangeToCompare.location; |
| |
| while (str1Index < limitLength) { |
| str1Char = str1Bytes[str1Index]; |
| str2Char = str2Bytes[str1Index]; |
| |
| if (str1Char != str2Char) { |
| if ((str1Char < 0x80) && (str2Char < 0x80)) { |
| if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A'); |
| if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A'); |
| |
| if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan); |
| } else { |
| str1Bytes = NULL; |
| break; |
| } |
| } |
| ++str1Index; |
| } |
| |
| str2Index = str1Index; |
| |
| if (str1Index == limitLength) { |
| int cmpResult = rangeToCompare.length - str2Len; |
| |
| return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); |
| } |
| } |
| } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) { |
| str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string); |
| str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2); |
| factor = sizeof(UTF16Char); |
| #if __LITTLE_ENDIAN__ |
| if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp |
| const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location; |
| const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len); |
| const UTF16Char *str2 = (const UTF16Char *)str2Bytes; |
| CFIndex cmpResult = 0; |
| |
| while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++); |
| |
| if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; |
| |
| return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); |
| } |
| #endif /* __LITTLE_ENDIAN__ */ |
| } |
| if ((NULL != str1Bytes) && (NULL != str2Bytes)) { |
| int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor); |
| |
| if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; |
| |
| return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); |
| } |
| } |
| |
| CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare); |
| CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len)); |
| |
| while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) { |
|