blob: 9163c79a982a50ff48c55e38286dc28f4d03babf [file] [log] [blame]
/*
* Copyright (c) 2008-2009 Brent Fulgham <bfulgham@gmail.org>. All rights reserved.
*
* This source code is a modified version of the CoreFoundation sources released by Apple Inc. under
* the terms of the APSL version 2.0 (see below).
*
* For information about changes from the original Apple source release can be found by reviewing the
* source control system for the project at https://sourceforge.net/svn/?group_id=246198.
*
* The original license information is as follows:
*
* Copyright (c) 2008 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/* CFUnicodePrecomposition.c
Copyright 1999-2002, Apple, Inc. All rights reserved.
Responsibility: Aki Inoue
*/
#include <string.h>
#include <CoreFoundation/CFBase.h>
#include <CoreFoundation/CFCharacterSet.h>
#include "CFUniChar.h"
#include "CFUnicodePrecomposition.h"
#include "CFInternal.h"
#include "CFUniCharPriv.h"
// Canonical Precomposition
static UTF32Char *__CFUniCharPrecompSourceTable = NULL;
static uint32_t __CFUniCharPrecompositionTableLength = 0;
static uint16_t *__CFUniCharBMPPrecompDestinationTable = NULL;
static uint32_t *__CFUniCharNonBMPPrecompDestinationTable = NULL;
static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P = NULL; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
static const uint8_t *__CFUniCharCombiningClassForBMP = NULL;
static CFSpinLock_t __CFUniCharPrecompositionTableLock = CFSpinLockInit;
static void __CFUniCharLoadPrecompositionTable(void) {
__CFSpinLock(&__CFUniCharPrecompositionTableLock);
if (NULL == __CFUniCharPrecompSourceTable) {
const uint32_t *bytes = (const uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping);
uint32_t bmpMappingLength;
if (NULL == bytes) {
__CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
return;
}
__CFUniCharPrecompositionTableLength = *(bytes++);
bmpMappingLength = *(bytes++);
__CFUniCharPrecompSourceTable = (UTF32Char *)bytes;
__CFUniCharBMPPrecompDestinationTable = (uint16_t *)((intptr_t)bytes + (__CFUniCharPrecompositionTableLength * sizeof(UTF32Char) * 2));
__CFUniCharNonBMPPrecompDestinationTable = (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable) + bmpMappingLength);
__CFUniCharNonBaseBitmapForBMP_P = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
__CFUniCharCombiningClassForBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
}
__CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
}
// Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
#define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) {
return CFUniCharIsMemberOfBitmap(character, (character < 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, ((character >> 16) & 0xFF))));
}
typedef struct {
UTF16Char _key;
UTF16Char _value;
} __CFUniCharPrecomposeBMPMappings;
static UTF16Char __CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings *theTable, uint32_t numElem, UTF16Char character) {
const __CFUniCharPrecomposeBMPMappings *p, *q, *divider;
if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
return 0;
}
p = theTable;
q = p + (numElem-1);
while (p <= q) {
divider = p + ((q - p) >> 1); /* divide by 2 */
if (character < divider->_key) { q = divider - 1; }
else if (character > divider->_key) { p = divider + 1; }
else { return divider->_value; }
}
return 0;
}
typedef struct {
UTF32Char _key;
uint32_t _value;
} __CFUniCharPrecomposeMappings;
static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings *theTable, uint32_t numElem, UTF32Char character) {
const __CFUniCharPrecomposeMappings *p, *q, *divider;
if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
return 0;
}
p = theTable;
q = p + (numElem-1);
while (p <= q) {
divider = p + ((q - p) >> 1); /* divide by 2 */
if (character < divider->_key) { q = divider - 1; }
else if (character > divider->_key) { p = divider + 1; }
else { return divider->_value; }
}
return 0;
}
__private_extern__
UTF32Char CFUniCharPrecomposeCharacter(UTF32Char base, UTF32Char combining) {
uint32_t value;
if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
if (!(value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)__CFUniCharPrecompSourceTable, __CFUniCharPrecompositionTableLength, combining))) return 0xFFFD;
// We don't have precomposition in non-BMP
if (value & kCFUniCharNonBmpFlag) {
value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16) & 0x7FFF, base);
} else {
value = __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings *)((uint32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base);
}
return (value ? value : 0xFFFD);
}
#define HANGUL_SBASE 0xAC00
#define HANGUL_LBASE 0x1100
#define HANGUL_VBASE 0x1161
#define HANGUL_TBASE 0x11A7
#define HANGUL_SCOUNT 11172
#define HANGUL_LCOUNT 19
#define HANGUL_VCOUNT 21
#define HANGUL_TCOUNT 28
#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
CF_INLINE void __CFUniCharMoveBufferFromEnd0(UTF16Char *convertedChars, CFIndex length, CFIndex delta) {
const UTF16Char *limit = convertedChars;
UTF16Char *dstP;
convertedChars += length;
dstP = convertedChars + delta;
while (convertedChars > limit) *(--dstP) = *(--convertedChars);
}
bool CFUniCharPrecompose(const UTF16Char *characters, CFIndex length, CFIndex *consumedLength, UTF16Char *precomposed, CFIndex maxLength, CFIndex *filledLength) {
UTF32Char currentChar = 0, lastChar = 0, precomposedChar = 0xFFFD;
CFIndex originalLength = length, usedLength = 0;
UTF16Char *currentBase = precomposed;
uint8_t currentClass, lastClass = 0;
bool currentBaseIsBMP = true;
bool isPrecomposed;
if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
while (length > 0) {
currentChar = *(characters++);
--length;
if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*characters)) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(characters++));
--length;
}
if (lastChar && __CFUniCharIsNonBaseCharacter(currentChar)) {
isPrecomposed = (precomposedChar == 0xFFFD ? false : true);
if (isPrecomposed) lastChar = precomposedChar;
currentClass = (currentChar > 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(currentChar, __CFUniCharCombiningClassForBMP));
if ((lastClass == 0) || (currentClass > lastClass)) {
if ((precomposedChar = CFUniCharPrecomposeCharacter(lastChar, currentChar)) == 0xFFFD) {
if (isPrecomposed) precomposedChar = lastChar;
lastClass = currentClass;
} else {
continue;
}
}
if (currentChar > 0xFFFF) { // Non-BMP
usedLength += 2;
if (usedLength > maxLength) break;
currentChar -= 0x10000;
*(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
*(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
} else {
++usedLength;
if (usedLength > maxLength) break;
*(precomposed++) = (UTF16Char)currentChar;
}
} else {
if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
int8_t lIndex = currentChar - HANGUL_LBASE;
if ((length > 0) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
int16_t vIndex = *characters - HANGUL_VBASE;
if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
int16_t tIndex = 0;
++characters; --length;
if (length > 0) {
tIndex = *characters - HANGUL_TBASE;
if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
tIndex = 0;
} else {
++characters; --length;
}
}
currentChar = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
}
}
}
if (precomposedChar != 0xFFFD) {
if (currentBaseIsBMP) { // Non-BMP
if (lastChar > 0xFFFF) { // Last char was Non-BMP
--usedLength;
memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
}
*(currentBase) = (UTF16Char)precomposedChar;
} else {
if (lastChar < 0x10000) { // Last char was BMP
++usedLength;
if (usedLength > maxLength) break;
__CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1);
}
precomposedChar -= 0x10000;
*currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
*(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
}
precomposedChar = 0xFFFD;
}
currentBase = precomposed;
lastChar = currentChar;
lastClass = 0;
if (currentChar > 0xFFFF) { // Non-BMP
usedLength += 2;
if (usedLength > maxLength) break;
currentChar -= 0x10000;
*(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
*(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
currentBaseIsBMP = false;
} else {
++usedLength;
if (usedLength > maxLength) break;
*(precomposed++) = (UTF16Char)currentChar;
currentBaseIsBMP = true;
}
}
}
if (precomposedChar != 0xFFFD) {
if (currentChar > 0xFFFF) { // Non-BMP
if (lastChar < 0x10000) { // Last char was BMP
++usedLength;
if (usedLength > maxLength) {
if (consumedLength) *consumedLength = originalLength - length;
if (filledLength) *filledLength = usedLength;
return false;
}
__CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1);
}
precomposedChar -= 0x10000;
*currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
*(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
} else {
if (lastChar > 0xFFFF) { // Last char was Non-BMP
--usedLength;
memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
}
*(currentBase) = (UTF16Char)precomposedChar;
}
}
if (consumedLength) *consumedLength = originalLength - length;
if (filledLength) *filledLength = usedLength;
return true;
}
#undef __CFUniCharIsNonBaseCharacter
#undef HANGUL_SBASE
#undef HANGUL_LBASE
#undef HANGUL_VBASE
#undef HANGUL_TBASE
#undef HANGUL_SCOUNT
#undef HANGUL_LCOUNT
#undef HANGUL_VCOUNT
#undef HANGUL_TCOUNT
#undef HANGUL_NCOUNT