// UTFConvert.cpp | |
#include "StdAfx.h" | |
#include "MyTypes.h" | |
#include "UTFConvert.h" | |
static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
bool CheckUTF8(const char *src) throw() | |
{ | |
for (;;) | |
{ | |
Byte c; | |
unsigned numAdds; | |
c = *src++; | |
if (c == 0) | |
return true; | |
if (c < 0x80) | |
continue; | |
if (c < 0xC0) | |
return false; | |
for (numAdds = 1; numAdds < 5; numAdds++) | |
if (c < kUtf8Limits[numAdds]) | |
break; | |
UInt32 value = (c - kUtf8Limits[numAdds - 1]); | |
do | |
{ | |
Byte c2 = *src++; | |
if (c2 < 0x80 || c2 >= 0xC0) | |
return false; | |
value <<= 6; | |
value |= (c2 - 0x80); | |
} | |
while (--numAdds); | |
if (value >= 0x110000) | |
return false; | |
} | |
} | |
static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw() | |
{ | |
size_t destPos = 0, srcPos = 0; | |
for (;;) | |
{ | |
Byte c; | |
unsigned numAdds; | |
if (srcPos == srcLen) | |
{ | |
*destLen = destPos; | |
return True; | |
} | |
c = (Byte)src[srcPos++]; | |
if (c < 0x80) | |
{ | |
if (dest) | |
dest[destPos] = (wchar_t)c; | |
destPos++; | |
continue; | |
} | |
if (c < 0xC0) | |
break; | |
for (numAdds = 1; numAdds < 5; numAdds++) | |
if (c < kUtf8Limits[numAdds]) | |
break; | |
UInt32 value = (c - kUtf8Limits[numAdds - 1]); | |
do | |
{ | |
Byte c2; | |
if (srcPos == srcLen) | |
break; | |
c2 = (Byte)src[srcPos++]; | |
if (c2 < 0x80 || c2 >= 0xC0) | |
break; | |
value <<= 6; | |
value |= (c2 - 0x80); | |
} | |
while (--numAdds); | |
if (value < 0x10000) | |
{ | |
if (dest) | |
dest[destPos] = (wchar_t)value; | |
destPos++; | |
} | |
else | |
{ | |
value -= 0x10000; | |
if (value >= 0x100000) | |
break; | |
if (dest) | |
{ | |
dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); | |
dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); | |
} | |
destPos += 2; | |
} | |
} | |
*destLen = destPos; | |
return False; | |
} | |
static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) | |
{ | |
size_t destPos = 0, srcPos = 0; | |
for (;;) | |
{ | |
unsigned numAdds; | |
UInt32 value; | |
if (srcPos == srcLen) | |
{ | |
*destLen = destPos; | |
return True; | |
} | |
value = src[srcPos++]; | |
if (value < 0x80) | |
{ | |
if (dest) | |
dest[destPos] = (char)value; | |
destPos++; | |
continue; | |
} | |
if (value >= 0xD800 && value < 0xE000) | |
{ | |
UInt32 c2; | |
if (value >= 0xDC00 || srcPos == srcLen) | |
break; | |
c2 = src[srcPos++]; | |
if (c2 < 0xDC00 || c2 >= 0xE000) | |
break; | |
value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; | |
} | |
for (numAdds = 1; numAdds < 5; numAdds++) | |
if (value < (((UInt32)1) << (numAdds * 5 + 6))) | |
break; | |
if (dest) | |
dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); | |
destPos++; | |
do | |
{ | |
numAdds--; | |
if (dest) | |
dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); | |
destPos++; | |
} | |
while (numAdds != 0); | |
} | |
*destLen = destPos; | |
return False; | |
} | |
bool ConvertUTF8ToUnicode(const AString &src, UString &dest) | |
{ | |
dest.Empty(); | |
size_t destLen = 0; | |
Utf8_To_Utf16(NULL, &destLen, src, src.Len()); | |
Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len()); | |
dest.ReleaseBuffer((unsigned)destLen); | |
return res ? true : false; | |
} | |
bool ConvertUnicodeToUTF8(const UString &src, AString &dest) | |
{ | |
dest.Empty(); | |
size_t destLen = 0; | |
Utf16_To_Utf8(NULL, &destLen, src, src.Len()); | |
Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len()); | |
dest.ReleaseBuffer((unsigned)destLen); | |
return res ? true : false; | |
} |