| /* |
| ****************************************************************************** |
| * |
| * Copyright (C) 2000-2009, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ****************************************************************************** |
| * file name: ucnvscsu.c |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2000nov18 |
| * created by: Markus W. Scherer |
| * |
| * This is an implementation of the Standard Compression Scheme for Unicode |
| * as defined in http://www.unicode.org/unicode/reports/tr6/ . |
| * Reserved commands and window settings are treated as illegal sequences and |
| * will result in callback calls. |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "unicode/ucnv_cb.h" |
| #include "ucnv_bld.h" |
| #include "ucnv_cnv.h" |
| #include "cmemory.h" |
| |
| /* SCSU definitions --------------------------------------------------------- */ |
| |
| /* SCSU command byte values */ |
| enum { |
| SQ0=0x01, /* Quote from window pair 0 */ |
| SQ7=0x08, /* Quote from window pair 7 */ |
| SDX=0x0B, /* Define a window as extended */ |
| Srs=0x0C, /* reserved */ |
| SQU=0x0E, /* Quote a single Unicode character */ |
| SCU=0x0F, /* Change to Unicode mode */ |
| SC0=0x10, /* Select window 0 */ |
| SC7=0x17, /* Select window 7 */ |
| SD0=0x18, /* Define and select window 0 */ |
| SD7=0x1F, /* Define and select window 7 */ |
| |
| UC0=0xE0, /* Select window 0 */ |
| UC7=0xE7, /* Select window 7 */ |
| UD0=0xE8, /* Define and select window 0 */ |
| UD7=0xEF, /* Define and select window 7 */ |
| UQU=0xF0, /* Quote a single Unicode character */ |
| UDX=0xF1, /* Define a Window as extended */ |
| Urs=0xF2 /* reserved */ |
| }; |
| |
| enum { |
| /* |
| * Unicode code points from 3400 to E000 are not adressible by |
| * dynamic window, since in these areas no short run alphabets are |
| * found. Therefore add gapOffset to all values from gapThreshold. |
| */ |
| gapThreshold=0x68, |
| gapOffset=0xAC00, |
| |
| /* values between reservedStart and fixedThreshold are reserved */ |
| reservedStart=0xA8, |
| |
| /* use table of predefined fixed offsets for values from fixedThreshold */ |
| fixedThreshold=0xF9 |
| }; |
| |
| /* constant offsets for the 8 static windows */ |
| static const uint32_t staticOffsets[8]={ |
| 0x0000, /* ASCII for quoted tags */ |
| 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ |
| 0x0100, /* Latin Extended-A */ |
| 0x0300, /* Combining Diacritical Marks */ |
| 0x2000, /* General Punctuation */ |
| 0x2080, /* Currency Symbols */ |
| 0x2100, /* Letterlike Symbols and Number Forms */ |
| 0x3000 /* CJK Symbols and punctuation */ |
| }; |
| |
| /* initial offsets for the 8 dynamic (sliding) windows */ |
| static const uint32_t initialDynamicOffsets[8]={ |
| 0x0080, /* Latin-1 */ |
| 0x00C0, /* Latin Extended A */ |
| 0x0400, /* Cyrillic */ |
| 0x0600, /* Arabic */ |
| 0x0900, /* Devanagari */ |
| 0x3040, /* Hiragana */ |
| 0x30A0, /* Katakana */ |
| 0xFF00 /* Fullwidth ASCII */ |
| }; |
| |
| /* Table of fixed predefined Offsets */ |
| static const uint32_t fixedOffsets[]={ |
| /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ |
| /* 0xFA */ 0x0250, /* IPA extensions */ |
| /* 0xFB */ 0x0370, /* Greek */ |
| /* 0xFC */ 0x0530, /* Armenian */ |
| /* 0xFD */ 0x3040, /* Hiragana */ |
| /* 0xFE */ 0x30A0, /* Katakana */ |
| /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ |
| }; |
| |
| /* state values */ |
| enum { |
| readCommand, |
| quotePairOne, |
| quotePairTwo, |
| quoteOne, |
| definePairOne, |
| definePairTwo, |
| defineOne |
| }; |
| |
| typedef struct SCSUData { |
| /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ |
| uint32_t toUDynamicOffsets[8]; |
| uint32_t fromUDynamicOffsets[8]; |
| |
| /* state machine state - toUnicode */ |
| UBool toUIsSingleByteMode; |
| uint8_t toUState; |
| int8_t toUQuoteWindow, toUDynamicWindow; |
| uint8_t toUByteOne; |
| uint8_t toUPadding[3]; |
| |
| /* state machine state - fromUnicode */ |
| UBool fromUIsSingleByteMode; |
| int8_t fromUDynamicWindow; |
| |
| /* |
| * windowUse[] keeps track of the use of the dynamic windows: |
| * At nextWindowUseIndex there is the least recently used window, |
| * and the following windows (in a wrapping manner) are more and more |
| * recently used. |
| * At nextWindowUseIndex-1 there is the most recently used window. |
| */ |
| uint8_t locale; |
| int8_t nextWindowUseIndex; |
| int8_t windowUse[8]; |
| } SCSUData; |
| |
| static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; |
| static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; |
| |
| enum { |
| lGeneric, l_ja |
| }; |
| |
| /* SCSU setup functions ----------------------------------------------------- */ |
| |
| static void |
| _SCSUReset(UConverter *cnv, UConverterResetChoice choice) { |
| SCSUData *scsu=(SCSUData *)cnv->extraInfo; |
| |
| if(choice<=UCNV_RESET_TO_UNICODE) { |
| /* reset toUnicode */ |
| uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); |
| |
| scsu->toUIsSingleByteMode=TRUE; |
| scsu->toUState=readCommand; |
| scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; |
| scsu->toUByteOne=0; |
| |
| cnv->toULength=0; |
| } |
| if(choice!=UCNV_RESET_TO_UNICODE) { |
| /* reset fromUnicode */ |
| uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); |
| |
| scsu->fromUIsSingleByteMode=TRUE; |
| scsu->fromUDynamicWindow=0; |
| |
| scsu->nextWindowUseIndex=0; |
| switch(scsu->locale) { |
| case l_ja: |
| uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); |
| break; |
| default: |
| uprv_memcpy(scsu->windowUse, initialWindowUse, 8); |
| break; |
| } |
| |
| cnv->fromUChar32=0; |
| } |
| } |
| |
| static void |
| _SCSUOpen(UConverter *cnv, |
| UConverterLoadArgs *pArgs, |
| UErrorCode *pErrorCode) { |
| const char *locale=pArgs->locale; |
| if(pArgs->onlyTestIsLoadable) { |
| return; |
| } |
| cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); |
| if(cnv->extraInfo!=NULL) { |
| if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { |
| ((SCSUData *)cnv->extraInfo)->locale=l_ja; |
| } else { |
| ((SCSUData *)cnv->extraInfo)->locale=lGeneric; |
| } |
| _SCSUReset(cnv, UCNV_RESET_BOTH); |
| } else { |
| *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| } |
| |
| /* Set the substitution character U+fffd as a Unicode string. */ |
| cnv->subUChars[0]=0xfffd; |
| cnv->subCharLen=-1; |
| } |
| |
| static void |
| _SCSUClose(UConverter *cnv) { |
| if(cnv->extraInfo!=NULL) { |
| if(!cnv->isExtraLocal) { |
| uprv_free(cnv->extraInfo); |
| } |
| cnv->extraInfo=NULL; |
| } |
| } |
| |
| /* SCSU-to-Unicode conversion functions ------------------------------------- */ |
| |
| static void |
| _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, |
| UErrorCode *pErrorCode) { |
| UConverter *cnv; |
| SCSUData *scsu; |
| const uint8_t *source, *sourceLimit; |
| UChar *target; |
| const UChar *targetLimit; |
| int32_t *offsets; |
| UBool isSingleByteMode; |
| uint8_t state, byteOne; |
| int8_t quoteWindow, dynamicWindow; |
| |
| int32_t sourceIndex, nextSourceIndex; |
| |
| uint8_t b; |
| |
| /* set up the local pointers */ |
| cnv=pArgs->converter; |
| scsu=(SCSUData *)cnv->extraInfo; |
| |
| source=(const uint8_t *)pArgs->source; |
| sourceLimit=(const uint8_t *)pArgs->sourceLimit; |
| target=pArgs->target; |
| targetLimit=pArgs->targetLimit; |
| offsets=pArgs->offsets; |
| |
| /* get the state machine state */ |
| isSingleByteMode=scsu->toUIsSingleByteMode; |
| state=scsu->toUState; |
| quoteWindow=scsu->toUQuoteWindow; |
| dynamicWindow=scsu->toUDynamicWindow; |
| byteOne=scsu->toUByteOne; |
| |
| /* sourceIndex=-1 if the current character began in the previous buffer */ |
| sourceIndex=state==readCommand ? 0 : -1; |
| nextSourceIndex=0; |
| |
| /* |
| * conversion "loop" |
| * |
| * For performance, this is not a normal C loop. |
| * Instead, there are two code blocks for the two SCSU modes. |
| * The function branches to either one, and a change of the mode is done with a goto to |
| * the other branch. |
| * |
| * Each branch has two conventional loops: |
| * - a fast-path loop for the most common codes in the mode |
| * - a loop for all other codes in the mode |
| * When the fast-path runs into a code that it cannot handle, its loop ends and it |
| * runs into the following loop to handle the other codes. |
| * The end of the input or output buffer is also handled by the slower loop. |
| * The slow loop jumps (goto) to the fast-path loop again as soon as possible. |
| * |
| * The callback handling is done by returning with an error code. |
| * The conversion framework actually calls the callback function. |
| */ |
| if(isSingleByteMode) { |
| /* fast path for single-byte mode */ |
| if(state==readCommand) { |
| fastSingle: |
| while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { |
| ++source; |
| ++nextSourceIndex; |
| if(b<=0x7f) { |
| /* write US-ASCII graphic character or DEL */ |
| *target++=(UChar)b; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* write from dynamic window */ |
| uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); |
| if(c<=0xffff) { |
| *target++=(UChar)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* output surrogate pair */ |
| *target++=(UChar)(0xd7c0+(c>>10)); |
| if(target<targetLimit) { |
| *target++=(UChar)(0xdc00|(c&0x3ff)); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* target overflow */ |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
| cnv->UCharErrorBufferLength=1; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| goto endloop; |
| } |
| } |
| } |
| sourceIndex=nextSourceIndex; |
| } |
| } |
| |
| /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ |
| singleByteMode: |
| while(source<sourceLimit) { |
| if(target>=targetLimit) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| b=*source++; |
| ++nextSourceIndex; |
| switch(state) { |
| case readCommand: |
| /* redundant conditions are commented out */ |
| /* here: b<0x20 because otherwise we would be in fastSingle */ |
| if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
| /* CR/LF/TAB/NUL */ |
| *target++=(UChar)b; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| sourceIndex=nextSourceIndex; |
| goto fastSingle; |
| } else if(SC0<=b) { |
| if(b<=SC7) { |
| dynamicWindow=(int8_t)(b-SC0); |
| sourceIndex=nextSourceIndex; |
| goto fastSingle; |
| } else /* if(SD0<=b && b<=SD7) */ { |
| dynamicWindow=(int8_t)(b-SD0); |
| state=defineOne; |
| } |
| } else if(/* SQ0<=b && */ b<=SQ7) { |
| quoteWindow=(int8_t)(b-SQ0); |
| state=quoteOne; |
| } else if(b==SDX) { |
| state=definePairOne; |
| } else if(b==SQU) { |
| state=quotePairOne; |
| } else if(b==SCU) { |
| sourceIndex=nextSourceIndex; |
| isSingleByteMode=FALSE; |
| goto fastUnicode; |
| } else /* Srs */ { |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| goto endloop; |
| } |
| |
| /* store the first byte of a multibyte sequence in toUBytes[] */ |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| break; |
| case quotePairOne: |
| byteOne=b; |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=quotePairTwo; |
| break; |
| case quotePairTwo: |
| *target++=(UChar)((byteOne<<8)|b); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| sourceIndex=nextSourceIndex; |
| state=readCommand; |
| goto fastSingle; |
| case quoteOne: |
| if(b<0x80) { |
| /* all static offsets are in the BMP */ |
| *target++=(UChar)(staticOffsets[quoteWindow]+b); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* write from dynamic window */ |
| uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); |
| if(c<=0xffff) { |
| *target++=(UChar)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* output surrogate pair */ |
| *target++=(UChar)(0xd7c0+(c>>10)); |
| if(target<targetLimit) { |
| *target++=(UChar)(0xdc00|(c&0x3ff)); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| *offsets++=sourceIndex; |
| } |
| } else { |
| /* target overflow */ |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
| cnv->UCharErrorBufferLength=1; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| goto endloop; |
| } |
| } |
| } |
| sourceIndex=nextSourceIndex; |
| state=readCommand; |
| goto fastSingle; |
| case definePairOne: |
| dynamicWindow=(int8_t)((b>>5)&7); |
| byteOne=(uint8_t)(b&0x1f); |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=definePairTwo; |
| break; |
| case definePairTwo: |
| scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); |
| sourceIndex=nextSourceIndex; |
| state=readCommand; |
| goto fastSingle; |
| case defineOne: |
| if(b==0) { |
| /* callback(illegal): Reserved window offset value 0 */ |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| goto endloop; |
| } else if(b<gapThreshold) { |
| scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; |
| } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { |
| scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; |
| } else if(b>=fixedThreshold) { |
| scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; |
| } else { |
| /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| goto endloop; |
| } |
| sourceIndex=nextSourceIndex; |
| state=readCommand; |
| goto fastSingle; |
| } |
| } |
| } else { |
| /* fast path for Unicode mode */ |
| if(state==readCommand) { |
| fastUnicode: |
| while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { |
| *target++=(UChar)((b<<8)|source[1]); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| sourceIndex=nextSourceIndex; |
| nextSourceIndex+=2; |
| source+=2; |
| } |
| } |
| |
| /* normal state machine for Unicode mode */ |
| /* unicodeByteMode: */ |
| while(source<sourceLimit) { |
| if(target>=targetLimit) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| b=*source++; |
| ++nextSourceIndex; |
| switch(state) { |
| case readCommand: |
| if((uint8_t)(b-UC0)>(Urs-UC0)) { |
| byteOne=b; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=quotePairTwo; |
| } else if(/* UC0<=b && */ b<=UC7) { |
| dynamicWindow=(int8_t)(b-UC0); |
| sourceIndex=nextSourceIndex; |
| isSingleByteMode=TRUE; |
| goto fastSingle; |
| } else if(/* UD0<=b && */ b<=UD7) { |
| dynamicWindow=(int8_t)(b-UD0); |
| isSingleByteMode=TRUE; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=defineOne; |
| goto singleByteMode; |
| } else if(b==UDX) { |
| isSingleByteMode=TRUE; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=definePairOne; |
| goto singleByteMode; |
| } else if(b==UQU) { |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=quotePairOne; |
| } else /* Urs */ { |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| goto endloop; |
| } |
| break; |
| case quotePairOne: |
| byteOne=b; |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=quotePairTwo; |
| break; |
| case quotePairTwo: |
| *target++=(UChar)((byteOne<<8)|b); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| sourceIndex=nextSourceIndex; |
| state=readCommand; |
| goto fastUnicode; |
| } |
| } |
| } |
| endloop: |
| |
| /* set the converter state back into UConverter */ |
| if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { |
| /* reset to deal with the next character */ |
| state=readCommand; |
| } else if(state==readCommand) { |
| /* not in a multi-byte sequence, reset toULength */ |
| cnv->toULength=0; |
| } |
| scsu->toUIsSingleByteMode=isSingleByteMode; |
| scsu->toUState=state; |
| scsu->toUQuoteWindow=quoteWindow; |
| scsu->toUDynamicWindow=dynamicWindow; |
| scsu->toUByteOne=byteOne; |
| |
| /* write back the updated pointers */ |
| pArgs->source=(const char *)source; |
| pArgs->target=target; |
| pArgs->offsets=offsets; |
| return; |
| } |
| |
| /* |
| * Identical to _SCSUToUnicodeWithOffsets but without offset handling. |
| * If a change is made in the original function, then either |
| * change this function the same way or |
| * re-copy the original function and remove the variables |
| * offsets, sourceIndex, and nextSourceIndex. |
| */ |
| static void |
| _SCSUToUnicode(UConverterToUnicodeArgs *pArgs, |
| UErrorCode *pErrorCode) { |
| UConverter *cnv; |
| SCSUData *scsu; |
| const uint8_t *source, *sourceLimit; |
| UChar *target; |
| const UChar *targetLimit; |
| UBool isSingleByteMode; |
| uint8_t state, byteOne; |
| int8_t quoteWindow, dynamicWindow; |
| |
| uint8_t b; |
| |
| /* set up the local pointers */ |
| cnv=pArgs->converter; |
| scsu=(SCSUData *)cnv->extraInfo; |
| |
| source=(const uint8_t *)pArgs->source; |
| sourceLimit=(const uint8_t *)pArgs->sourceLimit; |
| target=pArgs->target; |
| targetLimit=pArgs->targetLimit; |
| |
| /* get the state machine state */ |
| isSingleByteMode=scsu->toUIsSingleByteMode; |
| state=scsu->toUState; |
| quoteWindow=scsu->toUQuoteWindow; |
| dynamicWindow=scsu->toUDynamicWindow; |
| byteOne=scsu->toUByteOne; |
| |
| /* |
| * conversion "loop" |
| * |
| * For performance, this is not a normal C loop. |
| * Instead, there are two code blocks for the two SCSU modes. |
| * The function branches to either one, and a change of the mode is done with a goto to |
| * the other branch. |
| * |
| * Each branch has two conventional loops: |
| * - a fast-path loop for the most common codes in the mode |
| * - a loop for all other codes in the mode |
| * When the fast-path runs into a code that it cannot handle, its loop ends and it |
| * runs into the following loop to handle the other codes. |
| * The end of the input or output buffer is also handled by the slower loop. |
| * The slow loop jumps (goto) to the fast-path loop again as soon as possible. |
| * |
| * The callback handling is done by returning with an error code. |
| * The conversion framework actually calls the callback function. |
| */ |
| if(isSingleByteMode) { |
| /* fast path for single-byte mode */ |
| if(state==readCommand) { |
| fastSingle: |
| while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { |
| ++source; |
| if(b<=0x7f) { |
| /* write US-ASCII graphic character or DEL */ |
| *target++=(UChar)b; |
| } else { |
| /* write from dynamic window */ |
| uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); |
| if(c<=0xffff) { |
| *target++=(UChar)c; |
| } else { |
| /* output surrogate pair */ |
| *target++=(UChar)(0xd7c0+(c>>10)); |
| if(target<targetLimit) { |
| *target++=(UChar)(0xdc00|(c&0x3ff)); |
| } else { |
| /* target overflow */ |
| cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
| cnv->UCharErrorBufferLength=1; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| goto endloop; |
| } |
| } |
| } |
| } |
| } |
| |
| /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ |
| singleByteMode: |
| while(source<sourceLimit) { |
| if(target>=targetLimit) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| b=*source++; |
| switch(state) { |
| case readCommand: |
| /* redundant conditions are commented out */ |
| /* here: b<0x20 because otherwise we would be in fastSingle */ |
| if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
| /* CR/LF/TAB/NUL */ |
| *target++=(UChar)b; |
| goto fastSingle; |
| } else if(SC0<=b) { |
| if(b<=SC7) { |
| dynamicWindow=(int8_t)(b-SC0); |
| goto fastSingle; |
| } else /* if(SD0<=b && b<=SD7) */ { |
| dynamicWindow=(int8_t)(b-SD0); |
| state=defineOne; |
| } |
| } else if(/* SQ0<=b && */ b<=SQ7) { |
| quoteWindow=(int8_t)(b-SQ0); |
| state=quoteOne; |
| } else if(b==SDX) { |
| state=definePairOne; |
| } else if(b==SQU) { |
| state=quotePairOne; |
| } else if(b==SCU) { |
| isSingleByteMode=FALSE; |
| goto fastUnicode; |
| } else /* Srs */ { |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| goto endloop; |
| } |
| |
| /* store the first byte of a multibyte sequence in toUBytes[] */ |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| break; |
| case quotePairOne: |
| byteOne=b; |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=quotePairTwo; |
| break; |
| case quotePairTwo: |
| *target++=(UChar)((byteOne<<8)|b); |
| state=readCommand; |
| goto fastSingle; |
| case quoteOne: |
| if(b<0x80) { |
| /* all static offsets are in the BMP */ |
| *target++=(UChar)(staticOffsets[quoteWindow]+b); |
| } else { |
| /* write from dynamic window */ |
| uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); |
| if(c<=0xffff) { |
| *target++=(UChar)c; |
| } else { |
| /* output surrogate pair */ |
| *target++=(UChar)(0xd7c0+(c>>10)); |
| if(target<targetLimit) { |
| *target++=(UChar)(0xdc00|(c&0x3ff)); |
| } else { |
| /* target overflow */ |
| cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
| cnv->UCharErrorBufferLength=1; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| goto endloop; |
| } |
| } |
| } |
| state=readCommand; |
| goto fastSingle; |
| case definePairOne: |
| dynamicWindow=(int8_t)((b>>5)&7); |
| byteOne=(uint8_t)(b&0x1f); |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=definePairTwo; |
| break; |
| case definePairTwo: |
| scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); |
| state=readCommand; |
| goto fastSingle; |
| case defineOne: |
| if(b==0) { |
| /* callback(illegal): Reserved window offset value 0 */ |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| goto endloop; |
| } else if(b<gapThreshold) { |
| scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; |
| } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { |
| scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; |
| } else if(b>=fixedThreshold) { |
| scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; |
| } else { |
| /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| goto endloop; |
| } |
| state=readCommand; |
| goto fastSingle; |
| } |
| } |
| } else { |
| /* fast path for Unicode mode */ |
| if(state==readCommand) { |
| fastUnicode: |
| while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { |
| *target++=(UChar)((b<<8)|source[1]); |
| source+=2; |
| } |
| } |
| |
| /* normal state machine for Unicode mode */ |
| /* unicodeByteMode: */ |
| while(source<sourceLimit) { |
| if(target>=targetLimit) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| b=*source++; |
| switch(state) { |
| case readCommand: |
| if((uint8_t)(b-UC0)>(Urs-UC0)) { |
| byteOne=b; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=quotePairTwo; |
| } else if(/* UC0<=b && */ b<=UC7) { |
| dynamicWindow=(int8_t)(b-UC0); |
| isSingleByteMode=TRUE; |
| goto fastSingle; |
| } else if(/* UD0<=b && */ b<=UD7) { |
| dynamicWindow=(int8_t)(b-UD0); |
| isSingleByteMode=TRUE; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=defineOne; |
| goto singleByteMode; |
| } else if(b==UDX) { |
| isSingleByteMode=TRUE; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=definePairOne; |
| goto singleByteMode; |
| } else if(b==UQU) { |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| state=quotePairOne; |
| } else /* Urs */ { |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| cnv->toUBytes[0]=b; |
| cnv->toULength=1; |
| goto endloop; |
| } |
| break; |
| case quotePairOne: |
| byteOne=b; |
| cnv->toUBytes[1]=b; |
| cnv->toULength=2; |
| state=quotePairTwo; |
| break; |
| case quotePairTwo: |
| *target++=(UChar)((byteOne<<8)|b); |
| state=readCommand; |
| goto fastUnicode; |
| } |
| } |
| } |
| endloop: |
| |
| /* set the converter state back into UConverter */ |
| if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { |
| /* reset to deal with the next character */ |
| state=readCommand; |
| } else if(state==readCommand) { |
| /* not in a multi-byte sequence, reset toULength */ |
| cnv->toULength=0; |
| } |
| scsu->toUIsSingleByteMode=isSingleByteMode; |
| scsu->toUState=state; |
| scsu->toUQuoteWindow=quoteWindow; |
| scsu->toUDynamicWindow=dynamicWindow; |
| scsu->toUByteOne=byteOne; |
| |
| /* write back the updated pointers */ |
| pArgs->source=(const char *)source; |
| pArgs->target=target; |
| return; |
| } |
| |
| /* SCSU-from-Unicode conversion functions ----------------------------------- */ |
| |
| /* |
| * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve |
| * reasonable results. The lookahead is minimal. |
| * Many cases are simple: |
| * A character fits directly into the current mode, a dynamic or static window, |
| * or is not compressible. These cases are tested first. |
| * Real compression heuristics are applied to the rest, in code branches for |
| * single/Unicode mode and BMP/supplementary code points. |
| * The heuristics used here are extremely simple. |
| */ |
| |
| /* get the number of the window that this character is in, or -1 */ |
| static int8_t |
| getWindow(const uint32_t offsets[8], uint32_t c) { |
| int i; |
| for(i=0; i<8; ++i) { |
| if((uint32_t)(c-offsets[i])<=0x7f) { |
| return (int8_t)(i); |
| } |
| } |
| return -1; |
| } |
| |
| /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ |
| static UBool |
| isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { |
| return (UBool)(c<=offset+0x7f && |
| (c>=offset || (c<=0x7f && |
| (c>=0x20 || (1UL<<c)&0x2601)))); |
| /* binary 0010 0110 0000 0001, |
| check for b==0xd || b==0xa || b==9 || b==0 */ |
| } |
| |
| /* |
| * getNextDynamicWindow returns the next dynamic window to be redefined |
| */ |
| static int8_t |
| getNextDynamicWindow(SCSUData *scsu) { |
| int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; |
| if(++scsu->nextWindowUseIndex==8) { |
| scsu->nextWindowUseIndex=0; |
| } |
| return window; |
| } |
| |
| /* |
| * useDynamicWindow() adjusts |
| * windowUse[] and nextWindowUseIndex for the algorithm to choose |
| * the next dynamic window to be defined; |
| * a subclass may override it and provide its own algorithm. |
| */ |
| static void |
| useDynamicWindow(SCSUData *scsu, int8_t window) { |
| /* |
| * move the existing window, which just became the most recently used one, |
| * up in windowUse[] to nextWindowUseIndex-1 |
| */ |
| |
| /* first, find the index of the window - backwards to favor the more recently used windows */ |
| int i, j; |
| |
| i=scsu->nextWindowUseIndex; |
| do { |
| if(--i<0) { |
| i=7; |
| } |
| } while(scsu->windowUse[i]!=window); |
| |
| /* now copy each windowUse[i+1] to [i] */ |
| j=i+1; |
| if(j==8) { |
| j=0; |
| } |
| while(j!=scsu->nextWindowUseIndex) { |
| scsu->windowUse[i]=scsu->windowUse[j]; |
| i=j; |
| if(++j==8) { j=0; } |
| } |
| |
| /* finally, set the window into the most recently used index */ |
| scsu->windowUse[i]=window; |
| } |
| |
| /* |
| * calculate the offset and the code for a dynamic window that contains the character |
| * takes fixed offsets into account |
| * the offset of the window is stored in the offset variable, |
| * the code is returned |
| * |
| * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code |
| */ |
| static int |
| getDynamicOffset(uint32_t c, uint32_t *pOffset) { |
| int i; |
| |
| for(i=0; i<7; ++i) { |
| if((uint32_t)(c-fixedOffsets[i])<=0x7f) { |
| *pOffset=fixedOffsets[i]; |
| return 0xf9+i; |
| } |
| } |
| |
| if(c<0x80) { |
| /* No dynamic window for US-ASCII. */ |
| return -1; |
| } else if(c<0x3400 || |
| (uint32_t)(c-0x10000)<(0x14000-0x10000) || |
| (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) |
| ) { |
| /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ |
| *pOffset=c&0x7fffff80; |
| return (int)(c>>7); |
| } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { |
| /* For these characters we need to take the gapOffset into account. */ |
| *pOffset=c&0x7fffff80; |
| return (int)((c-gapOffset)>>7); |
| } else { |
| return -1; |
| } |
| } |
| |
| /* |
| * Idea for compression: |
| * - save SCSUData and other state before really starting work |
| * - at endloop, see if compression could be better with just unicode mode |
| * - don't do this if a callback has been called |
| * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning |
| * - different buffer handling! |
| * |
| * Drawback or need for corrective handling: |
| * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and |
| * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible |
| * not only for compression but also for HTML/XML documents with following charset/encoding announcers. |
| * |
| * How to achieve both? |
| * - Only replace the result after an SDX or SCU? |
| */ |
| |
| static void |
| _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, |
| UErrorCode *pErrorCode) { |
| UConverter *cnv; |
| SCSUData *scsu; |
| const UChar *source, *sourceLimit; |
| uint8_t *target; |
| int32_t targetCapacity; |
| int32_t *offsets; |
| |
| UBool isSingleByteMode; |
| uint8_t dynamicWindow; |
| uint32_t currentOffset; |
| |
| uint32_t c, delta; |
| |
| int32_t sourceIndex, nextSourceIndex; |
| |
| int32_t length; |
| |
| /* variables for compression heuristics */ |
| uint32_t offset; |
| UChar lead, trail; |
| int code; |
| int8_t window; |
| |
| /* set up the local pointers */ |
| cnv=pArgs->converter; |
| scsu=(SCSUData *)cnv->extraInfo; |
| |
| /* set up the local pointers */ |
| source=pArgs->source; |
| sourceLimit=pArgs->sourceLimit; |
| target=(uint8_t *)pArgs->target; |
| targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); |
| offsets=pArgs->offsets; |
| |
| /* get the state machine state */ |
| isSingleByteMode=scsu->fromUIsSingleByteMode; |
| dynamicWindow=scsu->fromUDynamicWindow; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| |
| c=cnv->fromUChar32; |
| |
| /* sourceIndex=-1 if the current character began in the previous buffer */ |
| sourceIndex= c==0 ? 0 : -1; |
| nextSourceIndex=0; |
| |
| /* similar conversion "loop" as in toUnicode */ |
| loop: |
| if(isSingleByteMode) { |
| if(c!=0 && targetCapacity>0) { |
| goto getTrailSingle; |
| } |
| |
| /* state machine for single-byte mode */ |
| /* singleByteMode: */ |
| while(source<sourceLimit) { |
| if(targetCapacity<=0) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| c=*source++; |
| ++nextSourceIndex; |
| |
| if((c-0x20)<=0x5f) { |
| /* pass US-ASCII graphic character through */ |
| *target++=(uint8_t)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| --targetCapacity; |
| } else if(c<0x20) { |
| if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
| /* CR/LF/TAB/NUL */ |
| *target++=(uint8_t)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| --targetCapacity; |
| } else { |
| /* quote C0 control character */ |
| c|=SQ0<<8; |
| length=2; |
| goto outputBytes; |
| } |
| } else if((delta=c-currentOffset)<=0x7f) { |
| /* use the current dynamic window */ |
| *target++=(uint8_t)(delta|0x80); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| --targetCapacity; |
| } else if(UTF_IS_SURROGATE(c)) { |
| if(UTF_IS_SURROGATE_FIRST(c)) { |
| getTrailSingle: |
| lead=(UChar)c; |
| if(source<sourceLimit) { |
| /* test the following code unit */ |
| trail=*source; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++source; |
| ++nextSourceIndex; |
| c=UTF16_GET_PAIR_VALUE(c, trail); |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| |
| /* compress supplementary character U+10000..U+10ffff */ |
| if((delta=c-currentOffset)<=0x7f) { |
| /* use the current dynamic window */ |
| *target++=(uint8_t)(delta|0x80); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| --targetCapacity; |
| } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a dynamic window that contains this character, change to it */ |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* might check if there are more characters in this window to come */ |
| /* define an extended window with this character */ |
| code-=0x200; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=4; |
| goto outputBytes; |
| } else { |
| /* change to Unicode mode and output this (lead, trail) pair */ |
| isSingleByteMode=FALSE; |
| *target++=(uint8_t)SCU; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| --targetCapacity; |
| c=((uint32_t)lead<<16)|trail; |
| length=4; |
| goto outputBytes; |
| } |
| } else if(c<0xa0) { |
| /* quote C1 control character */ |
| c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ |
| length=2; |
| goto outputBytes; |
| } else if(c==0xfeff || c>=0xfff0) { |
| /* quote signature character=byte order mark and specials */ |
| c|=SQU<<16; |
| length=3; |
| goto outputBytes; |
| } else { |
| /* compress all other BMP characters */ |
| if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a window defined that contains this character - switch to it or quote from it? */ |
| if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { |
| /* change to dynamic window */ |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else { |
| /* quote from dynamic window */ |
| c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; |
| length=2; |
| goto outputBytes; |
| } |
| } else if((window=getWindow(staticOffsets, c))>=0) { |
| /* quote from static window */ |
| c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* define a dynamic window with this character */ |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=3; |
| goto outputBytes; |
| } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && |
| (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
| ) { |
| /* |
| * this character is not compressible (a BMP ideograph or similar); |
| * switch to Unicode mode if this is the last character in the block |
| * or there is at least one more ideograph following immediately |
| */ |
| isSingleByteMode=FALSE; |
| c|=SCU<<16; |
| length=3; |
| goto outputBytes; |
| } else { |
| /* quote Unicode */ |
| c|=SQU<<16; |
| length=3; |
| goto outputBytes; |
| } |
| } |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| sourceIndex=nextSourceIndex; |
| } |
| } else { |
| if(c!=0 && targetCapacity>0) { |
| goto getTrailUnicode; |
| } |
| |
| /* state machine for Unicode mode */ |
| /* unicodeByteMode: */ |
| while(source<sourceLimit) { |
| if(targetCapacity<=0) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| c=*source++; |
| ++nextSourceIndex; |
| |
| if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { |
| /* not compressible, write character directly */ |
| if(targetCapacity>=2) { |
| *target++=(uint8_t)(c>>8); |
| *target++=(uint8_t)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| *offsets++=sourceIndex; |
| } |
| targetCapacity-=2; |
| } else { |
| length=2; |
| goto outputBytes; |
| } |
| } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { |
| /* compress BMP character if the following one is not an uncompressible ideograph */ |
| if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { |
| if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { |
| /* ASCII digit or letter */ |
| isSingleByteMode=TRUE; |
| c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; |
| length=2; |
| goto outputBytes; |
| } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a dynamic window that contains this character, change to it */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* define a dynamic window with this character */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=3; |
| goto outputBytes; |
| } |
| } |
| |
| /* don't know how to compress this character, just write it directly */ |
| length=2; |
| goto outputBytes; |
| } else if(c<0xe000) { |
| /* c is a surrogate */ |
| if(UTF_IS_SURROGATE_FIRST(c)) { |
| getTrailUnicode: |
| lead=(UChar)c; |
| if(source<sourceLimit) { |
| /* test the following code unit */ |
| trail=*source; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++source; |
| ++nextSourceIndex; |
| c=UTF16_GET_PAIR_VALUE(c, trail); |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| |
| /* compress supplementary character */ |
| if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && |
| !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
| ) { |
| /* |
| * there is a dynamic window that contains this character and |
| * the following character is not uncompressible, |
| * change to the window |
| */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ |
| (code=getDynamicOffset(c, &offset))>=0 |
| ) { |
| /* two supplementary characters in (probably) the same window - define an extended one */ |
| isSingleByteMode=TRUE; |
| code-=0x200; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=4; |
| goto outputBytes; |
| } else { |
| /* don't know how to compress this character, just write it directly */ |
| c=((uint32_t)lead<<16)|trail; |
| length=4; |
| goto outputBytes; |
| } |
| } else /* 0xe000<=c<0xf300 */ { |
| /* quote to avoid SCSU tags */ |
| c|=UQU<<16; |
| length=3; |
| goto outputBytes; |
| } |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| sourceIndex=nextSourceIndex; |
| } |
| } |
| endloop: |
| |
| /* set the converter state back into UConverter */ |
| scsu->fromUIsSingleByteMode=isSingleByteMode; |
| scsu->fromUDynamicWindow=dynamicWindow; |
| |
| cnv->fromUChar32=c; |
| |
| /* write back the updated pointers */ |
| pArgs->source=source; |
| pArgs->target=(char *)target; |
| pArgs->offsets=offsets; |
| return; |
| |
| outputBytes: |
| /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ |
| /* from the first if in the loop we know that targetCapacity>0 */ |
| if(length<=targetCapacity) { |
| if(offsets==NULL) { |
| switch(length) { |
| /* each branch falls through to the next one */ |
| case 4: |
| *target++=(uint8_t)(c>>24); |
| case 3: |
| *target++=(uint8_t)(c>>16); |
| case 2: |
| *target++=(uint8_t)(c>>8); |
| case 1: |
| *target++=(uint8_t)c; |
| default: |
| /* will never occur */ |
| break; |
| } |
| } else { |
| switch(length) { |
| /* each branch falls through to the next one */ |
| case 4: |
| *target++=(uint8_t)(c>>24); |
| *offsets++=sourceIndex; |
| case 3: |
| *target++=(uint8_t)(c>>16); |
| *offsets++=sourceIndex; |
| case 2: |
| *target++=(uint8_t)(c>>8); |
| *offsets++=sourceIndex; |
| case 1: |
| *target++=(uint8_t)c; |
| *offsets++=sourceIndex; |
| default: |
| /* will never occur */ |
| break; |
| } |
| } |
| targetCapacity-=length; |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| sourceIndex=nextSourceIndex; |
| goto loop; |
| } else { |
| uint8_t *p; |
| |
| /* |
| * We actually do this backwards here: |
| * In order to save an intermediate variable, we output |
| * first to the overflow buffer what does not fit into the |
| * regular target. |
| */ |
| /* we know that 0<=targetCapacity<length<=4 */ |
| /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ |
| length-=targetCapacity; |
| p=(uint8_t *)cnv->charErrorBuffer; |
| switch(length) { |
| /* each branch falls through to the next one */ |
| case 4: |
| *p++=(uint8_t)(c>>24); |
| case 3: |
| *p++=(uint8_t)(c>>16); |
| case 2: |
| *p++=(uint8_t)(c>>8); |
| case 1: |
| *p=(uint8_t)c; |
| default: |
| /* will never occur */ |
| break; |
| } |
| cnv->charErrorBufferLength=(int8_t)length; |
| |
| /* now output what fits into the regular target */ |
| c>>=8*length; /* length was reduced by targetCapacity */ |
| switch(targetCapacity) { |
| /* each branch falls through to the next one */ |
| case 3: |
| *target++=(uint8_t)(c>>16); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| case 2: |
| *target++=(uint8_t)(c>>8); |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| case 1: |
| *target++=(uint8_t)c; |
| if(offsets!=NULL) { |
| *offsets++=sourceIndex; |
| } |
| default: |
| break; |
| } |
| |
| /* target overflow */ |
| targetCapacity=0; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| c=0; |
| goto endloop; |
| } |
| } |
| |
| /* |
| * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. |
| * If a change is made in the original function, then either |
| * change this function the same way or |
| * re-copy the original function and remove the variables |
| * offsets, sourceIndex, and nextSourceIndex. |
| */ |
| static void |
| _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, |
| UErrorCode *pErrorCode) { |
| UConverter *cnv; |
| SCSUData *scsu; |
| const UChar *source, *sourceLimit; |
| uint8_t *target; |
| int32_t targetCapacity; |
| |
| UBool isSingleByteMode; |
| uint8_t dynamicWindow; |
| uint32_t currentOffset; |
| |
| uint32_t c, delta; |
| |
| int32_t length; |
| |
| /* variables for compression heuristics */ |
| uint32_t offset; |
| UChar lead, trail; |
| int code; |
| int8_t window; |
| |
| /* set up the local pointers */ |
| cnv=pArgs->converter; |
| scsu=(SCSUData *)cnv->extraInfo; |
| |
| /* set up the local pointers */ |
| source=pArgs->source; |
| sourceLimit=pArgs->sourceLimit; |
| target=(uint8_t *)pArgs->target; |
| targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); |
| |
| /* get the state machine state */ |
| isSingleByteMode=scsu->fromUIsSingleByteMode; |
| dynamicWindow=scsu->fromUDynamicWindow; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| |
| c=cnv->fromUChar32; |
| |
| /* similar conversion "loop" as in toUnicode */ |
| loop: |
| if(isSingleByteMode) { |
| if(c!=0 && targetCapacity>0) { |
| goto getTrailSingle; |
| } |
| |
| /* state machine for single-byte mode */ |
| /* singleByteMode: */ |
| while(source<sourceLimit) { |
| if(targetCapacity<=0) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| c=*source++; |
| |
| if((c-0x20)<=0x5f) { |
| /* pass US-ASCII graphic character through */ |
| *target++=(uint8_t)c; |
| --targetCapacity; |
| } else if(c<0x20) { |
| if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
| /* CR/LF/TAB/NUL */ |
| *target++=(uint8_t)c; |
| --targetCapacity; |
| } else { |
| /* quote C0 control character */ |
| c|=SQ0<<8; |
| length=2; |
| goto outputBytes; |
| } |
| } else if((delta=c-currentOffset)<=0x7f) { |
| /* use the current dynamic window */ |
| *target++=(uint8_t)(delta|0x80); |
| --targetCapacity; |
| } else if(UTF_IS_SURROGATE(c)) { |
| if(UTF_IS_SURROGATE_FIRST(c)) { |
| getTrailSingle: |
| lead=(UChar)c; |
| if(source<sourceLimit) { |
| /* test the following code unit */ |
| trail=*source; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++source; |
| c=UTF16_GET_PAIR_VALUE(c, trail); |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| |
| /* compress supplementary character U+10000..U+10ffff */ |
| if((delta=c-currentOffset)<=0x7f) { |
| /* use the current dynamic window */ |
| *target++=(uint8_t)(delta|0x80); |
| --targetCapacity; |
| } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a dynamic window that contains this character, change to it */ |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* might check if there are more characters in this window to come */ |
| /* define an extended window with this character */ |
| code-=0x200; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=4; |
| goto outputBytes; |
| } else { |
| /* change to Unicode mode and output this (lead, trail) pair */ |
| isSingleByteMode=FALSE; |
| *target++=(uint8_t)SCU; |
| --targetCapacity; |
| c=((uint32_t)lead<<16)|trail; |
| length=4; |
| goto outputBytes; |
| } |
| } else if(c<0xa0) { |
| /* quote C1 control character */ |
| c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ |
| length=2; |
| goto outputBytes; |
| } else if(c==0xfeff || c>=0xfff0) { |
| /* quote signature character=byte order mark and specials */ |
| c|=SQU<<16; |
| length=3; |
| goto outputBytes; |
| } else { |
| /* compress all other BMP characters */ |
| if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a window defined that contains this character - switch to it or quote from it? */ |
| if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { |
| /* change to dynamic window */ |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else { |
| /* quote from dynamic window */ |
| c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; |
| length=2; |
| goto outputBytes; |
| } |
| } else if((window=getWindow(staticOffsets, c))>=0) { |
| /* quote from static window */ |
| c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* define a dynamic window with this character */ |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=3; |
| goto outputBytes; |
| } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && |
| (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
| ) { |
| /* |
| * this character is not compressible (a BMP ideograph or similar); |
| * switch to Unicode mode if this is the last character in the block |
| * or there is at least one more ideograph following immediately |
| */ |
| isSingleByteMode=FALSE; |
| c|=SCU<<16; |
| length=3; |
| goto outputBytes; |
| } else { |
| /* quote Unicode */ |
| c|=SQU<<16; |
| length=3; |
| goto outputBytes; |
| } |
| } |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| } |
| } else { |
| if(c!=0 && targetCapacity>0) { |
| goto getTrailUnicode; |
| } |
| |
| /* state machine for Unicode mode */ |
| /* unicodeByteMode: */ |
| while(source<sourceLimit) { |
| if(targetCapacity<=0) { |
| /* target is full */ |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| break; |
| } |
| c=*source++; |
| |
| if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { |
| /* not compressible, write character directly */ |
| if(targetCapacity>=2) { |
| *target++=(uint8_t)(c>>8); |
| *target++=(uint8_t)c; |
| targetCapacity-=2; |
| } else { |
| length=2; |
| goto outputBytes; |
| } |
| } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { |
| /* compress BMP character if the following one is not an uncompressible ideograph */ |
| if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { |
| if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { |
| /* ASCII digit or letter */ |
| isSingleByteMode=TRUE; |
| c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; |
| length=2; |
| goto outputBytes; |
| } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
| /* there is a dynamic window that contains this character, change to it */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if((code=getDynamicOffset(c, &offset))>=0) { |
| /* define a dynamic window with this character */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=3; |
| goto outputBytes; |
| } |
| } |
| |
| /* don't know how to compress this character, just write it directly */ |
| length=2; |
| goto outputBytes; |
| } else if(c<0xe000) { |
| /* c is a surrogate */ |
| if(UTF_IS_SURROGATE_FIRST(c)) { |
| getTrailUnicode: |
| lead=(UChar)c; |
| if(source<sourceLimit) { |
| /* test the following code unit */ |
| trail=*source; |
| if(UTF_IS_SECOND_SURROGATE(trail)) { |
| ++source; |
| c=UTF16_GET_PAIR_VALUE(c, trail); |
| /* convert this surrogate code point */ |
| /* exit this condition tree */ |
| } else { |
| /* this is an unmatched lead code unit (1st surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| } else { |
| /* no more input */ |
| break; |
| } |
| } else { |
| /* this is an unmatched trail code unit (2nd surrogate) */ |
| /* callback(illegal) */ |
| *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
| goto endloop; |
| } |
| |
| /* compress supplementary character */ |
| if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && |
| !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
| ) { |
| /* |
| * there is a dynamic window that contains this character and |
| * the following character is not uncompressible, |
| * change to the window |
| */ |
| isSingleByteMode=TRUE; |
| dynamicWindow=window; |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
| length=2; |
| goto outputBytes; |
| } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ |
| (code=getDynamicOffset(c, &offset))>=0 |
| ) { |
| /* two supplementary characters in (probably) the same window - define an extended one */ |
| isSingleByteMode=TRUE; |
| code-=0x200; |
| dynamicWindow=getNextDynamicWindow(scsu); |
| currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
| useDynamicWindow(scsu, dynamicWindow); |
| c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
| length=4; |
| goto outputBytes; |
| } else { |
| /* don't know how to compress this character, just write it directly */ |
| c=((uint32_t)lead<<16)|trail; |
| length=4; |
| goto outputBytes; |
| } |
| } else /* 0xe000<=c<0xf300 */ { |
| /* quote to avoid SCSU tags */ |
| c|=UQU<<16; |
| length=3; |
| goto outputBytes; |
| } |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| } |
| } |
| endloop: |
| |
| /* set the converter state back into UConverter */ |
| scsu->fromUIsSingleByteMode=isSingleByteMode; |
| scsu->fromUDynamicWindow=dynamicWindow; |
| |
| cnv->fromUChar32=c; |
| |
| /* write back the updated pointers */ |
| pArgs->source=source; |
| pArgs->target=(char *)target; |
| return; |
| |
| outputBytes: |
| /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ |
| /* from the first if in the loop we know that targetCapacity>0 */ |
| if(length<=targetCapacity) { |
| switch(length) { |
| /* each branch falls through to the next one */ |
| case 4: |
| *target++=(uint8_t)(c>>24); |
| case 3: |
| *target++=(uint8_t)(c>>16); |
| case 2: |
| *target++=(uint8_t)(c>>8); |
| case 1: |
| *target++=(uint8_t)c; |
| default: |
| /* will never occur */ |
| break; |
| } |
| targetCapacity-=length; |
| |
| /* normal end of conversion: prepare for a new character */ |
| c=0; |
| goto loop; |
| } else { |
| uint8_t *p; |
| |
| /* |
| * We actually do this backwards here: |
| * In order to save an intermediate variable, we output |
| * first to the overflow buffer what does not fit into the |
| * regular target. |
| */ |
| /* we know that 0<=targetCapacity<length<=4 */ |
| /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ |
| length-=targetCapacity; |
| p=(uint8_t *)cnv->charErrorBuffer; |
| switch(length) { |
| /* each branch falls through to the next one */ |
| case 4: |
| *p++=(uint8_t)(c>>24); |
| case 3: |
| *p++=(uint8_t)(c>>16); |
| case 2: |
| *p++=(uint8_t)(c>>8); |
| case 1: |
| *p=(uint8_t)c; |
| default: |
| /* will never occur */ |
| break; |
| } |
| cnv->charErrorBufferLength=(int8_t)length; |
| |
| /* now output what fits into the regular target */ |
| c>>=8*length; /* length was reduced by targetCapacity */ |
| switch(targetCapacity) { |
| /* each branch falls through to the next one */ |
| case 3: |
| *target++=(uint8_t)(c>>16); |
| case 2: |
| *target++=(uint8_t)(c>>8); |
| case 1: |
| *target++=(uint8_t)c; |
| default: |
| break; |
| } |
| |
| /* target overflow */ |
| targetCapacity=0; |
| *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| c=0; |
| goto endloop; |
| } |
| } |
| |
| /* miscellaneous ------------------------------------------------------------ */ |
| |
| static const char * |
| _SCSUGetName(const UConverter *cnv) { |
| SCSUData *scsu=(SCSUData *)cnv->extraInfo; |
| |
| switch(scsu->locale) { |
| case l_ja: |
| return "SCSU,locale=ja"; |
| default: |
| return "SCSU"; |
| } |
| } |
| |
| /* structure for SafeClone calculations */ |
| struct cloneSCSUStruct |
| { |
| UConverter cnv; |
| SCSUData mydata; |
| }; |
| |
| static UConverter * |
| _SCSUSafeClone(const UConverter *cnv, |
| void *stackBuffer, |
| int32_t *pBufferSize, |
| UErrorCode *status) |
| { |
| struct cloneSCSUStruct * localClone; |
| int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); |
| |
| if (U_FAILURE(*status)){ |
| return 0; |
| } |
| |
| if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
| *pBufferSize = bufferSizeNeeded; |
| return 0; |
| } |
| |
| localClone = (struct cloneSCSUStruct *)stackBuffer; |
| /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
| |
| uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); |
| localClone->cnv.extraInfo = &localClone->mydata; |
| localClone->cnv.isExtraLocal = TRUE; |
| |
| return &localClone->cnv; |
| } |
| |
| |
| static const UConverterImpl _SCSUImpl={ |
| UCNV_SCSU, |
| |
| NULL, |
| NULL, |
| |
| _SCSUOpen, |
| _SCSUClose, |
| _SCSUReset, |
| |
| _SCSUToUnicode, |
| _SCSUToUnicodeWithOffsets, |
| _SCSUFromUnicode, |
| _SCSUFromUnicodeWithOffsets, |
| NULL, |
| |
| NULL, |
| _SCSUGetName, |
| NULL, |
| _SCSUSafeClone, |
| ucnv_getCompleteUnicodeSet |
| }; |
| |
| static const UConverterStaticData _SCSUStaticData={ |
| sizeof(UConverterStaticData), |
| "SCSU", |
| 1212, /* CCSID for SCSU */ |
| UCNV_IBM, UCNV_SCSU, |
| 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ |
| /* |
| * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode |
| * substitution string. |
| */ |
| { 0x0e, 0xff, 0xfd, 0 }, 3, |
| FALSE, FALSE, |
| 0, |
| 0, |
| { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
| }; |
| |
| const UConverterSharedData _SCSUData={ |
| sizeof(UConverterSharedData), ~((uint32_t)0), |
| NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl, |
| 0 |
| }; |
| |
| #endif |