| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #include "seccomon.h" |
| #include "secport.h" |
| |
| /* |
| * From RFC 2044: |
| * |
| * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
| * 0000 0000-0000 007F 0xxxxxxx |
| * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
| * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
| * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx |
| */ |
| |
| /* |
| * From http://www.imc.org/draft-hoffman-utf16 |
| * |
| * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 |
| * |
| * U' = yyyyyyyyyyxxxxxxxxxx |
| * W1 = 110110yyyyyyyyyy |
| * W2 = 110111xxxxxxxxxx |
| */ |
| |
| /* |
| * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit |
| * character values. If you wish to use this code for working with |
| * host byte order values, define the following: |
| * |
| * #if IS_BIG_ENDIAN |
| * #define L_0 0 |
| * #define L_1 1 |
| * #define L_2 2 |
| * #define L_3 3 |
| * #define H_0 0 |
| * #define H_1 1 |
| * #else / * not everyone has elif * / |
| * #if IS_LITTLE_ENDIAN |
| * #define L_0 3 |
| * #define L_1 2 |
| * #define L_2 1 |
| * #define L_3 0 |
| * #define H_0 1 |
| * #define H_1 0 |
| * #else |
| * #error "PDP and NUXI support deferred" |
| * #endif / * IS_LITTLE_ENDIAN * / |
| * #endif / * IS_BIG_ENDIAN * / |
| */ |
| |
| #define L_0 0 |
| #define L_1 1 |
| #define L_2 2 |
| #define L_3 3 |
| #define H_0 0 |
| #define H_1 1 |
| |
| #define BAD_UTF8 ((PRUint32)-1) |
| |
| /* |
| * Parse a single UTF-8 character per the spec. in section 3.9 (D36) |
| * of Unicode 4.0.0. |
| * |
| * Parameters: |
| * index - Points to the byte offset in inBuf of character to read. On success, |
| * updated to the offset of the following character. |
| * inBuf - Input buffer, UTF-8 encoded |
| * inbufLen - Length of input buffer, in bytes. |
| * |
| * Returns: |
| * Success - The UCS4 encoded character |
| * Failure - BAD_UTF8 |
| */ |
| static PRUint32 |
| sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen) |
| { |
| PRUint32 result; |
| unsigned int i = *index; |
| int bytes_left; |
| PRUint32 min_value; |
| |
| PORT_Assert(i < inBufLen); |
| |
| if ((inBuf[i] & 0x80) == 0x00) { |
| result = inBuf[i++]; |
| bytes_left = 0; |
| min_value = 0; |
| } else if ((inBuf[i] & 0xE0) == 0xC0) { |
| result = inBuf[i++] & 0x1F; |
| bytes_left = 1; |
| min_value = 0x80; |
| } else if ((inBuf[i] & 0xF0) == 0xE0) { |
| result = inBuf[i++] & 0x0F; |
| bytes_left = 2; |
| min_value = 0x800; |
| } else if ((inBuf[i] & 0xF8) == 0xF0) { |
| result = inBuf[i++] & 0x07; |
| bytes_left = 3; |
| min_value = 0x10000; |
| } else { |
| return BAD_UTF8; |
| } |
| |
| while (bytes_left--) { |
| if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) |
| return BAD_UTF8; |
| result = (result << 6) | (inBuf[i++] & 0x3F); |
| } |
| |
| /* Check for overlong sequences, surrogates, and outside unicode range */ |
| if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) { |
| return BAD_UTF8; |
| } |
| |
| *index = i; |
| return result; |
| } |
| |
| PRBool |
| sec_port_ucs4_utf8_conversion_function( |
| PRBool toUnicode, |
| unsigned char *inBuf, |
| unsigned int inBufLen, |
| unsigned char *outBuf, |
| unsigned int maxOutBufLen, |
| unsigned int *outBufLen) |
| { |
| PORT_Assert((unsigned int *)NULL != outBufLen); |
| |
| if (toUnicode) { |
| unsigned int i, len = 0; |
| |
| for (i = 0; i < inBufLen;) { |
| if ((inBuf[i] & 0x80) == 0x00) |
| i += 1; |
| else if ((inBuf[i] & 0xE0) == 0xC0) |
| i += 2; |
| else if ((inBuf[i] & 0xF0) == 0xE0) |
| i += 3; |
| else if ((inBuf[i] & 0xF8) == 0xF0) |
| i += 4; |
| else |
| return PR_FALSE; |
| |
| len += 4; |
| } |
| |
| if (len > maxOutBufLen) { |
| *outBufLen = len; |
| return PR_FALSE; |
| } |
| |
| len = 0; |
| |
| for (i = 0; i < inBufLen;) { |
| PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
| |
| if (ucs4 == BAD_UTF8) |
| return PR_FALSE; |
| |
| outBuf[len + L_0] = 0x00; |
| outBuf[len + L_1] = (unsigned char)(ucs4 >> 16); |
| outBuf[len + L_2] = (unsigned char)(ucs4 >> 8); |
| outBuf[len + L_3] = (unsigned char)ucs4; |
| |
| len += 4; |
| } |
| |
| *outBufLen = len; |
| return PR_TRUE; |
| } else { |
| unsigned int i, len = 0; |
| PORT_Assert((inBufLen % 4) == 0); |
| if ((inBufLen % 4) != 0) { |
| *outBufLen = 0; |
| return PR_FALSE; |
| } |
| |
| for (i = 0; i < inBufLen; i += 4) { |
| if ((inBuf[i + L_0] > 0x00) || (inBuf[i + L_1] > 0x10)) { |
| *outBufLen = 0; |
| return PR_FALSE; |
| } else if (inBuf[i + L_1] >= 0x01) |
| len += 4; |
| else if (inBuf[i + L_2] >= 0x08) |
| len += 3; |
| else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) |
| len += 2; |
| else |
| len += 1; |
| } |
| |
| if (len > maxOutBufLen) { |
| *outBufLen = len; |
| return PR_FALSE; |
| } |
| |
| len = 0; |
| |
| for (i = 0; i < inBufLen; i += 4) { |
| if (inBuf[i + L_1] >= 0x01) { |
| /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
| /* 00000000 000abcde fghijklm nopqrstu -> |
| 11110abc 10defghi 10jklmno 10pqrstu */ |
| |
| outBuf[len + 0] = 0xF0 | ((inBuf[i + L_1] & 0x1C) >> 2); |
| outBuf[len + 1] = 0x80 | ((inBuf[i + L_1] & 0x03) << 4) | ((inBuf[i + L_2] & 0xF0) >> 4); |
| outBuf[len + 2] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
| outBuf[len + 3] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
| |
| len += 4; |
| } else if (inBuf[i + L_2] >= 0x08) { |
| /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
| /* 00000000 00000000 abcdefgh ijklmnop -> |
| 1110abcd 10efghij 10klmnop */ |
| |
| outBuf[len + 0] = 0xE0 | ((inBuf[i + L_2] & 0xF0) >> 4); |
| outBuf[len + 1] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
| outBuf[len + 2] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
| |
| len += 3; |
| } else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) { |
| /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ |
| /* 00000000 00000000 00000abc defghijk -> |
| 110abcde 10fghijk */ |
| |
| outBuf[len + 0] = 0xC0 | ((inBuf[i + L_2] & 0x07) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
| outBuf[len + 1] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
| |
| len += 2; |
| } else { |
| /* 0000 0000-0000 007F -> 0xxxxxx */ |
| /* 00000000 00000000 00000000 0abcdefg -> |
| 0abcdefg */ |
| |
| outBuf[len + 0] = (inBuf[i + L_3] & 0x7F); |
| |
| len += 1; |
| } |
| } |
| |
| *outBufLen = len; |
| return PR_TRUE; |
| } |
| } |
| |
| PRBool |
| sec_port_ucs2_utf8_conversion_function( |
| PRBool toUnicode, |
| unsigned char *inBuf, |
| unsigned int inBufLen, |
| unsigned char *outBuf, |
| unsigned int maxOutBufLen, |
| unsigned int *outBufLen) |
| { |
| PORT_Assert((unsigned int *)NULL != outBufLen); |
| |
| if (toUnicode) { |
| unsigned int i, len = 0; |
| |
| for (i = 0; i < inBufLen;) { |
| if ((inBuf[i] & 0x80) == 0x00) { |
| i += 1; |
| len += 2; |
| } else if ((inBuf[i] & 0xE0) == 0xC0) { |
| i += 2; |
| len += 2; |
| } else if ((inBuf[i] & 0xF0) == 0xE0) { |
| i += 3; |
| len += 2; |
| } else if ((inBuf[i] & 0xF8) == 0xF0) { |
| i += 4; |
| len += 4; |
| } else |
| return PR_FALSE; |
| } |
| |
| if (len > maxOutBufLen) { |
| *outBufLen = len; |
| return PR_FALSE; |
| } |
| |
| len = 0; |
| |
| for (i = 0; i < inBufLen;) { |
| PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
| |
| if (ucs4 == BAD_UTF8) |
| return PR_FALSE; |
| |
| if (ucs4 < 0x10000) { |
| outBuf[len + H_0] = (unsigned char)(ucs4 >> 8); |
| outBuf[len + H_1] = (unsigned char)ucs4; |
| len += 2; |
| } else { |
| ucs4 -= 0x10000; |
| outBuf[len + 0 + H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); |
| outBuf[len + 0 + H_1] = (unsigned char)(ucs4 >> 10); |
| outBuf[len + 2 + H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); |
| outBuf[len + 2 + H_1] = (unsigned char)ucs4; |
| len += 4; |
| } |
| } |
| |
| *outBufLen = len; |
| return PR_TRUE; |
| } else { |
| unsigned int i, len = 0; |
| PORT_Assert((inBufLen % 2) == 0); |
| if ((inBufLen % 2) != 0) { |
| *outBufLen = 0; |
| return PR_FALSE; |
| } |
| |
| for (i = 0; i < inBufLen; i += 2) { |
| if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) |
| len += 1; |
| else if (inBuf[i + H_0] < 0x08) |
| len += 2; |
| else if (((inBuf[i + H_0] & 0xFC) == 0xD8)) { |
| if (((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)) { |
| i += 2; |
| len += 4; |
| } else { |
| return PR_FALSE; |
| } |
| } else if ((inBuf[i + H_0] & 0xFC) == 0xDC) { |
| return PR_FALSE; |
| } else { |
| len += 3; |
| } |
| } |
| |
| if (len > maxOutBufLen) { |
| *outBufLen = len; |
| return PR_FALSE; |
| } |
| |
| len = 0; |
| |
| for (i = 0; i < inBufLen; i += 2) { |
| if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) { |
| /* 0000-007F -> 0xxxxxx */ |
| /* 00000000 0abcdefg -> 0abcdefg */ |
| |
| outBuf[len] = inBuf[i + H_1] & 0x7F; |
| |
| len += 1; |
| } else if (inBuf[i + H_0] < 0x08) { |
| /* 0080-07FF -> 110xxxxx 10xxxxxx */ |
| /* 00000abc defghijk -> 110abcde 10fghijk */ |
| |
| outBuf[len + 0] = 0xC0 | ((inBuf[i + H_0] & 0x07) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
| outBuf[len + 1] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
| |
| len += 2; |
| } else if ((inBuf[i + H_0] & 0xFC) == 0xD8) { |
| int abcde, BCDE; |
| |
| PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)); |
| |
| /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
| /* 110110BC DEfghijk 110111lm nopqrstu -> |
| { Let abcde = BCDE + 1 } |
| 11110abc 10defghi 10jklmno 10pqrstu */ |
| |
| BCDE = ((inBuf[i + H_0] & 0x03) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
| abcde = BCDE + 1; |
| |
| outBuf[len + 0] = 0xF0 | ((abcde & 0x1C) >> 2); |
| outBuf[len + 1] = 0x80 | ((abcde & 0x03) << 4) | ((inBuf[i + 0 + H_1] & 0x3C) >> 2); |
| outBuf[len + 2] = 0x80 | ((inBuf[i + 0 + H_1] & 0x03) << 4) | ((inBuf[i + 2 + H_0] & 0x03) << 2) | ((inBuf[i + 2 + H_1] & 0xC0) >> 6); |
| outBuf[len + 3] = 0x80 | ((inBuf[i + 2 + H_1] & 0x3F) >> 0); |
| |
| i += 2; |
| len += 4; |
| } else { |
| /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
| /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ |
| |
| outBuf[len + 0] = 0xE0 | ((inBuf[i + H_0] & 0xF0) >> 4); |
| outBuf[len + 1] = 0x80 | ((inBuf[i + H_0] & 0x0F) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
| outBuf[len + 2] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
| |
| len += 3; |
| } |
| } |
| |
| *outBufLen = len; |
| return PR_TRUE; |
| } |
| } |
| |
| PRBool |
| sec_port_iso88591_utf8_conversion_function( |
| const unsigned char *inBuf, |
| unsigned int inBufLen, |
| unsigned char *outBuf, |
| unsigned int maxOutBufLen, |
| unsigned int *outBufLen) |
| { |
| unsigned int i, len = 0; |
| |
| PORT_Assert((unsigned int *)NULL != outBufLen); |
| |
| for (i = 0; i < inBufLen; i++) { |
| if ((inBuf[i] & 0x80) == 0x00) |
| len += 1; |
| else |
| len += 2; |
| } |
| |
| if (len > maxOutBufLen) { |
| *outBufLen = len; |
| return PR_FALSE; |
| } |
| |
| len = 0; |
| |
| for (i = 0; i < inBufLen; i++) { |
| if ((inBuf[i] & 0x80) == 0x00) { |
| /* 00-7F -> 0xxxxxxx */ |
| /* 0abcdefg -> 0abcdefg */ |
| |
| outBuf[len] = inBuf[i]; |
| len += 1; |
| } else { |
| /* 80-FF <- 110xxxxx 10xxxxxx */ |
| /* 00000000 abcdefgh -> 110000ab 10cdefgh */ |
| |
| outBuf[len + 0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); |
| outBuf[len + 1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); |
| |
| len += 2; |
| } |
| } |
| |
| *outBufLen = len; |
| return PR_TRUE; |
| } |