| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #ifdef FREEBL_NO_DEPEND |
| #include "stubs.h" |
| #endif |
| |
| #include "prinit.h" |
| #include "prenv.h" |
| #include "prerr.h" |
| #include "secerr.h" |
| |
| #include "prtypes.h" |
| #include "blapi.h" |
| #include "rijndael.h" |
| |
| #include "cts.h" |
| #include "ctr.h" |
| #include "gcm.h" |
| #include "mpi.h" |
| |
| #ifdef USE_HW_AES |
| #include "intel-aes.h" |
| #endif |
| #ifdef INTEL_GCM |
| #include "intel-gcm.h" |
| #endif /* INTEL_GCM */ |
| |
| /* Forward declarations */ |
| void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, |
| unsigned int Nk); |
| void rijndael_native_encryptBlock(AESContext *cx, |
| unsigned char *output, |
| const unsigned char *input); |
| |
| /* Stub definitions for the above rijndael_native_* functions, which |
| * shouldn't be used unless NSS_X86_OR_X64 is defined */ |
| #ifndef NSS_X86_OR_X64 |
| void |
| rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, |
| unsigned int Nk) |
| { |
| PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); |
| PORT_Assert(0); |
| } |
| |
| void |
| rijndael_native_encryptBlock(AESContext *cx, |
| unsigned char *output, |
| const unsigned char *input) |
| { |
| PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); |
| PORT_Assert(0); |
| } |
| #endif /* NSS_X86_OR_X64 */ |
| |
| /* |
| * There are currently three ways to build this code, varying in performance |
| * and code size. |
| * |
| * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab |
| * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table |
| * values "on-the-fly", using gfm |
| * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros |
| * |
| * The default is RIJNDAEL_INCLUDE_TABLES. |
| */ |
| |
| /* |
| * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], |
| * T**-1[0..4], IMXC[0..4] |
| * When building anything else, includes S, S**-1, Rcon |
| */ |
| #include "rijndael32.tab" |
| |
| #if defined(RIJNDAEL_INCLUDE_TABLES) |
| /* |
| * RIJNDAEL_INCLUDE_TABLES |
| */ |
| #define T0(i) _T0[i] |
| #define T1(i) _T1[i] |
| #define T2(i) _T2[i] |
| #define T3(i) _T3[i] |
| #define TInv0(i) _TInv0[i] |
| #define TInv1(i) _TInv1[i] |
| #define TInv2(i) _TInv2[i] |
| #define TInv3(i) _TInv3[i] |
| #define IMXC0(b) _IMXC0[b] |
| #define IMXC1(b) _IMXC1[b] |
| #define IMXC2(b) _IMXC2[b] |
| #define IMXC3(b) _IMXC3[b] |
| /* The S-box can be recovered from the T-tables */ |
| #ifdef IS_LITTLE_ENDIAN |
| #define SBOX(b) ((PRUint8)_T3[b]) |
| #else |
| #define SBOX(b) ((PRUint8)_T1[b]) |
| #endif |
| #define SINV(b) (_SInv[b]) |
| |
| #else /* not RIJNDAEL_INCLUDE_TABLES */ |
| |
| /* |
| * Code for generating T-table values. |
| */ |
| |
| #ifdef IS_LITTLE_ENDIAN |
| #define WORD4(b0, b1, b2, b3) \ |
| ((((PRUint32)b3) << 24) | \ |
| (((PRUint32)b2) << 16) | \ |
| (((PRUint32)b1) << 8) | \ |
| ((PRUint32)b0)) |
| #else |
| #define WORD4(b0, b1, b2, b3) \ |
| ((((PRUint32)b0) << 24) | \ |
| (((PRUint32)b1) << 16) | \ |
| (((PRUint32)b2) << 8) | \ |
| ((PRUint32)b3)) |
| #endif |
| |
| /* |
| * Define the S and S**-1 tables (both have been stored) |
| */ |
| #define SBOX(b) (_S[b]) |
| #define SINV(b) (_SInv[b]) |
| |
| /* |
| * The function xtime, used for Galois field multiplication |
| */ |
| #define XTIME(a) \ |
| ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) |
| |
| /* Choose GFM method (macros or function) */ |
| #if defined(RIJNDAEL_GENERATE_VALUES_MACRO) |
| |
| /* |
| * Galois field GF(2**8) multipliers, in macro form |
| */ |
| #define GFM01(a) \ |
| (a) /* a * 01 = a, the identity */ |
| #define GFM02(a) \ |
| (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ |
| #define GFM04(a) \ |
| (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ |
| #define GFM08(a) \ |
| (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ |
| #define GFM03(a) \ |
| (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ |
| #define GFM09(a) \ |
| (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ |
| #define GFM0B(a) \ |
| (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ |
| #define GFM0D(a) \ |
| (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ |
| #define GFM0E(a) \ |
| (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ |
| |
| #else /* RIJNDAEL_GENERATE_VALUES */ |
| |
| /* GF_MULTIPLY |
| * |
| * multiply two bytes represented in GF(2**8), mod (x**4 + 1) |
| */ |
| PRUint8 |
| gfm(PRUint8 a, PRUint8 b) |
| { |
| PRUint8 res = 0; |
| while (b > 0) { |
| res = (b & 0x01) ? res ^ a : res; |
| a = XTIME(a); |
| b >>= 1; |
| } |
| return res; |
| } |
| |
| #define GFM01(a) \ |
| (a) /* a * 01 = a, the identity */ |
| #define GFM02(a) \ |
| (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ |
| #define GFM03(a) \ |
| (gfm(a, 0x03)) /* a * 03 */ |
| #define GFM09(a) \ |
| (gfm(a, 0x09)) /* a * 09 */ |
| #define GFM0B(a) \ |
| (gfm(a, 0x0B)) /* a * 0B */ |
| #define GFM0D(a) \ |
| (gfm(a, 0x0D)) /* a * 0D */ |
| #define GFM0E(a) \ |
| (gfm(a, 0x0E)) /* a * 0E */ |
| |
| #endif /* choosing GFM function */ |
| |
| /* |
| * The T-tables |
| */ |
| #define G_T0(i) \ |
| (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)))) |
| #define G_T1(i) \ |
| (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)))) |
| #define G_T2(i) \ |
| (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)))) |
| #define G_T3(i) \ |
| (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)))) |
| |
| /* |
| * The inverse T-tables |
| */ |
| #define G_TInv0(i) \ |
| (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)))) |
| #define G_TInv1(i) \ |
| (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)))) |
| #define G_TInv2(i) \ |
| (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)))) |
| #define G_TInv3(i) \ |
| (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)))) |
| |
| /* |
| * The inverse mix column tables |
| */ |
| #define G_IMXC0(i) \ |
| (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i))) |
| #define G_IMXC1(i) \ |
| (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i))) |
| #define G_IMXC2(i) \ |
| (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i))) |
| #define G_IMXC3(i) \ |
| (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i))) |
| |
| /* Now choose the T-table indexing method */ |
| #if defined(RIJNDAEL_GENERATE_VALUES) |
| /* generate values for the tables with a function*/ |
| static PRUint32 |
| gen_TInvXi(PRUint8 tx, PRUint8 i) |
| { |
| PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; |
| si01 = SINV(i); |
| si02 = XTIME(si01); |
| si04 = XTIME(si02); |
| si08 = XTIME(si04); |
| si03 = si02 ^ si01; |
| si09 = si08 ^ si01; |
| si0B = si08 ^ si03; |
| si0D = si09 ^ si04; |
| si0E = si08 ^ si04 ^ si02; |
| switch (tx) { |
| case 0: |
| return WORD4(si0E, si09, si0D, si0B); |
| case 1: |
| return WORD4(si0B, si0E, si09, si0D); |
| case 2: |
| return WORD4(si0D, si0B, si0E, si09); |
| case 3: |
| return WORD4(si09, si0D, si0B, si0E); |
| } |
| return -1; |
| } |
| #define T0(i) G_T0(i) |
| #define T1(i) G_T1(i) |
| #define T2(i) G_T2(i) |
| #define T3(i) G_T3(i) |
| #define TInv0(i) gen_TInvXi(0, i) |
| #define TInv1(i) gen_TInvXi(1, i) |
| #define TInv2(i) gen_TInvXi(2, i) |
| #define TInv3(i) gen_TInvXi(3, i) |
| #define IMXC0(b) G_IMXC0(b) |
| #define IMXC1(b) G_IMXC1(b) |
| #define IMXC2(b) G_IMXC2(b) |
| #define IMXC3(b) G_IMXC3(b) |
| #else /* RIJNDAEL_GENERATE_VALUES_MACRO */ |
| /* generate values for the tables with macros */ |
| #define T0(i) G_T0(i) |
| #define T1(i) G_T1(i) |
| #define T2(i) G_T2(i) |
| #define T3(i) G_T3(i) |
| #define TInv0(i) G_TInv0(i) |
| #define TInv1(i) G_TInv1(i) |
| #define TInv2(i) G_TInv2(i) |
| #define TInv3(i) G_TInv3(i) |
| #define IMXC0(b) G_IMXC0(b) |
| #define IMXC1(b) G_IMXC1(b) |
| #define IMXC2(b) G_IMXC2(b) |
| #define IMXC3(b) G_IMXC3(b) |
| #endif /* choose T-table indexing method */ |
| |
| #endif /* not RIJNDAEL_INCLUDE_TABLES */ |
| |
| /************************************************************************** |
| * |
| * Stuff related to the Rijndael key schedule |
| * |
| *************************************************************************/ |
| |
| #define SUBBYTE(w) \ |
| ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \ |
| (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \ |
| (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \ |
| (((PRUint32)SBOX((w)&0xff)))) |
| |
| #ifdef IS_LITTLE_ENDIAN |
| #define ROTBYTE(b) \ |
| ((b >> 8) | (b << 24)) |
| #else |
| #define ROTBYTE(b) \ |
| ((b << 8) | (b >> 24)) |
| #endif |
| |
| /* rijndael_key_expansion7 |
| * |
| * Generate the expanded key from the key input by the user. |
| * XXX |
| * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte |
| * transformation is done periodically. The period is every 4 bytes, and |
| * since 7%4 != 0 this happens at different times for each key word (unlike |
| * Nk == 8 where it happens twice in every key word, in the same positions). |
| * For now, I'm implementing this case "dumbly", w/o any unrolling. |
| */ |
| static void |
| rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) |
| { |
| unsigned int i; |
| PRUint32 *W; |
| PRUint32 *pW; |
| PRUint32 tmp; |
| W = cx->expandedKey; |
| /* 1. the first Nk words contain the cipher key */ |
| memcpy(W, key, Nk * 4); |
| i = Nk; |
| /* 2. loop until full expanded key is obtained */ |
| pW = W + i - 1; |
| for (; i < cx->Nb * (cx->Nr + 1); ++i) { |
| tmp = *pW++; |
| if (i % Nk == 0) |
| tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; |
| else if (i % Nk == 4) |
| tmp = SUBBYTE(tmp); |
| *pW = W[i - Nk] ^ tmp; |
| } |
| } |
| |
| /* rijndael_key_expansion |
| * |
| * Generate the expanded key from the key input by the user. |
| */ |
| static void |
| rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) |
| { |
| unsigned int i; |
| PRUint32 *W; |
| PRUint32 *pW; |
| PRUint32 tmp; |
| unsigned int round_key_words = cx->Nb * (cx->Nr + 1); |
| if (Nk == 7) { |
| rijndael_key_expansion7(cx, key, Nk); |
| return; |
| } |
| W = cx->expandedKey; |
| /* The first Nk words contain the input cipher key */ |
| memcpy(W, key, Nk * 4); |
| i = Nk; |
| pW = W + i - 1; |
| /* Loop over all sets of Nk words, except the last */ |
| while (i < round_key_words - Nk) { |
| tmp = *pW++; |
| tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; |
| *pW = W[i++ - Nk] ^ tmp; |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| if (Nk == 4) |
| continue; |
| switch (Nk) { |
| case 8: |
| tmp = *pW++; |
| tmp = SUBBYTE(tmp); |
| *pW = W[i++ - Nk] ^ tmp; |
| case 7: |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| case 6: |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| case 5: |
| tmp = *pW++; |
| *pW = W[i++ - Nk] ^ tmp; |
| } |
| } |
| /* Generate the last word */ |
| tmp = *pW++; |
| tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; |
| *pW = W[i++ - Nk] ^ tmp; |
| /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, |
| * since the above loop generated all but the last Nk key words, there |
| * is no more need for the SubByte transformation. |
| */ |
| if (Nk < 8) { |
| for (; i < round_key_words; ++i) { |
| tmp = *pW++; |
| *pW = W[i - Nk] ^ tmp; |
| } |
| } else { |
| /* except in the case when Nk == 8. Then one more SubByte may have |
| * to be performed, at i % Nk == 4. |
| */ |
| for (; i < round_key_words; ++i) { |
| tmp = *pW++; |
| if (i % Nk == 4) |
| tmp = SUBBYTE(tmp); |
| *pW = W[i - Nk] ^ tmp; |
| } |
| } |
| } |
| |
| /* rijndael_invkey_expansion |
| * |
| * Generate the expanded key for the inverse cipher from the key input by |
| * the user. |
| */ |
| static void |
| rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) |
| { |
| unsigned int r; |
| PRUint32 *roundkeyw; |
| PRUint8 *b; |
| int Nb = cx->Nb; |
| /* begins like usual key expansion ... */ |
| rijndael_key_expansion(cx, key, Nk); |
| /* ... but has the additional step of InvMixColumn, |
| * excepting the first and last round keys. |
| */ |
| roundkeyw = cx->expandedKey + cx->Nb; |
| for (r = 1; r < cx->Nr; ++r) { |
| /* each key word, roundkeyw, represents a column in the key |
| * matrix. Each column is multiplied by the InvMixColumn matrix. |
| * [ 0E 0B 0D 09 ] [ b0 ] |
| * [ 09 0E 0B 0D ] * [ b1 ] |
| * [ 0D 09 0E 0B ] [ b2 ] |
| * [ 0B 0D 09 0E ] [ b3 ] |
| */ |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); |
| if (Nb <= 4) |
| continue; |
| switch (Nb) { |
| case 8: |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ |
| IMXC2(b[2]) ^ IMXC3(b[3]); |
| case 7: |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ |
| IMXC2(b[2]) ^ IMXC3(b[3]); |
| case 6: |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ |
| IMXC2(b[2]) ^ IMXC3(b[3]); |
| case 5: |
| b = (PRUint8 *)roundkeyw; |
| *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ |
| IMXC2(b[2]) ^ IMXC3(b[3]); |
| } |
| } |
| } |
| |
| /************************************************************************** |
| * |
| * Stuff related to Rijndael encryption/decryption. |
| * |
| *************************************************************************/ |
| |
| #ifdef IS_LITTLE_ENDIAN |
| #define BYTE0WORD(w) ((w)&0x000000ff) |
| #define BYTE1WORD(w) ((w)&0x0000ff00) |
| #define BYTE2WORD(w) ((w)&0x00ff0000) |
| #define BYTE3WORD(w) ((w)&0xff000000) |
| #else |
| #define BYTE0WORD(w) ((w)&0xff000000) |
| #define BYTE1WORD(w) ((w)&0x00ff0000) |
| #define BYTE2WORD(w) ((w)&0x0000ff00) |
| #define BYTE3WORD(w) ((w)&0x000000ff) |
| #endif |
| |
| typedef union { |
| PRUint32 w[4]; |
| PRUint8 b[16]; |
| } rijndael_state; |
| |
| #define COLUMN_0(state) state.w[0] |
| #define COLUMN_1(state) state.w[1] |
| #define COLUMN_2(state) state.w[2] |
| #define COLUMN_3(state) state.w[3] |
| |
| #define STATE_BYTE(i) state.b[i] |
| |
| static void NO_SANITIZE_ALIGNMENT |
| rijndael_encryptBlock128(AESContext *cx, |
| unsigned char *output, |
| const unsigned char *input) |
| { |
| unsigned int r; |
| PRUint32 *roundkeyw; |
| rijndael_state state; |
| PRUint32 C0, C1, C2, C3; |
| #if defined(NSS_X86_OR_X64) |
| #define pIn input |
| #define pOut output |
| #else |
| unsigned char *pIn, *pOut; |
| PRUint32 inBuf[4], outBuf[4]; |
| |
| if ((ptrdiff_t)input & 0x3) { |
| memcpy(inBuf, input, sizeof inBuf); |
| pIn = (unsigned char *)inBuf; |
| } else { |
| pIn = (unsigned char *)input; |
| } |
| if ((ptrdiff_t)output & 0x3) { |
| pOut = (unsigned char *)outBuf; |
| } else { |
| pOut = (unsigned char *)output; |
| } |
| #endif |
| roundkeyw = cx->expandedKey; |
| /* Step 1: Add Round Key 0 to initial state */ |
| COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++; |
| COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++; |
| COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++; |
| COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; |
| /* Step 2: Loop over rounds [1..NR-1] */ |
| for (r = 1; r < cx->Nr; ++r) { |
| /* Do ShiftRow, ByteSub, and MixColumn all at once */ |
| C0 = T0(STATE_BYTE(0)) ^ |
| T1(STATE_BYTE(5)) ^ |
| T2(STATE_BYTE(10)) ^ |
| T3(STATE_BYTE(15)); |
| C1 = T0(STATE_BYTE(4)) ^ |
| T1(STATE_BYTE(9)) ^ |
| T2(STATE_BYTE(14)) ^ |
| T3(STATE_BYTE(3)); |
| C2 = T0(STATE_BYTE(8)) ^ |
| T1(STATE_BYTE(13)) ^ |
| T2(STATE_BYTE(2)) ^ |
| T3(STATE_BYTE(7)); |
| C3 = T0(STATE_BYTE(12)) ^ |
| T1(STATE_BYTE(1)) ^ |
| T2(STATE_BYTE(6)) ^ |
| T3(STATE_BYTE(11)); |
| /* Round key addition */ |
| COLUMN_0(state) = C0 ^ *roundkeyw++; |
| COLUMN_1(state) = C1 ^ *roundkeyw++; |
| COLUMN_2(state) = C2 ^ *roundkeyw++; |
| COLUMN_3(state) = C3 ^ *roundkeyw++; |
| } |
| /* Step 3: Do the last round */ |
| /* Final round does not employ MixColumn */ |
| C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | |
| (BYTE1WORD(T3(STATE_BYTE(5)))) | |
| (BYTE2WORD(T0(STATE_BYTE(10)))) | |
| (BYTE3WORD(T1(STATE_BYTE(15))))) ^ |
| *roundkeyw++; |
| C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | |
| (BYTE1WORD(T3(STATE_BYTE(9)))) | |
| (BYTE2WORD(T0(STATE_BYTE(14)))) | |
| (BYTE3WORD(T1(STATE_BYTE(3))))) ^ |
| *roundkeyw++; |
| C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | |
| (BYTE1WORD(T3(STATE_BYTE(13)))) | |
| (BYTE2WORD(T0(STATE_BYTE(2)))) | |
| (BYTE3WORD(T1(STATE_BYTE(7))))) ^ |
| *roundkeyw++; |
| C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | |
| (BYTE1WORD(T3(STATE_BYTE(1)))) | |
| (BYTE2WORD(T0(STATE_BYTE(6)))) | |
| (BYTE3WORD(T1(STATE_BYTE(11))))) ^ |
| *roundkeyw++; |
| *((PRUint32 *)pOut) = C0; |
| *((PRUint32 *)(pOut + 4)) = C1; |
| *((PRUint32 *)(pOut + 8)) = C2; |
| *((PRUint32 *)(pOut + 12)) = C3; |
| #if defined(NSS_X86_OR_X64) |
| #undef pIn |
| #undef pOut |
| #else |
| if ((ptrdiff_t)output & 0x3) { |
| memcpy(output, outBuf, sizeof outBuf); |
| } |
| #endif |
| } |
| |
| static SECStatus NO_SANITIZE_ALIGNMENT |
| rijndael_decryptBlock128(AESContext *cx, |
| unsigned char *output, |
| const unsigned char *input) |
| { |
| int r; |
| PRUint32 *roundkeyw; |
| rijndael_state state; |
| PRUint32 C0, C1, C2, C3; |
| #if defined(NSS_X86_OR_X64) |
| #define pIn input |
| #define pOut output |
| #else |
| unsigned char *pIn, *pOut; |
| PRUint32 inBuf[4], outBuf[4]; |
| |
| if ((ptrdiff_t)input & 0x3) { |
| memcpy(inBuf, input, sizeof inBuf); |
| pIn = (unsigned char *)inBuf; |
| } else { |
| pIn = (unsigned char *)input; |
| } |
| if ((ptrdiff_t)output & 0x3) { |
| pOut = (unsigned char *)outBuf; |
| } else { |
| pOut = (unsigned char *)output; |
| } |
| #endif |
| roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; |
| /* reverse the final key addition */ |
| COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; |
| COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; |
| COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; |
| COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--; |
| /* Loop over rounds in reverse [NR..1] */ |
| for (r = cx->Nr; r > 1; --r) { |
| /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ |
| C0 = TInv0(STATE_BYTE(0)) ^ |
| TInv1(STATE_BYTE(13)) ^ |
| TInv2(STATE_BYTE(10)) ^ |
| TInv3(STATE_BYTE(7)); |
| C1 = TInv0(STATE_BYTE(4)) ^ |
| TInv1(STATE_BYTE(1)) ^ |
| TInv2(STATE_BYTE(14)) ^ |
| TInv3(STATE_BYTE(11)); |
| C2 = TInv0(STATE_BYTE(8)) ^ |
| TInv1(STATE_BYTE(5)) ^ |
| TInv2(STATE_BYTE(2)) ^ |
| TInv3(STATE_BYTE(15)); |
| C3 = TInv0(STATE_BYTE(12)) ^ |
| TInv1(STATE_BYTE(9)) ^ |
| TInv2(STATE_BYTE(6)) ^ |
| TInv3(STATE_BYTE(3)); |
| /* Invert the key addition step */ |
| COLUMN_3(state) = C3 ^ *roundkeyw--; |
| COLUMN_2(state) = C2 ^ *roundkeyw--; |
| COLUMN_1(state) = C1 ^ *roundkeyw--; |
| COLUMN_0(state) = C0 ^ *roundkeyw--; |
| } |
| /* inverse sub */ |
| pOut[0] = SINV(STATE_BYTE(0)); |
| pOut[1] = SINV(STATE_BYTE(13)); |
| pOut[2] = SINV(STATE_BYTE(10)); |
| pOut[3] = SINV(STATE_BYTE(7)); |
| pOut[4] = SINV(STATE_BYTE(4)); |
| pOut[5] = SINV(STATE_BYTE(1)); |
| pOut[6] = SINV(STATE_BYTE(14)); |
| pOut[7] = SINV(STATE_BYTE(11)); |
| pOut[8] = SINV(STATE_BYTE(8)); |
| pOut[9] = SINV(STATE_BYTE(5)); |
| pOut[10] = SINV(STATE_BYTE(2)); |
| pOut[11] = SINV(STATE_BYTE(15)); |
| pOut[12] = SINV(STATE_BYTE(12)); |
| pOut[13] = SINV(STATE_BYTE(9)); |
| pOut[14] = SINV(STATE_BYTE(6)); |
| pOut[15] = SINV(STATE_BYTE(3)); |
| /* final key addition */ |
| *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; |
| *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; |
| *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; |
| *((PRUint32 *)pOut) ^= *roundkeyw--; |
| #if defined(NSS_X86_OR_X64) |
| #undef pIn |
| #undef pOut |
| #else |
| if ((ptrdiff_t)output & 0x3) { |
| memcpy(output, outBuf, sizeof outBuf); |
| } |
| #endif |
| return SECSuccess; |
| } |
| |
| /************************************************************************** |
| * |
| * Rijndael modes of operation (ECB and CBC) |
| * |
| *************************************************************************/ |
| |
| static SECStatus |
| rijndael_encryptECB(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| AESBlockFunc *encryptor; |
| |
| if (aesni_support()) { |
| /* Use hardware acceleration for normal AES parameters. */ |
| encryptor = &rijndael_native_encryptBlock; |
| } else { |
| encryptor = &rijndael_encryptBlock128; |
| } |
| while (inputLen > 0) { |
| (*encryptor)(cx, output, input); |
| output += AES_BLOCK_SIZE; |
| input += AES_BLOCK_SIZE; |
| inputLen -= AES_BLOCK_SIZE; |
| } |
| return SECSuccess; |
| } |
| |
| static SECStatus |
| rijndael_encryptCBC(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| unsigned int j; |
| unsigned char *lastblock; |
| unsigned char inblock[AES_BLOCK_SIZE * 8]; |
| |
| if (!inputLen) |
| return SECSuccess; |
| lastblock = cx->iv; |
| while (inputLen > 0) { |
| /* XOR with the last block (IV if first block) */ |
| for (j = 0; j < AES_BLOCK_SIZE; ++j) { |
| inblock[j] = input[j] ^ lastblock[j]; |
| } |
| /* encrypt */ |
| rijndael_encryptBlock128(cx, output, inblock); |
| /* move to the next block */ |
| lastblock = output; |
| output += AES_BLOCK_SIZE; |
| input += AES_BLOCK_SIZE; |
| inputLen -= AES_BLOCK_SIZE; |
| } |
| memcpy(cx->iv, lastblock, AES_BLOCK_SIZE); |
| return SECSuccess; |
| } |
| |
| static SECStatus |
| rijndael_decryptECB(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| while (inputLen > 0) { |
| if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) { |
| return SECFailure; |
| } |
| output += AES_BLOCK_SIZE; |
| input += AES_BLOCK_SIZE; |
| inputLen -= AES_BLOCK_SIZE; |
| } |
| return SECSuccess; |
| } |
| |
| static SECStatus |
| rijndael_decryptCBC(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| const unsigned char *in; |
| unsigned char *out; |
| unsigned int j; |
| unsigned char newIV[AES_BLOCK_SIZE]; |
| |
| if (!inputLen) |
| return SECSuccess; |
| PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); |
| in = input + (inputLen - AES_BLOCK_SIZE); |
| memcpy(newIV, in, AES_BLOCK_SIZE); |
| out = output + (inputLen - AES_BLOCK_SIZE); |
| while (inputLen > AES_BLOCK_SIZE) { |
| if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { |
| return SECFailure; |
| } |
| for (j = 0; j < AES_BLOCK_SIZE; ++j) |
| out[j] ^= in[(int)(j - AES_BLOCK_SIZE)]; |
| out -= AES_BLOCK_SIZE; |
| in -= AES_BLOCK_SIZE; |
| inputLen -= AES_BLOCK_SIZE; |
| } |
| if (in == input) { |
| if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { |
| return SECFailure; |
| } |
| for (j = 0; j < AES_BLOCK_SIZE; ++j) |
| out[j] ^= cx->iv[j]; |
| } |
| memcpy(cx->iv, newIV, AES_BLOCK_SIZE); |
| return SECSuccess; |
| } |
| |
| /************************************************************************ |
| * |
| * BLAPI Interface functions |
| * |
| * The following functions implement the encryption routines defined in |
| * BLAPI for the AES cipher, Rijndael. |
| * |
| ***********************************************************************/ |
| |
| AESContext * |
| AES_AllocateContext(void) |
| { |
| return PORT_ZNewAligned(AESContext, 16, mem); |
| } |
| |
| /* |
| ** Initialize a new AES context suitable for AES encryption/decryption in |
| ** the ECB or CBC mode. |
| ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC |
| */ |
| static SECStatus |
| aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, |
| const unsigned char *iv, int mode, unsigned int encrypt) |
| { |
| unsigned int Nk; |
| PRBool use_hw_aes; |
| /* According to AES, block lengths are 128 and key lengths are 128, 192, or |
| * 256 bits. We support other key sizes as well [128, 256] as long as the |
| * length in bytes is divisible by 4. |
| */ |
| |
| if (key == NULL || |
| keysize < AES_BLOCK_SIZE || |
| keysize > 32 || |
| keysize % 4 != 0) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| if (mode != NSS_AES && mode != NSS_AES_CBC) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| if (mode == NSS_AES_CBC && iv == NULL) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| if (!cx) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| use_hw_aes = aesni_support() && (keysize % 8) == 0; |
| /* Nb = (block size in bits) / 32 */ |
| cx->Nb = AES_BLOCK_SIZE / 4; |
| /* Nk = (key size in bits) / 32 */ |
| Nk = keysize / 4; |
| /* Obtain number of rounds from "table" */ |
| cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); |
| /* copy in the iv, if neccessary */ |
| if (mode == NSS_AES_CBC) { |
| memcpy(cx->iv, iv, AES_BLOCK_SIZE); |
| #ifdef USE_HW_AES |
| if (use_hw_aes) { |
| cx->worker = (freeblCipherFunc) |
| intel_aes_cbc_worker(encrypt, keysize); |
| } else |
| #endif |
| { |
| cx->worker = (freeblCipherFunc)(encrypt |
| ? &rijndael_encryptCBC |
| : &rijndael_decryptCBC); |
| } |
| } else { |
| #ifdef USE_HW_AES |
| if (use_hw_aes) { |
| cx->worker = (freeblCipherFunc) |
| intel_aes_ecb_worker(encrypt, keysize); |
| } else |
| #endif |
| { |
| cx->worker = (freeblCipherFunc)(encrypt |
| ? &rijndael_encryptECB |
| : &rijndael_decryptECB); |
| } |
| } |
| PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); |
| if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { |
| PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); |
| return SECFailure; |
| } |
| #ifdef USE_HW_AES |
| if (use_hw_aes) { |
| intel_aes_init(encrypt, keysize); |
| } else |
| #endif |
| { |
| /* Generate expanded key */ |
| if (encrypt) { |
| if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || |
| cx->mode == NSS_AES_CTR)) { |
| PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); |
| /* Prepare hardware key for normal AES parameters. */ |
| rijndael_native_key_expansion(cx, key, Nk); |
| } else { |
| rijndael_key_expansion(cx, key, Nk); |
| } |
| } else { |
| rijndael_invkey_expansion(cx, key, Nk); |
| } |
| } |
| cx->worker_cx = cx; |
| cx->destroy = NULL; |
| cx->isBlock = PR_TRUE; |
| return SECSuccess; |
| } |
| |
| SECStatus |
| AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, |
| const unsigned char *iv, int mode, unsigned int encrypt, |
| unsigned int blocksize) |
| { |
| int basemode = mode; |
| PRBool baseencrypt = encrypt; |
| SECStatus rv; |
| |
| if (blocksize != AES_BLOCK_SIZE) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| |
| switch (mode) { |
| case NSS_AES_CTS: |
| basemode = NSS_AES_CBC; |
| break; |
| case NSS_AES_GCM: |
| case NSS_AES_CTR: |
| basemode = NSS_AES; |
| baseencrypt = PR_TRUE; |
| break; |
| } |
| /* Make sure enough is initialized so we can safely call Destroy. */ |
| cx->worker_cx = NULL; |
| cx->destroy = NULL; |
| cx->mode = mode; |
| rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt); |
| if (rv != SECSuccess) { |
| AES_DestroyContext(cx, PR_FALSE); |
| return rv; |
| } |
| |
| /* finally, set up any mode specific contexts */ |
| switch (mode) { |
| case NSS_AES_CTS: |
| cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv); |
| cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); |
| cx->destroy = (freeblDestroyFunc)CTS_DestroyContext; |
| cx->isBlock = PR_FALSE; |
| break; |
| case NSS_AES_GCM: |
| #if defined(INTEL_GCM) && defined(USE_HW_AES) |
| if (aesni_support() && (keysize % 8) == 0 && avx_support() && |
| clmul_support()) { |
| cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv); |
| cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate |
| : intel_AES_GCM_DecryptUpdate); |
| cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; |
| cx->isBlock = PR_FALSE; |
| } else |
| #endif |
| { |
| cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); |
| cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate |
| : GCM_DecryptUpdate); |
| cx->destroy = (freeblDestroyFunc)GCM_DestroyContext; |
| cx->isBlock = PR_FALSE; |
| } |
| break; |
| case NSS_AES_CTR: |
| cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv); |
| #if defined(USE_HW_AES) && defined(_MSC_VER) |
| if (aesni_support() && (keysize % 8) == 0) { |
| cx->worker = (freeblCipherFunc)CTR_Update_HW_AES; |
| } else |
| #endif |
| { |
| cx->worker = (freeblCipherFunc)CTR_Update; |
| } |
| cx->destroy = (freeblDestroyFunc)CTR_DestroyContext; |
| cx->isBlock = PR_FALSE; |
| break; |
| default: |
| /* everything has already been set up by aes_InitContext, just |
| * return */ |
| return SECSuccess; |
| } |
| /* check to see if we succeeded in getting the worker context */ |
| if (cx->worker_cx == NULL) { |
| /* no, just destroy the existing context */ |
| cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ |
| /* below that this isn't necessary */ |
| AES_DestroyContext(cx, PR_FALSE); |
| return SECFailure; |
| } |
| return SECSuccess; |
| } |
| |
| /* AES_CreateContext |
| * |
| * create a new context for Rijndael operations |
| */ |
| AESContext * |
| AES_CreateContext(const unsigned char *key, const unsigned char *iv, |
| int mode, int encrypt, |
| unsigned int keysize, unsigned int blocksize) |
| { |
| AESContext *cx = AES_AllocateContext(); |
| if (cx) { |
| SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, |
| blocksize); |
| if (rv != SECSuccess) { |
| AES_DestroyContext(cx, PR_TRUE); |
| cx = NULL; |
| } |
| } |
| return cx; |
| } |
| |
| /* |
| * AES_DestroyContext |
| * |
| * Zero an AES cipher context. If freeit is true, also free the pointer |
| * to the context. |
| */ |
| void |
| AES_DestroyContext(AESContext *cx, PRBool freeit) |
| { |
| if (cx->worker_cx && cx->destroy) { |
| (*cx->destroy)(cx->worker_cx, PR_TRUE); |
| cx->worker_cx = NULL; |
| cx->destroy = NULL; |
| } |
| if (freeit) { |
| PORT_Free(cx->mem); |
| } |
| } |
| |
| /* |
| * AES_Encrypt |
| * |
| * Encrypt an arbitrary-length buffer. The output buffer must already be |
| * allocated to at least inputLen. |
| */ |
| SECStatus |
| AES_Encrypt(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| /* Check args */ |
| if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { |
| PORT_SetError(SEC_ERROR_INPUT_LEN); |
| return SECFailure; |
| } |
| if (maxOutputLen < inputLen) { |
| PORT_SetError(SEC_ERROR_OUTPUT_LEN); |
| return SECFailure; |
| } |
| *outputLen = inputLen; |
| #if UINT_MAX > MP_32BIT_MAX |
| /* |
| * we can guarentee that GSM won't overlfow if we limit the input to |
| * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. |
| * |
| * We do it here to cover both hardware and software GCM operations. |
| */ |
| { |
| PR_STATIC_ASSERT(sizeof(unsigned int) > 4); |
| } |
| if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) { |
| PORT_SetError(SEC_ERROR_OUTPUT_LEN); |
| return SECFailure; |
| } |
| #else |
| /* if we can't pass in a 32_bit number, then no such check needed */ |
| { |
| PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); |
| } |
| #endif |
| |
| return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, |
| input, inputLen, AES_BLOCK_SIZE); |
| } |
| |
| /* |
| * AES_Decrypt |
| * |
| * Decrypt and arbitrary-length buffer. The output buffer must already be |
| * allocated to at least inputLen. |
| */ |
| SECStatus |
| AES_Decrypt(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, unsigned int maxOutputLen, |
| const unsigned char *input, unsigned int inputLen) |
| { |
| /* Check args */ |
| if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { |
| PORT_SetError(SEC_ERROR_INVALID_ARGS); |
| return SECFailure; |
| } |
| if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { |
| PORT_SetError(SEC_ERROR_INPUT_LEN); |
| return SECFailure; |
| } |
| if (maxOutputLen < inputLen) { |
| PORT_SetError(SEC_ERROR_OUTPUT_LEN); |
| return SECFailure; |
| } |
| *outputLen = inputLen; |
| return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, |
| input, inputLen, AES_BLOCK_SIZE); |
| } |