| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #include "secerr.h" |
| #include "rijndael.h" |
| |
| #if ((defined(__clang__) || \ |
| (defined(__GNUC__) && defined(__GNUC_MINOR__) && \ |
| (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \ |
| defined(IS_LITTLE_ENDIAN)) |
| |
| #ifndef __ARM_FEATURE_CRYPTO |
| #error "Compiler option is invalid" |
| #endif |
| |
| #include <arm_neon.h> |
| |
| SECStatus |
| arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key11); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (inputLen == 0) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11; |
| uint8x16_t iv; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| state = veorq_u8(state, iv); |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key11); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| iv = state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t iv; |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| |
| while (inputLen > 0) { |
| uint8x16_t state, old_state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| old_state = state; |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| state = veorq_u8(state, iv); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| |
| iv = old_state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13; |
| PRUint8 *key = (PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key11); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key12); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key13); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key13); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key12); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13; |
| uint8x16_t iv; |
| PRUint8 *key = (PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| state = veorq_u8(state, iv); |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key11); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key12); |
| state = veorq_u8(state, key13); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| iv = state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t iv; |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| |
| while (inputLen > 0) { |
| uint8x16_t state, old_state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| old_state = state; |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key13); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key12); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| state = veorq_u8(state, iv); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| |
| iv = old_state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13, key14, key15; |
| PRUint8 *key = (PRUint8 *)cx->k.expandedKey; |
| |
| if (inputLen == 0) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| key14 = vld1q_u8(key + 208); |
| key15 = vld1q_u8(key + 224); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key11); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key12); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key13); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key14); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key15); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13, key14, key15; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| key14 = vld1q_u8(key + 208); |
| key15 = vld1q_u8(key + 224); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key15); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key14); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key13); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key12); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| } |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13, key14, key15; |
| uint8x16_t iv; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| key14 = vld1q_u8(key + 208); |
| key15 = vld1q_u8(key + 224); |
| |
| while (inputLen > 0) { |
| uint8x16_t state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| input += 16; |
| inputLen -= 16; |
| |
| state = veorq_u8(state, iv); |
| |
| /* Rounds */ |
| state = vaeseq_u8(state, key1); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key2); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key3); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key4); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key5); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key6); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key7); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key8); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key9); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key10); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key11); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key12); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key13); |
| state = vaesmcq_u8(state); |
| state = vaeseq_u8(state, key14); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key15); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| iv = state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| SECStatus |
| arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, |
| unsigned int *outputLen, |
| unsigned int maxOutputLen, |
| const unsigned char *input, |
| unsigned int inputLen, |
| unsigned int blocksize) |
| { |
| #if !defined(HAVE_UNALIGNED_ACCESS) |
| pre_align unsigned char buf[16] post_align; |
| #endif |
| uint8x16_t iv; |
| uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; |
| uint8x16_t key11, key12, key13, key14, key15; |
| const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; |
| |
| if (!inputLen) { |
| return SECSuccess; |
| } |
| |
| /* iv */ |
| iv = vld1q_u8(cx->iv); |
| |
| /* expanedKey */ |
| key1 = vld1q_u8(key); |
| key2 = vld1q_u8(key + 16); |
| key3 = vld1q_u8(key + 32); |
| key4 = vld1q_u8(key + 48); |
| key5 = vld1q_u8(key + 64); |
| key6 = vld1q_u8(key + 80); |
| key7 = vld1q_u8(key + 96); |
| key8 = vld1q_u8(key + 112); |
| key9 = vld1q_u8(key + 128); |
| key10 = vld1q_u8(key + 144); |
| key11 = vld1q_u8(key + 160); |
| key12 = vld1q_u8(key + 176); |
| key13 = vld1q_u8(key + 192); |
| key14 = vld1q_u8(key + 208); |
| key15 = vld1q_u8(key + 224); |
| |
| while (inputLen > 0) { |
| uint8x16_t state, old_state; |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| state = vld1q_u8(input); |
| #else |
| if ((uintptr_t)input & 0x7) { |
| memcpy(buf, input, 16); |
| state = vld1q_u8(__builtin_assume_aligned(buf, 16)); |
| } else { |
| state = vld1q_u8(__builtin_assume_aligned(input, 8)); |
| } |
| #endif |
| old_state = state; |
| input += 16; |
| inputLen -= 16; |
| |
| /* Rounds */ |
| state = vaesdq_u8(state, key15); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key14); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key13); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key12); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key11); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key10); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key9); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key8); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key7); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key6); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key5); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key4); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key3); |
| state = vaesimcq_u8(state); |
| state = vaesdq_u8(state, key2); |
| /* AddRoundKey */ |
| state = veorq_u8(state, key1); |
| |
| state = veorq_u8(state, iv); |
| |
| #if defined(HAVE_UNALIGNED_ACCESS) |
| vst1q_u8(output, state); |
| #else |
| if ((uintptr_t)output & 0x7) { |
| vst1q_u8(__builtin_assume_aligned(buf, 16), state); |
| memcpy(output, buf, 16); |
| } else { |
| vst1q_u8(__builtin_assume_aligned(output, 8), state); |
| } |
| #endif |
| output += 16; |
| |
| iv = old_state; |
| } |
| vst1q_u8(cx->iv, iv); |
| |
| return SECSuccess; |
| } |
| |
| #endif |