From 5ac03256db0fe4ca7e3ad1117d096c3a76368b76 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Fri, 9 Jan 2015 09:46:07 +0100 Subject: backported CyaSSL/OpenSSL support for internal webserver instead of MatrixSSL --- release/src/router/cyassl/ctaocrypt/src/aes_asm.s | 484 ++++++++++++++++++++++ 1 file changed, 484 insertions(+) create mode 100755 release/src/router/cyassl/ctaocrypt/src/aes_asm.s (limited to 'release/src/router/cyassl/ctaocrypt/src/aes_asm.s') diff --git a/release/src/router/cyassl/ctaocrypt/src/aes_asm.s b/release/src/router/cyassl/ctaocrypt/src/aes_asm.s new file mode 100755 index 00000000..4aa35861 --- /dev/null +++ b/release/src/router/cyassl/ctaocrypt/src/aes_asm.s @@ -0,0 +1,484 @@ +/* aes_asm.s + * + * Copyright (C) 2006-2011 Sawtooth Consulting Ltd. + * + * This file is part of CyaSSL. + * + * CyaSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * CyaSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/* See IntelĀ® Advanced Encryption Standard (AES) Instructions Set White Paper + * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron + */ + + +//AES_CBC_encrypt (const unsigned char *in, +// unsigned char *out, +// unsigned char ivec[16], +// unsigned long length, +// const unsigned char *KS, +// int nr) +.globl AES_CBC_encrypt +AES_CBC_encrypt: +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8 +# parameter 6: %r9d +movq %rcx, %r10 +shrq $4, %rcx +shlq $60, %r10 +je NO_PARTS +addq $1, %rcx +NO_PARTS: +subq $16, %rsi +movdqa (%rdx), %xmm1 +LOOP: +pxor (%rdi), %xmm1 +pxor (%r8), %xmm1 +addq $16,%rsi +addq $16,%rdi +cmpl $12, %r9d +aesenc 16(%r8),%xmm1 +aesenc 32(%r8),%xmm1 +aesenc 48(%r8),%xmm1 +aesenc 64(%r8),%xmm1 +aesenc 80(%r8),%xmm1 +aesenc 96(%r8),%xmm1 +aesenc 112(%r8),%xmm1 +aesenc 128(%r8),%xmm1 +aesenc 144(%r8),%xmm1 +movdqa 160(%r8),%xmm2 +jb LAST +cmpl $14, %r9d + +aesenc 160(%r8),%xmm1 +aesenc 176(%r8),%xmm1 +movdqa 192(%r8),%xmm2 +jb LAST +aesenc 192(%r8),%xmm1 +aesenc 208(%r8),%xmm1 +movdqa 224(%r8),%xmm2 +LAST: +decq %rcx +aesenclast %xmm2,%xmm1 +movdqu %xmm1,(%rsi) +jne LOOP +ret + + + +//AES_CBC_decrypt (const unsigned char *in, +// unsigned char *out, +// unsigned char ivec[16], +// unsigned long length, +// const unsigned char *KS, +// int nr) +.globl AES_CBC_decrypt +AES_CBC_decrypt: +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8 +# parameter 6: %r9d + +movq %rcx, %r10 +shrq $4, %rcx +shlq $60, %r10 +je DNO_PARTS_4 +addq $1, %rcx +DNO_PARTS_4: +movq %rcx, %r10 +shlq $62, %r10 +shrq $62, %r10 +shrq $2, %rcx +movdqu (%rdx),%xmm5 +je DREMAINDER_4 +subq $64, %rsi +DLOOP_4: +movdqu (%rdi), %xmm1 +movdqu 16(%rdi), %xmm2 +movdqu 32(%rdi), %xmm3 +movdqu 48(%rdi), %xmm4 +movdqa %xmm1, %xmm6 +movdqa %xmm2, %xmm7 +movdqa %xmm3, %xmm8 +movdqa %xmm4, %xmm15 +movdqa (%r8), %xmm9 +movdqa 16(%r8), %xmm10 +movdqa 32(%r8), %xmm11 +movdqa 48(%r8), %xmm12 +pxor %xmm9, %xmm1 +pxor %xmm9, %xmm2 +pxor %xmm9, %xmm3 + +pxor %xmm9, %xmm4 +aesdec %xmm10, %xmm1 +aesdec %xmm10, %xmm2 +aesdec %xmm10, %xmm3 +aesdec %xmm10, %xmm4 +aesdec %xmm11, %xmm1 +aesdec %xmm11, %xmm2 +aesdec %xmm11, %xmm3 +aesdec %xmm11, %xmm4 +aesdec %xmm12, %xmm1 +aesdec %xmm12, %xmm2 +aesdec %xmm12, %xmm3 +aesdec %xmm12, %xmm4 +movdqa 64(%r8), %xmm9 +movdqa 80(%r8), %xmm10 +movdqa 96(%r8), %xmm11 +movdqa 112(%r8), %xmm12 +aesdec %xmm9, %xmm1 +aesdec %xmm9, %xmm2 +aesdec %xmm9, %xmm3 +aesdec %xmm9, %xmm4 +aesdec %xmm10, %xmm1 +aesdec %xmm10, %xmm2 +aesdec %xmm10, %xmm3 +aesdec %xmm10, %xmm4 +aesdec %xmm11, %xmm1 +aesdec %xmm11, %xmm2 +aesdec %xmm11, %xmm3 +aesdec %xmm11, %xmm4 +aesdec %xmm12, %xmm1 +aesdec %xmm12, %xmm2 +aesdec %xmm12, %xmm3 +aesdec %xmm12, %xmm4 +movdqa 128(%r8), %xmm9 +movdqa 144(%r8), %xmm10 +movdqa 160(%r8), %xmm11 +cmpl $12, %r9d +aesdec %xmm9, %xmm1 +aesdec %xmm9, %xmm2 +aesdec %xmm9, %xmm3 +aesdec %xmm9, %xmm4 +aesdec %xmm10, %xmm1 +aesdec %xmm10, %xmm2 +aesdec %xmm10, %xmm3 +aesdec %xmm10, %xmm4 +jb DLAST_4 +movdqa 160(%r8), %xmm9 +movdqa 176(%r8), %xmm10 +movdqa 192(%r8), %xmm11 +cmpl $14, %r9d +aesdec %xmm9, %xmm1 +aesdec %xmm9, %xmm2 +aesdec %xmm9, %xmm3 +aesdec %xmm9, %xmm4 +aesdec %xmm10, %xmm1 +aesdec %xmm10, %xmm2 +aesdec %xmm10, %xmm3 +aesdec %xmm10, %xmm4 +jb DLAST_4 + +movdqa 192(%r8), %xmm9 +movdqa 208(%r8), %xmm10 +movdqa 224(%r8), %xmm11 +aesdec %xmm9, %xmm1 +aesdec %xmm9, %xmm2 +aesdec %xmm9, %xmm3 +aesdec %xmm9, %xmm4 +aesdec %xmm10, %xmm1 +aesdec %xmm10, %xmm2 +aesdec %xmm10, %xmm3 +aesdec %xmm10, %xmm4 +DLAST_4: +addq $64, %rdi +addq $64, %rsi +decq %rcx +aesdeclast %xmm11, %xmm1 +aesdeclast %xmm11, %xmm2 +aesdeclast %xmm11, %xmm3 +aesdeclast %xmm11, %xmm4 +pxor %xmm5 ,%xmm1 +pxor %xmm6 ,%xmm2 +pxor %xmm7 ,%xmm3 +pxor %xmm8 ,%xmm4 +movdqu %xmm1, (%rsi) +movdqu %xmm2, 16(%rsi) +movdqu %xmm3, 32(%rsi) +movdqu %xmm4, 48(%rsi) +movdqa %xmm15,%xmm5 +jne DLOOP_4 +addq $64, %rsi +DREMAINDER_4: +cmpq $0, %r10 +je DEND_4 +DLOOP_4_2: +movdqu (%rdi), %xmm1 +movdqa %xmm1 ,%xmm15 +addq $16, %rdi +pxor (%r8), %xmm1 +movdqu 160(%r8), %xmm2 +cmpl $12, %r9d +aesdec 16(%r8), %xmm1 +aesdec 32(%r8), %xmm1 +aesdec 48(%r8), %xmm1 +aesdec 64(%r8), %xmm1 +aesdec 80(%r8), %xmm1 +aesdec 96(%r8), %xmm1 +aesdec 112(%r8), %xmm1 +aesdec 128(%r8), %xmm1 +aesdec 144(%r8), %xmm1 +jb DLAST_4_2 +movdqu 192(%r8), %xmm2 +cmpl $14, %r9d +aesdec 160(%r8), %xmm1 +aesdec 176(%r8), %xmm1 +jb DLAST_4_2 +movdqu 224(%r8), %xmm2 +aesdec 192(%r8), %xmm1 +aesdec 208(%r8), %xmm1 +DLAST_4_2: +aesdeclast %xmm2, %xmm1 +pxor %xmm5, %xmm1 +movdqa %xmm15, %xmm5 +movdqu %xmm1, (%rsi) + +addq $16, %rsi +decq %r10 +jne DLOOP_4_2 +DEND_4: +ret + + + + +//void AES_128_Key_Expansion(const unsigned char* userkey, +// unsigned char* key_schedule); +.align 16,0x90 +.globl AES_128_Key_Expansion +AES_128_Key_Expansion: +# parameter 1: %rdi +# parameter 2: %rsi +movl $10, 240(%rsi) + +movdqu (%rdi), %xmm1 +movdqa %xmm1, (%rsi) + + +ASSISTS: +aeskeygenassist $1, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 16(%rsi) +aeskeygenassist $2, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 32(%rsi) +aeskeygenassist $4, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 48(%rsi) +aeskeygenassist $8, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 64(%rsi) +aeskeygenassist $16, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 80(%rsi) +aeskeygenassist $32, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 96(%rsi) +aeskeygenassist $64, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 112(%rsi) +aeskeygenassist $0x80, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 128(%rsi) +aeskeygenassist $0x1b, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 144(%rsi) +aeskeygenassist $0x36, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 160(%rsi) +ret + +PREPARE_ROUNDKEY_128: +pshufd $255, %xmm2, %xmm2 +movdqa %xmm1, %xmm3 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pxor %xmm2, %xmm1 +ret + + +//void AES_192_Key_Expansion (const unsigned char *userkey, +// unsigned char *key) +.globl AES_192_Key_Expansion +AES_192_Key_Expansion: +# parameter 1: %rdi +# parameter 2: %rsi + +movdqu (%rdi), %xmm1 +movdqu 16(%rdi), %xmm3 +movdqa %xmm1, (%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x1, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 16(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 32(%rsi) + +aeskeygenassist $0x2, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 48(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x4, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 64(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 80(%rsi) + +aeskeygenassist $0x8, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 96(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x10, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 112(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 128(%rsi) + +aeskeygenassist $0x20, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 144(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x40, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 160(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 176(%rsi) + +aeskeygenassist $0x80, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 192(%rsi) +movdqa %xmm3, 208(%rsi) +ret + +PREPARE_ROUNDKEY_192: +pshufd $0x55, %xmm2, %xmm2 +movdqu %xmm1, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 + +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pxor %xmm2, %xmm1 +pshufd $0xff, %xmm1, %xmm2 +movdqu %xmm3, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pxor %xmm2, %xmm3 +ret + + +//void AES_256_Key_Expansion (const unsigned char *userkey, +// unsigned char *key) +.globl AES_256_Key_Expansion +AES_256_Key_Expansion: +# parameter 1: %rdi +# parameter 2: %rsi + +movdqu (%rdi), %xmm1 +movdqu 16(%rdi), %xmm3 +movdqa %xmm1, (%rsi) +movdqa %xmm3, 16(%rsi) + +aeskeygenassist $0x1, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 32(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 48(%rsi) +aeskeygenassist $0x2, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 64(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 80(%rsi) +aeskeygenassist $0x4, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 96(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 112(%rsi) +aeskeygenassist $0x8, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 128(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 144(%rsi) +aeskeygenassist $0x10, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 160(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 176(%rsi) +aeskeygenassist $0x20, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 192(%rsi) + +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 208(%rsi) +aeskeygenassist $0x40, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 224(%rsi) + +ret + +MAKE_RK256_a: +pshufd $0xff, %xmm2, %xmm2 +movdqa %xmm1, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pxor %xmm2, %xmm1 +ret + +MAKE_RK256_b: +pshufd $0xaa, %xmm2, %xmm2 +movdqa %xmm3, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pxor %xmm2, %xmm3 +ret + -- cgit v1.2.3-54-g00ecf