summaryrefslogtreecommitdiff
path: root/release/src/router/cyassl/ctaocrypt/src/aes_asm.s
diff options
context:
space:
mode:
Diffstat (limited to 'release/src/router/cyassl/ctaocrypt/src/aes_asm.s')
-rwxr-xr-xrelease/src/router/cyassl/ctaocrypt/src/aes_asm.s484
1 files changed, 484 insertions, 0 deletions
diff --git a/release/src/router/cyassl/ctaocrypt/src/aes_asm.s b/release/src/router/cyassl/ctaocrypt/src/aes_asm.s
new file mode 100755
index 00000000..4aa35861
--- /dev/null
+++ b/release/src/router/cyassl/ctaocrypt/src/aes_asm.s
@@ -0,0 +1,484 @@
+/* aes_asm.s
+ *
+ * Copyright (C) 2006-2011 Sawtooth Consulting Ltd.
+ *
+ * This file is part of CyaSSL.
+ *
+ * CyaSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * CyaSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/* See IntelĀ® Advanced Encryption Standard (AES) Instructions Set White Paper
+ * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron
+ */
+
+
+//AES_CBC_encrypt (const unsigned char *in,
+// unsigned char *out,
+// unsigned char ivec[16],
+// unsigned long length,
+// const unsigned char *KS,
+// int nr)
+.globl AES_CBC_encrypt
+AES_CBC_encrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+movq %rcx, %r10
+shrq $4, %rcx
+shlq $60, %r10
+je NO_PARTS
+addq $1, %rcx
+NO_PARTS:
+subq $16, %rsi
+movdqa (%rdx), %xmm1
+LOOP:
+pxor (%rdi), %xmm1
+pxor (%r8), %xmm1
+addq $16,%rsi
+addq $16,%rdi
+cmpl $12, %r9d
+aesenc 16(%r8),%xmm1
+aesenc 32(%r8),%xmm1
+aesenc 48(%r8),%xmm1
+aesenc 64(%r8),%xmm1
+aesenc 80(%r8),%xmm1
+aesenc 96(%r8),%xmm1
+aesenc 112(%r8),%xmm1
+aesenc 128(%r8),%xmm1
+aesenc 144(%r8),%xmm1
+movdqa 160(%r8),%xmm2
+jb LAST
+cmpl $14, %r9d
+
+aesenc 160(%r8),%xmm1
+aesenc 176(%r8),%xmm1
+movdqa 192(%r8),%xmm2
+jb LAST
+aesenc 192(%r8),%xmm1
+aesenc 208(%r8),%xmm1
+movdqa 224(%r8),%xmm2
+LAST:
+decq %rcx
+aesenclast %xmm2,%xmm1
+movdqu %xmm1,(%rsi)
+jne LOOP
+ret
+
+
+
+//AES_CBC_decrypt (const unsigned char *in,
+// unsigned char *out,
+// unsigned char ivec[16],
+// unsigned long length,
+// const unsigned char *KS,
+// int nr)
+.globl AES_CBC_decrypt
+AES_CBC_decrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+
+movq %rcx, %r10
+shrq $4, %rcx
+shlq $60, %r10
+je DNO_PARTS_4
+addq $1, %rcx
+DNO_PARTS_4:
+movq %rcx, %r10
+shlq $62, %r10
+shrq $62, %r10
+shrq $2, %rcx
+movdqu (%rdx),%xmm5
+je DREMAINDER_4
+subq $64, %rsi
+DLOOP_4:
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm2
+movdqu 32(%rdi), %xmm3
+movdqu 48(%rdi), %xmm4
+movdqa %xmm1, %xmm6
+movdqa %xmm2, %xmm7
+movdqa %xmm3, %xmm8
+movdqa %xmm4, %xmm15
+movdqa (%r8), %xmm9
+movdqa 16(%r8), %xmm10
+movdqa 32(%r8), %xmm11
+movdqa 48(%r8), %xmm12
+pxor %xmm9, %xmm1
+pxor %xmm9, %xmm2
+pxor %xmm9, %xmm3
+
+pxor %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec %xmm10, %xmm2
+aesdec %xmm10, %xmm3
+aesdec %xmm10, %xmm4
+aesdec %xmm11, %xmm1
+aesdec %xmm11, %xmm2
+aesdec %xmm11, %xmm3
+aesdec %xmm11, %xmm4
+aesdec %xmm12, %xmm1
+aesdec %xmm12, %xmm2
+aesdec %xmm12, %xmm3
+aesdec %xmm12, %xmm4
+movdqa 64(%r8), %xmm9
+movdqa 80(%r8), %xmm10
+movdqa 96(%r8), %xmm11
+movdqa 112(%r8), %xmm12
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec %xmm10, %xmm2
+aesdec %xmm10, %xmm3
+aesdec %xmm10, %xmm4
+aesdec %xmm11, %xmm1
+aesdec %xmm11, %xmm2
+aesdec %xmm11, %xmm3
+aesdec %xmm11, %xmm4
+aesdec %xmm12, %xmm1
+aesdec %xmm12, %xmm2
+aesdec %xmm12, %xmm3
+aesdec %xmm12, %xmm4
+movdqa 128(%r8), %xmm9
+movdqa 144(%r8), %xmm10
+movdqa 160(%r8), %xmm11
+cmpl $12, %r9d
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec %xmm10, %xmm2
+aesdec %xmm10, %xmm3
+aesdec %xmm10, %xmm4
+jb DLAST_4
+movdqa 160(%r8), %xmm9
+movdqa 176(%r8), %xmm10
+movdqa 192(%r8), %xmm11
+cmpl $14, %r9d
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec %xmm10, %xmm2
+aesdec %xmm10, %xmm3
+aesdec %xmm10, %xmm4
+jb DLAST_4
+
+movdqa 192(%r8), %xmm9
+movdqa 208(%r8), %xmm10
+movdqa 224(%r8), %xmm11
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec %xmm10, %xmm2
+aesdec %xmm10, %xmm3
+aesdec %xmm10, %xmm4
+DLAST_4:
+addq $64, %rdi
+addq $64, %rsi
+decq %rcx
+aesdeclast %xmm11, %xmm1
+aesdeclast %xmm11, %xmm2
+aesdeclast %xmm11, %xmm3
+aesdeclast %xmm11, %xmm4
+pxor %xmm5 ,%xmm1
+pxor %xmm6 ,%xmm2
+pxor %xmm7 ,%xmm3
+pxor %xmm8 ,%xmm4
+movdqu %xmm1, (%rsi)
+movdqu %xmm2, 16(%rsi)
+movdqu %xmm3, 32(%rsi)
+movdqu %xmm4, 48(%rsi)
+movdqa %xmm15,%xmm5
+jne DLOOP_4
+addq $64, %rsi
+DREMAINDER_4:
+cmpq $0, %r10
+je DEND_4
+DLOOP_4_2:
+movdqu (%rdi), %xmm1
+movdqa %xmm1 ,%xmm15
+addq $16, %rdi
+pxor (%r8), %xmm1
+movdqu 160(%r8), %xmm2
+cmpl $12, %r9d
+aesdec 16(%r8), %xmm1
+aesdec 32(%r8), %xmm1
+aesdec 48(%r8), %xmm1
+aesdec 64(%r8), %xmm1
+aesdec 80(%r8), %xmm1
+aesdec 96(%r8), %xmm1
+aesdec 112(%r8), %xmm1
+aesdec 128(%r8), %xmm1
+aesdec 144(%r8), %xmm1
+jb DLAST_4_2
+movdqu 192(%r8), %xmm2
+cmpl $14, %r9d
+aesdec 160(%r8), %xmm1
+aesdec 176(%r8), %xmm1
+jb DLAST_4_2
+movdqu 224(%r8), %xmm2
+aesdec 192(%r8), %xmm1
+aesdec 208(%r8), %xmm1
+DLAST_4_2:
+aesdeclast %xmm2, %xmm1
+pxor %xmm5, %xmm1
+movdqa %xmm15, %xmm5
+movdqu %xmm1, (%rsi)
+
+addq $16, %rsi
+decq %r10
+jne DLOOP_4_2
+DEND_4:
+ret
+
+
+
+
+//void AES_128_Key_Expansion(const unsigned char* userkey,
+// unsigned char* key_schedule);
+.align 16,0x90
+.globl AES_128_Key_Expansion
+AES_128_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+movl $10, 240(%rsi)
+
+movdqu (%rdi), %xmm1
+movdqa %xmm1, (%rsi)
+
+
+ASSISTS:
+aeskeygenassist $1, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 16(%rsi)
+aeskeygenassist $2, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $4, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 48(%rsi)
+aeskeygenassist $8, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $16, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 80(%rsi)
+aeskeygenassist $32, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $64, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 112(%rsi)
+aeskeygenassist $0x80, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x1b, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 144(%rsi)
+aeskeygenassist $0x36, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 160(%rsi)
+ret
+
+PREPARE_ROUNDKEY_128:
+pshufd $255, %xmm2, %xmm2
+movdqa %xmm1, %xmm3
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pxor %xmm2, %xmm1
+ret
+
+
+//void AES_192_Key_Expansion (const unsigned char *userkey,
+// unsigned char *key)
+.globl AES_192_Key_Expansion
+AES_192_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 16(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 32(%rsi)
+
+aeskeygenassist $0x2, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 48(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x4, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 64(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 80(%rsi)
+
+aeskeygenassist $0x8, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 96(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x10, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 112(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 128(%rsi)
+
+aeskeygenassist $0x20, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 144(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x40, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 160(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 176(%rsi)
+
+aeskeygenassist $0x80, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 192(%rsi)
+movdqa %xmm3, 208(%rsi)
+ret
+
+PREPARE_ROUNDKEY_192:
+pshufd $0x55, %xmm2, %xmm2
+movdqu %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pxor %xmm2, %xmm1
+pshufd $0xff, %xmm1, %xmm2
+movdqu %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pxor %xmm2, %xmm3
+ret
+
+
+//void AES_256_Key_Expansion (const unsigned char *userkey,
+// unsigned char *key)
+.globl AES_256_Key_Expansion
+AES_256_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, 16(%rsi)
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 48(%rsi)
+aeskeygenassist $0x2, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 80(%rsi)
+aeskeygenassist $0x4, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 112(%rsi)
+aeskeygenassist $0x8, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 144(%rsi)
+aeskeygenassist $0x10, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 160(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 176(%rsi)
+aeskeygenassist $0x20, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 192(%rsi)
+
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 208(%rsi)
+aeskeygenassist $0x40, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 224(%rsi)
+
+ret
+
+MAKE_RK256_a:
+pshufd $0xff, %xmm2, %xmm2
+movdqa %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pxor %xmm2, %xmm1
+ret
+
+MAKE_RK256_b:
+pshufd $0xaa, %xmm2, %xmm2
+movdqa %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pxor %xmm2, %xmm3
+ret
+