diff serpent.c @ 0:d7da3b1e1540 libtomcrypt

put back the 0.95 makefile which was inadvertently merged over
author Matt Johnston <matt@ucc.asn.au>
date Mon, 31 May 2004 18:21:40 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/serpent.c	Mon May 31 18:21:40 2004 +0000
@@ -0,0 +1,698 @@
+#include "mycrypt.h"
+
+#ifdef SERPENT
+
+const struct _cipher_descriptor serpent_desc =
+{
+    "serpent",
+    5,
+    16, 32, 16, 32,
+    &serpent_setup,
+    &serpent_ecb_encrypt,
+    &serpent_ecb_decrypt,
+    &serpent_test,
+    &serpent_keysize
+};
+
+/* These defines are derived from Brian Gladman's work.  Contact him at [email protected] 
+ *
+ * Available on the web at http://fp.gladman.plus.com/cryptography_technology/aes/index.htm
+ */
+#define sb0(a,b,c,d,e,f,g,h)    \
+    t1 = a ^ d;     \
+    t2 = a & d;     \
+    t3 = c ^ t1;    \
+    t6 = b & t1;    \
+    t4 = b ^ t3;    \
+    t10 = ~t3;      \
+    h = t2 ^ t4;    \
+    t7 = a ^ t6;    \
+    t14 = ~t7;      \
+    t8 = c | t7;    \
+    t11 = t3 ^ t7;  \
+    g = t4 ^ t8;    \
+    t12 = h & t11;  \
+    f = t10 ^ t12;  \
+    e = t12 ^ t14
+
+/* 15 terms */
+
+#define ib0(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = a ^ b;     \
+    t3 = t1 | t2;   \
+    t4 = d ^ t3;    \
+    t7 = d & t2;    \
+    t5 = c ^ t4;    \
+    t8 = t1 ^ t7;   \
+    g = t2 ^ t5;    \
+    t11 = a & t4;   \
+    t9 = g & t8;    \
+    t14 = t5 ^ t8;  \
+    f = t4 ^ t9;    \
+    t12 = t5 | f;   \
+    h = t11 ^ t12;  \
+    e = h ^ t14
+
+/* 14 terms!  */
+
+#define sb1(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = b ^ t1;    \
+    t3 = a | t2;    \
+    t4 = d | t2;    \
+    t5 = c ^ t3;    \
+    g = d ^ t5;     \
+    t7 = b ^ t4;    \
+    t8 = t2 ^ g;    \
+    t9 = t5 & t7;   \
+    h = t8 ^ t9;    \
+    t11 = t5 ^ t7;  \
+    f = h ^ t11;    \
+    t13 = t8 & t11; \
+    e = t5 ^ t13
+
+/* 17 terms */
+
+#define ib1(a,b,c,d,e,f,g,h)    \
+    t1 = a ^ d;     \
+    t2 = a & b;     \
+    t3 = b ^ c;     \
+    t4 = a ^ t3;    \
+    t5 = b | d;     \
+    t7 = c | t1;    \
+    h = t4 ^ t5;    \
+    t8 = b ^ t7;    \
+    t11 = ~t2;      \
+    t9 = t4 & t8;   \
+    f = t1 ^ t9;    \
+    t13 = t9 ^ t11; \
+    t12 = h & f;    \
+    g = t12 ^ t13;  \
+    t15 = a & d;    \
+    t16 = c ^ t13;  \
+    e = t15 ^ t16
+
+/* 16 terms */
+
+#define sb2(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = b ^ d;     \
+    t3 = c & t1;    \
+    t13 = d | t1;   \
+    e = t2 ^ t3;    \
+    t5 = c ^ t1;    \
+    t6 = c ^ e;     \
+    t7 = b & t6;    \
+    t10 = e | t5;   \
+    h = t5 ^ t7;    \
+    t9 = d | t7;    \
+    t11 = t9 & t10; \
+    t14 = t2 ^ h;   \
+    g = a ^ t11;    \
+    t15 = g ^ t13;  \
+    f = t14 ^ t15
+
+/* 16 terms */
+
+#define ib2(a,b,c,d,e,f,g,h)    \
+    t1 = b ^ d;     \
+    t2 = ~t1;       \
+    t3 = a ^ c;     \
+    t4 = c ^ t1;    \
+    t7 = a | t2;    \
+    t5 = b & t4;    \
+    t8 = d ^ t7;    \
+    t11 = ~t4;      \
+    e = t3 ^ t5;    \
+    t9 = t3 | t8;   \
+    t14 = d & t11;  \
+    h = t1 ^ t9;    \
+    t12 = e | h;    \
+    f = t11 ^ t12;  \
+    t15 = t3 ^ t12; \
+    g = t14 ^ t15
+
+/* 17 terms */
+
+#define sb3(a,b,c,d,e,f,g,h)    \
+    t1 = a ^ c;     \
+    t2 = d ^ t1;    \
+    t3 = a & t2;    \
+    t4 = d ^ t3;    \
+    t5 = b & t4;    \
+    g = t2 ^ t5;    \
+    t7 = a | g;     \
+    t8 = b | d;     \
+    t11 = a | d;    \
+    t9 = t4 & t7;   \
+    f = t8 ^ t9;    \
+    t12 = b ^ t11;  \
+    t13 = g ^ t9;   \
+    t15 = t3 ^ t8;  \
+    h = t12 ^ t13;  \
+    t16 = c & t15;  \
+    e = t12 ^ t16
+
+/* 16 term solution that performs less well than 17 term one
+   in my environment (PPro/PII)                                  
+
+#define sb3(a,b,c,d,e,f,g,h)    \
+    t1 = a ^ b;     \
+    t2 = a & c;     \
+    t3 = a | d;     \
+    t4 = c ^ d;     \
+    t5 = t1 & t3;   \
+    t6 = t2 | t5;   \
+    g = t4 ^ t6;    \
+    t8 = b ^ t3;    \
+    t9 = t6 ^ t8;   \
+    t10 = t4 & t9;  \
+    e = t1 ^ t10;   \
+    t12 = g & e;    \
+    f = t9 ^ t12;   \
+    t14 = b | d;    \
+    t15 = t4 ^ t12; \
+    h = t14 ^ t15
+*/
+
+/* 17 terms */
+
+#define ib3(a,b,c,d,e,f,g,h)    \
+    t1 = b ^ c;     \
+    t2 = b | c;     \
+    t3 = a ^ c;     \
+    t7 = a ^ d;     \
+    t4 = t2 ^ t3;   \
+    t5 = d | t4;    \
+    t9 = t2 ^ t7;   \
+    e = t1 ^ t5;    \
+    t8 = t1 | t5;   \
+    t11 = a & t4;   \
+    g = t8 ^ t9;    \
+    t12 = e | t9;   \
+    f = t11 ^ t12;  \
+    t14 = a & g;    \
+    t15 = t2 ^ t14; \
+    t16 = e & t15;  \
+    h = t4 ^ t16
+
+/* 15 terms */
+
+#define sb4(a,b,c,d,e,f,g,h)    \
+    t1 = a ^ d;     \
+    t2 = d & t1;    \
+    t3 = c ^ t2;    \
+    t4 = b | t3;    \
+    h = t1 ^ t4;    \
+    t6 = ~b;        \
+    t7 = t1 | t6;   \
+    e = t3 ^ t7;    \
+    t9 = a & e;     \
+    t10 = t1 ^ t6;  \
+    t11 = t4 & t10; \
+    g = t9 ^ t11;   \
+    t13 = a ^ t3;   \
+    t14 = t10 & g;  \
+    f = t13 ^ t14
+
+/* 17 terms */
+
+#define ib4(a,b,c,d,e,f,g,h)    \
+    t1 = c ^ d;     \
+    t2 = c | d;     \
+    t3 = b ^ t2;    \
+    t4 = a & t3;    \
+    f = t1 ^ t4;    \
+    t6 = a ^ d;     \
+    t7 = b | d;     \
+    t8 = t6 & t7;   \
+    h = t3 ^ t8;    \
+    t10 = ~a;       \
+    t11 = c ^ h;    \
+    t12 = t10 | t11;\
+    e = t3 ^ t12;   \
+    t14 = c | t4;   \
+    t15 = t7 ^ t14; \
+    t16 = h | t10;  \
+    g = t15 ^ t16
+
+/* 16 terms */
+
+#define sb5(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = a ^ b;     \
+    t3 = a ^ d;     \
+    t4 = c ^ t1;    \
+    t5 = t2 | t3;   \
+    e = t4 ^ t5;    \
+    t7 = d & e;     \
+    t8 = t2 ^ e;    \
+    t10 = t1 | e;   \
+    f = t7 ^ t8;    \
+    t11 = t2 | t7;  \
+    t12 = t3 ^ t10; \
+    t14 = b ^ t7;   \
+    g = t11 ^ t12;  \
+    t15 = f & t12;  \
+    h = t14 ^ t15
+
+/* 16 terms */
+
+#define ib5(a,b,c,d,e,f,g,h)    \
+    t1 = ~c;        \
+    t2 = b & t1;    \
+    t3 = d ^ t2;    \
+    t4 = a & t3;    \
+    t5 = b ^ t1;    \
+    h = t4 ^ t5;    \
+    t7 = b | h;     \
+    t8 = a & t7;    \
+    f = t3 ^ t8;    \
+    t10 = a | d;    \
+    t11 = t1 ^ t7;  \
+    e = t10 ^ t11;  \
+    t13 = a ^ c;    \
+    t14 = b & t10;  \
+    t15 = t4 | t13; \
+    g = t14 ^ t15
+
+/* 15 terms */
+
+#define sb6(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = a ^ d;     \
+    t3 = b ^ t2;    \
+    t4 = t1 | t2;   \
+    t5 = c ^ t4;    \
+    f = b ^ t5;     \
+    t13 = ~t5;      \
+    t7 = t2 | f;    \
+    t8 = d ^ t7;    \
+    t9 = t5 & t8;   \
+    g = t3 ^ t9;    \
+    t11 = t5 ^ t8;  \
+    e = g ^ t11;    \
+    t14 = t3 & t11; \
+    h = t13 ^ t14
+
+/* 15 terms */
+
+#define ib6(a,b,c,d,e,f,g,h)    \
+    t1 = ~a;        \
+    t2 = a ^ b;     \
+    t3 = c ^ t2;    \
+    t4 = c | t1;    \
+    t5 = d ^ t4;    \
+    t13 = d & t1;   \
+    f = t3 ^ t5;    \
+    t7 = t3 & t5;   \
+    t8 = t2 ^ t7;   \
+    t9 = b | t8;    \
+    h = t5 ^ t9;    \
+    t11 = b | h;    \
+    e = t8 ^ t11;   \
+    t14 = t3 ^ t11; \
+    g = t13 ^ t14
+
+/* 17 terms */
+
+#define sb7(a,b,c,d,e,f,g,h)    \
+    t1 = ~c;        \
+    t2 = b ^ c;     \
+    t3 = b | t1;    \
+    t4 = d ^ t3;    \
+    t5 = a & t4;    \
+    t7 = a ^ d;     \
+    h = t2 ^ t5;    \
+    t8 = b ^ t5;    \
+    t9 = t2 | t8;   \
+    t11 = d & t3;   \
+    f = t7 ^ t9;    \
+    t12 = t5 ^ f;   \
+    t15 = t1 | t4;  \
+    t13 = h & t12;  \
+    g = t11 ^ t13;  \
+    t16 = t12 ^ g;  \
+    e = t15 ^ t16
+
+/* 17 terms */
+
+#define ib7(a,b,c,d,e,f,g,h)    \
+    t1 = a & b;     \
+    t2 = a | b;     \
+    t3 = c | t1;    \
+    t4 = d & t2;    \
+    h = t3 ^ t4;    \
+    t6 = ~d;        \
+    t7 = b ^ t4;    \
+    t8 = h ^ t6;    \
+    t11 = c ^ t7;   \
+    t9 = t7 | t8;   \
+    f = a ^ t9;     \
+    t12 = d | f;    \
+    e = t11 ^ t12;  \
+    t14 = a & h;    \
+    t15 = t3 ^ f;   \
+    t16 = e ^ t14;  \
+    g = t15 ^ t16
+
+#define k_xor(r,a,b,c,d)             \
+    a ^= skey->serpent.K[4 * (r) + 0]; \
+    b ^= skey->serpent.K[4 * (r) + 1]; \
+    c ^= skey->serpent.K[4 * (r) + 2]; \
+    d ^= skey->serpent.K[4 * (r) + 3]
+
+#define k_set(r,a,b,c,d)   \
+    a = lkey[4 * (r) +  8];  \
+    b = lkey[4 * (r) +  9];  \
+    c = lkey[4 * (r) + 10];  \
+    d = lkey[4 * (r) + 11]
+
+#define k_get(r,a,b,c,d)            \
+    skey->serpent.K[4 * (r) + 0] = a; \
+    skey->serpent.K[4 * (r) + 1] = b; \
+    skey->serpent.K[4 * (r) + 2] = c; \
+    skey->serpent.K[4 * (r) + 3] = d
+
+/* the linear transformation and its inverse    */
+
+#define rot(a,b,c,d)    \
+    a = ROL(a, 13);    \
+    c = ROL(c, 3);     \
+    d ^= c ^ (a << 3);  \
+    b ^= a ^ c;         \
+    d = ROL(d, 7);     \
+    b = ROL(b, 1);     \
+    a ^= b ^ d;         \
+    c ^= d ^ (b << 7);  \
+    a = ROL(a, 5);     \
+    c = ROL(c, 22)
+
+#define irot(a,b,c,d)   \
+    c = ROR(c, 22);    \
+    a = ROR(a, 5);     \
+    c ^= d ^ (b << 7);  \
+    a ^= b ^ d;         \
+    d = ROR(d, 7);     \
+    b = ROR(b, 1);     \
+    d ^= c ^ (a << 3);  \
+    b ^= a ^ c;         \
+    c = ROR(c, 3);     \
+    a = ROR(a, 13)
+    
+#ifdef CLEAN_STACK
+static int _serpent_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
+#else
+int serpent_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
+#endif
+{
+    unsigned long lkey[140], t, a, b, c, d, e, f, g, h, x;
+    unsigned long t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
+    unsigned char buf[32];
+
+    _ARGCHK(key != NULL);
+    _ARGCHK(skey != NULL);
+
+    /* check rounds */
+    if (num_rounds != 0 && num_rounds != 32) {
+       return CRYPT_INVALID_ROUNDS;
+    }
+
+    /* check keylen */
+    if (keylen < 16 || keylen > 32) {
+       return CRYPT_INVALID_KEYSIZE;
+    }
+
+    /* copy key and expand to 32bytes as required */
+    for (x = 0; x < (unsigned long)keylen; x++) {
+        buf[x] = key[x];
+    }
+
+    if (x < 32) {
+       buf[x++] = (unsigned char)0x01;
+       while (x < 32) {
+           buf[x++] = (unsigned char)0;
+       }
+    }
+
+    /* copy key into 32-bit words */
+    for (x = 0; x < 8; x++) {
+        LOAD32L(lkey[x], &buf[x*4]);
+    }
+
+    /* expand using the LFSR to 140 words */
+    for (x = 0; x < 132; x++) {
+        t = lkey[x] ^ lkey[x+3] ^ lkey[x+5] ^ lkey[x+7] ^ x ^ 0x9E3779B9UL;
+        lkey[x + 8] = ROL(t, 11);
+    }
+
+    /* perform the substituions */
+    for (x = 0; x < 32; ) {
+       k_set( x,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb2(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb1(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb0(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb7(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb6(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb5(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+       k_set( x,a,b,c,d);sb4(a,b,c,d,e,f,g,h);k_get( x,e,f,g,h); ++x;
+    }
+    k_set(32,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get(32,e,f,g,h);
+    return CRYPT_OK;
+}
+
+#ifdef CLEAN_STACK
+int serpent_setup(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
+{
+   int x;
+   x = _serpent_setup(key, keylen, num_rounds, skey);
+   burn_stack(sizeof(unsigned long)*166 + sizeof(unsigned char)*32);
+   return x;
+}
+#endif
+
+#ifdef CLEAN_STACK
+static void _serpent_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
+#else
+void serpent_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
+#endif
+{
+    unsigned long a,b,c,d,e,f,g,h;
+    unsigned long t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
+
+    _ARGCHK(pt != NULL);
+    _ARGCHK(ct != NULL);
+    _ARGCHK(skey != NULL);
+
+    LOAD32L(a, &pt[0]);LOAD32L(b, &pt[4]);LOAD32L(c, &pt[8]);LOAD32L(d, &pt[12]);
+    k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); 
+    k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
+    k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
+    k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d); k_xor(32,a,b,c,d);
+    STORE32L(a, &ct[0]);STORE32L(b, &ct[4]);STORE32L(c, &ct[8]);STORE32L(d, &ct[12]);
+}
+
+#ifdef CLEAN_STACK
+void serpent_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
+{
+   _serpent_ecb_encrypt(pt, ct, skey);
+   burn_stack(sizeof(unsigned long)*24);
+}
+#endif
+
+#ifdef CLEAN_STACK
+static void _serpent_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
+#else
+void serpent_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
+#endif
+{
+    unsigned long a,b,c,d,e,f,g,h;
+    unsigned long t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
+
+    _ARGCHK(pt != NULL);
+    _ARGCHK(ct != NULL);
+    _ARGCHK(skey != NULL);
+
+    LOAD32L(a, &ct[0]);LOAD32L(b, &ct[4]);LOAD32L(c, &ct[8]);LOAD32L(d, &ct[12]);
+    k_xor(32,a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h);
+    irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d);
+    irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h);
+    irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d);
+    irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h);
+    irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d);
+    irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h);
+    irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d);
+    irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h);
+    irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d);
+    irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h);
+    irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d);
+    irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h);
+    irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d);
+    irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h);
+    irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d);
+    irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h);
+    irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d);
+    irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h);
+    irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d);
+    irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h);
+    irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d);
+    irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h);
+    irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d);
+    irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h);
+    irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d);
+    irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h);
+    irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d);
+    irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h);
+    irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d);
+    irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h);
+    irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d);
+    STORE32L(a, &pt[0]);STORE32L(b, &pt[4]);STORE32L(c, &pt[8]);STORE32L(d, &pt[12]);
+}
+
+#ifdef CLEAN_STACK
+void serpent_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
+{
+   _serpent_ecb_decrypt(ct, pt, skey);
+   burn_stack(sizeof(unsigned long)*24);
+}
+#endif
+
+int serpent_test(void)
+{
+   static const struct {
+       int keylen;
+       unsigned char key[32], pt[16], ct[16];
+   } tests[] = {
+   {
+      16,
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0xdd, 0xd2, 0x6b, 0x98, 0xa5, 0xff, 0xd8, 0x2c,
+        0x05, 0x34, 0x5a, 0x9d, 0xad, 0xbf, 0xaf, 0x49 }
+   },
+   {
+      16,
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
+      { 0x4a, 0xe9, 0xa2, 0x0b, 0x2b, 0x14, 0xa1, 0x02,
+        0x90, 0xcb, 0xb8, 0x20, 0xb7, 0xff, 0xb5, 0x10 }
+   },
+   {
+      24,
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08 },
+      { 0xe1, 0x1b, 0x01, 0x52, 0x4e, 0xa1, 0xf4, 0x65, 
+        0xa2, 0xa2, 0x00, 0x43, 0xeb, 0x9f, 0x7e, 0x8a }
+   },
+   {
+      32,
+      { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0xe0, 0x88, 0x5d, 0x44, 0x60, 0x37, 0x34, 0x69,
+        0xd1, 0xfa, 0x6c, 0x36, 0xa6, 0xe1, 0xc5, 0x2f }
+   },
+   {
+      32,
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x17, 0xc6, 0x25, 0x8e, 0x60, 0x09, 0xe2, 0x82,
+        0x66, 0x18, 0x69, 0xd5, 0x25, 0xf7, 0xd2, 0x04 }
+   },
+   {
+      32,
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x9f, 0xe1, 0x43, 0x25, 0x0d, 0x00, 0xe2, 0x56, 
+        0x96, 0xb0, 0x1e, 0x0a, 0x2e, 0xd0, 0x5d, 0xb3 }
+   }
+   };
+
+   unsigned char buf[2][16];
+   int x, err;
+   symmetric_key key;
+
+   for (x = 0; x < (int)(sizeof(tests) / sizeof(tests[0])); x++) {
+      /* setup key */
+      if ((err = serpent_setup(tests[x].key, tests[x].keylen, 0, &key))!= CRYPT_OK) {
+         return err;
+      }
+
+      /* encrypt and decrypt */
+      serpent_ecb_encrypt(tests[x].pt, buf[0], &key);
+      serpent_ecb_decrypt(buf[0], buf[1], &key);
+
+      /* compare */
+      if (memcmp(buf[0], tests[x].ct, 16) != 0 || memcmp(buf[1], tests[x].pt, 16) != 0) {
+         return CRYPT_FAIL_TESTVECTOR;
+      }
+   }
+   return CRYPT_OK;
+}
+
+int serpent_keysize(int *desired_keysize)
+{
+   _ARGCHK(desired_keysize != NULL);
+
+   if (*desired_keysize < 16)
+      return CRYPT_INVALID_KEYSIZE;
+   if (*desired_keysize > 32)
+      *desired_keysize = 32;
+   return CRYPT_OK;
+}
+
+#endif
+
+