# HG changeset patch # User Matt Johnston # Date 1371049064 -28800 # Node ID 439b7aaaec9e75b166104b2d1201e17bee038089 # Parent 3aa92c7f379c49fa9613bf712d740e185b77b7f8 Get aes from avr231 appnote instead diff -r 3aa92c7f379c -r 439b7aaaec9e aes.c --- a/aes.c Thu Jun 06 00:05:13 2013 +0800 +++ b/aes.c Wed Jun 12 22:57:44 2013 +0800 @@ -1,539 +1,481 @@ -// advanced encryption standard -// Original author: Karl Malbrain, malbrain@yahoo.com -// Ported to Atmel AVR by: Jiri Pittner, jiri@pittnerovi.com +#include "aes.h" +//#include "loader.h" // -/* -This work, including the source code, documentation -and related data, is placed into the public domain. +#define KEY_COUNT 1 + +#if KEY_COUNT > 0 + +//#include "aeskeys.inc" + + + + +typedef unsigned char byte; + + + +#define BPOLY 0x1b //!< Lower 8 bits of (x^8+x^4+x^3+x+1), ie. (x^4+x^3+x+1). +#define BLOCKSIZE 16 //!< Block size in number of bytes. + + + +#if KEY_COUNT == 1 + #define KEYBITS 128 //!< Use AES128. +#elif KEY_COUNT == 2 + #define KEYBITS 192 //!< Use AES196. +#elif KEY_COUNT == 3 + #define KEYBITS 256 //!< Use AES256. +#else + #error Use 1, 2 or 3 keys! +#endif -The original author is Karl Malbrain. +#if KEYBITS == 128 + #define ROUNDS 10 //!< Number of rounds. + #define KEYLENGTH 16 //!< Key length in number of bytes. +#elif KEYBITS == 192 + #define ROUNDS 12 //!< Number of rounds. + #define KEYLENGTH 24 //!< // Key length in number of bytes. +#elif KEYBITS == 256 + #define ROUNDS 14 //!< Number of rounds. + #define KEYLENGTH 32 //!< Key length in number of bytes. +#else + #error Key must be 128, 192 or 256 bits! +#endif + +#define EXPANDED_KEY_SIZE (BLOCKSIZE * (ROUNDS+1)) //!< 176, 208 or 240 bytes. + + + +byte block1[ 256 ]; //!< Workspace 1. +byte block2[ 256 ]; //!< Worksapce 2. + + + +byte * powTbl; //!< Final location of exponentiation lookup table. +byte * logTbl; //!< Final location of logarithm lookup table. +byte * sBox; //!< Final location of s-box. +byte * sBoxInv; //!< Final location of inverse s-box. +byte * expandedKey; //!< Final location of expanded key. + + + +void CalcPowLog( byte * powTbl, byte * logTbl ) +{ + byte i = 0; + byte t = 1; + + do { + // Use 0x03 as root for exponentiation and logarithms. + powTbl[i] = t; + logTbl[t] = i; + i++; -THIS SOFTWARE IS PROVIDED AS-IS WITHOUT WARRANTY -OF ANY KIND, NOT EVEN THE IMPLIED WARRANTY OF -MERCHANTABILITY. THE AUTHOR OF THIS SOFTWARE, -ASSUMES _NO_ RESPONSIBILITY FOR ANY CONSEQUENCE -RESULTING FROM THE USE, MODIFICATION, OR -REDISTRIBUTION OF THIS SOFTWARE. -*/ + // Muliply t by 3 in GF(2^8). + t ^= (t << 1) ^ (t & 0x80 ? BPOLY : 0); + } while( t != 1 ); // Cyclic properties ensure that i < 255. + + powTbl[255] = powTbl[0]; // 255 = '-0', 254 = -1, etc. +} + + + +void CalcSBox( byte * sBox ) +{ + byte i, rot; + byte temp; + byte result; + + // Fill all entries of sBox[]. + i = 0; + do { + // Inverse in GF(2^8). + if( i > 0 ) { + temp = powTbl[ 255 - logTbl[i] ]; + } else { + temp = 0; + } + + // Affine transformation in GF(2). + result = temp ^ 0x63; // Start with adding a vector in GF(2). + for( rot = 0; rot < 4; rot++ ) { + // Rotate left. + temp = (temp<<1) | (temp>>7); -#include -#include + // Add rotated byte in GF(2). + result ^= temp; + } + + // Put result in table. + sBox[i] = result; + } while( ++i != 0 ); +} + + + +void CalcSBoxInv( byte * sBox, byte * sBoxInv ) +{ + byte i = 0; + byte j = 0; -#include //tables have to reside in flash memory + // Iterate through all elements in sBoxInv using i. + do { + // Search through sBox using j. + do { + // Check if current j is the inverse of current i. + if( sBox[ j ] == i ) { + // If so, set sBoxInc and indicate search finished. + sBoxInv[ i ] = j; + j = 255; + } + } while( ++j != 0 ); + } while( ++i != 0 ); +} + + + +void CycleLeft( byte * row ) +{ + // Cycle 4 bytes in an array left once. + byte temp = row[0]; + row[0] = row[1]; + row[1] = row[2]; + row[2] = row[3]; + row[3] = temp; +} + -// AES only supports Nb=4 -#define Nb 4 // number of columns in the state & expanded key - -#define Nk 4 // number of columns in a key -#define Nr 10 // number of rounds in encryption +void InvMixColumn( byte * column ) +{ + byte result0, result1, result2, result3; + byte column0, column1, column2, column3; + byte xor; -#define Sbox(i) (pgm_read_byte(&P_Sbox[i])) -const unsigned char P_Sbox[256] __attribute__ ((__progmem__)) = { // forward s-box -0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, -0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, -0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, -0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, -0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, -0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, -0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, -0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, -0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, -0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, -0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, -0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, -0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, -0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, -0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, -0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; + // This generates more effective code, at least + // with the IAR C compiler. + column0 = column[0]; + column1 = column[1]; + column2 = column[2]; + column3 = column[3]; -#define InvSbox(i) (pgm_read_byte(&P_InvSbox[i])) -const unsigned char P_InvSbox[256] __attribute__ ((__progmem__)) = { // inverse s-box -0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, -0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, -0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, -0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, -0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, -0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, -0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, -0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, -0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, -0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, -0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, -0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, -0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, -0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, -0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, -0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; + // Partial sums (modular addition using XOR). + result0 = column1 ^ column2 ^ column3; + result1 = column0 ^ column2 ^ column3; + result2 = column0 ^ column1 ^ column3; + result3 = column0 ^ column1 ^ column2; -// combined Xtimes2[Sbox[]] -#define Xtime2Sbox(i) (pgm_read_byte(&P_Xtime2Sbox[i])) -const unsigned char P_Xtime2Sbox[256] __attribute__ ((__progmem__)) = { -0xc6, 0xf8, 0xee, 0xf6, 0xff, 0xd6, 0xde, 0x91, 0x60, 0x02, 0xce, 0x56, 0xe7, 0xb5, 0x4d, 0xec, -0x8f, 0x1f, 0x89, 0xfa, 0xef, 0xb2, 0x8e, 0xfb, 0x41, 0xb3, 0x5f, 0x45, 0x23, 0x53, 0xe4, 0x9b, -0x75, 0xe1, 0x3d, 0x4c, 0x6c, 0x7e, 0xf5, 0x83, 0x68, 0x51, 0xd1, 0xf9, 0xe2, 0xab, 0x62, 0x2a, -0x08, 0x95, 0x46, 0x9d, 0x30, 0x37, 0x0a, 0x2f, 0x0e, 0x24, 0x1b, 0xdf, 0xcd, 0x4e, 0x7f, 0xea, -0x12, 0x1d, 0x58, 0x34, 0x36, 0xdc, 0xb4, 0x5b, 0xa4, 0x76, 0xb7, 0x7d, 0x52, 0xdd, 0x5e, 0x13, -0xa6, 0xb9, 0x00, 0xc1, 0x40, 0xe3, 0x79, 0xb6, 0xd4, 0x8d, 0x67, 0x72, 0x94, 0x98, 0xb0, 0x85, -0xbb, 0xc5, 0x4f, 0xed, 0x86, 0x9a, 0x66, 0x11, 0x8a, 0xe9, 0x04, 0xfe, 0xa0, 0x78, 0x25, 0x4b, -0xa2, 0x5d, 0x80, 0x05, 0x3f, 0x21, 0x70, 0xf1, 0x63, 0x77, 0xaf, 0x42, 0x20, 0xe5, 0xfd, 0xbf, -0x81, 0x18, 0x26, 0xc3, 0xbe, 0x35, 0x88, 0x2e, 0x93, 0x55, 0xfc, 0x7a, 0xc8, 0xba, 0x32, 0xe6, -0xc0, 0x19, 0x9e, 0xa3, 0x44, 0x54, 0x3b, 0x0b, 0x8c, 0xc7, 0x6b, 0x28, 0xa7, 0xbc, 0x16, 0xad, -0xdb, 0x64, 0x74, 0x14, 0x92, 0x0c, 0x48, 0xb8, 0x9f, 0xbd, 0x43, 0xc4, 0x39, 0x31, 0xd3, 0xf2, -0xd5, 0x8b, 0x6e, 0xda, 0x01, 0xb1, 0x9c, 0x49, 0xd8, 0xac, 0xf3, 0xcf, 0xca, 0xf4, 0x47, 0x10, -0x6f, 0xf0, 0x4a, 0x5c, 0x38, 0x57, 0x73, 0x97, 0xcb, 0xa1, 0xe8, 0x3e, 0x96, 0x61, 0x0d, 0x0f, -0xe0, 0x7c, 0x71, 0xcc, 0x90, 0x06, 0xf7, 0x1c, 0xc2, 0x6a, 0xae, 0x69, 0x17, 0x99, 0x3a, 0x27, -0xd9, 0xeb, 0x2b, 0x22, 0xd2, 0xa9, 0x07, 0x33, 0x2d, 0x3c, 0x15, 0xc9, 0x87, 0xaa, 0x50, 0xa5, -0x03, 0x59, 0x09, 0x1a, 0x65, 0xd7, 0x84, 0xd0, 0x82, 0x29, 0x5a, 0x1e, 0x7b, 0xa8, 0x6d, 0x2c -}; + // Multiply column bytes by 2 modulo BPOLY. + // This operation is done the following way to ensure cycle count + // independent from data contents. Take care when changing this code. + xor = 0; + if (column0 & 0x80) { + xor = BPOLY; + } + column0 <<= 1; + column0 ^= xor; + + xor = 0; + if (column1 & 0x80) { + xor = BPOLY; + } + column1 <<= 1; + column1 ^= xor; + + xor = 0; + if (column2 & 0x80) { + xor = BPOLY; + } + column2 <<= 1; + column2 ^= xor; + + xor = 0; + if (column3 & 0x80) { + xor = BPOLY; + } + column3 <<= 1; + column3 ^= xor; -// combined Xtimes3[Sbox[]] -#define Xtime3Sbox(i) (pgm_read_byte(&P_Xtime3Sbox[i])) -const unsigned char P_Xtime3Sbox[256] __attribute__ ((__progmem__)) = { -0xa5, 0x84, 0x99, 0x8d, 0x0d, 0xbd, 0xb1, 0x54, 0x50, 0x03, 0xa9, 0x7d, 0x19, 0x62, 0xe6, 0x9a, -0x45, 0x9d, 0x40, 0x87, 0x15, 0xeb, 0xc9, 0x0b, 0xec, 0x67, 0xfd, 0xea, 0xbf, 0xf7, 0x96, 0x5b, -0xc2, 0x1c, 0xae, 0x6a, 0x5a, 0x41, 0x02, 0x4f, 0x5c, 0xf4, 0x34, 0x08, 0x93, 0x73, 0x53, 0x3f, -0x0c, 0x52, 0x65, 0x5e, 0x28, 0xa1, 0x0f, 0xb5, 0x09, 0x36, 0x9b, 0x3d, 0x26, 0x69, 0xcd, 0x9f, -0x1b, 0x9e, 0x74, 0x2e, 0x2d, 0xb2, 0xee, 0xfb, 0xf6, 0x4d, 0x61, 0xce, 0x7b, 0x3e, 0x71, 0x97, -0xf5, 0x68, 0x00, 0x2c, 0x60, 0x1f, 0xc8, 0xed, 0xbe, 0x46, 0xd9, 0x4b, 0xde, 0xd4, 0xe8, 0x4a, -0x6b, 0x2a, 0xe5, 0x16, 0xc5, 0xd7, 0x55, 0x94, 0xcf, 0x10, 0x06, 0x81, 0xf0, 0x44, 0xba, 0xe3, -0xf3, 0xfe, 0xc0, 0x8a, 0xad, 0xbc, 0x48, 0x04, 0xdf, 0xc1, 0x75, 0x63, 0x30, 0x1a, 0x0e, 0x6d, -0x4c, 0x14, 0x35, 0x2f, 0xe1, 0xa2, 0xcc, 0x39, 0x57, 0xf2, 0x82, 0x47, 0xac, 0xe7, 0x2b, 0x95, -0xa0, 0x98, 0xd1, 0x7f, 0x66, 0x7e, 0xab, 0x83, 0xca, 0x29, 0xd3, 0x3c, 0x79, 0xe2, 0x1d, 0x76, -0x3b, 0x56, 0x4e, 0x1e, 0xdb, 0x0a, 0x6c, 0xe4, 0x5d, 0x6e, 0xef, 0xa6, 0xa8, 0xa4, 0x37, 0x8b, -0x32, 0x43, 0x59, 0xb7, 0x8c, 0x64, 0xd2, 0xe0, 0xb4, 0xfa, 0x07, 0x25, 0xaf, 0x8e, 0xe9, 0x18, -0xd5, 0x88, 0x6f, 0x72, 0x24, 0xf1, 0xc7, 0x51, 0x23, 0x7c, 0x9c, 0x21, 0xdd, 0xdc, 0x86, 0x85, -0x90, 0x42, 0xc4, 0xaa, 0xd8, 0x05, 0x01, 0x12, 0xa3, 0x5f, 0xf9, 0xd0, 0x91, 0x58, 0x27, 0xb9, -0x38, 0x13, 0xb3, 0x33, 0xbb, 0x70, 0x89, 0xa7, 0xb6, 0x22, 0x92, 0x20, 0x49, 0xff, 0x78, 0x7a, -0x8f, 0xf8, 0x80, 0x17, 0xda, 0x31, 0xc6, 0xb8, 0xc3, 0xb0, 0x77, 0x11, 0xcb, 0xfc, 0xd6, 0x3a -}; - -// modular multiplication tables -// based on: - -// Xtime2[x] = (x & 0x80 ? 0x1b : 0) ^ (x + x) -// Xtime3[x] = x^Xtime2[x]; + // More partial sums. + result0 ^= column0 ^ column1; + result1 ^= column1 ^ column2; + result2 ^= column2 ^ column3; + result3 ^= column0 ^ column3; -#define Xtime2(i) (pgm_read_byte(&P_Xtime2[i])) -const unsigned char P_Xtime2[256] __attribute__ ((__progmem__)) = { -0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, -0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, -0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, -0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, -0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, -0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, -0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, -0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, -0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, -0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, -0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, -0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, -0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, -0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, -0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, -0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5}; + // Multiply column bytes by 2 modulo BPOLY. + // This operation is done the following way to ensure cycle count + // independent from data contents. Take care when changing this code. + xor = 0; + if (column0 & 0x80) { + xor = BPOLY; + } + column0 <<= 1; + column0 ^= xor; + + xor = 0; + if (column1 & 0x80) { + xor = BPOLY; + } + column1 <<= 1; + column1 ^= xor; + + xor = 0; + if (column2 & 0x80) { + xor = BPOLY; + } + column2 <<= 1; + column2 ^= xor; + + xor = 0; + if (column3 & 0x80) { + xor = BPOLY; + } + column3 <<= 1; + column3 ^= xor; -#define Xtime9(i) (pgm_read_byte(&P_Xtime9[i])) -const unsigned char P_Xtime9[256] __attribute__ ((__progmem__)) = { -0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, -0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, -0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, -0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, -0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, -0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, -0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, -0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, -0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, -0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, -0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, -0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, -0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, -0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, -0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, -0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46}; - -#define XtimeB(i) (pgm_read_byte(&P_XtimeB[i])) -const unsigned char P_XtimeB[256] __attribute__ ((__progmem__)) = { -0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, -0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, -0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, -0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, -0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, -0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, -0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, -0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, -0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, -0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, -0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, -0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, -0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, -0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, -0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, -0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3}; + // More partial sums. + result0 ^= column0 ^ column2; + result1 ^= column1 ^ column3; + result2 ^= column0 ^ column2; + result3 ^= column1 ^ column3; -#define XtimeD(i) (pgm_read_byte(&P_XtimeD[i])) -const unsigned char P_XtimeD[256] __attribute__ ((__progmem__)) = { -0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, -0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, -0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, -0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, -0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, -0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, -0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, -0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, -0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, -0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, -0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, -0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, -0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, -0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, -0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, -0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97}; + // Multiply column bytes by 2 modulo BPOLY. + // This operation is done the following way to ensure cycle count + // independent from data contents. Take care when changing this code. + xor = 0; + if (column0 & 0x80) { + xor = BPOLY; + } + column0 <<= 1; + column0 ^= xor; + + xor = 0; + if (column1 & 0x80) { + xor = BPOLY; + } + column1 <<= 1; + column1 ^= xor; + + xor = 0; + if (column2 & 0x80) { + xor = BPOLY; + } + column2 <<= 1; + column2 ^= xor; + + xor = 0; + if (column3 & 0x80) { + xor = BPOLY; + } + column3 <<= 1; + column3 ^= xor; -#define XtimeE(i) (pgm_read_byte(&P_XtimeE[i])) -const unsigned char P_XtimeE[256] __attribute__ ((__progmem__)) = { -0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, -0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, -0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, -0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, -0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, -0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, -0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, -0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, -0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, -0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, -0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, -0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, -0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, -0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, -0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, -0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d}; + // Final partial sum. + column0 ^= column1 ^ column2 ^ column3; + + // Final sums stored into original column bytes. + column[0] = result0 ^ column0; + column[1] = result1 ^ column0; + column[2] = result2 ^ column0; + column[3] = result3 ^ column0; +} + + -// exchanges columns in each of 4 rows -// row0 - unchanged, row1- shifted left 1, -// row2 - shifted left 2 and row3 - shifted left 3 -void ShiftRows (unsigned char *state) +void SubBytes( byte * bytes, byte count ) { -unsigned char tmp; + do { + *bytes = sBox[ *bytes ]; // Substitute every byte in state. + bytes++; + } while( --count ); +} - // just substitute row 0 - state[0] = Sbox(state[0]), state[4] = Sbox(state[4]); - state[8] = Sbox(state[8]), state[12] = Sbox(state[12]); - // rotate row 1 - tmp = Sbox(state[1]), state[1] = Sbox(state[5]); - state[5] = Sbox(state[9]), state[9] = Sbox(state[13]), state[13] = tmp; - // rotate row 2 - tmp = Sbox(state[2]), state[2] = Sbox(state[10]), state[10] = tmp; - tmp = Sbox(state[6]), state[6] = Sbox(state[14]), state[14] = tmp; - - // rotate row 3 - tmp = Sbox(state[15]), state[15] = Sbox(state[11]); - state[11] = Sbox(state[7]), state[7] = Sbox(state[3]), state[3] = tmp; +void InvSubBytesAndXOR( byte * bytes, byte * key, byte count ) +{ + do { +// *bytes = sBoxInv[ *bytes ] ^ *key; // Inverse substitute every byte in state and add key. + *bytes = block2[ *bytes ] ^ *key; // Use block2 directly. Increases speed. + bytes++; + key++; + } while( --count ); } -// restores columns in each of 4 rows -// row0 - unchanged, row1- shifted right 1, -// row2 - shifted right 2 and row3 - shifted right 3 -void InvShiftRows (unsigned char *state) + + +void InvShiftRows( byte * state ) { -unsigned char tmp; + byte temp; + + // Note: State is arranged column by column. - // restore row 0 - state[0] = InvSbox(state[0]), state[4] = InvSbox(state[4]); - state[8] = InvSbox(state[8]), state[12] = InvSbox(state[12]); + // Cycle second row right one time. + temp = state[ 1 + 3*4 ]; + state[ 1 + 3*4 ] = state[ 1 + 2*4 ]; + state[ 1 + 2*4 ] = state[ 1 + 1*4 ]; + state[ 1 + 1*4 ] = state[ 1 + 0*4 ]; + state[ 1 + 0*4 ] = temp; - // restore row 1 - tmp = InvSbox(state[13]), state[13] = InvSbox(state[9]); - state[9] = InvSbox(state[5]), state[5] = InvSbox(state[1]), state[1] = tmp; + // Cycle third row right two times. + temp = state[ 2 + 0*4 ]; + state[ 2 + 0*4 ] = state[ 2 + 2*4 ]; + state[ 2 + 2*4 ] = temp; + temp = state[ 2 + 1*4 ]; + state[ 2 + 1*4 ] = state[ 2 + 3*4 ]; + state[ 2 + 3*4 ] = temp; - // restore row 2 - tmp = InvSbox(state[2]), state[2] = InvSbox(state[10]), state[10] = tmp; - tmp = InvSbox(state[6]), state[6] = InvSbox(state[14]), state[14] = tmp; - - // restore row 3 - tmp = InvSbox(state[3]), state[3] = InvSbox(state[7]); - state[7] = InvSbox(state[11]), state[11] = InvSbox(state[15]), state[15] = tmp; + // Cycle fourth row right three times, ie. left once. + temp = state[ 3 + 0*4 ]; + state[ 3 + 0*4 ] = state[ 3 + 1*4 ]; + state[ 3 + 1*4 ] = state[ 3 + 2*4 ]; + state[ 3 + 2*4 ] = state[ 3 + 3*4 ]; + state[ 3 + 3*4 ] = temp; } -// recombine and mix each row in a column -void MixSubColumns (unsigned char *state) -{ -unsigned char tmp[4 * Nb]; + - // mixing column 0 - tmp[0] = Xtime2Sbox(state[0]) ^ Xtime3Sbox(state[5]) ^ Sbox(state[10]) ^ Sbox(state[15]); - tmp[1] = Sbox(state[0]) ^ Xtime2Sbox(state[5]) ^ Xtime3Sbox(state[10]) ^ Sbox(state[15]); - tmp[2] = Sbox(state[0]) ^ Sbox(state[5]) ^ Xtime2Sbox(state[10]) ^ Xtime3Sbox(state[15]); - tmp[3] = Xtime3Sbox(state[0]) ^ Sbox(state[5]) ^ Sbox(state[10]) ^ Xtime2Sbox(state[15]); +void InvMixColumns( byte * state ) +{ + InvMixColumn( state + 0*4 ); + InvMixColumn( state + 1*4 ); + InvMixColumn( state + 2*4 ); + InvMixColumn( state + 3*4 ); +} + + - // mixing column 1 - tmp[4] = Xtime2Sbox(state[4]) ^ Xtime3Sbox(state[9]) ^ Sbox(state[14]) ^ Sbox(state[3]); - tmp[5] = Sbox(state[4]) ^ Xtime2Sbox(state[9]) ^ Xtime3Sbox(state[14]) ^ Sbox(state[3]); - tmp[6] = Sbox(state[4]) ^ Sbox(state[9]) ^ Xtime2Sbox(state[14]) ^ Xtime3Sbox(state[3]); - tmp[7] = Xtime3Sbox(state[4]) ^ Sbox(state[9]) ^ Sbox(state[14]) ^ Xtime2Sbox(state[3]); +void XORBytes( byte * bytes1, byte * bytes2, byte count ) +{ + do { + *bytes1 ^= *bytes2; // Add in GF(2), ie. XOR. + bytes1++; + bytes2++; + } while( --count ); +} - // mixing column 2 - tmp[8] = Xtime2Sbox(state[8]) ^ Xtime3Sbox(state[13]) ^ Sbox(state[2]) ^ Sbox(state[7]); - tmp[9] = Sbox(state[8]) ^ Xtime2Sbox(state[13]) ^ Xtime3Sbox(state[2]) ^ Sbox(state[7]); - tmp[10] = Sbox(state[8]) ^ Sbox(state[13]) ^ Xtime2Sbox(state[2]) ^ Xtime3Sbox(state[7]); - tmp[11] = Xtime3Sbox(state[8]) ^ Sbox(state[13]) ^ Sbox(state[2]) ^ Xtime2Sbox(state[7]); + - // mixing column 3 - tmp[12] = Xtime2Sbox(state[12]) ^ Xtime3Sbox(state[1]) ^ Sbox(state[6]) ^ Sbox(state[11]); - tmp[13] = Sbox(state[12]) ^ Xtime2Sbox(state[1]) ^ Xtime3Sbox(state[6]) ^ Sbox(state[11]); - tmp[14] = Sbox(state[12]) ^ Sbox(state[1]) ^ Xtime2Sbox(state[6]) ^ Xtime3Sbox(state[11]); - tmp[15] = Xtime3Sbox(state[12]) ^ Sbox(state[1]) ^ Sbox(state[6]) ^ Xtime2Sbox(state[11]); - - memcpy (state, tmp, sizeof(tmp)); +void CopyBytes( byte * to, byte * from, byte count ) +{ + do { + *to = *from; + to++; + from++; + } while( --count ); } -// restore and un-mix each row in a column -void InvMixSubColumns (unsigned char *state) + + +void KeyExpansion( byte * key, byte * expandedKey ) { -unsigned char tmp[4 * Nb]; -int i; + byte temp[4]; + byte i; + byte Rcon[4] = { 0x01, 0x00, 0x00, 0x00 }; // Round constant. + +#if 0 + // matt + unsigned char BOOTFLASH * key = kTable; +#endif - // restore column 0 - tmp[0] = XtimeE(state[0]) ^ XtimeB(state[1]) ^ XtimeD(state[2]) ^ Xtime9(state[3]); - tmp[5] = Xtime9(state[0]) ^ XtimeE(state[1]) ^ XtimeB(state[2]) ^ XtimeD(state[3]); - tmp[10] = XtimeD(state[0]) ^ Xtime9(state[1]) ^ XtimeE(state[2]) ^ XtimeB(state[3]); - tmp[15] = XtimeB(state[0]) ^ XtimeD(state[1]) ^ Xtime9(state[2]) ^ XtimeE(state[3]); + // Copy key to start of expanded key. + i = KEYLENGTH; + do { + *expandedKey = *key; + expandedKey++; + key++; + } while( --i ); + + // Prepare last 4 bytes of key in temp. + expandedKey -= 4; + temp[0] = *(expandedKey++); + temp[1] = *(expandedKey++); + temp[2] = *(expandedKey++); + temp[3] = *(expandedKey++); - // restore column 1 - tmp[4] = XtimeE(state[4]) ^ XtimeB(state[5]) ^ XtimeD(state[6]) ^ Xtime9(state[7]); - tmp[9] = Xtime9(state[4]) ^ XtimeE(state[5]) ^ XtimeB(state[6]) ^ XtimeD(state[7]); - tmp[14] = XtimeD(state[4]) ^ Xtime9(state[5]) ^ XtimeE(state[6]) ^ XtimeB(state[7]); - tmp[3] = XtimeB(state[4]) ^ XtimeD(state[5]) ^ Xtime9(state[6]) ^ XtimeE(state[7]); + // Expand key. + i = KEYLENGTH; + while( i < BLOCKSIZE*(ROUNDS+1) ) { + // Are we at the start of a multiple of the key size? + if( (i % KEYLENGTH) == 0 ) { + CycleLeft( temp ); // Cycle left once. + SubBytes( temp, 4 ); // Substitute each byte. + XORBytes( temp, Rcon, 4 ); // Add constant in GF(2). + *Rcon = (*Rcon << 1) ^ (*Rcon & 0x80 ? BPOLY : 0); + } - // restore column 2 - tmp[8] = XtimeE(state[8]) ^ XtimeB(state[9]) ^ XtimeD(state[10]) ^ Xtime9(state[11]); - tmp[13] = Xtime9(state[8]) ^ XtimeE(state[9]) ^ XtimeB(state[10]) ^ XtimeD(state[11]); - tmp[2] = XtimeD(state[8]) ^ Xtime9(state[9]) ^ XtimeE(state[10]) ^ XtimeB(state[11]); - tmp[7] = XtimeB(state[8]) ^ XtimeD(state[9]) ^ Xtime9(state[10]) ^ XtimeE(state[11]); + // Keysize larger than 24 bytes, ie. larger that 192 bits? + #if KEYLENGTH > 24 + // Are we right past a block size? + else if( (i % KEYLENGTH) == BLOCKSIZE ) { + SubBytes( temp, 4 ); // Substitute each byte. + } + #endif - // restore column 3 - tmp[12] = XtimeE(state[12]) ^ XtimeB(state[13]) ^ XtimeD(state[14]) ^ Xtime9(state[15]); - tmp[1] = Xtime9(state[12]) ^ XtimeE(state[13]) ^ XtimeB(state[14]) ^ XtimeD(state[15]); - tmp[6] = XtimeD(state[12]) ^ Xtime9(state[13]) ^ XtimeE(state[14]) ^ XtimeB(state[15]); - tmp[11] = XtimeB(state[12]) ^ XtimeD(state[13]) ^ Xtime9(state[14]) ^ XtimeE(state[15]); + // Add bytes in GF(2) one KEYLENGTH away. + XORBytes( temp, expandedKey - KEYLENGTH, 4 ); - for( i=0; i < 4 * Nb; i++ ) - state[i] = InvSbox(tmp[i]); + // Copy result to current 4 bytes. + *(expandedKey++) = temp[ 0 ]; + *(expandedKey++) = temp[ 1 ]; + *(expandedKey++) = temp[ 2 ]; + *(expandedKey++) = temp[ 3 ]; + + i += 4; // Next 4 bytes. + } } -// encrypt/decrypt columns of the key -// n.b. you can replace this with -// byte-wise xor if you wish. + + +void InvCipher( byte * block, byte * expandedKey ) +{ + byte round = ROUNDS-1; + expandedKey += BLOCKSIZE * ROUNDS; + + XORBytes( block, expandedKey, 16 ); + expandedKey -= BLOCKSIZE; -void AddRoundKey (unsigned *state, unsigned *key) -{ -int idx; + do { + InvShiftRows( block ); + InvSubBytesAndXOR( block, expandedKey, 16 ); + expandedKey -= BLOCKSIZE; + InvMixColumns( block ); + } while( --round ); - for( idx = 0; idx < 4; idx++ ) - state[idx] ^= key[idx]; + InvShiftRows( block ); + InvSubBytesAndXOR( block, expandedKey, 16 ); } -unsigned char Rcon[11] = { -0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36}; + -// produce Nb bytes for each round -void ExpandKey (unsigned char *key, unsigned char *expkey) +void aesInit( unsigned char *key, unsigned char * tempbuf ) { -unsigned char tmp0, tmp1, tmp2, tmp3, tmp4; -unsigned idx; + powTbl = block1; + logTbl = block2; + CalcPowLog( powTbl, logTbl ); - memcpy (expkey, key, Nk * 4); + sBox = tempbuf; + CalcSBox( sBox ); - for( idx = Nk; idx < Nb * (Nr + 1); idx++ ) { - tmp0 = expkey[4*idx - 4]; - tmp1 = expkey[4*idx - 3]; - tmp2 = expkey[4*idx - 2]; - tmp3 = expkey[4*idx - 1]; - if( !(idx % Nk) ) { - tmp4 = tmp3; - tmp3 = Sbox(tmp0); - tmp0 = Sbox(tmp1) ^ Rcon[idx/Nk]; - tmp1 = Sbox(tmp2); - tmp2 = Sbox(tmp4); - } else if( Nk > 6 && idx % Nk == 4 ) { - tmp0 = Sbox(tmp0); - tmp1 = Sbox(tmp1); - tmp2 = Sbox(tmp2); - tmp3 = Sbox(tmp3); - } + expandedKey = block1; + KeyExpansion( key, expandedKey ); + + sBoxInv = block2; // Must be block2. + CalcSBoxInv( sBox, sBoxInv ); +} + + - expkey[4*idx+0] = expkey[4*idx - 4*Nk + 0] ^ tmp0; - expkey[4*idx+1] = expkey[4*idx - 4*Nk + 1] ^ tmp1; - expkey[4*idx+2] = expkey[4*idx - 4*Nk + 2] ^ tmp2; - expkey[4*idx+3] = expkey[4*idx - 4*Nk + 3] ^ tmp3; +void aesDecrypt( unsigned char * buffer, unsigned char * chainBlock ) +{ + byte temp[ BLOCKSIZE ]; + + CopyBytes( temp, buffer, BLOCKSIZE ); + InvCipher( buffer, expandedKey ); + if (chainBlock) + { + XORBytes( buffer, chainBlock, BLOCKSIZE ); + CopyBytes( chainBlock, temp, BLOCKSIZE ); } } -// encrypt one 128 bit block -void Encrypt (unsigned char *in, unsigned char *expkey, unsigned char *out) -{ -unsigned char state[Nb * 4]; -unsigned round; - - memcpy (state, in, Nb * 4); - AddRoundKey ((unsigned *)state, (unsigned *)expkey); - - for( round = 1; round < Nr + 1; round++ ) { - if( round < Nr ) - MixSubColumns (state); - else - ShiftRows (state); - - AddRoundKey ((unsigned *)state, (unsigned *)expkey + round * Nb); - } - - memcpy (out, state, sizeof(state)); -} - -void Decrypt (unsigned char *in, unsigned char *expkey, unsigned char *out) -{ -unsigned char state[Nb * 4]; -unsigned round; - - memcpy (state, in, sizeof(state)); - - AddRoundKey ((unsigned *)state, (unsigned *)expkey + Nr * Nb); - InvShiftRows(state); - - for( round = Nr; round--; ) - { - AddRoundKey ((unsigned *)state, (unsigned *)expkey + round * Nb); - if( round ) - InvMixSubColumns (state); - } - - memcpy (out, state, sizeof(state)); -} - - -#if 0 - -/*avr specific routines*/ -#include "backward.h" -#include -#include -#include -#include -#include - - - -#if defined(at90s2313) || defined(at90s8535) -#else -#define ATmega #endif - -#ifdef ATmega -#define USR UCSRA -#endif - - - -void printP (PGM_P string){ - char c; - c=pgm_read_byte(string); - while (c) { - loop_until_bit_is_set(USR, UDRE); - UDR = c; - c=pgm_read_byte(++string); - } - return; - } - - - -void print (const char *string){ - while (*string) { - loop_until_bit_is_set(USR, UDRE); - UDR = *string++; - } - return; - } - -void scan(char *string){ -char c; - do { - do { - loop_until_bit_is_set(USR, RXC); - c =UDR; - } while bit_is_set(USR, FE); - *string++ = c; - //echo the character - loop_until_bit_is_set(USR, UDRE); - UDR = c; - } while ( c != '\r' ); - loop_until_bit_is_set(USR, UDRE); - UDR = '\n'; - string[-1]=0; - } - - -//UART initialize -#ifdef ATmega -#define UCR UCSRB -#define UART_INIT(baud) { \ -UBRRH=0; \ -UBRRL= (XTAL/baud+15)/16-1; \ -UCSRB=(1<