struggling with inline assembler
salsa via digitalmars-d-ldc
digitalmars-d-ldc at puremagic.com
Thu Apr 9 08:31:33 PDT 2015
I'm trying to use Intel's AES instruction set for AES encryption.
The following piece of code works well with DMD2 but won't
compile with LDC. ldc2 tells me this:
Basic Block in function
'_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANSIONFNaNbNiNexPhPhZv'
does not have terminator!
label %endentry
LLVM ERROR: Broken function found, compilation aborted!
Flow control in the asm block might be the problem.
By the way, how could I access arrays (ubyte[]) instead of
pointers in inline assembly? Couldn't find a single piece of
documentation...
I preferrably omit the use of 'naked' assembler functions. I
tried to do it as in biguintx86.d but was confused with the
calling conventions. Registers are used in reverse order compared
to the C calling convention, aren't they?
******************
module main;
import std.stdio;
import core.cpuid;
void main(string[] args)
{
assert(sse2 && aes, "hardware does not support sse2 and aes!");
// test vectors
immutable ubyte[16] plaintext = cast(const
ubyte[])x"6bc1bee22e409f96e93d7e117393172a";
immutable ubyte[16] ciphertext = cast(const
ubyte[])x"3ad77bb40d7a3660a89ecaf32466ef97";
immutable ubyte[16] userKey = cast(const
ubyte[])x"2b7e151628aed2a6abf7158809cf4f3c";
ubyte[16*11] keySchedule; // buffer for key schedule
AES_128_KEY_EXPANSION!"encrypt"(userKey.ptr,
keySchedule.ptr); // initialize encryption key schedule
ubyte[16] buffer;
AES_128_ENCRYPT(keySchedule.ptr, plaintext.ptr, buffer.ptr); //
encrypt one 128 bit block
assert(buffer == ciphertext, "aes encryption failed");
writeln("200 OK");
}
/// AES128 11 round encryption
/// Params:
/// key = 11*16 byte key schedule
/// plain = 16 bytes plaintext
/// ciphertext = at least 16 bytes output buffer
void AES_128_ENCRYPT(in ubyte* key, in ubyte* plain, ubyte*
ciphertext)
in {
//assert(key.length == 16*ROUNDS, "invalid key size");
//assert(plain.length == 16, "invalid input block size");
//assert(ciphertext.length >= 16, "output buffer too small");
}
body {
asm {
mov RDX, key; // pointer to key schedule
// load key into XMM0-XMM10
lddqu XMM0, [RDX+0x00];
lddqu XMM1, [RDX+0x10];
lddqu XMM2, [RDX+0x20];
lddqu XMM3, [RDX+0x30];
lddqu XMM4, [RDX+0x40];
lddqu XMM5, [RDX+0x50];
lddqu XMM6, [RDX+0x60];
lddqu XMM7, [RDX+0x70];
lddqu XMM8, [RDX+0x80];
lddqu XMM9, [RDX+0x90];
lddqu XMM10, [RDX+0xA0];
// load plaintext into XMM15
mov RDX, plain; // pointer to plaintext
movdqu XMM15, [RDX]; // read plaintext block
// AES-128 encryption sequence.
// The data block is in XMM15.
// Registers XMM0–XMM10 hold the round keys(from 0 to 10 in
this order).
// In the end, XMM15 holds the encryption result.
pxor XMM15, XMM0; // Whitening step (Round 0)
aesenc XMM15, XMM1; // Round 1
aesenc XMM15, XMM2; // Round 2
aesenc XMM15, XMM3; // Round 3
aesenc XMM15, XMM4; // Round 4
aesenc XMM15, XMM5; // Round 5
aesenc XMM15, XMM6; // Round 6
aesenc XMM15, XMM7; // Round 7
aesenc XMM15, XMM8; // Round 8
aesenc XMM15, XMM9; // Round 9
aesenclast XMM15, XMM10; // Round 10
mov RDX, ciphertext; // pointer to output buffer
movdqu [RDX], XMM15; // write processed data to buffer
}
}
///
/// Expand a 128 bit user key into 11 round keys
///
/// source:
http://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf,
Figure 19. AES-128 Key Expansion: Outlined Code Example
///
/// Params:
///
/// decrypt = generate decryption key if set to true. default:
false
///
/// userKey = the AES key as given by the user
/// key = 11 round keys
///
///
enum ROUNDS = 11;
@trusted
public void AES_128_KEY_EXPANSION(string mode = "encrypt")(in
ubyte* userKey, ubyte* key) nothrow @nogc
if(mode == "encrypt" || mode == "decrypt")
in {
//assertHardwareSupport();
//assert(userKey.length == 16, "invalid key size");
//assert(key.length == ROUNDS*16, "invalid key schedule size");
}
body {
asm {
mov RDX, userKey; // pointer to user key
movdqu XMM1, [RDX]; // read user key
xor RCX, RCX; // set index to 0
mov RDX, key; // pointer to working key
movdqu [RDX+RCX], XMM1;
add RCX, 0x10; // increment by 16 bytes
aeskeygenassist XMM2, XMM1, 0x01;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x02;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x04;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x08;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x10;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x20;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x40;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x80;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x1b;
call aes_128_assist;
aeskeygenassist XMM2, XMM1, 0x36;
call aes_128_assist;
}
static if(mode == "decrypt") {
asm {
// generate inverse key
call aesimc128;
}
} asm {
jmp END;
aes_128_assist:
pshufd XMM2, XMM2, 0xff;
//vpslldq XMM3, XMM1, 0x4; // vpslldq requires AVX, pslldq
requires only SSE2
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
//vpslldq XMM3, XMM1, 0x4;
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
//vpslldq XMM3, XMM1, 0x4;
movdqu XMM3, XMM1;
pslldq XMM3, 0x4;
pxor XMM1, XMM3;
pxor XMM1, XMM2;
mov RDX, key; // pointer to working key
movdqu [RDX+RCX], XMM1; // store result in keySchedule
add RCX, 0x10; // increment index by 16 bytes
ret; // end of key_expansion_128
//
// do aesimc for all except the first and the last round key
//
aesimc128:
mov RDX, key; // pointer to key output buffer
add RDX, 0x10; // dont modify first key
mov RCX, ROUNDS-2; // set counter to number of rounds - 2
LOOP:
movdqu XMM1, [RDX]; // load
aesimc XMM1, XMM1; // invert
movdqu [RDX], XMM1; // store
add RDX, 0x10; // increment pointer
loop LOOP; // loop rounds-2 times
ret;
// end aesimc128
END:
;
}
}
More information about the digitalmars-d-ldc
mailing list