struggling with inline assembler

salsa via digitalmars-d-ldc digitalmars-d-ldc at puremagic.com
Thu Apr 9 08:31:33 PDT 2015


I'm trying to use Intel's AES instruction set for AES encryption. 
The following piece of code works well with DMD2 but won't 
compile with LDC. ldc2 tells me this:

Basic Block in function 
'_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANSIONFNaNbNiNexPhPhZv' 
does not have terminator!
label %endentry
LLVM ERROR: Broken function found, compilation aborted!

Flow control in the asm block might be the problem.

By the way, how could I access arrays (ubyte[]) instead of 
pointers in inline assembly? Couldn't find a single piece of 
documentation...
I preferrably omit the use of 'naked' assembler functions. I 
tried to do it as in biguintx86.d but was confused with the 
calling conventions. Registers are used in reverse order compared 
to the C calling convention, aren't they?

******************

module main;

import std.stdio;
import core.cpuid;


void main(string[] args)
{

	assert(sse2 && aes, "hardware does not support sse2 and aes!");


	// test vectors
	immutable ubyte[16] plaintext = cast(const 
ubyte[])x"6bc1bee22e409f96e93d7e117393172a";
	immutable ubyte[16] ciphertext = cast(const 
ubyte[])x"3ad77bb40d7a3660a89ecaf32466ef97";
	immutable ubyte[16] userKey = cast(const 
ubyte[])x"2b7e151628aed2a6abf7158809cf4f3c";

	ubyte[16*11] keySchedule;	// buffer for key schedule
	AES_128_KEY_EXPANSION!"encrypt"(userKey.ptr, 
keySchedule.ptr);	// initialize encryption key schedule

	ubyte[16] buffer;
	AES_128_ENCRYPT(keySchedule.ptr, plaintext.ptr, buffer.ptr);	// 
encrypt one 128 bit block


	assert(buffer == ciphertext, "aes encryption failed");
	writeln("200 OK");

}

/// AES128 11 round encryption
/// Params:
/// key = 11*16 byte key schedule
/// plain = 16 bytes plaintext
/// ciphertext = at least 16 bytes output buffer
void AES_128_ENCRYPT(in ubyte* key, in ubyte* plain, ubyte* 
ciphertext)
in {
	//assert(key.length == 16*ROUNDS, "invalid key size");
	//assert(plain.length == 16, "invalid input block size");
	//assert(ciphertext.length >= 16, "output buffer too small");
}
body {

	asm {
		mov RDX, key;	// pointer to key schedule
		// load key into XMM0-XMM10
		lddqu XMM0, [RDX+0x00];
		lddqu XMM1, [RDX+0x10];
		lddqu XMM2, [RDX+0x20];
		lddqu XMM3, [RDX+0x30];
		lddqu XMM4, [RDX+0x40];
		lddqu XMM5, [RDX+0x50];
		lddqu XMM6, [RDX+0x60];
		lddqu XMM7, [RDX+0x70];
		lddqu XMM8, [RDX+0x80];
		lddqu XMM9, [RDX+0x90];
		lddqu XMM10, [RDX+0xA0];

		// load plaintext into XMM15

		mov RDX, plain;	// pointer to plaintext
		movdqu XMM15, [RDX];	// read plaintext block

		// AES-128 encryption sequence.
		// The data block is in XMM15.
		// Registers XMM0–XMM10 hold the round keys(from 0 to 10 in 
this order).
		// In the end, XMM15 holds the encryption result.
		pxor XMM15, XMM0; // Whitening step (Round 0)
		aesenc XMM15, XMM1; // Round 1
		aesenc XMM15, XMM2; // Round 2
		aesenc XMM15, XMM3; // Round 3
		aesenc XMM15, XMM4; // Round 4
		aesenc XMM15, XMM5; // Round 5
		aesenc XMM15, XMM6; // Round 6
		aesenc XMM15, XMM7; // Round 7
		aesenc XMM15, XMM8; // Round 8
		aesenc XMM15, XMM9; // Round 9
		aesenclast XMM15, XMM10; // Round 10

		mov RDX, ciphertext;	// pointer to output buffer
		movdqu [RDX], XMM15;	// write processed data to buffer
	}
}


///
/// Expand a 128 bit user key into 11 round keys
///
/// source: 
http://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf, 
Figure 19. AES-128 Key Expansion: Outlined Code Example
///
/// Params:
///
/// decrypt = generate decryption key if set to true. default: 
false
///
/// userKey = the AES key as given by the user
/// key = 11 round keys
///
///
enum ROUNDS = 11;
@trusted
public void AES_128_KEY_EXPANSION(string mode = "encrypt")(in 
ubyte* userKey, ubyte* key) nothrow @nogc
	if(mode == "encrypt" || mode == "decrypt")
	in {
		//assertHardwareSupport();
		//assert(userKey.length == 16, "invalid key size");
		//assert(key.length == ROUNDS*16, "invalid key schedule size");
	}
body {
	
	
	asm  {

		mov RDX, userKey; // pointer to user key
		movdqu XMM1, [RDX]; // read user key
		
		xor RCX, RCX; // set index to 0
		
		mov RDX, key;	// pointer to working key
		movdqu [RDX+RCX], XMM1;
		add	RCX, 0x10; // increment by 16 bytes
		
		aeskeygenassist XMM2, XMM1, 0x01;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x02;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x04;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x08;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x10;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x20;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x40;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x80;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x1b;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x36;
		call aes_128_assist;
		
	}
	static if(mode == "decrypt") {
		asm {
			// generate inverse key
			call aesimc128;
		}
	} asm {
		
		jmp END;
		
	aes_128_assist:
		pshufd XMM2, XMM2, 0xff;
		
		//vpslldq XMM3, XMM1, 0x4; // vpslldq requires AVX, pslldq 
requires only SSE2
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		
		//vpslldq XMM3, XMM1, 0x4;
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		
		//vpslldq XMM3, XMM1, 0x4;
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		pxor XMM1, XMM2;
		
		mov RDX, key;	// pointer to working key
		movdqu [RDX+RCX], XMM1; // store result in keySchedule
		add RCX, 0x10; // increment index by 16 bytes

		ret; // end of key_expansion_128
		
		//
		// do aesimc for all except the first and the last round key
		//
	aesimc128:
		
		mov RDX, key;			// pointer to key output buffer
		add RDX, 0x10;			// dont modify first key
		
		mov RCX, ROUNDS-2;		// set counter to number of rounds - 2
		
	LOOP:
		movdqu XMM1, [RDX];		// load
		aesimc XMM1, XMM1;		// invert
		movdqu [RDX], XMM1;		// store
		
		add RDX, 0x10;			// increment pointer
		
		loop LOOP;				// loop rounds-2 times
		
		ret;
		
		// end aesimc128
		
	END:
		;
	}
}


More information about the digitalmars-d-ldc mailing list