Best way in D2 to rotate a ubyte[4] array
bearophile
bearophileHUGS at lycos.com
Wed Mar 9 15:25:13 PST 2011
Tom:
> What is the most efficient way of implement a rotation of ubyte[4] array?
>
> By rotation I mean: rotateRight([1, 2, 3, 4]) -> [4, 1, 2, 3]
Two versions, I have done no benchmarks so far:
import std.c.stdio: printf;
union Four {
ubyte[4] a;
uint u;
}
void showFour(Four f) {
printf("f.u: %u\n", f.u);
printf("f.a: [%d, %d, %d, %d]\n",
cast(int)f.a[0], cast(int)f.a[1],
cast(int)f.a[2], cast(int)f.a[3]);
}
void main() {
Four f;
f.a[] = [1, 2, 3, 4];
showFour(f);
f.u = (f.u << 8) | (f.u >> 24);
showFour(f);
printf("\n");
// alternative
f.a[] = [1, 2, 3, 4];
uint u2 = f.u;
showFour(f);
printf("u2: %u\n", u2);
asm {
rol u2, 8;
}
f.u = u2;
showFour(f);
}
/*
dmd -O -release test.d
__Dmain comdat
push EBP
mov EBP,ESP
sub ESP,8
push 4
mov EAX,offset FLAT:_D12TypeInfo_xAh6__initZ
push 4
push 3
push 2
push 1
push 4
mov dword ptr -8[EBP],0
push EAX
call near ptr __d_arrayliteralT
add ESP,018h
push EAX
lea EAX,-8[EBP]
push EAX
call near ptr _memcpy
mov EAX,-8[EBP]
call near ptr _D4test8showFourFS4test4FourZv
mov EAX,-8[EBP]
mov ECX,-8[EBP]
shl EAX,8 ; <=========
shr ECX,018h
or EAX,ECX
mov -8[EBP],EAX
mov EAX,-8[EBP]
call near ptr _D4test8showFourFS4test4FourZv
mov EAX,offset FLAT:_DATA[024h]
push EAX
call near ptr _printf
mov EAX,offset FLAT:_D12TypeInfo_xAh6__initZ
push 4
push 4
push 3
push 2
push 1
push 4
push EAX
call near ptr __d_arrayliteralT
add ESP,018h
push EAX
lea EAX,-8[EBP]
push EAX
call near ptr _memcpy
mov EAX,-8[EBP]
mov -4[EBP],EAX
mov EAX,-8[EBP]
call near ptr _D4test8showFourFS4test4FourZv
mov EAX,offset FLAT:_DATA[028h]
push dword ptr -4[EBP]
push EAX
call near ptr _printf
add ESP,024h
rol -4[EBP],8 ; <=========
mov EAX,-4[EBP]
mov -8[EBP],EAX
mov EAX,-4[EBP]
call near ptr _D4test8showFourFS4test4FourZv
mov ESP,EBP
pop EBP
ret
*/
In theory a C/C++/D compiler has to compile an expression like (x<< 8)|(x>>24) with a ROL instruction, in practice DMD doesn't do it. Months ago I have asked the two (four in X86) roll instructions to be added to the Phobos core intrinsics module, but I am not sure what Walter answered me.
Bye,
bearophile
More information about the Digitalmars-d-learn
mailing list