Best way in D2 to rotate a ubyte[4] array

Tom tom at nospam.com
Wed Mar 9 15:35:59 PST 2011


El 09/03/2011 20:25, bearophile escribió:
> Tom:
>
>> What is the most efficient way of implement a rotation of ubyte[4] array?
>>
>> By rotation I mean: rotateRight([1, 2, 3, 4]) ->  [4, 1, 2, 3]
>
> Two versions, I have done no benchmarks so far:
>
> import std.c.stdio: printf;
>
> union Four {
>      ubyte[4] a;
>      uint u;
> }
>
> void showFour(Four f) {
>      printf("f.u: %u\n", f.u);
>      printf("f.a: [%d, %d, %d, %d]\n",
>             cast(int)f.a[0], cast(int)f.a[1],
>             cast(int)f.a[2], cast(int)f.a[3]);
> }
>
> void main() {
>      Four f;
>      f.a[] = [1, 2, 3, 4];
>      showFour(f);
>      f.u = (f.u<<  8) | (f.u>>  24);
>      showFour(f);
>      printf("\n");
>
>      // alternative
>      f.a[] = [1, 2, 3, 4];
>      uint u2 = f.u;
>      showFour(f);
>      printf("u2: %u\n", u2);
>      asm {
>          rol  u2, 8;
>      }
>      f.u = u2;
>      showFour(f);
> }
>
> /*
>
> dmd -O -release test.d
>
> __Dmain comdat
>          push    EBP
>          mov EBP,ESP
>          sub ESP,8
>          push    4
>          mov EAX,offset FLAT:_D12TypeInfo_xAh6__initZ
>          push    4
>          push    3
>          push    2
>          push    1
>          push    4
>          mov dword ptr -8[EBP],0
>          push    EAX
>          call    near ptr __d_arrayliteralT
>          add ESP,018h
>          push    EAX
>          lea EAX,-8[EBP]
>          push    EAX
>          call    near ptr _memcpy
>          mov EAX,-8[EBP]
>          call    near ptr _D4test8showFourFS4test4FourZv
>          mov EAX,-8[EBP]
>          mov ECX,-8[EBP]
>          shl EAX,8        ;<=========
>          shr ECX,018h
>          or  EAX,ECX
>          mov -8[EBP],EAX
>          mov EAX,-8[EBP]
>          call    near ptr _D4test8showFourFS4test4FourZv
>          mov EAX,offset FLAT:_DATA[024h]
>          push    EAX
>          call    near ptr _printf
>          mov EAX,offset FLAT:_D12TypeInfo_xAh6__initZ
>          push    4
>          push    4
>          push    3
>          push    2
>          push    1
>          push    4
>          push    EAX
>          call    near ptr __d_arrayliteralT
>          add ESP,018h
>          push    EAX
>          lea EAX,-8[EBP]
>          push    EAX
>          call    near ptr _memcpy
>          mov EAX,-8[EBP]
>          mov -4[EBP],EAX
>          mov EAX,-8[EBP]
>          call    near ptr _D4test8showFourFS4test4FourZv
>          mov EAX,offset FLAT:_DATA[028h]
>          push    dword ptr -4[EBP]
>          push    EAX
>          call    near ptr _printf
>          add ESP,024h
>          rol -4[EBP],8   ;<=========
>          mov EAX,-4[EBP]
>          mov -8[EBP],EAX
>          mov EAX,-4[EBP]
>          call    near ptr _D4test8showFourFS4test4FourZv
>          mov ESP,EBP
>          pop EBP
>          ret
> */
>
> In theory a C/C++/D compiler has to compile an expression like (x<<  8)|(x>>24) with a ROL instruction, in practice DMD doesn't do it. Months ago I have asked the two (four in X86) roll instructions to be added to the Phobos core intrinsics module, but I am not sure what Walter answered me.
>
> Bye,
> bearophile

Wow, thanks b, didn't know of the rol instruction...

Tom;


More information about the Digitalmars-d-learn mailing list