Issues using the in-line assembler
solidstate1991
laszloszeremi at outlook.com
Thu Apr 5 20:35:58 UTC 2018
Seems I found a better solution hidden in the docs:
@nogc protected int[2] transformFunc(int[2] xy){
version(X86){
asm @nogc{
naked;
mov EBX, this;
movd XMM1, sX[EBX];
pslldq XMM1, 4;
movss XMM1, sY[EBX];
movq XMM0, xy;
paddd XMM0, XMM1; // [x,y] + [sX,sY]
movq XMM3, xy0[EBX];
psubd XMM0, XMM3; // ([x,y] + [sX,sY] - [x_0,y_0])
movq XMM1, ac[EBX];
movq XMM2, bd[EBX];
pmuludq XMM1, XMM0; // [A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0])
psrlq XMM1, 16; // ([A,0,C,0] * ([x,y] + [sX,sY] -
[x_0,y_0]))>>16
movups XMM4, XMM0;
psrldq XMM4, 4;
pslldq XMM0, 4;
por XMM4, XMM0;
pmuludq XMM2, XMM4; // [0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0])
psrlq XMM2, 16; // ([0,B,0,D] * ([x,y] + [sX,sY] -
[x_0,y_0]))>>16
paddq XMM1, XMM2; // ([A,B,C,D] * ([x,y] + [sX,sY] -
[x_0,y_0]))>>16
punpckldq XMM3, XMM7;
paddq XMM1, XMM3; // ([A,B,C,D] * ([x,y] + [sX,sY] -
[x_0,y_0]))>>16 + [x_0,y_0]
movups XMM0, XMM1; // Convert 64 bit vectors into 32 bit ones
psrldq XMM0, 4;
por XMM0, XMM1;
ret ;
}
}(...)
}
More information about the Digitalmars-d-learn
mailing list