A new calling convention in VS2013
bearophile
bearophileHUGS at lycos.com
Sat Jul 13 03:35:58 PDT 2013
Through Reddit I've found an article about
vector-calling-convention added to VS2013:
http://blogs.msdn.com/b/vcblog/archive/2013/07/12/introducing-vector-calling-convention.aspx
So I have written what I think is a similar D program:
import core.stdc.stdio, core.simd;
struct Particle { float4 x, y; }
Particle addParticles(in Particle p1, in Particle p2) pure
nothrow {
return Particle(p1.x + p2.x, p1.y + p2.y);
}
// BUG 10627 and 10523
//alias Particle2 = float4[2];
//Particle2 addParticles(in Particle2 p1, in Particle2 p2) {
// return p1[] + p2[];
//}
void main() {
auto p1 = Particle([1, 2, 3, 4], [10, 20, 30, 40]);
printf("%f %f %f %f %f %f %f %f\n",
p1.x.array[0], p1.x.array[1], p1.x.array[2],
p1.x.array[3],
p1.y.array[0], p1.y.array[1], p1.y.array[2],
p1.y.array[3]);
auto p2 = Particle([100, 200, 300, 400], [1000, 2000, 3000,
4000]);
printf("%f %f %f %f %f %f %f %f\n",
p2.x.array[0], p2.x.array[1], p2.x.array[2],
p2.x.array[3],
p2.y.array[0], p2.y.array[1], p2.y.array[2],
p2.y.array[3]);
auto p3 = addParticles(p1, p2);
printf("%f %f %f %f %f %f %f %f\n",
p3.x.array[0], p3.x.array[1], p3.x.array[2],
p3.x.array[3],
p3.y.array[0], p3.y.array[1], p3.y.array[2],
p3.y.array[3]);
}
I have compiled with the latest ldc2 (Windows32):
ldc2 -O5 -disable-inlining -release -vectorize-slp
-vectorize-slp-aggressive -output-s test.d
The resulting X86 asm:
__D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $16, %esp
movaps 40(%ebp), %xmm0
movaps 56(%ebp), %xmm1
addps 8(%ebp), %xmm0
addps 24(%ebp), %xmm1
movups %xmm1, 16(%eax)
movups %xmm0, (%eax)
movl %ebp, %esp
popl %ebp
ret $64
__Dmain:
...
movaps 160(%esp), %xmm0
movaps 176(%esp), %xmm1
movaps %xmm1, 48(%esp)
movaps %xmm0, 32(%esp)
movaps 128(%esp), %xmm0
movaps 144(%esp), %xmm1
movaps %xmm1, 16(%esp)
movaps %xmm0, (%esp)
leal 96(%esp), %eax
calll __D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle
subl $64, %esp
movss 96(%esp), %xmm0
movss 100(%esp), %xmm1
movss 104(%esp), %xmm2
movss 108(%esp), %xmm3
movss 112(%esp), %xmm4
movss 116(%esp), %xmm5
movss 120(%esp), %xmm6
movss 124(%esp), %xmm7
cvtss2sd %xmm7, %xmm7
movsd %xmm7, 60(%esp)
cvtss2sd %xmm6, %xmm6
movsd %xmm6, 52(%esp)
cvtss2sd %xmm5, %xmm5
movsd %xmm5, 44(%esp)
cvtss2sd %xmm4, %xmm4
movsd %xmm4, 36(%esp)
cvtss2sd %xmm3, %xmm3
movsd %xmm3, 28(%esp)
cvtss2sd %xmm2, %xmm2
movsd %xmm2, 20(%esp)
cvtss2sd %xmm1, %xmm1
movsd %xmm1, 12(%esp)
cvtss2sd %xmm0, %xmm0
movsd %xmm0, 4(%esp)
movl $_.str3, (%esp)
calll ___mingw_printf
xorl %eax, %eax
movl %ebp, %esp
popl %ebp
ret
Are those vector calling conventions useful for D too?
Bye,
bearophile
More information about the Digitalmars-d
mailing list