A new calling convention in VS2013

bearophile bearophileHUGS at lycos.com
Sat Jul 13 03:35:58 PDT 2013


Through Reddit I've found an article about 
vector-calling-convention added to VS2013:
http://blogs.msdn.com/b/vcblog/archive/2013/07/12/introducing-vector-calling-convention.aspx


So I have written what I think is a similar D program:


import core.stdc.stdio, core.simd;

struct Particle { float4 x, y; }

Particle addParticles(in Particle p1, in Particle p2) pure 
nothrow {
     return Particle(p1.x + p2.x, p1.y + p2.y);
}

// BUG 10627 and 10523
//alias Particle2 = float4[2];
//Particle2 addParticles(in Particle2 p1, in Particle2 p2) {
//    return p1[] + p2[];
//}

void main() {
     auto p1 = Particle([1, 2, 3, 4], [10, 20, 30, 40]);
     printf("%f %f %f %f %f %f %f %f\n",
            p1.x.array[0], p1.x.array[1], p1.x.array[2], 
p1.x.array[3],
            p1.y.array[0], p1.y.array[1], p1.y.array[2], 
p1.y.array[3]);

     auto p2 = Particle([100, 200, 300, 400], [1000, 2000, 3000, 
4000]);
     printf("%f %f %f %f %f %f %f %f\n",
            p2.x.array[0], p2.x.array[1], p2.x.array[2], 
p2.x.array[3],
            p2.y.array[0], p2.y.array[1], p2.y.array[2], 
p2.y.array[3]);

     auto p3 = addParticles(p1, p2);
     printf("%f %f %f %f %f %f %f %f\n",
            p3.x.array[0], p3.x.array[1], p3.x.array[2], 
p3.x.array[3],
            p3.y.array[0], p3.y.array[1], p3.y.array[2], 
p3.y.array[3]);
}


I have compiled with the latest ldc2 (Windows32):

ldc2 -O5 -disable-inlining -release -vectorize-slp 
-vectorize-slp-aggressive -output-s test.d


The resulting X86 asm:

__D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle:
	pushl	%ebp
	movl	%esp, %ebp
	andl	$-16, %esp
	subl	$16, %esp
	movaps	40(%ebp), %xmm0
	movaps	56(%ebp), %xmm1
	addps	8(%ebp), %xmm0
	addps	24(%ebp), %xmm1
	movups	%xmm1, 16(%eax)
	movups	%xmm0, (%eax)
	movl	%ebp, %esp
	popl	%ebp
	ret	$64

__Dmain:
...
	movaps	160(%esp), %xmm0
	movaps	176(%esp), %xmm1
	movaps	%xmm1, 48(%esp)
	movaps	%xmm0, 32(%esp)
	movaps	128(%esp), %xmm0
	movaps	144(%esp), %xmm1
	movaps	%xmm1, 16(%esp)
	movaps	%xmm0, (%esp)
	leal	96(%esp), %eax
	calll	__D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle
	subl	$64, %esp
	movss	96(%esp), %xmm0
	movss	100(%esp), %xmm1
	movss	104(%esp), %xmm2
	movss	108(%esp), %xmm3
	movss	112(%esp), %xmm4
	movss	116(%esp), %xmm5
	movss	120(%esp), %xmm6
	movss	124(%esp), %xmm7
	cvtss2sd	%xmm7, %xmm7
	movsd	%xmm7, 60(%esp)
	cvtss2sd	%xmm6, %xmm6
	movsd	%xmm6, 52(%esp)
	cvtss2sd	%xmm5, %xmm5
	movsd	%xmm5, 44(%esp)
	cvtss2sd	%xmm4, %xmm4
	movsd	%xmm4, 36(%esp)
	cvtss2sd	%xmm3, %xmm3
	movsd	%xmm3, 28(%esp)
	cvtss2sd	%xmm2, %xmm2
	movsd	%xmm2, 20(%esp)
	cvtss2sd	%xmm1, %xmm1
	movsd	%xmm1, 12(%esp)
	cvtss2sd	%xmm0, %xmm0
	movsd	%xmm0, 4(%esp)
	movl	$_.str3, (%esp)
	calll	___mingw_printf
	xorl	%eax, %eax
	movl	%ebp, %esp
	popl	%ebp
	ret


Are those vector calling conventions useful for D too?

Bye,
bearophile


More information about the Digitalmars-d mailing list