[Issue 10286] New: Better optimization for struct constructors?
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Thu Jun 6 17:35:34 PDT 2013
http://d.puremagic.com/issues/show_bug.cgi?id=10286
Summary: Better optimization for struct constructors?
Product: D
Version: D2
Platform: All
OS/Version: All
Status: NEW
Severity: enhancement
Priority: P2
Component: DMD
AssignedTo: nobody at puremagic.com
ReportedBy: bearophile_hugs at eml.cc
--- Comment #0 from bearophile_hugs at eml.cc 2013-06-06 17:35:31 PDT ---
Created an attachment (id=1220)
A small raytracer
This is a benchmark program that shows the difference in run-time between a
struct with and without explicit constructor:
import core.stdc.stdio: printf;
import core.stdc.stdlib: atoi;
struct V3a {
double x, y, z;
this(in double x_, in double y_, in double z_)
pure nothrow {
this.x = x_;
this.y = y_;
this.z = z_;
}
}
struct V3b {
double x, y, z;
}
double spam1(in uint N) pure nothrow {
double total = 0.0;
for (uint i = 0; i < N; i++) {
immutable v = V3a(i, i, i);
total += v.y;
}
return total;
}
double spam2(in uint N) pure nothrow {
double total = 0.0;
for (uint i = 0; i < N; i++) {
immutable v = V3b(i, i, i);
total += v.y;
}
return total;
}
void main(in string[] args) {
immutable uint N = (args.length >= 2) ?
atoi((args[1] ~ '\0').ptr) :
1_000;
if (args.length >= 3 && args[2] == "1")
printf("%f\n", spam1(N));
else
printf("%f\n", spam2(N));
}
If you run it you see a performance difference between creating V3a and V3b.
This is the asm generated by dmd (dmd 2.064alpha, -O -release -inline
-noboundscheck):
_D5test25spam1FNaNbxkZd:
sub ESP,030h
mov EDX,EAX
xor ECX,ECX
push EBX
test EDX,EDX
push ESI
push EDI
mov dword ptr 034h[ESP],0
mov dword ptr 038h[ESP],0
je L52
L1E: mov ESI,offset FLAT:_D5test23V3a6__initZ
lea EDI,01Ch[ESP]
movsd
movsd
movsd
movsd
movsd
movsd
xor EBX,EBX
mov EAX,ECX
mov 0Ch[ESP],ECX
inc ECX
cmp ECX,EDX
mov 010h[ESP],EBX
fild long64 ptr 0Ch[ESP]
fstp qword ptr 024h[ESP]
fld qword ptr 024h[ESP]
fadd qword ptr 034h[ESP]
fstp qword ptr 034h[ESP]
jb L1E
L52: fld qword ptr 034h[ESP]
pop EDI
pop ESI
pop EBX
add ESP,030h
ret
_D5test25spam2FNaNbxkZd:
sub ESP,030h
mov EDX,EAX
xor ECX,ECX
push EBX
test EDX,EDX
mov dword ptr 02Ch[ESP],0
mov dword ptr 030h[ESP],0
je L63
L1C: mov 4[ESP],ECX
xor EBX,EBX
mov EAX,ECX
mov 8[ESP],EBX
inc ECX
cmp ECX,EDX
fild long64 ptr 4[ESP]
fstp qword ptr 014h[ESP]
mov 4[ESP],EAX
mov 8[ESP],EBX
fild long64 ptr 4[ESP]
fstp qword ptr 01Ch[ESP]
mov 4[ESP],EAX
mov 8[ESP],EBX
fild long64 ptr 4[ESP]
fld qword ptr 01Ch[ESP]
fxch ST1
fstp qword ptr 024h[ESP]
fadd qword ptr 02Ch[ESP]
fstp qword ptr 02Ch[ESP]
jb L1C
L63: fld qword ptr 02Ch[ESP]
pop EBX
add ESP,030h
ret
One visible difference is that block of movsd:
movsd
movsd
movsd
movsd
movsd
movsd
Compiling with ldc2 (V. 0.11.0, based on DMD v2.062 and LLVM 3.3svn, -O5
-release -profile-verifier-noassert):
__D5test25spam1FNaNbxkZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $8, %esp
xorps %xmm0, %xmm0
testl %eax, %eax
je LBB1_1
movsd LCPI1_0, %xmm2
xorps %xmm1, %xmm1
.align 16, 0x90
LBB1_3:
addsd %xmm0, %xmm1
addsd %xmm2, %xmm0
decl %eax
jne LBB1_3
jmp LBB1_4
LBB1_1:
xorps %xmm1, %xmm1
LBB1_4:
movsd %xmm1, (%esp)
fldl (%esp)
movl %ebp, %esp
popl %ebp
ret
__D5test25spam2FNaNbxkZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $8, %esp
xorps %xmm0, %xmm0
testl %eax, %eax
je LBB2_3
movsd LCPI2_0, %xmm1
xorps %xmm2, %xmm2
.align 16, 0x90
LBB2_2:
addsd %xmm2, %xmm0
addsd %xmm1, %xmm2
decl %eax
jne LBB2_2
LBB2_3:
movsd %xmm0, (%esp)
fldl (%esp)
movl %ebp, %esp
popl %ebp
ret
You see that ldc2 compiles the two functions at the same way, and indeed their
run-time is the same.
But the problem is not limited to DMD. As soon as your program becomes a little
longer than such tiny benchmark, the performance difference between structs
with constructors and struct without constructors becomes well visible even
with ldc2.
In attach you find a small single-module ray tracer, it contains a V3 struct.
It contains a basic constructor like this:
this(in double x_, in double y_, in double z_) pure nothrow {
this.x = x_;
this.y = y_;
this.z = z_;
}
If you run the program with or without that constructor, using both ldc2 and
dmd, you see a significant performance difference.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list