DMD floating point performance.
Dave
Dave_member at pathlink.com
Sun Nov 12 15:13:49 PST 2006
Walter Bright wrote:
> Dave wrote:
>> If you look at the DMD asm, the problem is that each operation is
>> wrapped by a load/store.. Why wouldn't val and sum be kept in fp
>> registers inside the loop?
>
> The issue isn't with D, it's with the back end code generator. You'll
> see the same thing with the C and C++ compiler.
>
> Because the FPU is a 'stack' machine rather than a register machine,
> it's hard to write a code generator that enregisters floating point
I know this is simplistic, but could something like fxch be used to 'mimick' a register machine as
vars are stored into registers? fxch is supposed to be very efficient.
> variables. It's also problematical because every function call must
> empty the FPU stack anyway, and so a lot of register spill logic must be
> in place for it to be very useful.
Could ffree greatly simply things here?
> Not impossible, just a lot of tricky work. How does GDC do with this?
The original post was comparing DMD vs. GDC.
Here's some code showing relative asm. output:
dmd = 1: 0.673 secs
dmd asm = 1: 0.682 secs
opt asm = 1: 0.272 secs
;---
import std.stdio, std.date;
void main()
{
d_time s = getUTCtime;
double sum = fp;
d_time e = getUTCtime;
writefln("dmd = ",sum,": ",(e-s)/cast(real)TicksPerSecond," secs");
s = getUTCtime;
sum = fp_dmd_asm;
e = getUTCtime;
writefln("dmd asm = ",sum,": ",(e-s)/cast(real)TicksPerSecond," secs");
s = getUTCtime;
sum = fp_dmd_opt;
e = getUTCtime;
writefln("opt asm = ",sum,": ",(e-s)/cast(real)TicksPerSecond," secs");
}
double fp()
{
double sum = 1.0, val = 0.000001;
for(size_t i = 0; i < 10_000_000; i++)
{
sum += val;
sum -= val;
sum *= val;
sum /= val;
}
return sum;
}
double _sum = 1.0, _val = 0.000001;
double fp_dmd_asm() // more or less
{
asm
{
fld qword ptr _sum[0];
fstp qword ptr -8[EBP];
xor EAX,EAX;
L11: fld qword ptr _val[0];
fadd qword ptr -8[EBP];
fstp qword ptr -8[EBP];
fld qword ptr _val[0];
fsubr qword ptr -8[EBP];
fstp qword ptr -8[EBP];
fld qword ptr _val[0];
fmul qword ptr -8[EBP];
fstp qword ptr -8[EBP];
fld qword ptr _val[0];
fdivr qword ptr -8[EBP];
fstp qword ptr -8[EBP];
inc EAX;
cmp EAX,10_000_000;
jb L11;
fld qword ptr -8[EBP];
fstp _sum[0];
}
return _sum;
}
double fp_dmd_opt()
{
double sum = 1.0, val = 0.000001;
asm
{
fld qword ptr sum[0];
fld qword ptr val[0];
fxch ST(1);
xor EAX,EAX;
L1: fadd ST, ST(1);
fsubr ST, ST(1);
fmul ST, ST(1);
fdivr ST, ST(1);
inc EAX;
cmp EAX,10_000_000;
jb L1;
fstp sum[0];
ffree ST(1);
}
return sum;
}
More information about the Digitalmars-d
mailing list