Performance
bearophile via Digitalmars-d
digitalmars-d at puremagic.com
Fri May 30 07:43:04 PDT 2014
This C++ code:
double plus(const unsigned int nSteps) {
const double p0 = 0.0045;
const double p1 = 1.00045452-p0;
double tot = 1.346346;
bool b = true;
for (unsigned int i = 0; i < nSteps; i++) {
switch (b) {
case true:
tot += p0;
break;
case false:
tot += p1;
break;
}
b = !b;
}
return tot;
}
G++ 4.8.0 gives the asm (using -Ofast, that implies unsafe FP
optimizations):
__Z4plusj:
movl 4(%esp), %ecx
testl %ecx, %ecx
je L7
fldl LC0
xorl %edx, %edx
movl $1, %eax
fldl LC2
jmp L6
.p2align 4,,7
L11:
fxch %st(1)
addl $1, %edx
xorl $1, %eax
cmpl %ecx, %edx
faddl LC1
je L12
fxch %st(1)
L6:
cmpb $1, %al
je L11
addl $1, %edx
xorl $1, %eax
cmpl %ecx, %edx
fadd %st, %st(1)
jne L6
fstp %st(0)
jmp L10
.p2align 4,,7
L12:
fstp %st(1)
L10:
rep ret
L7:
fldl LC0
ret
Bye,
bearophile
More information about the Digitalmars-d
mailing list