Performance

bearophile via Digitalmars-d digitalmars-d at puremagic.com
Fri May 30 07:43:04 PDT 2014


This C++ code:

double plus(const unsigned int nSteps) {
     const double p0 = 0.0045;
     const double p1 = 1.00045452-p0;

     double tot = 1.346346;
     bool b = true;

     for (unsigned int i = 0; i < nSteps; i++) {
         switch (b) {
             case true:
                 tot += p0;
                 break;
             case false:
                 tot += p1;
                 break;
         }

         b = !b;
     }

     return tot;
}


G++ 4.8.0 gives the asm (using -Ofast, that implies unsafe FP 
optimizations):

__Z4plusj:
	movl	4(%esp), %ecx
	testl	%ecx, %ecx
	je	L7
	fldl	LC0
	xorl	%edx, %edx
	movl	$1, %eax
	fldl	LC2
	jmp	L6
	.p2align 4,,7
L11:
	fxch	%st(1)
	addl	$1, %edx
	xorl	$1, %eax
	cmpl	%ecx, %edx
	faddl	LC1
	je	L12
	fxch	%st(1)
L6:
	cmpb	$1, %al
	je	L11
	addl	$1, %edx
	xorl	$1, %eax
	cmpl	%ecx, %edx
	fadd	%st, %st(1)
	jne	L6
	fstp	%st(0)
	jmp	L10
	.p2align 4,,7
L12:
	fstp	%st(1)
L10:
	rep ret
L7:
	fldl	LC0
	ret

Bye,
bearophile


More information about the Digitalmars-d mailing list