Is there a way to get a list of functions that get inlined by
bearophile
bearophileHUGS at lycos.com
Tue Feb 9 14:18:34 PST 2010
Scorn:
> double min(double a, double b, double c)
> {
> return a < b && a < c ? a : b < c ? b : c;
> }
Don't write code like that, add some parenthesys like this:
return (a < b && a < c) ? a : (b < c ? b : c);
because the compiler is able to sort out those operator precedences, but the programmer that comes after you and reads that code will have problems.
A compiler compiles that code with 3 FP tests, while I think two suffice, so there are better ways to write that.
> This is (and a little bit more) is running in a tight loop which runs
> about 10000000 times.
> With these "optimizations" i get a speed increase about 20% percent.
---------------------
I have created a module named "mo" and a main module named "temp":
module mo;
int foo(int x) {
return x * x;
}
double min3(double a, double b, double c) {
return (a <= b) ? (a <= c ? a : c) : (b <= c ? b : c);
}
---------------------
module temp; // main module
version (Tango) {
import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi, atof;
} else {
import std.c.stdio: printf;
import std.c.stdlib: atoi, atof;
}
import mo: foo, min3;
void main() {
int x = atoi("12");
printf("%d\n", foo(x));
double x1 = atof("10");
double x2 = atof("20");
double x3 = atof("30");
printf("%f\n", min3(x1, x2, x3));
}
---------------------
>From my tests it seems LDC isn't able to inline those functions, while DMD is able to inline them :-)
ldc -O5 -release -output-s -inline temp.d mo.d
08049600 <_Dmain>:
8049600: 83 ec 34 sub $0x34,%esp
8049603: c7 04 24 e8 8c 05 08 movl $0x8058ce8,(%esp)
804960a: e8 99 fd ff ff call 80493a8 <atoi at plt>
804960f: e8 9c 00 00 00 call 80496b0 <_D2mo3fooFiZi>
8049614: 89 44 24 04 mov %eax,0x4(%esp)
8049618: c7 04 24 eb 8c 05 08 movl $0x8058ceb,(%esp)
804961f: e8 64 fd ff ff call 8049388 <printf at plt>
8049624: c7 04 24 ef 8c 05 08 movl $0x8058cef,(%esp)
804962b: e8 98 fd ff ff call 80493c8 <atof at plt>
8049630: db 7c 24 28 fstpt 0x28(%esp)
8049634: c7 04 24 f2 8c 05 08 movl $0x8058cf2,(%esp)
804963b: e8 88 fd ff ff call 80493c8 <atof at plt>
8049640: db 7c 24 1c fstpt 0x1c(%esp)
8049644: c7 04 24 f5 8c 05 08 movl $0x8058cf5,(%esp)
804964b: e8 78 fd ff ff call 80493c8 <atof at plt>
8049650: db 6c 24 28 fldt 0x28(%esp)
8049654: dd 5c 24 10 fstpl 0x10(%esp)
8049658: db 6c 24 1c fldt 0x1c(%esp)
804965c: dd 5c 24 08 fstpl 0x8(%esp)
8049660: dd 1c 24 fstpl (%esp)
8049663: e8 58 00 00 00 call 80496c0 <_D2mo4min3FdddZd>
8049668: 83 ec 18 sub $0x18,%esp
804966b: dd 5c 24 04 fstpl 0x4(%esp)
804966f: c7 04 24 f8 8c 05 08 movl $0x8058cf8,(%esp)
8049676: e8 0d fd ff ff call 8049388 <printf at plt>
804967b: 31 c0 xor %eax,%eax
804967d: 83 c4 34 add $0x34,%esp
8049680: c2 08 00 ret $0x8
8049683: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
8049689: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi
-----------------
dmd -O -release -inline temp.d mo.d
__Dmain comdat
L0: sub ESP,038h
mov EAX,offset FLAT:_DATA
push EBX
push ESI
push EDI
push EAX
call near ptr _atoi
add ESP,4
mov EBX,EAX
mov ECX,EAX
imul ECX,ECX
mov EDX,offset FLAT:_DATA[4]
push ECX
push EDX
call near ptr _printf
mov ESI,offset FLAT:_DATA[8]
push ESI
call near ptr _atof
mov EDI,offset FLAT:_DATA[0Ch]
fstp qword ptr 018h[ESP]
push EDI
call near ptr _atof
mov EAX,offset FLAT:_DATA[010h]
fstp qword ptr 024h[ESP]
push EAX
call near ptr _atof
add ESP,4
fld qword ptr 01Ch[ESP]
fxch ST1
fstp qword ptr 02Ch[ESP]
fcomp qword ptr 024h[ESP]
fstsw AX
sahf
ja L83
jp L83
fld qword ptr 01Ch[ESP]
fcomp qword ptr 02Ch[ESP]
fstsw AX
sahf
ja L7D
jp L7D
fld qword ptr 01Ch[ESP]
jmp short L9C
L7D: fld qword ptr 02Ch[ESP]
jmp short L9C
L83: fld qword ptr 024h[ESP]
fcomp qword ptr 02Ch[ESP]
fstsw AX
sahf
ja L98
jp L98
fld qword ptr 024h[ESP]
jmp short L9C
L98: fld qword ptr 02Ch[ESP]
L9C: sub ESP,8
mov ECX,offset FLAT:_DATA[014h]
fstp qword ptr [ESP]
push ECX
call near ptr _printf
add ESP,01Ch
xor EAX,EAX
pop EDI
pop ESI
pop EBX
add ESP,038h
ret
-----------------
Using Link-Time optimization LDC is able to inline those functions.
So here it seems LDC is worse :-(
Bye,
bearophile
More information about the Digitalmars-d-learn
mailing list