Is there a way to get a list of functions that get inlined by

bearophile bearophileHUGS at lycos.com
Tue Feb 9 14:18:34 PST 2010


Scorn:

> double min(double a, double b, double c)
> {
>     return a < b && a < c ? a : b < c ? b : c;
> }

Don't write code like that, add some parenthesys like this:

return (a < b && a < c) ? a : (b < c ? b : c);

because the compiler is able to sort out those operator precedences, but the programmer that comes after you and reads that code will have problems.
A compiler compiles that code with 3 FP tests, while I think two suffice, so there are better ways to write that.


> This is (and a little bit more) is running in a tight loop which runs
> about 10000000 times.
> With these "optimizations" i get a speed increase about 20% percent. 

---------------------

I have created a module named "mo" and a main module named "temp":

module mo;
int foo(int x) {
    return x * x;
}

double min3(double a, double b, double c) {
    return (a <= b) ? (a <= c ? a : c) : (b <= c ? b : c);
}

---------------------

module temp; // main module
version (Tango) {
    import tango.stdc.stdio: printf;
    import tango.stdc.stdlib: atoi, atof;
} else {
    import std.c.stdio: printf;
    import std.c.stdlib: atoi, atof;
}
import mo: foo, min3;

void main() {
    int x = atoi("12");
    printf("%d\n", foo(x));

    double x1 = atof("10");
    double x2 = atof("20");
    double x3 = atof("30");
    printf("%f\n", min3(x1, x2, x3));
}

---------------------

>From my tests it seems LDC isn't able to inline those functions, while DMD is able to inline them :-)

ldc -O5 -release -output-s -inline temp.d mo.d

08049600 <_Dmain>:
 8049600:	83 ec 34             	sub    $0x34,%esp
 8049603:	c7 04 24 e8 8c 05 08 	movl   $0x8058ce8,(%esp)
 804960a:	e8 99 fd ff ff       	call   80493a8 <atoi at plt>
 804960f:	e8 9c 00 00 00       	call   80496b0 <_D2mo3fooFiZi>
 8049614:	89 44 24 04          	mov    %eax,0x4(%esp)
 8049618:	c7 04 24 eb 8c 05 08 	movl   $0x8058ceb,(%esp)
 804961f:	e8 64 fd ff ff       	call   8049388 <printf at plt>
 8049624:	c7 04 24 ef 8c 05 08 	movl   $0x8058cef,(%esp)
 804962b:	e8 98 fd ff ff       	call   80493c8 <atof at plt>
 8049630:	db 7c 24 28          	fstpt  0x28(%esp)
 8049634:	c7 04 24 f2 8c 05 08 	movl   $0x8058cf2,(%esp)
 804963b:	e8 88 fd ff ff       	call   80493c8 <atof at plt>
 8049640:	db 7c 24 1c          	fstpt  0x1c(%esp)
 8049644:	c7 04 24 f5 8c 05 08 	movl   $0x8058cf5,(%esp)
 804964b:	e8 78 fd ff ff       	call   80493c8 <atof at plt>
 8049650:	db 6c 24 28          	fldt   0x28(%esp)
 8049654:	dd 5c 24 10          	fstpl  0x10(%esp)
 8049658:	db 6c 24 1c          	fldt   0x1c(%esp)
 804965c:	dd 5c 24 08          	fstpl  0x8(%esp)
 8049660:	dd 1c 24             	fstpl  (%esp)
 8049663:	e8 58 00 00 00       	call   80496c0 <_D2mo4min3FdddZd>
 8049668:	83 ec 18             	sub    $0x18,%esp
 804966b:	dd 5c 24 04          	fstpl  0x4(%esp)
 804966f:	c7 04 24 f8 8c 05 08 	movl   $0x8058cf8,(%esp)
 8049676:	e8 0d fd ff ff       	call   8049388 <printf at plt>
 804967b:	31 c0                	xor    %eax,%eax
 804967d:	83 c4 34             	add    $0x34,%esp
 8049680:	c2 08 00             	ret    $0x8
 8049683:	8d b6 00 00 00 00    	lea    0x0(%esi),%esi
 8049689:	8d bc 27 00 00 00 00 	lea    0x0(%edi,%eiz,1),%edi

-----------------

dmd -O -release -inline temp.d mo.d

__Dmain comdat
L0:     sub ESP,038h
        mov EAX,offset FLAT:_DATA
        push    EBX
        push    ESI
        push    EDI
        push    EAX
        call    near ptr _atoi
        add ESP,4
        mov EBX,EAX
        mov ECX,EAX
        imul    ECX,ECX
        mov EDX,offset FLAT:_DATA[4]
        push    ECX
        push    EDX
        call    near ptr _printf
        mov ESI,offset FLAT:_DATA[8]
        push    ESI
        call    near ptr _atof
        mov EDI,offset FLAT:_DATA[0Ch]
        fstp    qword ptr 018h[ESP]
        push    EDI
        call    near ptr _atof
        mov EAX,offset FLAT:_DATA[010h]
        fstp    qword ptr 024h[ESP]
        push    EAX
        call    near ptr _atof
        add ESP,4
        fld qword ptr 01Ch[ESP]
        fxch    ST1
        fstp    qword ptr 02Ch[ESP]
        fcomp   qword ptr 024h[ESP]
        fstsw   AX
        sahf
        ja  L83
        jp  L83
        fld qword ptr 01Ch[ESP]
        fcomp   qword ptr 02Ch[ESP]
        fstsw   AX
        sahf
        ja  L7D
        jp  L7D
        fld qword ptr 01Ch[ESP]
        jmp short   L9C
L7D:        fld qword ptr 02Ch[ESP]
        jmp short   L9C
L83:        fld qword ptr 024h[ESP]
        fcomp   qword ptr 02Ch[ESP]
        fstsw   AX
        sahf
        ja  L98
        jp  L98
        fld qword ptr 024h[ESP]
        jmp short   L9C
L98:        fld qword ptr 02Ch[ESP]
L9C:        sub ESP,8
        mov ECX,offset FLAT:_DATA[014h]
        fstp    qword ptr [ESP]
        push    ECX
        call    near ptr _printf
        add ESP,01Ch
        xor EAX,EAX
        pop EDI
        pop ESI
        pop EBX
        add ESP,038h
        ret

-----------------

Using Link-Time optimization LDC is able to inline those functions.
So here it seems LDC is worse :-(

Bye,
bearophile


More information about the Digitalmars-d-learn mailing list