[Issue 4438] New: A missed function inlining

d-bugmail at puremagic.com d-bugmail at puremagic.com
Thu Jul 8 05:17:48 PDT 2010


http://d.puremagic.com/issues/show_bug.cgi?id=4438

           Summary: A missed function inlining
           Product: D
           Version: D1 & D2
          Platform: x86
        OS/Version: Windows
            Status: NEW
          Severity: enhancement
          Priority: P2
         Component: DMD
        AssignedTo: nobody at puremagic.com
        ReportedBy: bearophile_hugs at eml.cc


--- Comment #0 from bearophile_hugs at eml.cc 2010-07-08 05:17:43 PDT ---
A test program that can be compiled with DMD2+Phobos2 and LDC1+Tango1:


version (Tango)
    import tango.stdc.stdio: printf;
else
    import std.c.stdio: printf;

double masked_dot(double[] a1, double[] a2, ubyte[] mask)
    in {
        assert(a1.length == a2.length);
        assert(a1.length == mask.length);
    } body {
        double sum = 0.0;
        foreach (i, m; mask)
            if (m)
                sum += a1[i] * a2[i];
        return sum;
    }

void main() {
    int N = 1000;

    auto m1 = new double[][](N, N);
    foreach (ref row; m1)
        row[] = 2.0;

    auto m2 = new double[][](N, N);
    foreach (ref row; m2)
        row[] = 0.5;

    auto mask = new ubyte[N];
    mask[] = 1;

    double sum = 0.0;
    for (int r; r < m1.length; r++)
        sum += masked_dot(m1[r], m2[r], mask);

    printf("%f\n", sum);
}


Compiled with:
dmd v2.047 and ldc based on DMD v1.057 and llvm 2.6

ldc -O3 -release -inline -output-s test
dmd -O -release -inline test.d


I'd like dmd to inline this masked_dot() function too.

-------------------------------

The cleaned up asm produced by dmd:

_D4test10masked_dotFAdAdAhZd    comdat
        sub    ESP,030h
        push    ESI
        xor    ESI,ESI
        mov    dword ptr 4[ESP],0
        mov    dword ptr 8[ESP],0
        cmp    038h[ESP],ESI
        je    L51
        mov    EDX,03Ch[ESP]
        mov    EAX,038h[ESP]
        mov    ECX,EDX
L26:        cmp    [ECX][ESI],0
        je    L4A
        mov    EDX,04Ch[ESP]
        mov    EAX,048h[ESP]
        mov    EAX,040h[ESP]
        fld    qword ptr [ESI*8][EDX]
        mov    EDX,044h[ESP]
        fmul    qword ptr [ESI*8][EDX]
        fadd    qword ptr 4[ESP]
        fstp    qword ptr 4[ESP]
L4A:        inc    ESI
        cmp    ESI,038h[ESP]
        jb    L26
L51:        fld    qword ptr 4[ESP]
        pop    ESI
        add    ESP,030h
        ret    018h

__Dmain    comdat
L0:        sub    ESP,028h
        mov    EAX,offset FLAT:_D12TypeInfo_AAd6__initZ
        push    EBX
        push    ESI
        push    EDI
        push    03E8h
        push    03E8h
        push    2
        push    EAX
        call    near ptr __d_newarraymiT
        xor    EBX,EBX
        mov    020h[ESP],EAX
        mov    024h[ESP],EDX
        add    ESP,010h
        cmp    010h[ESP],EBX
        je    L58
        mov    ESI,EDX
L32:        lea    EDI,[EBX*8][ESI]
        mov    EDX,4[EDI]
        mov    EAX,[EDI]
        push    dword ptr [EDI]
        push    dword ptr FLAT:_DATA[04h]
        push    dword ptr FLAT:_DATA[00h]
        push    EDX
        call    near ptr __memset64
        add    ESP,010h
        inc    EBX
        cmp    EBX,010h[ESP]
        jb    L32
L58:        push    03E8h
        mov    ECX,offset FLAT:_D12TypeInfo_AAd6__initZ
        push    03E8h
        push    2
        push    ECX
        call    near ptr __d_newarraymiT
        xor    EBX,EBX
        mov    028h[ESP],EAX
        mov    02Ch[ESP],EDX
        add    ESP,010h
        cmp    018h[ESP],EBX
        je    LAA
        mov    ESI,EDX
L84:        lea    EDI,[EBX*8][ESI]
        mov    EDX,4[EDI]
        mov    EAX,[EDI]
        push    dword ptr [EDI]
        push    dword ptr FLAT:_DATA[0Ch]
        push    dword ptr FLAT:_DATA[08h]
        push    EDX
        call    near ptr __memset64
        add    ESP,010h
        inc    EBX
        cmp    EBX,018h[ESP]
        jb    L84
LAA:        push    03E8h
        mov    EBX,offset FLAT:_D11TypeInfo_Ah6__initZ
        push    EBX
        call    near ptr __d_newarrayT
        mov    028h[ESP],EAX
        mov    ECX,028h[ESP]
        mov    EAX,01010101h
        mov    02Ch[ESP],EDX
        mov    EDX,02Ch[ESP]
        mov    EBX,028h[ESP]
        mov    EDI,EDX
        rep
        stosb
        mov    030h[ESP],ECX
        mov    034h[ESP],ECX
        mov    038h[ESP],ECX
        add    ESP,8
        cmp    010h[ESP],ECX
        je    L12E
        mov    EDX,014h[ESP]
        mov    EDI,EDX
        mov    EAX,010h[ESP]
        mov    EDX,01Ch[ESP]
        mov    EBX,030h[ESP]
        mov    EAX,018h[ESP]
        mov    ESI,EDX
L104:        push    dword ptr 4[EBX*8][EDI]
        push    [EBX*8][EDI]
        push    dword ptr 4[EBX*8][ESI]
        push    [EBX*8][ESI]
        push    dword ptr 034h[ESP]
        push    dword ptr 034h[ESP]
        call    near ptr _D4test10masked_dotFAdAdAhZd
        inc    EBX
        fadd    qword ptr 028h[ESP]
        cmp    EBX,010h[ESP]
        fstp    qword ptr 028h[ESP]
        jb    L104
L12E:        push    dword ptr 02Ch[ESP]
        mov    ECX,offset FLAT:_DATA[010h]
        push    dword ptr 02Ch[ESP]
        push    ECX
        call    near ptr _printf
        add    ESP,0Ch
        xor    EAX,EAX
        pop    EDI
        pop    ESI
        pop    EBX
        add    ESP,028h
        ret

-------------------------------

The cleaned up asm produced by ldc:

_D4test10masked_dotFAdAdAhZd:
    pushl   %edi
    pushl   %esi
    subl    $12, %esp
    movl    24(%esp), %eax
    testl   %eax, %eax
    je  .LBB1_6
    movl    28(%esp), %ecx
    movl    36(%esp), %edx
    movl    44(%esp), %esi
    pxor    %xmm0, %xmm0
    xorl    %edi, %edi
    .align  16
.LBB1_2:
    cmpb    $0, (%ecx,%edi)
    je  .LBB1_4
    movsd   (%esi,%edi,8), %xmm1
    mulsd   (%edx,%edi,8), %xmm1
    addsd   %xmm1, %xmm0
.LBB1_4:
    incl    %edi
    cmpl    %eax, %edi
    jne .LBB1_2
.LBB1_5:
    movsd   %xmm0, (%esp)
    fldl    (%esp)
    addl    $12, %esp
    popl    %esi
    popl    %edi
    ret $24
.LBB1_6:
    pxor    %xmm0, %xmm0
    jmp .LBB1_5

_Dmain:
    pushl   %ebp
    pushl   %ebx
    pushl   %edi
    pushl   %esi
    subl    $36, %esp
    movl    $1000, 28(%esp)
    movl    $1000, 32(%esp)
    leal    28(%esp), %eax
    movl    %eax, 8(%esp)
    movl    $2, 4(%esp)
    movl    $_D12TypeInfo_AAd6__initZ, (%esp)
    xorl    %esi, %esi
    call    _d_newarraymiT
    movl    %eax, %edi
    .align  16
.LBB2_1:
    movl    4(%edi,%esi,8), %eax
    movl    (%edi,%esi,8), %ecx
    movl    %ecx, 4(%esp)
    movl    %eax, (%esp)
    movl    $1073741824, 12(%esp)
    movl    $0, 8(%esp)
    call    _d_array_init_double
    incl    %esi
    cmpl    $1000, %esi
    jne .LBB2_1
    movl    $1000, 20(%esp)
    movl    $1000, 24(%esp)
    leal    20(%esp), %eax
    movl    %eax, 8(%esp)
    movl    $2, 4(%esp)
    movl    $_D12TypeInfo_AAd6__initZ, (%esp)
    xorl    %esi, %esi
    call    _d_newarraymiT
    movl    %eax, %ebx
    .align  16
.LBB2_3:
    movl    4(%ebx,%esi,8), %eax
    movl    (%ebx,%esi,8), %ecx
    movl    %ecx, 4(%esp)
    movl    %eax, (%esp)
    movl    $1071644672, 12(%esp)
    movl    $0, 8(%esp)
    call    _d_array_init_double
    incl    %esi
    cmpl    $1000, %esi
    jne .LBB2_3
    movl    $1000, 4(%esp)
    movl    $_D11TypeInfo_Ah6__initZ, (%esp)
    call    _d_newarrayT
    movl    %eax, %esi
    movl    %esi, (%esp)
    movl    $1000, 8(%esp)
    movl    $1, 4(%esp)
    call    memset
    pxor    %xmm0, %xmm0
    xorl    %eax, %eax
.LBB2_5:
    movl    4(%ebx,%eax,8), %ecx
    movl    4(%edi,%eax,8), %edx
    pxor    %xmm1, %xmm1
    xorl    %ebp, %ebp
    .align  16
.LBB2_6:
    cmpb    $0, (%esi,%ebp)
    je  .LBB2_8
    movsd   (%edx,%ebp,8), %xmm2
    mulsd   (%ecx,%ebp,8), %xmm2
    addsd   %xmm2, %xmm1
.LBB2_8:
    incl    %ebp
    cmpl    $1000, %ebp
    jne .LBB2_6
    addsd   %xmm1, %xmm0
    incl    %eax
    cmpl    $1000, %eax
    jne .LBB2_5
    movsd   %xmm0, 4(%esp)
    movl    $.str, (%esp)
    call    printf
    xorl    %eax, %eax
    addl    $36, %esp
    popl    %esi
    popl    %edi
    popl    %ebx
    popl    %ebp
    ret $8

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------


More information about the Digitalmars-d-bugs mailing list