LDC 0.12.0 beta 1 released, please help test!

bearophile bearophileHUGS at lycos.com
Tue Oct 15 14:16:30 PDT 2013


David Nadlinger:

> (Almost) all the issues reported against the first 0.12.0 alpha
> release have been fixed, so it is time for the next round of 
> testing.

I am now testing this ldc2 on Windows.

I have also seen this thread:

http://forum.dlang.org/thread/CAP9J_HXfC736D452phSFBuOsoWtiRZ=Qsx-4eiGtwG-E76EMnA@mail.gmail.com


This is a small test program:

import std.algorithm: count;
int foo(int[] data) {
     return data.count([0, 1]);
}
void main() {}



This ldc2 compiles it to (32 bit code):



__D4test3fooFAiZi:
     pushl   %ebp
     pushl   %ebx
     pushl   %edi
     pushl   %esi
     subl    $24, %esp
     movl    $2, 4(%esp)
     movl    $__D11TypeInfo_Ai6__initZ, (%esp)
     calll   __d_newarrayvT
     movl    %eax, %esi
     movl    %edx, %ebx
     movl    $1, 4(%ebx)
     movl    $0, (%ebx)
     movl    48(%esp), %ebp
     movl    44(%esp), %edi
     testl   %esi, %esi
     jne LBB0_2
     movl    $_.str5, 16(%esp)
     movl    $58, 12(%esp)
     movl    $5699, 8(%esp)
     movl    $_.str4, 4(%esp)
     movl    $42, (%esp)
     calll   __D3std9exception7bailOutFNaNfAyakxAaZv
     subl    $20, %esp
LBB0_2:
     movl    %ebp, 12(%esp)
     movl    %edi, 8(%esp)
     movl    %ebx, 4(%esp)
     movl    %ebx, 20(%esp)
     movl    %esi, (%esp)
     xorl    %ebx, %ebx
     calll   
__D3std9algorithm34__T4findVAyaa6_61203d3d2062TAiTAiZ4findFNaNbNfAiAiZAi
     subl    $16, %esp
     testl   %eax, %eax
     je  LBB0_5
     testl   %esi, %esi
     je  LBB0_5
     .align  16, 0x90
LBB0_4:
     movl    20(%esp), %ecx
     movl    %ecx, 4(%esp)
     movl    %esi, (%esp)
     movl    %edi, %ecx
     subl    %eax, %ecx
     addl    %esi, %ecx
     leal    (%ebp,%ecx,4), %ebp
     movl    %ebp, 12(%esp)
     subl    %ecx, %edi
     movl    %edi, 8(%esp)
     incl    %ebx
     calll   
__D3std9algorithm34__T4findVAyaa6_61203d3d2062TAiTAiZ4findFNaNbNfAiAiZAi
     subl    $16, %esp
     testl   %eax, %eax
     jne LBB0_4
LBB0_5:
     movl    %ebx, %eax
     addl    $24, %esp
     popl    %esi
     popl    %edi
     popl    %ebx
     popl    %ebp
     ret $8


As you see there's a call to __d_newarrayvT. But perhaps that 
call can be optimized away by a good compiler, allocating the [0, 
1] array statically, something like this:


import std.algorithm: count;
int foo(int[] data) {
     return data.count([0, 1]);
}
void main() {}



import std.algorithm: count;
int foo(int[] data) {
     static int[2] tmp = [0, 1];
     return data.count(tmp[]);
}
void main() {}


That ldc2 compiler better to:

__D5test23fooFAiZi:
     pushl   %ebp
     pushl   %ebx
     pushl   %edi
     pushl   %esi
     subl    $40, %esp
     movl    64(%esp), %esi
     movl    60(%esp), %edx
     cmpl    $2, %edx
     jb  LBB0_4
     movl    __tls_index, %eax
     movl    %fs:44, %ecx
     movl    (%ecx,%eax,4), %ecx
     movl    __D5test23fooFAiZi3tmpG2i at SECREL32+4(%ecx), %edi
     movl    %edi, 36(%esp)
     movl    $1, %eax
     cmpl    %edi, __D5test23fooFAiZi3tmpG2i at SECREL32(%ecx)
     movl    $1, %ebx
     je  LBB0_3
     movl    $2, %ebx
LBB0_3:
     movl    %ebx, 32(%esp)
     cmpl    $1, %edx
     jbe LBB0_4
     .align  16, 0x90
LBB0_9:
     movl    %eax, %ebp
     movl    $1, %eax
     movl    36(%esp), %ecx
     cmpl    %ecx, (%esi,%ebp,4)
     jne LBB0_12
     leal    -1(%ebp), %eax
     movl    %edx, %edi
     subl    %eax, %edi
     movl    %ebx, %eax
     je  LBB0_12
     leal    -4(%esi,%ebp,4), %eax
     movl    %eax, 4(%esp)
     movl    __tls_index, %eax
     movl    %fs:44, %ecx
     movl    (%ecx,%eax,4), %eax
     leal    __D5test23fooFAiZi3tmpG2i at SECREL32(%eax), %eax
     movl    %eax, 12(%esp)
     movl    $__D11TypeInfo_Ai6__initZ, 16(%esp)
     movl    $1, 8(%esp)
     movl    $1, (%esp)
     movl    %esi, %ebx
     movl    %edx, %esi
     calll   __adEq2
     movl    %esi, %edx
     movl    %ebx, %esi
     movl    32(%esp), %ebx
     testl   %eax, %eax
     movl    %ebx, %eax
     jne LBB0_5
LBB0_12:
     addl    %ebp, %eax
     cmpl    %edx, %eax
     jb  LBB0_9
LBB0_4:
     xorl    %edi, %edi
LBB0_5:
     movl    $0, 20(%esp)
     testl   %edi, %edi
     je  LBB0_23
     movl    __tls_index, %eax
     movl    %fs:44, %ecx
     movl    (%ecx,%eax,4), %eax
     movl    __D5test23fooFAiZi3tmpG2i at SECREL32+4(%eax), %eax
     movl    %eax, 32(%esp)
     movl    $0, 20(%esp)
     jmp LBB0_7
LBB0_21:
     movl    28(%esp), %edi
     jmp LBB0_22
     .align  16, 0x90
LBB0_7:
     movl    %esi, %ebp
     movl    %edx, %esi
     subl    %edi, %esi
     leal    8(%ebp,%esi,4), %eax
     movl    %eax, 24(%esp)
     addl    $2, %esi
     subl    %esi, %edx
     incl    20(%esp)
     cmpl    $2, %edx
     jae LBB0_13
     xorl    %edi, %edi
     jmp LBB0_22
     .align  16, 0x90
LBB0_13:
     movl    __tls_index, %eax
     movl    %fs:44, %ecx
     movl    (%ecx,%eax,4), %ecx
     movl    $1, %eax
     movl    32(%esp), %edi
     cmpl    %edi, __D5test23fooFAiZi3tmpG2i at SECREL32(%ecx)
     movl    $1, 36(%esp)
     je  LBB0_15
     movl    $2, 36(%esp)
LBB0_15:
     cmpl    $1, %edx
     jbe LBB0_16
     .align  16, 0x90
LBB0_17:
     movl    %eax, %ebx
     leal    (%ebx,%esi), %ecx
     movl    $1, %eax
     movl    32(%esp), %edi
     cmpl    %edi, (%ebp,%ecx,4)
     jne LBB0_20
     leal    -1(%ebx), %eax
     movl    %edx, %ecx
     subl    %eax, %ecx
     movl    %ecx, 28(%esp)
     movl    36(%esp), %eax
     je  LBB0_20
     leal    -1(%ebx,%esi), %eax
     leal    (%ebp,%eax,4), %eax
     movl    %eax, 4(%esp)
     movl    __tls_index, %eax
     movl    %fs:44, %ecx
     movl    (%ecx,%eax,4), %eax
     leal    __D5test23fooFAiZi3tmpG2i at SECREL32(%eax), %eax
     movl    %eax, 12(%esp)
     movl    $__D11TypeInfo_Ai6__initZ, 16(%esp)
     movl    $1, 8(%esp)
     movl    $1, (%esp)
     movl    %esi, %edi
     movl    %ebp, %esi
     movl    %edx, %ebp
     calll   __adEq2
     movl    %ebp, %edx
     movl    %esi, %ebp
     movl    %edi, %esi
     testl   %eax, %eax
     movl    36(%esp), %eax
     jne LBB0_21
LBB0_20:
     addl    %ebx, %eax
     cmpl    %edx, %eax
     jb  LBB0_17
LBB0_16:
     xorl    %edi, %edi
LBB0_22:
     testl   %edi, %edi
     movl    24(%esp), %esi
     jne LBB0_7
LBB0_23:
     movl    20(%esp), %eax
     addl    $40, %esp
     popl    %esi
     popl    %edi
     popl    %ebx
     popl    %ebp
     ret $8


(I don't know what __D5test23fooFAiZi3tmpG2i at SECREL32 is).

Now what's left to remove are two calls to __adEq2 (because I 
think they are used only on length-2 arrays and much slower and 
an inlined test of length and two ints equality).

So can ldc2 remove this call to __d_newarrayvT and later even to 
__adEq2?

Bye,
bearophile


More information about the digitalmars-d-ldc mailing list