LDC 0.12.0 beta 1 released, please help test!
bearophile
bearophileHUGS at lycos.com
Tue Oct 15 14:16:30 PDT 2013
David Nadlinger:
> (Almost) all the issues reported against the first 0.12.0 alpha
> release have been fixed, so it is time for the next round of
> testing.
I am now testing this ldc2 on Windows.
I have also seen this thread:
http://forum.dlang.org/thread/CAP9J_HXfC736D452phSFBuOsoWtiRZ=Qsx-4eiGtwG-E76EMnA@mail.gmail.com
This is a small test program:
import std.algorithm: count;
int foo(int[] data) {
return data.count([0, 1]);
}
void main() {}
This ldc2 compiles it to (32 bit code):
__D4test3fooFAiZi:
pushl %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $24, %esp
movl $2, 4(%esp)
movl $__D11TypeInfo_Ai6__initZ, (%esp)
calll __d_newarrayvT
movl %eax, %esi
movl %edx, %ebx
movl $1, 4(%ebx)
movl $0, (%ebx)
movl 48(%esp), %ebp
movl 44(%esp), %edi
testl %esi, %esi
jne LBB0_2
movl $_.str5, 16(%esp)
movl $58, 12(%esp)
movl $5699, 8(%esp)
movl $_.str4, 4(%esp)
movl $42, (%esp)
calll __D3std9exception7bailOutFNaNfAyakxAaZv
subl $20, %esp
LBB0_2:
movl %ebp, 12(%esp)
movl %edi, 8(%esp)
movl %ebx, 4(%esp)
movl %ebx, 20(%esp)
movl %esi, (%esp)
xorl %ebx, %ebx
calll
__D3std9algorithm34__T4findVAyaa6_61203d3d2062TAiTAiZ4findFNaNbNfAiAiZAi
subl $16, %esp
testl %eax, %eax
je LBB0_5
testl %esi, %esi
je LBB0_5
.align 16, 0x90
LBB0_4:
movl 20(%esp), %ecx
movl %ecx, 4(%esp)
movl %esi, (%esp)
movl %edi, %ecx
subl %eax, %ecx
addl %esi, %ecx
leal (%ebp,%ecx,4), %ebp
movl %ebp, 12(%esp)
subl %ecx, %edi
movl %edi, 8(%esp)
incl %ebx
calll
__D3std9algorithm34__T4findVAyaa6_61203d3d2062TAiTAiZ4findFNaNbNfAiAiZAi
subl $16, %esp
testl %eax, %eax
jne LBB0_4
LBB0_5:
movl %ebx, %eax
addl $24, %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
ret $8
As you see there's a call to __d_newarrayvT. But perhaps that
call can be optimized away by a good compiler, allocating the [0,
1] array statically, something like this:
import std.algorithm: count;
int foo(int[] data) {
return data.count([0, 1]);
}
void main() {}
import std.algorithm: count;
int foo(int[] data) {
static int[2] tmp = [0, 1];
return data.count(tmp[]);
}
void main() {}
That ldc2 compiler better to:
__D5test23fooFAiZi:
pushl %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $40, %esp
movl 64(%esp), %esi
movl 60(%esp), %edx
cmpl $2, %edx
jb LBB0_4
movl __tls_index, %eax
movl %fs:44, %ecx
movl (%ecx,%eax,4), %ecx
movl __D5test23fooFAiZi3tmpG2i at SECREL32+4(%ecx), %edi
movl %edi, 36(%esp)
movl $1, %eax
cmpl %edi, __D5test23fooFAiZi3tmpG2i at SECREL32(%ecx)
movl $1, %ebx
je LBB0_3
movl $2, %ebx
LBB0_3:
movl %ebx, 32(%esp)
cmpl $1, %edx
jbe LBB0_4
.align 16, 0x90
LBB0_9:
movl %eax, %ebp
movl $1, %eax
movl 36(%esp), %ecx
cmpl %ecx, (%esi,%ebp,4)
jne LBB0_12
leal -1(%ebp), %eax
movl %edx, %edi
subl %eax, %edi
movl %ebx, %eax
je LBB0_12
leal -4(%esi,%ebp,4), %eax
movl %eax, 4(%esp)
movl __tls_index, %eax
movl %fs:44, %ecx
movl (%ecx,%eax,4), %eax
leal __D5test23fooFAiZi3tmpG2i at SECREL32(%eax), %eax
movl %eax, 12(%esp)
movl $__D11TypeInfo_Ai6__initZ, 16(%esp)
movl $1, 8(%esp)
movl $1, (%esp)
movl %esi, %ebx
movl %edx, %esi
calll __adEq2
movl %esi, %edx
movl %ebx, %esi
movl 32(%esp), %ebx
testl %eax, %eax
movl %ebx, %eax
jne LBB0_5
LBB0_12:
addl %ebp, %eax
cmpl %edx, %eax
jb LBB0_9
LBB0_4:
xorl %edi, %edi
LBB0_5:
movl $0, 20(%esp)
testl %edi, %edi
je LBB0_23
movl __tls_index, %eax
movl %fs:44, %ecx
movl (%ecx,%eax,4), %eax
movl __D5test23fooFAiZi3tmpG2i at SECREL32+4(%eax), %eax
movl %eax, 32(%esp)
movl $0, 20(%esp)
jmp LBB0_7
LBB0_21:
movl 28(%esp), %edi
jmp LBB0_22
.align 16, 0x90
LBB0_7:
movl %esi, %ebp
movl %edx, %esi
subl %edi, %esi
leal 8(%ebp,%esi,4), %eax
movl %eax, 24(%esp)
addl $2, %esi
subl %esi, %edx
incl 20(%esp)
cmpl $2, %edx
jae LBB0_13
xorl %edi, %edi
jmp LBB0_22
.align 16, 0x90
LBB0_13:
movl __tls_index, %eax
movl %fs:44, %ecx
movl (%ecx,%eax,4), %ecx
movl $1, %eax
movl 32(%esp), %edi
cmpl %edi, __D5test23fooFAiZi3tmpG2i at SECREL32(%ecx)
movl $1, 36(%esp)
je LBB0_15
movl $2, 36(%esp)
LBB0_15:
cmpl $1, %edx
jbe LBB0_16
.align 16, 0x90
LBB0_17:
movl %eax, %ebx
leal (%ebx,%esi), %ecx
movl $1, %eax
movl 32(%esp), %edi
cmpl %edi, (%ebp,%ecx,4)
jne LBB0_20
leal -1(%ebx), %eax
movl %edx, %ecx
subl %eax, %ecx
movl %ecx, 28(%esp)
movl 36(%esp), %eax
je LBB0_20
leal -1(%ebx,%esi), %eax
leal (%ebp,%eax,4), %eax
movl %eax, 4(%esp)
movl __tls_index, %eax
movl %fs:44, %ecx
movl (%ecx,%eax,4), %eax
leal __D5test23fooFAiZi3tmpG2i at SECREL32(%eax), %eax
movl %eax, 12(%esp)
movl $__D11TypeInfo_Ai6__initZ, 16(%esp)
movl $1, 8(%esp)
movl $1, (%esp)
movl %esi, %edi
movl %ebp, %esi
movl %edx, %ebp
calll __adEq2
movl %ebp, %edx
movl %esi, %ebp
movl %edi, %esi
testl %eax, %eax
movl 36(%esp), %eax
jne LBB0_21
LBB0_20:
addl %ebx, %eax
cmpl %edx, %eax
jb LBB0_17
LBB0_16:
xorl %edi, %edi
LBB0_22:
testl %edi, %edi
movl 24(%esp), %esi
jne LBB0_7
LBB0_23:
movl 20(%esp), %eax
addl $40, %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
ret $8
(I don't know what __D5test23fooFAiZi3tmpG2i at SECREL32 is).
Now what's left to remove are two calls to __adEq2 (because I
think they are used only on length-2 arrays and much slower and
an inlined test of length and two ints equality).
So can ldc2 remove this call to __d_newarrayvT and later even to
__adEq2?
Bye,
bearophile
More information about the digitalmars-d-ldc
mailing list