One document about Go
bearophile
bearophileHUGS at lycos.com
Tue Jun 8 12:05:40 PDT 2010
I have tested dmd v2.047beta and it works. I have seen this bug fix improves the performance of my code:
http://d.puremagic.com/issues/show_bug.cgi?id=2008
------------------
Walter Bright:
>it would be fair to provide examples when saying things like LLVM does a better job on X.<
This is a D2 program, I compile it with v2.047beta, the total() function doesn't get inlined:
import std.c.stdio: printf;
int total(int[] data) {
int res;
foreach (x; data)
res += x;
return res;
}
void main() {
enum int[] data = [7, 6, 5, 9, 8, 4, 3, 1, 2, 0];
printf("%d\n", total(data));
}
asm generated by DMD, -O -release -inline (cleaned up a little):
_D4test5totalFAiZi comdat
push EAX
xor ECX,ECX
xor EDX,EDX
push EBX
cmp 0Ch[ESP],ECX
je L28
mov 4[ESP],EDX
mov EDX,010h[ESP]
mov EBX,EDX
mov EAX,0Ch[ESP]
mov EDX,4[ESP]
L1E: add ECX,[EDX*4][EBX]
inc EDX
cmp EDX,0Ch[ESP]
jb L1E
L28: pop EBX
mov EAX,ECX
pop ECX
ret 8
__Dmain comdat
L0: push EAX
push EAX
mov EAX,offset FLAT:_D11TypeInfo_Ai6__initZ
push EBX
push 0
push 2
push 1
push 3
push 4
push 8
push 9
push 5
push 6
push 7
push 0Ah
push EAX
call near ptr __d_arrayliteralT
add ESP,030h
mov ECX,EAX
push ECX
mov EBX,0Ah
push EBX
call near ptr _D4test5totalFAiZi
mov EDX,offset FLAT:_DATA
push EAX
push EDX
call near ptr _printf
add ESP,8
xor EAX,EAX
pop EBX
add ESP,8
ret
------------------
This is the same program translated to D1 for Tango:
import tango.stdc.stdio: printf;
int total(int[] data) {
int res;
foreach (x; data)
res += x;
return res;
}
void main() {
const int[] data = [7, 6, 5, 9, 8, 4, 3, 1, 2, 0];
printf("%d\n", total(data));
}
The asm generated by LDC, -O3 -release -inline:
_D4temp5totalFAiZi:
pushl %esi
movl 8(%esp), %ecx
testl %ecx, %ecx
je .LBB1_4
movl 12(%esp), %edx
xorl %eax, %eax
movl %eax, %esi
.align 16
.LBB1_2:
addl (%edx,%esi,4), %eax
incl %esi
cmpl %ecx, %esi
jne .LBB1_2
.LBB1_3:
popl %esi
ret $8
.LBB1_4:
xorl %eax, %eax
jmp .LBB1_3
.type .constarray, at object
.data
.align 16
.constarray:
.long 7
.long 6
.long 5
.long 9
.long 8
.long 4
.long 3
.long 1
.long 2
.zero 4
.size .constarray, 40
_Dmain:
subl $12, %esp
movl .constarray+4, %eax
addl .constarray, %eax
addl .constarray+8, %eax
addl .constarray+12, %eax
addl .constarray+16, %eax
addl .constarray+20, %eax
addl .constarray+24, %eax
addl .constarray+28, %eax
addl .constarray+32, %eax
addl .constarray+36, %eax
movl %eax, 4(%esp)
movl $.str, (%esp)
call printf
xorl %eax, %eax
addl $12, %esp
ret $8
You can see ldc inlined total(), and in this case unrolls the loop too because the array is known at compile time. But it doesn't perform the last optimization.
---------------------------
If I use the Link-Time optimization with LDC it optimizes the code better, this is the disassembly of the main:
08049620 <_Dmain>:
8049620: 83 ec 0c sub $0xc,%esp
8049623: c7 44 24 04 2d 00 00 movl $0x2d,0x4(%esp)
804962a: 00
804962b: c7 04 24 48 35 06 08 movl $0x8063548,(%esp)
8049632: e8 85 fd ff ff call 80493bc <printf at plt>
8049637: 31 c0 xor %eax,%eax
8049639: 83 c4 0c add $0xc,%esp
804963c: c2 08 00 ret $0x8
804963f: 90 nop
That movl $0x2d,0x4(%esp) is the result, 45 in base 10, fully computed.
Bye,
bearophile
More information about the Digitalmars-d
mailing list