[Issue 4438] New: A missed function inlining
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Thu Jul 8 05:17:48 PDT 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438
Summary: A missed function inlining
Product: D
Version: D1 & D2
Platform: x86
OS/Version: Windows
Status: NEW
Severity: enhancement
Priority: P2
Component: DMD
AssignedTo: nobody at puremagic.com
ReportedBy: bearophile_hugs at eml.cc
--- Comment #0 from bearophile_hugs at eml.cc 2010-07-08 05:17:43 PDT ---
A test program that can be compiled with DMD2+Phobos2 and LDC1+Tango1:
version (Tango)
import tango.stdc.stdio: printf;
else
import std.c.stdio: printf;
double masked_dot(double[] a1, double[] a2, ubyte[] mask)
in {
assert(a1.length == a2.length);
assert(a1.length == mask.length);
} body {
double sum = 0.0;
foreach (i, m; mask)
if (m)
sum += a1[i] * a2[i];
return sum;
}
void main() {
int N = 1000;
auto m1 = new double[][](N, N);
foreach (ref row; m1)
row[] = 2.0;
auto m2 = new double[][](N, N);
foreach (ref row; m2)
row[] = 0.5;
auto mask = new ubyte[N];
mask[] = 1;
double sum = 0.0;
for (int r; r < m1.length; r++)
sum += masked_dot(m1[r], m2[r], mask);
printf("%f\n", sum);
}
Compiled with:
dmd v2.047 and ldc based on DMD v1.057 and llvm 2.6
ldc -O3 -release -inline -output-s test
dmd -O -release -inline test.d
I'd like dmd to inline this masked_dot() function too.
-------------------------------
The cleaned up asm produced by dmd:
_D4test10masked_dotFAdAdAhZd comdat
sub ESP,030h
push ESI
xor ESI,ESI
mov dword ptr 4[ESP],0
mov dword ptr 8[ESP],0
cmp 038h[ESP],ESI
je L51
mov EDX,03Ch[ESP]
mov EAX,038h[ESP]
mov ECX,EDX
L26: cmp [ECX][ESI],0
je L4A
mov EDX,04Ch[ESP]
mov EAX,048h[ESP]
mov EAX,040h[ESP]
fld qword ptr [ESI*8][EDX]
mov EDX,044h[ESP]
fmul qword ptr [ESI*8][EDX]
fadd qword ptr 4[ESP]
fstp qword ptr 4[ESP]
L4A: inc ESI
cmp ESI,038h[ESP]
jb L26
L51: fld qword ptr 4[ESP]
pop ESI
add ESP,030h
ret 018h
__Dmain comdat
L0: sub ESP,028h
mov EAX,offset FLAT:_D12TypeInfo_AAd6__initZ
push EBX
push ESI
push EDI
push 03E8h
push 03E8h
push 2
push EAX
call near ptr __d_newarraymiT
xor EBX,EBX
mov 020h[ESP],EAX
mov 024h[ESP],EDX
add ESP,010h
cmp 010h[ESP],EBX
je L58
mov ESI,EDX
L32: lea EDI,[EBX*8][ESI]
mov EDX,4[EDI]
mov EAX,[EDI]
push dword ptr [EDI]
push dword ptr FLAT:_DATA[04h]
push dword ptr FLAT:_DATA[00h]
push EDX
call near ptr __memset64
add ESP,010h
inc EBX
cmp EBX,010h[ESP]
jb L32
L58: push 03E8h
mov ECX,offset FLAT:_D12TypeInfo_AAd6__initZ
push 03E8h
push 2
push ECX
call near ptr __d_newarraymiT
xor EBX,EBX
mov 028h[ESP],EAX
mov 02Ch[ESP],EDX
add ESP,010h
cmp 018h[ESP],EBX
je LAA
mov ESI,EDX
L84: lea EDI,[EBX*8][ESI]
mov EDX,4[EDI]
mov EAX,[EDI]
push dword ptr [EDI]
push dword ptr FLAT:_DATA[0Ch]
push dword ptr FLAT:_DATA[08h]
push EDX
call near ptr __memset64
add ESP,010h
inc EBX
cmp EBX,018h[ESP]
jb L84
LAA: push 03E8h
mov EBX,offset FLAT:_D11TypeInfo_Ah6__initZ
push EBX
call near ptr __d_newarrayT
mov 028h[ESP],EAX
mov ECX,028h[ESP]
mov EAX,01010101h
mov 02Ch[ESP],EDX
mov EDX,02Ch[ESP]
mov EBX,028h[ESP]
mov EDI,EDX
rep
stosb
mov 030h[ESP],ECX
mov 034h[ESP],ECX
mov 038h[ESP],ECX
add ESP,8
cmp 010h[ESP],ECX
je L12E
mov EDX,014h[ESP]
mov EDI,EDX
mov EAX,010h[ESP]
mov EDX,01Ch[ESP]
mov EBX,030h[ESP]
mov EAX,018h[ESP]
mov ESI,EDX
L104: push dword ptr 4[EBX*8][EDI]
push [EBX*8][EDI]
push dword ptr 4[EBX*8][ESI]
push [EBX*8][ESI]
push dword ptr 034h[ESP]
push dword ptr 034h[ESP]
call near ptr _D4test10masked_dotFAdAdAhZd
inc EBX
fadd qword ptr 028h[ESP]
cmp EBX,010h[ESP]
fstp qword ptr 028h[ESP]
jb L104
L12E: push dword ptr 02Ch[ESP]
mov ECX,offset FLAT:_DATA[010h]
push dword ptr 02Ch[ESP]
push ECX
call near ptr _printf
add ESP,0Ch
xor EAX,EAX
pop EDI
pop ESI
pop EBX
add ESP,028h
ret
-------------------------------
The cleaned up asm produced by ldc:
_D4test10masked_dotFAdAdAhZd:
pushl %edi
pushl %esi
subl $12, %esp
movl 24(%esp), %eax
testl %eax, %eax
je .LBB1_6
movl 28(%esp), %ecx
movl 36(%esp), %edx
movl 44(%esp), %esi
pxor %xmm0, %xmm0
xorl %edi, %edi
.align 16
.LBB1_2:
cmpb $0, (%ecx,%edi)
je .LBB1_4
movsd (%esi,%edi,8), %xmm1
mulsd (%edx,%edi,8), %xmm1
addsd %xmm1, %xmm0
.LBB1_4:
incl %edi
cmpl %eax, %edi
jne .LBB1_2
.LBB1_5:
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
popl %esi
popl %edi
ret $24
.LBB1_6:
pxor %xmm0, %xmm0
jmp .LBB1_5
_Dmain:
pushl %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $36, %esp
movl $1000, 28(%esp)
movl $1000, 32(%esp)
leal 28(%esp), %eax
movl %eax, 8(%esp)
movl $2, 4(%esp)
movl $_D12TypeInfo_AAd6__initZ, (%esp)
xorl %esi, %esi
call _d_newarraymiT
movl %eax, %edi
.align 16
.LBB2_1:
movl 4(%edi,%esi,8), %eax
movl (%edi,%esi,8), %ecx
movl %ecx, 4(%esp)
movl %eax, (%esp)
movl $1073741824, 12(%esp)
movl $0, 8(%esp)
call _d_array_init_double
incl %esi
cmpl $1000, %esi
jne .LBB2_1
movl $1000, 20(%esp)
movl $1000, 24(%esp)
leal 20(%esp), %eax
movl %eax, 8(%esp)
movl $2, 4(%esp)
movl $_D12TypeInfo_AAd6__initZ, (%esp)
xorl %esi, %esi
call _d_newarraymiT
movl %eax, %ebx
.align 16
.LBB2_3:
movl 4(%ebx,%esi,8), %eax
movl (%ebx,%esi,8), %ecx
movl %ecx, 4(%esp)
movl %eax, (%esp)
movl $1071644672, 12(%esp)
movl $0, 8(%esp)
call _d_array_init_double
incl %esi
cmpl $1000, %esi
jne .LBB2_3
movl $1000, 4(%esp)
movl $_D11TypeInfo_Ah6__initZ, (%esp)
call _d_newarrayT
movl %eax, %esi
movl %esi, (%esp)
movl $1000, 8(%esp)
movl $1, 4(%esp)
call memset
pxor %xmm0, %xmm0
xorl %eax, %eax
.LBB2_5:
movl 4(%ebx,%eax,8), %ecx
movl 4(%edi,%eax,8), %edx
pxor %xmm1, %xmm1
xorl %ebp, %ebp
.align 16
.LBB2_6:
cmpb $0, (%esi,%ebp)
je .LBB2_8
movsd (%edx,%ebp,8), %xmm2
mulsd (%ecx,%ebp,8), %xmm2
addsd %xmm2, %xmm1
.LBB2_8:
incl %ebp
cmpl $1000, %ebp
jne .LBB2_6
addsd %xmm1, %xmm0
incl %eax
cmpl $1000, %eax
jne .LBB2_5
movsd %xmm0, 4(%esp)
movl $.str, (%esp)
call printf
xorl %eax, %eax
addl $36, %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
ret $8
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list