Few ideas to reduce template bloat
bearophile
bearophileHUGS at lycos.com
Fri Mar 26 07:03:21 PDT 2010
I have asked to llvm devs, it seems llvm already has an optimization pass that removes duplicated functions. But it's disabled on default, you can use it with LDC with "-mergefunc".
Its implementation, probably written by nlewycky:
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_25/lib/Transforms/IPO/MergeFunctions.cpp
--------------------
I have done few tests in D1 with LDC, the first one, here -mergefunc (and no other optimizations) leaves a single function:
import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;
int factorial1(int X) {
if (X == 0) return 1;
return X*factorial1(X-1);
}
int factorial2(int X) {
if (X == 0) return 1;
return X*factorial2(X-1);
}
void main(char[][] args) {
printf("%d\n", factorial1(atoi(args[1].ptr)));
printf("%d\n", factorial2(atoi(args[1].ptr)));
}
--------------------
Second test I've done:
import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;
T factorial(T)(T x) {
if (x == 0)
return 1;
else
return x * factorial(x - 1);
}
void main(char[][] args) {
printf("%d\n", factorial(atoi(args[1].ptr)));
printf("%d\n", factorial(cast(uint)atoi(args[1].ptr)));
}
The resulting asm with -mergefunc shows some duplication left (a person says this is a bug that can be fixed, I have to talk with another developer):
__unnamed_1:
subl $4, %esp
movl %eax, (%esp)
testl %eax, %eax
jne .LBB2_2
movl $1, %eax
addl $4, %esp
ret
.LBB2_2:
movl (%esp), %eax
decl %eax
call factorial!uint
imull (%esp), %eax
addl $4, %esp
ret
factorial!int:
subl $4, %esp
call __unnamed_1
addl $4, %esp
ret
factorial!uint:
subl $4, %esp
call __unnamed_1
addl $4, %esp
ret
_Dmain:
subl $28, %esp
movl 36(%esp), %eax
movl %eax, 20(%esp)
movl 32(%esp), %eax
movl %eax, 16(%esp)
cmpl $1, 16(%esp)
jbe .LBB1_3
movl 20(%esp), %eax
movl 12(%eax), %eax
movl %eax, (%esp)
call atoi
call factorial!int
movl %eax, 4(%esp)
movl $.str, (%esp)
call printf
cmpl $1, 16(%esp)
jbe .LBB1_4
movl 20(%esp), %eax
movl 12(%eax), %eax
movl %eax, (%esp)
call atoi
call factorial!uint
movl %eax, 4(%esp)
movl $.str2, (%esp)
call printf
xorl %eax, %eax
addl $28, %esp
ret $8
.LBB1_3:
movl .modulefilename+4, %eax
movl %eax, 4(%esp)
movl .modulefilename, %eax
movl %eax, (%esp)
movl $12, 8(%esp)
call _d_array_bounds
.LBB1_4:
movl .modulefilename+4, %eax
movl %eax, 4(%esp)
movl .modulefilename, %eax
movl %eax, (%esp)
movl $13, 8(%esp)
call _d_array_bounds
----------------------------
My third experiment:
import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;
int factorial1(int X) {
if (X == 0) return 1;
return X*factorial1(X-1);
}
int factorial2(int X) {
if (X == 0) return 1;
return X*factorial2(X-1);
}
int caller(int function(int) func, int x) {
return func(x);
}
void main() {
printf("%d\n", caller(&factorial1, atoi("10")));
printf("%d\n", caller(&factorial2, atoi("10")));
}
Its asm without mergefunc:
_D5temp310factorial1FiZi:
subl $4, %esp
movl %eax, (%esp)
movl (%esp), %eax
cmpl $0, %eax
jne .LBB1_2
movl $1, %eax
addl $4, %esp
ret
.LBB1_2:
movl (%esp), %eax
subl $1, %eax
call _D5temp310factorial1FiZi
movl %eax, %ecx
movl (%esp), %eax
imull %ecx, %eax
addl $4, %esp
ret
_D5temp310factorial2FiZi:
subl $4, %esp
movl %eax, (%esp)
movl (%esp), %eax
cmpl $0, %eax
jne .LBB2_2
movl $1, %eax
addl $4, %esp
ret
.LBB2_2:
movl (%esp), %eax
subl $1, %eax
call _D5temp310factorial2FiZi
movl %eax, %ecx
movl (%esp), %eax
imull %ecx, %eax
addl $4, %esp
ret
_D5temp36callerFPFiZiiZi:
subl $12, %esp
movl 16(%esp), %ecx
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl 8(%esp), %ecx
movl 4(%esp), %eax
call *%ecx
addl $12, %esp
ret $4
_Dmain:
subl $12, %esp
leal .str1, %eax
movl %eax, (%esp)
call atoi
movl $_D5temp310factorial1FiZi, (%esp)
call _D5temp36callerFPFiZiiZi
subl $4, %esp
movl $.str, (%esp)
movl %eax, 4(%esp)
call printf
leal .str3, %eax
movl %eax, (%esp)
call atoi
movl $_D5temp310factorial2FiZi, (%esp)
call _D5temp36callerFPFiZiiZi
subl $4, %esp
movl $.str2, (%esp)
movl %eax, 4(%esp)
call printf
xorl %eax, %eax
addl $12, %esp
ret $8
Its asm with mergefunc:
_D5temp310factorial1FiZi:
subl $4, %esp
movl %eax, (%esp)
testl %eax, %eax
jne .LBB1_2
movl $1, %eax
addl $4, %esp
ret
.LBB1_2:
movl (%esp), %eax
decl %eax
call _D5temp310factorial1FiZi
imull (%esp), %eax
addl $4, %esp
ret
_D5temp310factorial2FiZi:
subl $4, %esp
call _D5temp310factorial1FiZi
addl $4, %esp
ret
_D5temp36callerFPFiZiiZi:
subl $12, %esp
movl 16(%esp), %ecx
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
call *8(%esp)
addl $12, %esp
ret $4
.size _D5temp36callerFPFiZiiZi, .-_D5temp36callerFPFiZiiZi
_Dmain:
subl $12, %esp
movl $.str1, (%esp)
call atoi
movl $_D5temp310factorial1FiZi, (%esp)
call _D5temp36callerFPFiZiiZi
subl $4, %esp
movl %eax, 4(%esp)
movl $.str, (%esp)
call printf
movl $.str3, (%esp)
call atoi
movl $_D5temp310factorial2FiZi, (%esp)
call _D5temp36callerFPFiZiiZi
subl $4, %esp
movl %eax, 4(%esp)
movl $.str2, (%esp)
call printf
xorl %eax, %eax
addl $12, %esp
ret $8
llvm is not able to replace all function pointers factorial2 with the ones to factorial1, so turns factorial2 into a stump that just calls the factorial1. In C/D you can compare function pointers so in this case LLVM has done what it can. But can't here llvm replace factorial2 with just a jump instruction? I am not expert enough yet to answer this. If someone has an answer for me I'd like to hear it :-)
Bye,
bearophile
More information about the Digitalmars-d
mailing list