Few ideas to reduce template bloat

bearophile bearophileHUGS at lycos.com
Fri Mar 26 07:03:21 PDT 2010


I have asked to llvm devs, it seems llvm already has an optimization pass that removes duplicated functions. But it's disabled on default, you can use it with LDC with "-mergefunc".

Its implementation, probably written by nlewycky:
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_25/lib/Transforms/IPO/MergeFunctions.cpp

--------------------

I have done few tests in D1 with LDC, the first one, here -mergefunc (and no other optimizations) leaves a single function:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

int factorial1(int X) {
  if (X == 0) return 1;
  return X*factorial1(X-1);
}
int factorial2(int X) {
  if (X == 0) return 1;
  return X*factorial2(X-1);
}

void main(char[][] args) {
  printf("%d\n", factorial1(atoi(args[1].ptr)));
  printf("%d\n", factorial2(atoi(args[1].ptr)));
}

--------------------

Second test I've done:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

T factorial(T)(T x) {
  if (x == 0)
    return 1;
  else
    return x * factorial(x - 1);
}

void main(char[][] args) {
  printf("%d\n", factorial(atoi(args[1].ptr)));
  printf("%d\n", factorial(cast(uint)atoi(args[1].ptr)));
}


The resulting asm with -mergefunc shows some duplication left (a person says this is a bug that can be fixed, I have to talk with another developer):

__unnamed_1:
	subl	$4, %esp
	movl	%eax, (%esp)
	testl	%eax, %eax
	jne	.LBB2_2
	movl	$1, %eax
	addl	$4, %esp
	ret
.LBB2_2:
	movl	(%esp), %eax
	decl	%eax
	call	factorial!uint
	imull	(%esp), %eax
	addl	$4, %esp
	ret

factorial!int:
	subl	$4, %esp
	call	__unnamed_1
	addl	$4, %esp
	ret

factorial!uint:
	subl	$4, %esp
	call	__unnamed_1
	addl	$4, %esp
	ret

_Dmain:
	subl	$28, %esp
	movl	36(%esp), %eax
	movl	%eax, 20(%esp)
	movl	32(%esp), %eax
	movl	%eax, 16(%esp)
	cmpl	$1, 16(%esp)
	jbe	.LBB1_3
	movl	20(%esp), %eax
	movl	12(%eax), %eax
	movl	%eax, (%esp)
	call	atoi
	call	factorial!int
	movl	%eax, 4(%esp)
	movl	$.str, (%esp)
	call	printf
	cmpl	$1, 16(%esp)
	jbe	.LBB1_4
	movl	20(%esp), %eax
	movl	12(%eax), %eax
	movl	%eax, (%esp)
	call	atoi
	call	factorial!uint
	movl	%eax, 4(%esp)
	movl	$.str2, (%esp)
	call	printf
	xorl	%eax, %eax
	addl	$28, %esp
	ret	$8
.LBB1_3:
	movl	.modulefilename+4, %eax
	movl	%eax, 4(%esp)
	movl	.modulefilename, %eax
	movl	%eax, (%esp)
	movl	$12, 8(%esp)
	call	_d_array_bounds
.LBB1_4:
	movl	.modulefilename+4, %eax
	movl	%eax, 4(%esp)
	movl	.modulefilename, %eax
	movl	%eax, (%esp)
	movl	$13, 8(%esp)
	call	_d_array_bounds

----------------------------

My third experiment:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

int factorial1(int X) {
  if (X == 0) return 1;
  return X*factorial1(X-1);
}
int factorial2(int X) {
  if (X == 0) return 1;
  return X*factorial2(X-1);
}

int caller(int function(int) func, int x) {
    return func(x);
}

void main() {
  printf("%d\n", caller(&factorial1, atoi("10")));
  printf("%d\n", caller(&factorial2, atoi("10")));
}


Its asm without mergefunc:

_D5temp310factorial1FiZi:
	subl	$4, %esp
	movl	%eax, (%esp)
	movl	(%esp), %eax
	cmpl	$0, %eax
	jne	.LBB1_2
	movl	$1, %eax
	addl	$4, %esp
	ret
.LBB1_2:
	movl	(%esp), %eax
	subl	$1, %eax
	call	_D5temp310factorial1FiZi
	movl	%eax, %ecx
	movl	(%esp), %eax
	imull	%ecx, %eax
	addl	$4, %esp
	ret

_D5temp310factorial2FiZi:
	subl	$4, %esp
	movl	%eax, (%esp)
	movl	(%esp), %eax
	cmpl	$0, %eax
	jne	.LBB2_2
	movl	$1, %eax
	addl	$4, %esp
	ret
.LBB2_2:
	movl	(%esp), %eax
	subl	$1, %eax
	call	_D5temp310factorial2FiZi
	movl	%eax, %ecx
	movl	(%esp), %eax
	imull	%ecx, %eax
	addl	$4, %esp
	ret

_D5temp36callerFPFiZiiZi:
	subl	$12, %esp
	movl	16(%esp), %ecx
	movl	%ecx, 8(%esp)
	movl	%eax, 4(%esp)
	movl	8(%esp), %ecx
	movl	4(%esp), %eax
	call	*%ecx
	addl	$12, %esp
	ret	$4

_Dmain:
	subl	$12, %esp
	leal	.str1, %eax
	movl	%eax, (%esp)
	call	atoi
	movl	$_D5temp310factorial1FiZi, (%esp)
	call	_D5temp36callerFPFiZiiZi
	subl	$4, %esp
	movl	$.str, (%esp)
	movl	%eax, 4(%esp)
	call	printf
	leal	.str3, %eax
	movl	%eax, (%esp)
	call	atoi
	movl	$_D5temp310factorial2FiZi, (%esp)
	call	_D5temp36callerFPFiZiiZi
	subl	$4, %esp
	movl	$.str2, (%esp)
	movl	%eax, 4(%esp)
	call	printf
	xorl	%eax, %eax
	addl	$12, %esp
	ret	$8


Its asm with mergefunc:

_D5temp310factorial1FiZi:
	subl	$4, %esp
	movl	%eax, (%esp)
	testl	%eax, %eax
	jne	.LBB1_2
	movl	$1, %eax
	addl	$4, %esp
	ret
.LBB1_2:
	movl	(%esp), %eax
	decl	%eax
	call	_D5temp310factorial1FiZi
	imull	(%esp), %eax
	addl	$4, %esp
	ret

_D5temp310factorial2FiZi:
	subl	$4, %esp
	call	_D5temp310factorial1FiZi
	addl	$4, %esp
	ret

_D5temp36callerFPFiZiiZi:
	subl	$12, %esp
	movl	16(%esp), %ecx
	movl	%ecx, 8(%esp)
	movl	%eax, 4(%esp)
	call	*8(%esp)
	addl	$12, %esp
	ret	$4
	.size	_D5temp36callerFPFiZiiZi, .-_D5temp36callerFPFiZiiZi

_Dmain:
	subl	$12, %esp
	movl	$.str1, (%esp)
	call	atoi
	movl	$_D5temp310factorial1FiZi, (%esp)
	call	_D5temp36callerFPFiZiiZi
	subl	$4, %esp
	movl	%eax, 4(%esp)
	movl	$.str, (%esp)
	call	printf
	movl	$.str3, (%esp)
	call	atoi
	movl	$_D5temp310factorial2FiZi, (%esp)
	call	_D5temp36callerFPFiZiiZi
	subl	$4, %esp
	movl	%eax, 4(%esp)
	movl	$.str2, (%esp)
	call	printf
	xorl	%eax, %eax
	addl	$12, %esp
	ret	$8


llvm is not able to replace all function pointers factorial2 with the ones to factorial1, so turns factorial2 into a stump that just calls the factorial1. In C/D you can compare function pointers so in this case LLVM has done what it can. But can't here llvm replace factorial2 with just a jump instruction? I am not expert enough yet to answer this. If someone has an answer for me I'd like to hear it :-)

Bye,
bearophile



More information about the Digitalmars-d mailing list