Inlining Ref Functions
bearophile
bearophileHUGS at lycos.com
Sat May 16 02:22:15 PDT 2009
I have compiled a small variant of your code with the a very new LDC compiler (May 9 revision, it doesn't print the revision number), the code:
import tango.stdc.stdio: printf;
import Integer = tango.text.convert.Integer;
void swap(T)(ref T a, ref T b) {
T temp = a;
a = b;
b = temp;
}
void ptrSwap(T)(T* a, T* b) {
T temp = *a;
*a = *b;
*b = temp;
}
void main(char[][] args) {
int a = Integer.parse(args[1]);
int b = Integer.parse(args[2]);
printf("%d\n", a);
swap(a, b);
printf("%d\n", a);
ptrSwap(&a, &b);
printf("%d\n", a);
}
Generated asm with various compiler arguments:
ldc -output-s -release inline_test.d
swap:
subl $4, %esp
movl 8(%esp), %ecx
movl (%ecx), %edx
movl %edx, (%esp)
movl (%eax), %edx
movl %edx, (%ecx)
movl (%esp), %ecx
movl %ecx, (%eax)
addl $4, %esp
ret $4
ptrSwap:
subl $12, %esp
movl 16(%esp), %ecx
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl 8(%esp), %eax
movl (%eax), %eax
movl %eax, (%esp)
movl 4(%esp), %eax
movl (%eax), %eax
movl 8(%esp), %ecx
movl %eax, (%ecx)
movl (%esp), %eax
movl 4(%esp), %ecx
movl %eax, (%ecx)
addl $12, %esp
ret $4
main:
pushl %ebx
pushl %edi
pushl %esi
subl $32, %esp
movl 52(%esp), %eax
movl %eax, 28(%esp)
movl 48(%esp), %eax
movl %eax, 24(%esp)
movl 28(%esp), %eax
movl 12(%eax), %ecx
movl 8(%eax), %eax
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl $0, (%esp)
xorl %esi, %esi
movl %esi, %eax
call Integer.parse
subl $12, %esp
movl %eax, 20(%esp)
movl 28(%esp), %eax
movl 20(%eax), %ecx
movl 16(%eax), %eax
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl $0, (%esp)
movl %esi, %eax
call Integer.parse
subl $12, %esp
movl %eax, 16(%esp)
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str1, (%esp)
call printf
leal 20(%esp), %edi
movl %edi, (%esp)
leal 16(%esp), %ebx
movl %ebx, %eax
call swap
subl $4, %esp
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str2, (%esp)
call printf
movl %edi, (%esp)
movl %ebx, %eax
call ptrSwap
subl $4, %esp
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str3, (%esp)
call printf
[...]
-------------------------
ldc -inline -release -output-s inline_test.d
main:
pushl %esi
subl $48, %esp
movl 60(%esp), %eax
movl %eax, 28(%esp)
movl 56(%esp), %eax
movl %eax, 24(%esp)
movl 28(%esp), %eax
movl 12(%eax), %ecx
movl 8(%eax), %eax
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl $0, (%esp)
xorl %esi, %esi
movl %esi, %eax
call Integer.parse
subl $12, %esp
movl %eax, 20(%esp)
movl 28(%esp), %eax
movl 20(%eax), %ecx
movl 16(%eax), %eax
movl %ecx, 8(%esp)
movl %eax, 4(%esp)
movl $0, (%esp)
movl %esi, %eax
call Integer.parse
subl $12, %esp
movl %eax, 16(%esp)
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str1, (%esp)
call printf
movl 20(%esp), %eax
movl %eax, 32(%esp)
movl 16(%esp), %eax
movl %eax, 20(%esp)
movl 32(%esp), %eax
movl %eax, 16(%esp)
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str2, (%esp)
call printf
leal 20(%esp), %eax
movl %eax, 44(%esp)
leal 16(%esp), %eax
movl %eax, 40(%esp)
movl 44(%esp), %eax
movl (%eax), %eax
movl %eax, 36(%esp)
movl 40(%esp), %eax
movl (%eax), %eax
movl 44(%esp), %ecx
movl %eax, (%ecx)
movl 36(%esp), %eax
movl 40(%esp), %ecx
movl %eax, (%ecx)
movl 20(%esp), %eax
movl %eax, 4(%esp)
movl $.str3, (%esp)
call printf
[...]
-------------------------
ldc -inline -release -O5 -output-s inline_test.d
main:
pushl %ebx
pushl %edi
pushl %esi
subl $16, %esp
movl 36(%esp), %esi
movl 12(%esi), %eax
movl 8(%esi), %ecx
movl %eax, 8(%esp)
movl %ecx, 4(%esp)
movl $0, (%esp)
xorl %edi, %edi
xorl %eax, %eax
call Integer.parse
subl $12, %esp
movl %eax, %ebx
movl 20(%esi), %eax
movl 16(%esi), %ecx
movl %eax, 8(%esp)
movl %ecx, 4(%esp)
movl $0, (%esp)
movl %edi, %eax
call Integer.parse
subl $12, %esp
movl %eax, %esi
movl %ebx, 4(%esp)
movl $.str1, (%esp)
call printf
movl %esi, 4(%esp)
movl $.str1, (%esp)
call printf
movl %ebx, 4(%esp)
movl $.str1, (%esp)
call printf
[...]
You can see that -inline is enough to get both inlined.
The performance of D isn't something to ignore, I have translated a small ray tracing program from C++ and I have seen performance up to about 3-3.5 times slower with DMD, mostly because of missing inlining. Some benchmarks:
http://www.fantascienza.net/leonardo/js/smallpt.zip
http://www.fantascienza.net/leonardo/js/ao_bench.zip
Bye,
bearophile
More information about the Digitalmars-d
mailing list