Inlining Ref Functions

Sat May 16 02:22:15 PDT 2009

I have compiled a small variant of your code with the a very new LDC compiler (May  9 revision, it doesn't print the revision number), the code:

import tango.stdc.stdio: printf;
import Integer = tango.text.convert.Integer;

void swap(T)(ref T a, ref T b) {
    T temp = a;
    a = b;
    b = temp;
}

void ptrSwap(T)(T* a, T* b) {
    T temp = *a;
    *a = *b;
    *b = temp;
}

void main(char[][] args) {
    int a = Integer.parse(args[1]);
    int b = Integer.parse(args[2]);
    printf("%d\n", a);
    swap(a, b);
    printf("%d\n", a);
    ptrSwap(&a, &b);
    printf("%d\n", a);
}

Generated asm with various compiler arguments:

ldc -output-s -release inline_test.d

swap:
    subl    $4, %esp
    movl    8(%esp), %ecx
    movl    (%ecx), %edx
    movl    %edx, (%esp)
    movl    (%eax), %edx
    movl    %edx, (%ecx)
    movl    (%esp), %ecx
    movl    %ecx, (%eax)
    addl    $4, %esp
    ret $4

ptrSwap:
    subl    $12, %esp
    movl    16(%esp), %ecx
    movl    %ecx, 8(%esp)
    movl    %eax, 4(%esp)
    movl    8(%esp), %eax
    movl    (%eax), %eax
    movl    %eax, (%esp)
    movl    4(%esp), %eax
    movl    (%eax), %eax
    movl    8(%esp), %ecx
    movl    %eax, (%ecx)
    movl    (%esp), %eax
    movl    4(%esp), %ecx
    movl    %eax, (%ecx)
    addl    $12, %esp
    ret $4

main:
    pushl   %ebx
    pushl   %edi
    pushl   %esi
    subl    $32, %esp
    movl    52(%esp), %eax
    movl    %eax, 28(%esp)
    movl    48(%esp), %eax
    movl    %eax, 24(%esp)
    movl    28(%esp), %eax
    movl    12(%eax), %ecx
    movl    8(%eax), %eax
    movl    %ecx, 8(%esp)
    movl    %eax, 4(%esp)
    movl    $0, (%esp)
    xorl    %esi, %esi
    movl    %esi, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, 20(%esp)
    movl    28(%esp), %eax
    movl    20(%eax), %ecx
    movl    16(%eax), %eax
    movl    %ecx, 8(%esp)
    movl    %eax, 4(%esp)
    movl    $0, (%esp)
    movl    %esi, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, 16(%esp)
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str1, (%esp)
    call    printf
    leal    20(%esp), %edi
    movl    %edi, (%esp)
    leal    16(%esp), %ebx
    movl    %ebx, %eax
    call    swap
    subl    $4, %esp
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str2, (%esp)
    call    printf
    movl    %edi, (%esp)
    movl    %ebx, %eax
    call    ptrSwap
    subl    $4, %esp
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str3, (%esp)
    call    printf
    [...]

-------------------------

ldc -inline -release -output-s inline_test.d

main:
    pushl   %esi
    subl    $48, %esp
    movl    60(%esp), %eax
    movl    %eax, 28(%esp)
    movl    56(%esp), %eax
    movl    %eax, 24(%esp)
    movl    28(%esp), %eax
    movl    12(%eax), %ecx
    movl    8(%eax), %eax
    movl    %ecx, 8(%esp)
    movl    %eax, 4(%esp)
    movl    $0, (%esp)
    xorl    %esi, %esi
    movl    %esi, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, 20(%esp)
    movl    28(%esp), %eax
    movl    20(%eax), %ecx
    movl    16(%eax), %eax
    movl    %ecx, 8(%esp)
    movl    %eax, 4(%esp)
    movl    $0, (%esp)
    movl    %esi, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, 16(%esp)
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str1, (%esp)
    call    printf
    movl    20(%esp), %eax
    movl    %eax, 32(%esp)
    movl    16(%esp), %eax
    movl    %eax, 20(%esp)
    movl    32(%esp), %eax
    movl    %eax, 16(%esp)
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str2, (%esp)
    call    printf
    leal    20(%esp), %eax
    movl    %eax, 44(%esp)
    leal    16(%esp), %eax
    movl    %eax, 40(%esp)
    movl    44(%esp), %eax
    movl    (%eax), %eax
    movl    %eax, 36(%esp)
    movl    40(%esp), %eax
    movl    (%eax), %eax
    movl    44(%esp), %ecx
    movl    %eax, (%ecx)
    movl    36(%esp), %eax
    movl    40(%esp), %ecx
    movl    %eax, (%ecx)
    movl    20(%esp), %eax
    movl    %eax, 4(%esp)
    movl    $.str3, (%esp)
    call    printf
    [...]

-------------------------

ldc -inline -release -O5 -output-s inline_test.d
main:
    pushl   %ebx
    pushl   %edi
    pushl   %esi
    subl    $16, %esp
    movl    36(%esp), %esi
    movl    12(%esi), %eax
    movl    8(%esi), %ecx
    movl    %eax, 8(%esp)
    movl    %ecx, 4(%esp)
    movl    $0, (%esp)
    xorl    %edi, %edi
    xorl    %eax, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, %ebx
    movl    20(%esi), %eax
    movl    16(%esi), %ecx
    movl    %eax, 8(%esp)
    movl    %ecx, 4(%esp)
    movl    $0, (%esp)
    movl    %edi, %eax
    call    Integer.parse
    subl    $12, %esp
    movl    %eax, %esi
    movl    %ebx, 4(%esp)
    movl    $.str1, (%esp)
    call    printf
    movl    %esi, 4(%esp)
    movl    $.str1, (%esp)
    call    printf
    movl    %ebx, 4(%esp)
    movl    $.str1, (%esp)
    call    printf
    [...]

You can see that -inline is enough to get both inlined.

The performance of D isn't something to ignore, I have translated a small ray tracing program from C++ and I have seen performance up to about 3-3.5 times slower with DMD, mostly because of missing inlining. Some benchmarks:
http://www.fantascienza.net/leonardo/js/smallpt.zip
http://www.fantascienza.net/leonardo/js/ao_bench.zip

Bye,
bearophile