Struct copies
bearophile
bearophileHUGS at lycos.com
Sun Jan 26 05:02:48 PST 2014
The following code is compiled with the ldc2 compiler based on
LLVM 3.3.1.
This swaps two values in-place:
void swap(T)(ref T x, ref T y) pure nothrow {
immutable aux = x;
x = y;
y = aux;
}
If I swap uint values I get the asm and IR:
__D5test611__T4swapTkZ4swapFNaNbNfKkKkZv:
pushl %esi
movl 8(%esp), %ecx
movl (%ecx), %edx
movl (%eax), %esi
movl %esi, (%ecx)
movl %edx, (%eax)
popl %esi
ret $4
; Function Attrs: nounwind
define x86_stdcallcc void @"\01__D5test65swap1FNaNbKkKkZv"(i32*
inreg nocapture %y_arg, i32* nocapture %x_arg) #0 {
entry:
%tmp = load i32* %x_arg, align 4
%tmp2 = load i32* %y_arg, align 4
store i32 %tmp2, i32* %x_arg, align 4
store i32 %tmp, i32* %y_arg, align 4
ret void
}
Often I have a simple struct like this, with a sizeof equal to a
size_t or two size_t (a size_t is a 32 bit unsigned on this
system):
struct Foo {
ushort a;
char b, c;
}
If I instantiate the swap function template on values of type Foo
I get the asm and IR:
__D5test621__T4swapTS5test63FooZ4swapFNaNbNfKS5test63FooKS5test63FooZv:
pushl %edi
pushl %esi
movl 12(%esp), %ecx
movw (%ecx), %dx
movw 2(%ecx), %si
movl (%eax), %edi
movl %edi, (%ecx)
movw %dx, (%eax)
movw %si, 2(%eax)
popl %esi
popl %edi
ret $4
; Function Attrs: nounwind
define x86_stdcallcc void
@"\01__D5test65swap2FNaNbKS5test63FooKS5test63FooZv"(%test6.Foo*
inreg nocapture %y_arg, %test6.Foo* nocapture %x_arg) #0 {
entry:
%0 = getelementptr inbounds %test6.Foo* %x_arg, i32 0, i32 0
%1 = load i16* %0, align 1
%2 = getelementptr inbounds %test6.Foo* %x_arg, i32 0, i32 1
%3 = load i8* %2, align 1
%4 = getelementptr inbounds %test6.Foo* %x_arg, i32 0, i32 2
%5 = load i8* %4, align 1
%6 = bitcast %test6.Foo* %y_arg to i32*
%7 = bitcast %test6.Foo* %x_arg to i32*
%8 = load i32* %6, align 1
store i32 %8, i32* %7, align 1
%9 = getelementptr inbounds %test6.Foo* %y_arg, i32 0, i32 0
store i16 %1, i16* %9, align 1
%10 = getelementptr inbounds %test6.Foo* %y_arg, i32 0, i32 1
store i8 %3, i8* %10, align 1
%11 = getelementptr inbounds %test6.Foo* %y_arg, i32 0, i32 2
store i8 %5, i8* %11, align 1
ret void
}
If I create a new union Bar that contains a 32 bit integer that
comprises all three Foo fields:
union Bar {
uint all;
struct {
ushort a;
char b, c;
}
}
Now I can define a new swap function that works on values of type
Bar:
void swap2(ref Bar x, ref Bar y) pure nothrow {
immutable Bar aux = x;
x.all = y.all;
y.all = aux.all;
}
Its asm and IR are shorter:
__D5test65swap2FNaNbKS5test63BarKS5test63BarZv:
pushl %esi
movl 8(%esp), %ecx
movl (%ecx), %edx
movl (%eax), %esi
movl %esi, (%ecx)
movl %edx, (%eax)
popl %esi
ret $4
; Function Attrs: nounwind
define x86_stdcallcc void
@"\01__D5test65swap3FNaNbKS5test63BarKS5test63BarZv"(%test6.Bar*
inreg nocapture %y_arg, %test6.Bar* nocapture %x_arg) #0 {
entry:
%0 = getelementptr inbounds %test6.Bar* %x_arg, i32 0, i32 0
%1 = load i32* %0, align 1
%tmp4 = getelementptr %test6.Bar* %y_arg, i32 0, i32 0
%tmp5 = load i32* %tmp4, align 4
store i32 %tmp5, i32* %0, align 4
store i32 %1, i32* %tmp4, align 4
ret void
}
In the case of swapping Foos why isn't LLVM optimizing the swap
function to a shorter asm like swap2? I have asked this on the
LLVM IRC channel, and aKor has told me that similar C code Clang
on swaps two Foo using a memcpy so uses a single 32 bit copy. So
perhaps ldc2 can do the same for this common case.
Bye,
bearophile
More information about the digitalmars-d-ldc
mailing list