Virtual value types during compile-time for static type safety, static optimizations and function overloading.

Sat Jul 18 03:06:05 PDT 2015

I made a thorough comparison using multiple compilers and a 
summary of the findings. In short, there is a runtime overhead.

I reduced the code to cut out the imports and made two versions 
with equivalent semantic content.
positive0.d contains the hand written specializations of the abs 
function.
positive.d contains the solution with function templates / static 
type analysis.

///////

/* positive0.d:

Compile & execute:
$ dmd positive0.d; ./positive0; echo $?
$ ldc2 positive0.d; ./positive0; echo $?

generate ASM source:
$ dmd positive0.d; gobjdump -d positive0.o > positive0.dmd.s
$ ldc2 positive0.d -output-s

*/

int absPositive(int n) {
   return n;
}

int abs(int n) {
   return (n>=0) ? n : -n;
}

int square(int x) {
   return x * x;
}

int main() {
   return !((abs(-16) == 16)
     && (abs(3) == 3)
     && (square(5).abs == 25)
     && (square(-4).abs == 16));
}

///////

/* positive.d:

Compile & execute:
$ dmd positive.d; ./positive; echo $?
$ ldc2 positive.d; ./positive; echo $?

generate ASM source:
$ dmd positive.d; gobjdump -d positive.o > positive.dmd.s
$ ldc2 positive.d -output-s

*/
struct Positive {
   int num;
   alias num this;
}

Positive abs(T)(T n) {
   static if (is(T == Positive)) {
     return n;
   } else {
     return Positive((n >= 0) ? n : -n);
   }
}

Positive square(int x) {
   return Positive(x * x);
}

int main() {
   return !((abs(-16) == 16)
     && (abs(3) == 3)
     && (square(5).abs == 25)
     && (square(-4).abs == 16));
}

///////

I compared the generated asms. The asm code was substantially 
longer in case of non-hand written specializations of the abs 
function.

The 'optimized' versions of the abs function were equivalent, but 
the 'non-optimzed' versions shows the runtime overhead for dmd 
and ldc2 as well, a double 'mov' commands instead of a single 
ones;

The compiled hand written code was roughly half the size for both 
compilers:

File sizes:
ldc:
2678 positive0.s
4313 positive.s

dmd:
3442 positive0.dmd.s
8701 positive.dmd.s

You can see the abs functions below, and you can spot the double 
'mov' operations:

positive.dmd.s:
0000000000000230 
<_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive>:
  230:	55                   	push   %rbp
  231:	48 8b ec             	mov    %rsp,%rbp
  234:	48 83 ec 10          	sub    $0x10,%rsp
  238:	85 ff                	test   %edi,%edi
  23a:	78 02                	js     23e 
<_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive+0xe>
  23c:	eb 02                	jmp    240 
<_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive+0x10>
  23e:	f7 df                	neg    %edi
  240:	89 7d f0             	mov    %edi,-0x10(%rbp)
  243:	48 89 f8             	mov    %rdi,%rax
  246:	c9                   	leaveq
  247:	c3                   	retq

0000000000000248 
<_D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive>:
  248:	55                   	push   %rbp
  249:	48 8b ec             	mov    %rsp,%rbp
  24c:	48 83 ec 10          	sub    $0x10,%rsp
  250:	48 89 f8             	mov    %rdi,%rax
  253:	c9                   	leaveq
  254:	c3                   	retq
  255:	0f 1f 00             	nopl   (%rax)

positive0.dmd.s:
00000000000000a0 <_D9positive011absPositiveFiZi>:
   a0:	55                   	push   %rbp
   a1:	48 8b ec             	mov    %rsp,%rbp
   a4:	48 83 ec 10          	sub    $0x10,%rsp
   a8:	48 89 f8             	mov    %rdi,%rax
   ab:	c9                   	leaveq
   ac:	c3                   	retq
   ad:	0f 1f 00             	nopl   (%rax)

00000000000000b0 <_D9positive03absFiZi>:
   b0:	55                   	push   %rbp
   b1:	48 8b ec             	mov    %rsp,%rbp
   b4:	48 83 ec 10          	sub    $0x10,%rsp
   b8:	85 ff                	test   %edi,%edi
   ba:	78 05                	js     c1 <_D9positive03absFiZi+0x11>
   bc:	48 89 f8             	mov    %rdi,%rax
   bf:	eb 05                	jmp    c6 <_D9positive03absFiZi+0x16>
   c1:	48 89 f8             	mov    %rdi,%rax
   c4:	f7 d8                	neg    %eax
   c6:	c9                   	leaveq
   c7:	c3                   	retq

ldc2:
positive.s:

__D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive:
	.cfi_startproc
	movl	%edi, -4(%rsp)
	cmpl	$0, -4(%rsp)
	jl	LBB2_2
	leaq	-4(%rsp), %rax
	movq	%rax, -16(%rsp)
	jmp	LBB2_3
LBB2_2:
	leaq	-20(%rsp), %rax
	xorl	%ecx, %ecx
	subl	-4(%rsp), %ecx
	movl	%ecx, -20(%rsp)
	movq	%rax, -16(%rsp)
LBB2_3:
	movq	-16(%rsp), %rax
	movl	(%rax), %ecx
	movl	%ecx, -8(%rsp)
	movl	%ecx, %eax
	retq
	.cfi_endproc

	.globl	__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive
	.weak_definition	__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive
	.align	4, 0x90
__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive:
	.cfi_startproc
	movl	%edi, -8(%rsp)
	movl	%edi, %eax
	retq
	.cfi_endproc

	.section	__TEXT,__text,regular,pure_instructions
	.align	4, 0x90

positive0.s:
__D9positive011absPositiveFiZi:
	.cfi_startproc
	movl	%edi, -4(%rsp)
	movl	-4(%rsp), %eax
	retq
	.cfi_endproc

	.globl	__D9positive03absFiZi
	.align	4, 0x90
__D9positive03absFiZi:
	.cfi_startproc
	movl	%edi, -4(%rsp)
	cmpl	$0, -4(%rsp)
	jl	LBB1_2
	leaq	-4(%rsp), %rax
	movq	%rax, -16(%rsp)
	jmp	LBB1_3
LBB1_2:
	leaq	-20(%rsp), %rax
	xorl	%ecx, %ecx
	subl	-4(%rsp), %ecx
	movl	%ecx, -20(%rsp)
	movq	%rax, -16(%rsp)
LBB1_3:
	movq	-16(%rsp), %rax
	movl	(%rax), %eax
	retq
	.cfi_endproc

	.globl	__D9positive06squareFiZi
	.align	4, 0x90

my compilers:

$ ldc2 -version
LDC - the LLVM D compiler (6d3923):
   based on DMD v2.066.1 and LLVM 3.6.1
   Default target: x86_64-apple-darwin14.4.0
   Host CPU: core-avx2

$ dmd --version
DMD64 D Compiler v2.067