It seems like DMD doesn't optimize tail recursion very well

Andrei Khropov andkhropov at nospam_mtu-net.ru
Fri Jun 9 14:03:44 PDT 2006


pragma wrote:


> To my eye, the codegen in D is not only shorter, but more efficent. 

It is more efficient compared to DMC only.

My point was that DMD codegen is substantially less efficient _in this
particular case_ than widespread C++ compilers like MSVC++ or GCC while being
on a par with them (or superior) in the other cases. It's even slower than MS
C# compiler ( about 1500 ms vs 2200 ms for DMD ).

Here is the assembler output (ack and main) for my original program (output was
removed from the original program to make the listing clearer) in GCC 3.4.2 (in
MinGW)
 :

----------------------------------------------------------
__Z3Ackii:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ebx
	subl	$8, %esp
	movl	8(%ebp), %edx
	movl	12(%ebp), %eax
	.p2align 4,,15
L12:
	testl	%edx, %edx
	je	L7
L14:
	testl	%eax, %eax
	jne	L4
	decl	%edx
	movl	$1, %eax
	testl	%edx, %edx
	jne	L14
L7:
	addl	$8, %esp
	incl	%eax
	popl	%ebx
	popl	%ebp
	ret
	.p2align 4,,7
L4:
	movl	%edx, (%esp)
	leal	-1(%edx), %ebx
	decl	%eax
	movl	%eax, 4(%esp)
	call	__Z3Akkii
	movl	%ebx, %edx
	jmp	L12
	.def	___main;	.scl	2;	.type	32;	.endef
	.section .rdata,"dr"
LC0:
	.ascii " ms elapsed.\12\0"
	.text
	.align 2
	.p2align 4,,15
.globl _main
	.def	_main;	.scl	2;	.type	32;	.endef
_main:
	pushl	%ebp
	movl	$16, %eax
	movl	%esp, %ebp
	pushl	%ebx
	subl	$20, %esp
	andl	$-16, %esp
	call	__alloca
	call	___main
	call	_GetTickCount at 0
	movl	$3, (%esp)
	movl	$10, %ecx
	movl	%eax, %ebx
	movl	%ecx, 4(%esp)
	call	__Z3Ackii
	movl	%eax, 4(%esp)
	movl	$2, (%esp)
	call	__Z3Ackii
	call	_GetTickCount at 0
	movl	$__ZSt4cout, (%esp)
	subl	%ebx, %eax
	movl	%eax, 4(%esp)
	call	__ZNSolsEm
	movl	%eax, (%esp)
	movl	$LC0, %edx
	movl	%edx, 4(%esp)
	call	__ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
	movl	-4(%ebp), %ebx
	xorl	%eax, %eax
	leave
	ret 
----------------------------------------------------------

It seems like GCC was clever enough to somehow split the calculation in two
calls.

-- 
AKhropov



More information about the Digitalmars-d mailing list