It seems like DMD doesn't optimize tail recursion very well

pragma pragma_member at pathlink.com
Fri Jun 9 10:25:32 PDT 2006


Attached are the ASM dump of the Ack() function as outlined elsewhere in this
thread.  

I was actually suprised that the extern(C) D version of the Ack() function was
byte-for-byte identical to the plain C/CPP version of the same code.  The plain
extern(D) version was the only run that created a different routine, and hence,
the wildly different timings.

To my eye, the codegen in D is not only shorter, but more efficent.  The only
thing I'm not too sure on is the use of enter/leave instead of push/pop/mov for
maintaining up the function's stack frame.  While I have no solid evidence, I'd
be willing to bet that these two instructions are what's holding things up.

My timings are on an Intel chip.  Anyone out there with an AMD machine willing
to try this on?

- Eric

// dissasembler listing for ackd.exe 
// int Ack(int a, int b)

SUB_L00402010:
enter	0004h,00h
mov	[ebp-04h],eax
nop
nop
nop
cmp	dword ptr [ebp+08h],00000000h
jnz	L00402025
inc	eax
leave
retn	0004h
;------------------------------------------------------------------------------
L00402025:
cmp	dword ptr [ebp-04h],00000000h
jnz	L0040203E
mov	eax,[ebp+08h]
dec	eax
push	eax
mov	eax,00000001h
call	SUB_L00402010
leave
retn	0004h
;------------------------------------------------------------------------------
L0040203E:
mov	ecx,[ebp+08h]
lea	edx,[ecx-01h]
push	edx
push	ecx
mov	eax,[ebp-04h]
dec	eax
call	SUB_L00402010
call	SUB_L00402010
leave
retn	0004h

// dissasembler listing for ackd.exe 
// extern(C) int Ack(int a, int b)

SUB_L00402010:
push	ebp
mov	ebp,esp
push	ebx
nop
nop
nop
cmp	dword ptr [ebp+08h],00000000h
jnz	L00402024
mov	eax,[ebp+0Ch]
inc	eax
pop	ebx
pop	ebp
retn
;------------------------------------------------------------------------------
L00402024:
cmp	dword ptr [ebp+0Ch],00000000h
jnz	L0040203C
push	00000001h
mov	ecx,[ebp+08h]
dec	ecx
push	ecx
call	SUB_L00402010
add	esp,00000008h
pop	ebx
pop	ebp
retn
;------------------------------------------------------------------------------
L0040203C:
mov	edx,[ebp+0Ch]
dec	edx
push	edx
push	[ebp+08h]
call	SUB_L00402010
add	esp,00000008h
push	eax
mov	ebx,[ebp+08h]
dec	ebx
push	ebx
call	SUB_L00402010
add	esp,00000008h
pop	ebx
pop	ebp
retn

// dissasembler listing for ackcpp.exe 
// int Ack(int a, int b)

SUB_L00402010:
push	ebp
mov	ebp,esp
push	ebx
nop
nop
nop
cmp	dword ptr [ebp+08h],00000000h
jnz	L00402024
mov	eax,[ebp+0Ch]
inc	eax
pop	ebx
pop	ebp
retn
;------------------------------------------------------------------------------
L00402024:
cmp	dword ptr [ebp+0Ch],00000000h
jnz	L0040203C
push	00000001h
mov	ecx,[ebp+08h]
dec	ecx
push	ecx
call	SUB_L00402010
add	esp,00000008h
pop	ebx
pop	ebp
retn
;------------------------------------------------------------------------------
L0040203C:
mov	edx,[ebp+0Ch]
dec	edx
push	edx
push	[ebp+08h]
call	SUB_L00402010
add	esp,00000008h
push	eax
mov	ebx,[ebp+08h]
dec	ebx
push	ebx
call	SUB_L00402010
add	esp,00000008h
pop	ebx
pop	ebp
retn





More information about the Digitalmars-d mailing list