Regression - quality of generated x86-64 code between GDC v12.3 and v13.1

Cecil Ward cecil at cecilward.com
Thu Jun 8 04:13:30 UTC 2023


I wrote a very small procedure in D and the x86-64 asm code 
generated in GDC 12.3 was excellent whereas that from 13.1 was 
insanely bloated, totally different. Note: the badness is 
independent of the -On optimisation level (-O3 used initially.)

Here’s the D code and following it, two asm code snippets:

====



public
pragma( inline, true )
cpuid_abcd_t
cpuid_insn( in uint32_t eax ) pure nothrow @nogc @trusted
    	{ /* ecx arg omitted; absolutely minimal variant wrapper */
    	assert( ! is_ecx_needed( eax ) );	// since we are not 
providing an ecx, we had better not be needing to supply one

    	static assert( eax.sizeof * 8 == 32 );	// optional, exact
    	static assert( eax.sizeof * 8 >= 32 );	// essential min

    	const uint32_t in_eax = eax;	// really just for 
type-checking, and constness-assertion
    	static assert( in_eax.sizeof * 8 == 32 );
	
	cpuid_abcd_t ret = void;	/* undefined until the cpuid insn 
writes it */
	static assert(    ret.eax.sizeof * 8 == 32 && ret.ebx.sizeof * 8 
== 32
    	               && ret.ecx.sizeof * 8 == 32 && ret.edx.sizeof 
* 8 == 32 );
	asm pure nothrow @nogc
	    {
	    ".intel_syntax   " ~ "\n\t" ~
	
	    "cpuid"  	       ~ "\n\t" ~
		
	    ".att_syntax  	  \n"
	
	    : /* outputs : it is guaranteed that all bits 63…32 of 
rax/rbx/rcx/rdx etc are zeroed in output. */
	    	"=a" ( ret.eax ),	// an lhs ref, write-only; and only bits 
31…0 are significant
	    	"=b" ( ret.ebx ),	// ..  ..
	    	"=c" ( ret.ecx ),
	    	"=d" ( ret.edx )
	    : 	/* inputs : */
	    	"a"  ( in_eax ) 	// read.
	    				// /* no ecx input - this is the variant with input ecx 
omitted */
	    : 	/* no clobbers apart from the outputs already listed */
	        /* does cpuid set flags? - think not, so no "cc" clobber 
reqd */
	    ;
	    }
	return ret;
	}

/* ======== */

GDC 12.3::  -O3 -frelease -march=native

push	rbx
mov	eax, edi
cpuid
mov	rsi, rdx
sal		rbx, 32
mov	eax, eax
mov	edx, ecx
sal		rsi, 32
or		rax, rbx
pop	rbx
or		rdx, rsi
ret

====
GDC 13.1 = v. bad, same switches:  -O3 -frelease -march=native

push		bp
mov		eax, edi
mov		rbp, rsp
push		rbx
and		        rsp, -32
cpuid
vmovd		xmm3, eax
vmovd		xmm2, ecx
vpinsrd	        xmm1, xmm2, edx, 1
vpinsrd	        xmm0, xmm3, rbx, 1
vpunpcklqdq	  xmm4, xmm0, xmm1
vmovdqa	xmmword ptr [rsp-80], xmm4
mov		rax, qword ptr [rsp-80]
mov		rdx, qword ptr [rsp-72]
mov		rbx, qword ptr [rbp-8]	
leave
ret
/* ======== */


More information about the D.gnu mailing list