Regression - quality of generated x86-64 code between GDC v12.3 and v13.1
Cecil Ward
cecil at cecilward.com
Thu Jun 8 04:13:30 UTC 2023
I wrote a very small procedure in D and the x86-64 asm code
generated in GDC 12.3 was excellent whereas that from 13.1 was
insanely bloated, totally different. Note: the badness is
independent of the -On optimisation level (-O3 used initially.)
Here’s the D code and following it, two asm code snippets:
====
public
pragma( inline, true )
cpuid_abcd_t
cpuid_insn( in uint32_t eax ) pure nothrow @nogc @trusted
{ /* ecx arg omitted; absolutely minimal variant wrapper */
assert( ! is_ecx_needed( eax ) ); // since we are not
providing an ecx, we had better not be needing to supply one
static assert( eax.sizeof * 8 == 32 ); // optional, exact
static assert( eax.sizeof * 8 >= 32 ); // essential min
const uint32_t in_eax = eax; // really just for
type-checking, and constness-assertion
static assert( in_eax.sizeof * 8 == 32 );
cpuid_abcd_t ret = void; /* undefined until the cpuid insn
writes it */
static assert( ret.eax.sizeof * 8 == 32 && ret.ebx.sizeof * 8
== 32
&& ret.ecx.sizeof * 8 == 32 && ret.edx.sizeof
* 8 == 32 );
asm pure nothrow @nogc
{
".intel_syntax " ~ "\n\t" ~
"cpuid" ~ "\n\t" ~
".att_syntax \n"
: /* outputs : it is guaranteed that all bits 63…32 of
rax/rbx/rcx/rdx etc are zeroed in output. */
"=a" ( ret.eax ), // an lhs ref, write-only; and only bits
31…0 are significant
"=b" ( ret.ebx ), // .. ..
"=c" ( ret.ecx ),
"=d" ( ret.edx )
: /* inputs : */
"a" ( in_eax ) // read.
// /* no ecx input - this is the variant with input ecx
omitted */
: /* no clobbers apart from the outputs already listed */
/* does cpuid set flags? - think not, so no "cc" clobber
reqd */
;
}
return ret;
}
/* ======== */
GDC 12.3:: -O3 -frelease -march=native
push rbx
mov eax, edi
cpuid
mov rsi, rdx
sal rbx, 32
mov eax, eax
mov edx, ecx
sal rsi, 32
or rax, rbx
pop rbx
or rdx, rsi
ret
====
GDC 13.1 = v. bad, same switches: -O3 -frelease -march=native
push bp
mov eax, edi
mov rbp, rsp
push rbx
and rsp, -32
cpuid
vmovd xmm3, eax
vmovd xmm2, ecx
vpinsrd xmm1, xmm2, edx, 1
vpinsrd xmm0, xmm3, rbx, 1
vpunpcklqdq xmm4, xmm0, xmm1
vmovdqa xmmword ptr [rsp-80], xmm4
mov rax, qword ptr [rsp-80]
mov rdx, qword ptr [rsp-72]
mov rbx, qword ptr [rbp-8]
leave
ret
/* ======== */
More information about the D.gnu
mailing list