[Issue 13952] change in struct ctor lowering triggers codegen bug

Thu Feb 5 15:34:24 PST 2015

https://issues.dlang.org/show_bug.cgi?id=13952

--- Comment #4 from Martin Nowak <code at dawg.eu> ---
struct X86Imm
{
    ulong imm;
}

struct X86Opnd
{
    union
    {
        X86Reg reg;
        X86Imm imm;
    }

    ubyte tag;

    this(X86Reg r) { reg = r; }
}

struct X86Reg
{
    /// Register type
    ubyte type;

    /// Register index number
    ubyte regNo;

    /// Size in bits
    ushort size;
}

X86Opnd opnd(X86Reg reg)
{
    return X86Opnd(reg);
}

Here is the asm that for the opnd function with 2.066.1

_D4bug24opndFS4bug26X86RegZS4bug27X86Opnd PROC
        push    rbp                                     ; 0000 _ 55
        mov     rbp, rsp                                ; 0001 _ 48: 8B. EC
        sub     rsp, 32                                 ; 0004 _ 48: 83. EC, 20
        lea     rax, [rbp-20H]                          ; 0008 _ 48: 8D. 45, E0
        xor     rcx, rcx                                ; 000C _ 48: 31. C9
        mov     qword ptr [rax], rcx                    ; 000F _ 48: 89. 08
        mov     qword ptr [rax+8H], rcx                 ; 0012 _ 48: 89. 48, 08
        mov     rsi, rdi                                ; 0016 _ 48: 89. FE
        mov     rdi, rax                                ; 0019 _ 48: 89. C7
        call    _D4bug27X86Opnd6__ctorMFNcS4bug26X86RegZS4bug27X86Opnd; 001C _
E8, 00000000(rel)
        mov     rdx, qword ptr [rax+8H]                 ; 0021 _ 48: 8B. 50, 08
        mov     rax, qword ptr [rax]                    ; 0025 _ 48: 8B. 00
        mov     rsp, rbp                                ; 0028 _ 48: 8B. E5
        pop     rbp                                     ; 002B _ 5D
        ret                                             ; 002C _ C3
_D4bug24opndFS4bug26X86RegZS4bug27X86Opnd ENDP

That's the same function now.

_D4bug24opndFS4bug26X86RegZS4bug27X86Opnd PROC
        push    rbp                                     ; 0000 _ 55
        mov     rbp, rsp                                ; 0001 _ 48: 8B. EC
        sub     rsp, 32                                 ; 0004 _ 48: 83. EC, 20
        xor     eax, eax                                ; 0008 _ 31. C0
        mov     dword ptr [rbp-18H], eax                ; 000A _ 89. 45, E8
        mov     dword ptr [rbp-14H], eax                ; 000D _ 89. 45, EC
        xor     ecx, ecx                                ; 0010 _ 31. C9
        mov     byte ptr [rbp-10H], cl                  ; 0012 _ 88. 4D, F0
        mov     byte ptr [rbp-0FH], cl                  ; 0015 _ 88. 4D, F1
        mov     word ptr [rbp-0EH], ax                  ; 0018 _ 66: 89. 45, F2
        mov     edx, dword ptr [rbp-10H]                ; 001C _ 8B. 55, F0
        mov     dword ptr [rbp-20H], edx                ; 001F _ 89. 55, E0
        mov     byte ptr [rbp-18H], cl                  ; 0022 _ 88. 4D, E8
        mov     rsi, rdi                                ; 0025 _ 48: 89. FE
        lea     rdi, [rbp-20H]                          ; 0028 _ 48: 8D. 7D, E0
        call    _D4bug27X86Opnd6__ctorMFNcS4bug26X86RegZS4bug27X86Opnd; 002C _
E8, 00000000(rel)
        mov     rdx, qword ptr [rax+8H]                 ; 0031 _ 48: 8B. 50, 08
        mov     rax, qword ptr [rax]                    ; 0035 _ 48: 8B. 00
        mov     rsp, rbp                                ; 0038 _ 48: 8B. E5
        pop     rbp                                     ; 003B _ 5D
        ret                                             ; 003C _ C3
_D4bug24opndFS4bug26X86RegZS4bug27X86Opnd ENDP

Just focusing on the X86Opnd init code

        lea     rax, [rbp-20H]
        xor     rcx, rcx
        mov     qword ptr [rax], rcx
        mov     qword ptr [rax+8H], rcx

In the new code the 4 bytes at [rbp-1CH] are never initialized.

        xor     eax, eax
        mov     dword ptr [rbp-18H], eax
        mov     dword ptr [rbp-14H], eax
        xor     ecx, ecx
        mov     byte ptr [rbp-10H], cl
        mov     byte ptr [rbp-0FH], cl
        mov     word ptr [rbp-0EH], ax
        mov     edx, dword ptr [rbp-10H]
        mov     dword ptr [rbp-20H], edx
        mov     byte ptr [rbp-18H], cl
        mov     rsi, rdi
        lea     rdi, [rbp-20H]

Without a constructor it's the same for both dmd versions and also only 
initializes the first union field of X86Opnd.

        xor     eax, eax
        mov     dword ptr [rbp-18H], eax
        mov     dword ptr [rbp-14H], eax
        mov     dword ptr [rbp-20H], edi

So this isn't really a regression and it was a bug in Higgs to rely on memory
compare.
I filed https://issues.dlang.org/show_bug.cgi?id=14107 to disallow default
comparison for structs with unions.

The amount of code generated for the constructor is still worrisome.
Especially the part that initializes X86Reg, because it initializes every field
individually, it constructs a temporary on the stack only to end up loading
zero into edx.

        xor     ecx, ecx
        mov     byte ptr [rbp-10H], cl
        mov     byte ptr [rbp-0FH], cl
        mov     word ptr [rbp-0EH], ax
        mov     edx, dword ptr [rbp-10H]

So we should still consider to revert that part of the change.

--