division of objects into classes and structures is bad

Sun Dec 28 14:05:15 PST 2008

(Forgive, again I will mention the patient for me a theme)

About why I consider that division of objects into classes and
structures is bad.

If the object is a class that there will be additional overhead charge
at calculation of expressions with this class because it is impossible
to allocate a class instance on a stack and transfer it as result to
next function.

Example with usage of the overloaded function opAdd:

//===================
import std.stdio;

class C {
    int i;
    C opAdd( C src )    {
        auto ret = new C;
        ret.i = i + src.i;
        return ret;
    }
}

void main() {
    auto c1 = new C;
    c1.i = 1;

    auto c2 = c1 + c1 + c1;
    writeln( c2.i );
}
//===================

auto c2 = c1 + c1 + c1; // calculated as:
auto c2 = c1.opAdd( c1 ).opAdd( c1 );

The temporary result of opAdd in this expression is located in
heap (i.e. with overhead), and only then the reference to it is
transferred in the second function opAdd:

        assume  CS:_D4test1C5opAddMFC4test1CZC4test1C
L0:             push    EAX
                push    EBX
                push    offset FLAT:_D4test1C7__ClassZ
                call    near ptr __d_newclass
                mov     EBX,EAX
                mov     EAX,8[ESP]
                mov     ECX,8[EAX]
                mov     EDX,010h[ESP]
                add     ECX,8[EDX]
                mov     8[EBX],ECX
                add     ESP,4
                mov     EAX,EBX
                pop     EBX
                pop     ECX
                ret     4
_D4test1C5opAddMFC4test1CZC4test1C      ends
__Dmain comdat
       assume  CS:__Dmain
L0:             push    EBX
                push    offset FLAT:_D4test1C7__ClassZ
                call    near ptr __d_newclass
                mov     EBX,EAX
                mov     dword ptr 8[EBX],1
                add     ESP,4
                push    EBX
                push    EBX
                mov     EAX,EBX
                mov     ECX,[EBX]
                call    dword ptr 014h[ECX]
                mov     EDX,[EAX]
                call    dword ptr 014h[EDX]
                mov     EAX,8[EAX]
                pop     EBX
                ret
__Dmain ends

opAdd is called two times (call dword ptr 014h[ECX] and call dword ptr
014h[EDX]). Objects created in heap (call near ptr __d_newclass). There
is created two objects and the first of them is temporary.

Now we will consider the same example with usage of the object of
structure which allows allocation on a stack:

//===================
struct C {
    int i;
    int[100] j; // to prevent returning this struct in registers

    C opAdd( C src )    {
        C ret;
        ret.i = i + src.i;
        return ret;
    }
}

int main() {
    C c1;

    // initialise i by "random" value to prevent compile-time calculation
    c1.i = cast(int)&c1;

    auto c2 = c1 + c1 + c1;
    return c2.i;
}
//===================

In this case the compiler easily detects that returned value is
allocated in a stack and to transfer it in the following function of
anything it is not necessary to do - enough to leave it in a stack
(linux objdump output):

struct C {
    int i;
    int[100] j; // to prevent returning this struct in register

    C opAdd( C src )    {
...
        C ret;
 8049075:       b9 65 00 00 00          mov    $0x65,%ecx
 804907a:       31 c0                   xor    %eax,%eax
 804907c:       8b 7d 08                mov    0x8(%ebp),%edi
 804907f:       f3 ab                   rep stos %eax,%es:(%edi)
        ret.i = i + src.i;
 8049081:       8b 8d 5c fe ff ff       mov    -0x1a4(%ebp),%ecx
 8049087:       8b 11                   mov    (%ecx),%edx
 8049089:       03 55 0c                add    0xc(%ebp),%edx
 804908c:       8b 5d 08                mov    0x8(%ebp),%ebx
 804908f:       89 13                   mov    %edx,(%ebx)
 8049091:       8b 45 08                mov    0x8(%ebp),%eax
 8049094:       5f                      pop    %edi
 8049095:       5b                      pop    %ebx
 8049096:       c9                      leave
 8049097:       c2 98 01                ret    $0x198
 804909a:       90                      nop
 804909b:       90                      nop
    }
}

int main() {
...
    auto c2 = c1 + c1 + c1;
 80490c3:       8d 9d bc fc ff ff       lea    -0x344(%ebp),%ebx
 80490c9:       b9 65 00 00 00          mov    $0x65,%ecx
 80490ce:       ff 33                   pushl  (%ebx)
 80490d0:       83 eb 04                sub    $0x4,%ebx
 80490d3:       e2 f9                   loop   80490ce <_Dmain+0x32>
 80490d5:       8d 95 cc fc ff ff       lea    -0x334(%ebp),%edx
 80490db:       52                      push   %edx
 80490dc:       8d b5 bc fc ff ff       lea    -0x344(%ebp),%esi
 80490e2:       b1 65                   mov    $0x65,%cl
 80490e4:       ff 36                   pushl  (%esi)
 80490e6:       83 ee 04                sub    $0x4,%esi
 80490e9:       e2 f9                   loop   80490e4 <_Dmain+0x48>
 80490eb:       8d 85 6c fe ff ff       lea    -0x194(%ebp),%eax
 80490f1:       50                      push   %eax
 80490f2:       8d 85 2c fb ff ff       lea    -0x4d4(%ebp),%eax
 80490f8:       e8 67 ff ff ff          *call   8049064*
 80490fd:       e8 62 ff ff ff          *call   8049064*
    return c2.i;
 8049102:       8b 85 cc fc ff ff       mov    -0x334(%ebp),%eax
...

(in 80490f8 and 80490fd simply two calls successively)

If structures and classes were same that excellent optimization in any
case would turn out