std.range.zip performance

bearophile bearophileHUGS at lycos.com
Tue Feb 15 16:24:41 PST 2011


While doing some benchmarks on the Clay language I've found something interesting.

I have timed a zip() done on two short arrays, and I have found this D version too much slow compared to normal array code. Higher order functions like zip, map, filter, etc are going to stay, they aren't toys, so I think the D compiler has to digest them better.

(There is lot of of asm at the tail of this post, I don't know if some of t will get cut away.)

Bye,
bearophile


Timings, seconds, best of 3:
  #1:  1.95
  #2: 61.50
  #3:  2.25
  #4:  1.52

----------------------

Binary sizes, bytes:
  #1:  33_016
  #2: 284_188
  #3: 103_964
  #4: 103_964

----------------------

Compilers and command line used:

DMD 2.051
clay compiler (hg id dec41ba07d58 tip, llvm r122866, Jan  5 2011)

clay -inline test1.clay -o test1.exe
dmd -O -release -inline test2.d
dmd -O -release -inline test3.d

To produce asm code with Clay:
clay -inline -asm test1.clay -o test1.s

Output: -1149672960 for all versions.

----------------------

Code used:


// program #1 (Clay)
main() {
    var a = Vector([18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]);
    var b = Vector([9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]);
    var tot = 0; // 32 bit signed int
    for (i in range(0, 50*1000*1000))
        for (x, y in zipped(a, b))
            tot += x + y;
    println(tot);
}

----------------------

// program #2 (D2)
import std.c.stdio: printf;
import std.range: zip;
void main() {
    auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
    auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
    int tot = 0;
    foreach (i; 0 .. 50_000_000)
        foreach (xy; zip(a, b))
            tot += xy[0] + xy[1];
    printf("%d\n", tot);
}

----------------------

// program #3 (D2)
import std.c.stdio: printf;
void main() {
    auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
    auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
    int tot = 0;
    foreach (i; 0 .. 50_000_000)
        foreach (j; 0 .. a.length)
            tot += a[j] + b[j];
    printf("%d\n", tot);
}

----------------------

// program #4 (D2)
import std.c.stdio: printf;
void main() {
    auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
    auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
    int tot = 0;
    foreach (i; 0 .. 50_000_000)
        foreach (j; 0 .. 30)
            tot += a[j] + b[j];
    printf("%d\n", tot);
}

----------------------

Just loops program #1:

    movl    $50000000, %eax         # imm = 0x2FAF080
    .align    16, 0x90
LBB2_4:                                 # %bb.nph.i.i
                                        # =>This Loop Header: Depth=1
                                        #     Child Loop BB2_5 Depth 2
    xorl    %edx, %edx
    .align    16, 0x90
LBB2_5:                                 # %return410.i.i
                                        #   Parent Loop BB2_4 Depth=1
                                        # =>  This Inner Loop Header: Depth=2
    addl    (%esi,%edx,4), %ecx
    addl    (%edi,%edx,4), %ecx
    incl    %edx
    cmpl    $30, %edx
    jne    LBB2_5
# BB#3:                                 # %return272.loopexit.i.i
                                        #   in Loop: Header=BB2_4 Depth=1
    decl    %eax
    jne    LBB2_4

----------------------

Just loops program #2:

LCE:        push    dword ptr 018h[ESP]
        mov    ESI,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip6__initZ
        push    dword ptr 018h[ESP]
        push    dword ptr 028h[ESP]
        push    dword ptr 028h[ESP]
        push    0
        lea    EDI,078h[ESP]
        movsd
        movsd
        movsd
        movsd
        movsd
        lea    EAX,078h[ESP]
        call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
        mov    ESI,EAX
        lea    EDI,044h[ESP]
        movsd
        movsd
        movsd
        movsd
        movsd
        lea    ESI,044h[ESP]
        lea    EDI,024h[ESP]
        movsd
        movsd
        movsd
        movsd
        movsd
        lea    EAX,024h[ESP]
        call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
        xor    AL,1
        je    L153
L11C:        lea    EAX,024h[ESP]
        call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
        mov    07Ch[ESP],EAX
        mov    ECX,07Ch[ESP]
        lea    EAX,024h[ESP]
        mov    080h[ESP],EDX
        add    ECX,080h[ESP]
        add    EBX,ECX
        call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv
        lea    EAX,024h[ESP]
        call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
        xor    AL,1
        jne    L11C
L153:        inc    EBP
        cmp    EBP,02FAF080h
        jb    LCE


_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip    comdat
        push    EBX
        mov    ECX,8[ESP]
        mov    EBX,014h[ESP]
        push    ESI
        mov    ESI,EAX
        mov    EDX,01Ch[ESP]
        mov    4[ESI],EDX
        mov    EDX,014h[ESP]
        mov    [ESI],EBX
        mov    EBX,010h[ESP]
        mov    010h[ESI],ECX
        mov    8[ESI],EBX
        mov    0Ch[ESI],EDX
        pop    ESI
        pop    EBX
        ret    014h


_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb    comdat
L0:        sub    ESP,05Ch
        mov    ECX,010h[EAX]
        test    ECX,ECX
        push    EBX
        push    ESI
        mov    ESI,EAX
        je    L21
        cmp    ECX,1
        je    LC3
        cmp    ECX,2
        je    L55
        jmp    near ptr LC3
L21:        mov    EBX,[ESI]
        mov    EDX,4[ESI]
        mov    0Ch[ESP],EBX
        mov    010h[ESP],EDX
        cmp    dword ptr 0Ch[ESP],0
        je    L4A
        mov    EBX,8[ESI]
        mov    EDX,0Ch[ESI]
        mov    014h[ESP],EBX
        mov    018h[ESP],EDX
        cmp    dword ptr 014h[ESP],0
        jne    LC3
L4A:        pop    ESI
        mov    EAX,1
        pop    EBX
        add    ESP,05Ch
        ret
L55:        mov    EBX,[ESI]
        mov    EDX,4[ESI]
        mov    ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
        mov    01Ch[ESP],EBX
        mov    020h[ESP],EDX
        mov    02Ch[ESP],EAX
        mov    030h[ESP],ECX
        cmp    dword ptr 01Ch[ESP],0
        setz    DL
        mov    EBX,8[ESI]
        mov    024h[ESP],EBX
        mov    ECX,0Ch[ESI]
        mov    028h[ESP],ECX
        cmp    dword ptr 024h[ESP],0
        setz    CL
        cmp    DL,CL
        mov    EDX,1
        je    L98
        xor    EDX,EDX
L98:        xor    DL,1
        je    LC3
        push    dword ptr FLAT:_DATA[054h]
        push    dword ptr FLAT:_DATA[050h]
        push    0ADEh
        mov    EAX,038h[ESP]
        mov    EDX,03Ch[ESP]
        mov    EBX,038h[ESP]
        call    EDX
        push    EDX
        push    EAX
        call    near ptr _D3std9exception7bailOutFAyaixAaZv
LC3:        pop    ESI
        xor    EAX,EAX
        pop    EBX
        add    ESP,05Ch
        ret


_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple    comdat
    assume    CS:_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
L0:        sub    ESP,028h
        mov    EDX,4[EAX]
        push    EBX
        push    ESI
        mov    ESI,EAX
        mov    EBX,[ESI]
        push    EDI
        mov    014h[ESP],EBX
        mov    018h[ESP],EDX
        cmp    dword ptr 014h[ESP],0
        je    L31
        lea    EDX,0Ch[ESP]
        mov    EBX,[ESI]
        push    EDX
        mov    EDX,4[ESI]
        mov    EAX,[EDX]
        push    4
        call    near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
        jmp short    L41
L31:        lea    ECX,0Ch[ESP]
        mov    EDI,4
        push    ECX
        push    EDI
        call    near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L41:        mov    EBX,8[ESI]
        mov    ECX,0Ch[ESI]
        test    EBX,EBX
        je    L61
        lea    EDI,010h[ESP]
        mov    EDX,0Ch[ESI]
        mov    EAX,8[ESI]
        push    EDI
        mov    EAX,[EDX]
        push    4
        call    near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
        jmp short    L71
L61:        lea    ECX,010h[ESP]
        mov    ESI,4
        push    ECX
        push    ESI
        call    near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L71:        mov    EDX,010h[ESP]
        mov    EAX,0Ch[ESP]
        pop    EDI
        pop    ESI
        pop    EBX
        add    ESP,028h
        ret


_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv    comdat
L0:        sub    ESP,04Ch
        mov    ECX,010h[EAX]
        test    ECX,ECX
        push    EBX
        push    EBP
        push    ESI
        push    EDI
        mov    EDI,EAX
        je    L1F
        cmp    ECX,1
        je    L4A
        cmp    ECX,2
        je    L8C
        jmp    near ptr L142
L1F:        mov    ESI,[EDI]
        mov    EDX,4[EDI]
        dec    ESI
        mov    EBX,[EDI]
        add    EDX,4
        lea    EBP,8[EDI]
        mov    [EDI],ESI
        mov    4[EDI],EDX
        mov    ESI,0[EBP]
        mov    EDX,4[EBP]
        dec    ESI
        mov    EBX,0[EBP]
        add    EDX,4
        mov    0[EBP],ESI
        mov    4[EBP],EDX
        jmp    near ptr L142
L4A:        mov    ESI,[EDI]
        mov    ECX,4[EDI]
        test    ESI,ESI
        je    L63
        mov    ESI,[EDI]
        mov    EDX,4[EDI]
        dec    ESI
        mov    EBX,[EDI]
        add    EDX,4
        mov    [EDI],ESI
        mov    4[EDI],EDX
L63:        mov    ESI,8[EDI]
        mov    ECX,0Ch[EDI]
        test    ESI,ESI
        je    L142
        lea    EBP,8[EDI]
        mov    ESI,0[EBP]
        mov    EDX,4[EBP]
        dec    ESI
        mov    EBX,0[EBP]
        add    EDX,4
        mov    0[EBP],ESI
        mov    4[EBP],EDX
        jmp    near ptr L142
L8C:        mov    EBX,[EDI]
        mov    EDX,4[EDI]
        mov    ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral831MFZAxa
        mov    014h[ESP],EBX
        mov    018h[ESP],EDX
        mov    01Ch[ESP],EAX
        mov    020h[ESP],ECX
        cmp    dword ptr 014h[ESP],0
        setnz    DL
        xor    DL,1
        je    LD7
        mov    EDX,ECX
        push    dword ptr FLAT:_DATA[054h]
        push    dword ptr FLAT:_DATA[050h]
        push    0B86h
        mov    EAX,028h[ESP]
        mov    EBX,028h[ESP]
        call    EDX
        push    EDX
        push    EAX
        call    near ptr _D3std9exception7bailOutFAyaixAaZv
LD7:        mov    EAX,[EDI]
        mov    EDX,4[EDI]
        dec    EAX
        mov    EBX,[EDI]
        add    EDX,4
        mov    4[EDI],EDX
        mov    EDX,0Ch[EDI]
        mov    EBX,EDI
        mov    [EDI],EAX
        mov    EAX,8[EDI]
        mov    ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral832MFZAxa
        mov    024h[ESP],EAX
        mov    028h[ESP],EDX
        mov    02Ch[ESP],EBX
        mov    030h[ESP],ECX
        cmp    dword ptr 024h[ESP],0
        setnz    DL
        xor    DL,1
        je    L12F
        push    dword ptr FLAT:_DATA[054h]
        push    dword ptr FLAT:_DATA[050h]
        push    0B86h
        mov    EAX,038h[ESP]
        call    ECX
        push    EDX
        push    EAX
        call    near ptr _D3std9exception7bailOutFAyaixAaZv
L12F:        lea    ESI,8[EDI]
        mov    EBX,[ESI]
        mov    EDX,4[ESI]
        dec    EBX
        mov    EAX,[ESI]
        mov    [ESI],EBX
        add    EDX,4
        mov    4[ESI],EDX
L142:        pop    EDI
        pop    ESI
        pop    EBP
        pop    EBX
        add    ESP,04Ch
        ret


_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb    comdat
L0:        sub    ESP,05Ch
        mov    ECX,010h[EAX]
        test    ECX,ECX
        push    EBX
        push    ESI
        mov    ESI,EAX
        je    L21
        cmp    ECX,1
        je    LC3
        cmp    ECX,2
        je    L55
        jmp    near ptr LC3
L21:        mov    EBX,[ESI]
        mov    EDX,4[ESI]
        mov    0Ch[ESP],EBX
        mov    010h[ESP],EDX
        cmp    dword ptr 0Ch[ESP],0
        je    L4A
        mov    EBX,8[ESI]
        mov    EDX,0Ch[ESI]
        mov    014h[ESP],EBX
        mov    018h[ESP],EDX
        cmp    dword ptr 014h[ESP],0
        jne    LC3
L4A:        pop    ESI
        mov    EAX,1
        pop    EBX
        add    ESP,05Ch
        ret
L55:        mov    EBX,[ESI]
        mov    EDX,4[ESI]
        mov    ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
        mov    01Ch[ESP],EBX
        mov    020h[ESP],EDX
        mov    02Ch[ESP],EAX
        mov    030h[ESP],ECX
        cmp    dword ptr 01Ch[ESP],0
        setz    DL
        mov    EBX,8[ESI]
        mov    024h[ESP],EBX
        mov    ECX,0Ch[ESI]
        mov    028h[ESP],ECX
        cmp    dword ptr 024h[ESP],0
        setz    CL
        cmp    DL,CL
        mov    EDX,1
        je    L98
        xor    EDX,EDX
L98:        xor    DL,1
        je    LC3
        push    dword ptr FLAT:_DATA[054h]
        push    dword ptr FLAT:_DATA[050h]
        push    0ADEh
        mov    EAX,038h[ESP]
        mov    EDX,03Ch[ESP]
        mov    EBX,038h[ESP]
        call    EDX
        push    EDX
        push    EAX
        call    near ptr _D3std9exception7bailOutFAyaixAaZv
LC3:        pop    ESI
        xor    EAX,EAX
        pop    EBX
        add    ESP,05Ch
        ret


_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi    comdat
L0:        push    EAX
        mov    EDX,offset FLAT:_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi14__dgliteral829MFZC6object9Throwable
        push    EBX
        push    ESI
        cmp    dword ptr 010h[ESP],4
        sbb    ECX,ECX
        inc    ECX
        push    ECX
        lea    EBX,0Ch[ESP]
        push    EDX
        push    EBX
        call    near ptr _D3std9exception14__T7enforceTbZ7enforceFbLC6object9ThrowableZb
        mov    ECX,014h[ESP]
        mov    EAX,014h[ESP]
        mov    ESI,8[ESP]
        mov    [ECX],ESI
        pop    ESI
        pop    EBX
        pop    ECX
        ret    8


(I have not included all the code, like _D3std9exception7bailOutFAyaixAaZv).

----------------------

Just loops program #3:

LDE:    xor    EBX,EBX
        cmp    010h[ESP],BL
        je    LF5
LE6:    mov    ECX,[EBX*4][EAX]
        add    ECX,[EBX*4][EDI]
        add    ESI,ECX
        inc    EBX
        cmp    EBX,014h[ESP]
        jb    LE6
LF5:    inc    EDX
        cmp    EDX,02FAF080h
        jb    LDE

----------------------

Just loops program #4:

LBF:    xor    EBX,EBX
LC1:    mov    ECX,[EBX*4][EAX]
        add    ECX,[EBX*4][EDI]
        add    ESI,ECX
        inc    EBX
        cmp    EBX,01Eh
        jb    LC1
        inc    EDX
        cmp    EDX,02FAF080h
        jb    LBF

----------------------


More information about the Digitalmars-d mailing list