std.range.zip performance
bearophile
bearophileHUGS at lycos.com
Tue Feb 15 16:24:41 PST 2011
While doing some benchmarks on the Clay language I've found something interesting.
I have timed a zip() done on two short arrays, and I have found this D version too much slow compared to normal array code. Higher order functions like zip, map, filter, etc are going to stay, they aren't toys, so I think the D compiler has to digest them better.
(There is lot of of asm at the tail of this post, I don't know if some of t will get cut away.)
Bye,
bearophile
Timings, seconds, best of 3:
#1: 1.95
#2: 61.50
#3: 2.25
#4: 1.52
----------------------
Binary sizes, bytes:
#1: 33_016
#2: 284_188
#3: 103_964
#4: 103_964
----------------------
Compilers and command line used:
DMD 2.051
clay compiler (hg id dec41ba07d58 tip, llvm r122866, Jan 5 2011)
clay -inline test1.clay -o test1.exe
dmd -O -release -inline test2.d
dmd -O -release -inline test3.d
To produce asm code with Clay:
clay -inline -asm test1.clay -o test1.s
Output: -1149672960 for all versions.
----------------------
Code used:
// program #1 (Clay)
main() {
var a = Vector([18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]);
var b = Vector([9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]);
var tot = 0; // 32 bit signed int
for (i in range(0, 50*1000*1000))
for (x, y in zipped(a, b))
tot += x + y;
println(tot);
}
----------------------
// program #2 (D2)
import std.c.stdio: printf;
import std.range: zip;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (xy; zip(a, b))
tot += xy[0] + xy[1];
printf("%d\n", tot);
}
----------------------
// program #3 (D2)
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. a.length)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
// program #4 (D2)
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. 30)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
Just loops program #1:
movl $50000000, %eax # imm = 0x2FAF080
.align 16, 0x90
LBB2_4: # %bb.nph.i.i
# =>This Loop Header: Depth=1
# Child Loop BB2_5 Depth 2
xorl %edx, %edx
.align 16, 0x90
LBB2_5: # %return410.i.i
# Parent Loop BB2_4 Depth=1
# => This Inner Loop Header: Depth=2
addl (%esi,%edx,4), %ecx
addl (%edi,%edx,4), %ecx
incl %edx
cmpl $30, %edx
jne LBB2_5
# BB#3: # %return272.loopexit.i.i
# in Loop: Header=BB2_4 Depth=1
decl %eax
jne LBB2_4
----------------------
Just loops program #2:
LCE: push dword ptr 018h[ESP]
mov ESI,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip6__initZ
push dword ptr 018h[ESP]
push dword ptr 028h[ESP]
push dword ptr 028h[ESP]
push 0
lea EDI,078h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,078h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
mov ESI,EAX
lea EDI,044h[ESP]
movsd
movsd
movsd
movsd
movsd
lea ESI,044h[ESP]
lea EDI,024h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
je L153
L11C: lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
mov 07Ch[ESP],EAX
mov ECX,07Ch[ESP]
lea EAX,024h[ESP]
mov 080h[ESP],EDX
add ECX,080h[ESP]
add EBX,ECX
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
jne L11C
L153: inc EBP
cmp EBP,02FAF080h
jb LCE
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip comdat
push EBX
mov ECX,8[ESP]
mov EBX,014h[ESP]
push ESI
mov ESI,EAX
mov EDX,01Ch[ESP]
mov 4[ESI],EDX
mov EDX,014h[ESP]
mov [ESI],EBX
mov EBX,010h[ESP]
mov 010h[ESI],ECX
mov 8[ESI],EBX
mov 0Ch[ESI],EDX
pop ESI
pop EBX
ret 014h
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple comdat
assume CS:_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
L0: sub ESP,028h
mov EDX,4[EAX]
push EBX
push ESI
mov ESI,EAX
mov EBX,[ESI]
push EDI
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
je L31
lea EDX,0Ch[ESP]
mov EBX,[ESI]
push EDX
mov EDX,4[ESI]
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L41
L31: lea ECX,0Ch[ESP]
mov EDI,4
push ECX
push EDI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L41: mov EBX,8[ESI]
mov ECX,0Ch[ESI]
test EBX,EBX
je L61
lea EDI,010h[ESP]
mov EDX,0Ch[ESI]
mov EAX,8[ESI]
push EDI
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L71
L61: lea ECX,010h[ESP]
mov ESI,4
push ECX
push ESI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L71: mov EDX,010h[ESP]
mov EAX,0Ch[ESP]
pop EDI
pop ESI
pop EBX
add ESP,028h
ret
_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv comdat
L0: sub ESP,04Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push EBP
push ESI
push EDI
mov EDI,EAX
je L1F
cmp ECX,1
je L4A
cmp ECX,2
je L8C
jmp near ptr L142
L1F: mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
lea EBP,8[EDI]
mov [EDI],ESI
mov 4[EDI],EDX
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L4A: mov ESI,[EDI]
mov ECX,4[EDI]
test ESI,ESI
je L63
mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
mov [EDI],ESI
mov 4[EDI],EDX
L63: mov ESI,8[EDI]
mov ECX,0Ch[EDI]
test ESI,ESI
je L142
lea EBP,8[EDI]
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L8C: mov EBX,[EDI]
mov EDX,4[EDI]
mov ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral831MFZAxa
mov 014h[ESP],EBX
mov 018h[ESP],EDX
mov 01Ch[ESP],EAX
mov 020h[ESP],ECX
cmp dword ptr 014h[ESP],0
setnz DL
xor DL,1
je LD7
mov EDX,ECX
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,028h[ESP]
mov EBX,028h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LD7: mov EAX,[EDI]
mov EDX,4[EDI]
dec EAX
mov EBX,[EDI]
add EDX,4
mov 4[EDI],EDX
mov EDX,0Ch[EDI]
mov EBX,EDI
mov [EDI],EAX
mov EAX,8[EDI]
mov ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral832MFZAxa
mov 024h[ESP],EAX
mov 028h[ESP],EDX
mov 02Ch[ESP],EBX
mov 030h[ESP],ECX
cmp dword ptr 024h[ESP],0
setnz DL
xor DL,1
je L12F
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,038h[ESP]
call ECX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
L12F: lea ESI,8[EDI]
mov EBX,[ESI]
mov EDX,4[ESI]
dec EBX
mov EAX,[ESI]
mov [ESI],EBX
add EDX,4
mov 4[ESI],EDX
L142: pop EDI
pop ESI
pop EBP
pop EBX
add ESP,04Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi comdat
L0: push EAX
mov EDX,offset FLAT:_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi14__dgliteral829MFZC6object9Throwable
push EBX
push ESI
cmp dword ptr 010h[ESP],4
sbb ECX,ECX
inc ECX
push ECX
lea EBX,0Ch[ESP]
push EDX
push EBX
call near ptr _D3std9exception14__T7enforceTbZ7enforceFbLC6object9ThrowableZb
mov ECX,014h[ESP]
mov EAX,014h[ESP]
mov ESI,8[ESP]
mov [ECX],ESI
pop ESI
pop EBX
pop ECX
ret 8
(I have not included all the code, like _D3std9exception7bailOutFAyaixAaZv).
----------------------
Just loops program #3:
LDE: xor EBX,EBX
cmp 010h[ESP],BL
je LF5
LE6: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,014h[ESP]
jb LE6
LF5: inc EDX
cmp EDX,02FAF080h
jb LDE
----------------------
Just loops program #4:
LBF: xor EBX,EBX
LC1: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,01Eh
jb LC1
inc EDX
cmp EDX,02FAF080h
jb LBF
----------------------
More information about the Digitalmars-d
mailing list