String compare performance
bearophile
bearophileHUGS at lycos.com
Sat Nov 27 08:53:06 PST 2010
While translating a common Python script to D, I have found something interesting, so I have reduced it to a little benchmark.
Below there is the reduced Python2 code (it uses Psyco), and a little program to generate some test data. The timing of the first D2 version is not good compared to the Python-Psyco program (the generation of the *300 array is a quick thing), so I have created two more D2 versions to show myself that D2 wasn't broken :-)
The reduced code looks like a syntetic benchmark, but it has about the same performance profile of a 60 lines long Python script (the original code was using xrange(0,len(...)-3,3) instead of xrange(len(...)-3), but the situation doesn't change much).
Timings, dmd compiler, best of 4, seconds:
D #1: 5.72
Psy: 1.59
D #2: 0.55
D #3: 0.34
2.3 GHz CPU.
dmd 2.050, ldc based on DMD v1.057 and llvm 2.6, gdc head based on GCC 4.4.5.
--------------------------
# Python2 version
import psyco
def test(data):
count = 0
for i in xrange(len(data) - 3):
codon = data[i : i + 3]
if codon == "TAG" or codon == "TGA" or codon == "TAA":
count += 1
return count
def main():
data = open("data.txt").read()
data = data * 300
print test(data)
psyco.full()
main()
--------------------------
To produce some test data:
# Python2 code
from itertools import islice
def rnd(ia = 3877, ic = 29573, im = 139968):
seed = 42
imf = float(im)
while 1:
seed = (seed * ia + ic) % im
r = seed / imf
yield "ACGT"[int(r * 4)]
def main():
data = "".join(islice(rnd(), 0, 120000))
file("data.txt", "w").write(data)
main()
--------------------------
A D2 traslation (I have used printf to reduce a lot the asm produced):
// D2 version #1
import std.file: read;
import std.c.stdio: printf;
int test(char[] data) {
int count;
foreach (i; 0 .. data.length - 3) {
char[] codon = data[i .. i + 3];
if (codon == "TAG" || codon == "TGA" || codon == "TAA")
count++;
}
return count;
}
void main() {
char[] data0 = cast(char[])read("data.txt");
int n = 300;
char[] data = new char[data0.length * n];
for (size_t pos; pos < data.length; pos += data0.length)
data[pos .. pos+data0.length] = data0;
printf("%d\n", test(data));
}
--------------------------
// D2 version #2
import std.file: read;
import std.c.stdio: printf;
bool cmp3(char[] s1, string s2) {
return s1[0] == s2[0] && s1[1] == s2[1] && s1[2] == s2[2];
}
int test(char[] data) {
int count;
foreach (i; 0 .. data.length - 3) {
char[] codon = data[i .. i + 3];
if (cmp3(codon, "TAG") || cmp3(codon, "TGA") || cmp3(codon, "TAA"))
count++;
}
return count;
}
void main() {
char[] data0 = cast(char[])read("data.txt");
int n = 300;
char[] data = new char[data0.length * n];
for (size_t pos; pos < data.length; pos += data0.length)
data[pos .. pos+data0.length] = data0;
printf("%d\n", test(data));
}
--------------------------
// D2 version #3
import std.file: read;
import std.c.stdio: printf;
int test(char[] data) {
int count;
foreach (i; 0 .. data.length - 3) {
if (data[i] == 'T') {
if (data[i+1] == 'A') {
if (data[i+2] == 'G') {
count++;
} else if (data[i+2] == 'A') {
count++;
}
} else if (data[i+1] == 'G') {
if (data[i+2] == 'A')
count++;
}
}
}
return count;
}
void main() {
char[] data0 = cast(char[])read("data.txt");
int n = 300;
char[] data = new char[data0.length * n];
for (size_t pos; pos < data.length; pos += data0.length)
data[pos .. pos+data0.length] = data0;
printf("%d\n", test(data));
}
--------------------------
dmd -O -release -inline test1.d
_D5test14testFAaZi comdat
L0: sub ESP,024h
push EBX
xor EBX,EBX
push EBP
mov EBP,030h[ESP]
push ESI
xor ESI,ESI
add EBP,0FFFFFFFDh
push EDI
je LA8
mov EDX,03Ch[ESP]
mov EAX,038h[ESP]
mov EDI,EDX
L22: mov ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
lea EAX,3[EBX]
sub EAX,EBX
push ECX
lea EDX,[EDI][EBX]
push dword ptr FLAT:_DATA[0Ch]
push dword ptr FLAT:_DATA[08h]
mov 020h[ESP],EAX
mov 024h[ESP],EDX
push EDX
push EAX
call near ptr __adEq2
add ESP,014h
test EAX,EAX
jne L9E
mov ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
push ECX
push dword ptr FLAT:_DATA[01Ch]
push dword ptr FLAT:_DATA[018h]
push dword ptr 024h[ESP]
push dword ptr 024h[ESP]
call near ptr __adEq2
add ESP,014h
test EAX,EAX
jne L9E
mov EDX,offset FLAT:_D11TypeInfo_Aa6__initZ
push EDX
push dword ptr FLAT:_DATA[02Ch]
push dword ptr FLAT:_DATA[028h]
push dword ptr 024h[ESP]
push dword ptr 024h[ESP]
call near ptr __adEq2
add ESP,014h
test EAX,EAX
je L9F
L9E: inc ESI
L9F: inc EBX
cmp EBX,EBP
jb L22
LA8: pop EDI
mov EAX,ESI
pop ESI
pop EBP
pop EBX
add ESP,024h
ret 8
--------------------------
dmd -O -release -inline test2.d
_D5test24testFAaZi comdat
sub ESP,03Ch
mov ECX,040h[ESP]
push EBX
push EBP
push ESI
xor ESI,ESI
push EDI
xor EDI,EDI
add ECX,0FFFFFFFDh
mov 01Ch[ESP],ECX
je LC5
mov EDX,054h[ESP]
mov EAX,050h[ESP]
mov EBP,EDX
L26: lea EBX,3[ESI]
sub EBX,ESI
lea ECX,[EBP][ESI]
mov 028h[ESP],ECX
mov DL,[ECX]
mov ECX,FLAT:_DATA[0Ch]
mov 024h[ESP],EBX
mov EAX,FLAT:_DATA[08h]
cmp DL,[ECX]
mov 010h[ESP],ECX
jne L65
mov EDX,028h[ESP]
mov EAX,EBX
mov EBX,010h[ESP]
mov CL,1[EDX]
cmp CL,1[EBX]
jne L65
mov DL,2[EDX]
cmp DL,2[EBX]
je LB9
L65: mov EDX,028h[ESP]
mov CL,[EDX]
mov 018h[ESP],EDX
mov EAX,024h[ESP]
mov EDX,FLAT:_DATA[01Ch]
cmp CL,[EDX]
mov EAX,FLAT:_DATA[018h]
jne L96
mov EBX,018h[ESP]
mov AL,1[EBX]
cmp AL,1[EDX]
jne L96
mov AL,2[EBX]
cmp AL,2[EDX]
je LB9
L96: mov EDX,FLAT:_DATA[02Ch]
mov EAX,FLAT:_DATA[028h]
cmp CL,[EDX]
jne LBA
mov ECX,018h[ESP]
mov BL,1[ECX]
cmp BL,1[EDX]
jne LBA
mov CL,2[ECX]
cmp CL,2[EDX]
jne LBA
LB9: inc EDI
LBA: inc ESI
cmp ESI,01Ch[ESP]
jb L26
LC5: mov EAX,EDI
pop EDI
pop ESI
pop EBP
pop EBX
add ESP,03Ch
ret 8
--------------------------
dmd -O -release -inline test3.d
_D5test34testFAaZi comdat
push EAX
xor EDX,EDX
push EBX
xor EBX,EBX
push ESI
mov ESI,010h[ESP]
add ESI,0FFFFFFFDh
je L4A
mov 8[ESP],EDX
mov EDX,014h[ESP]
mov ECX,EDX
mov EAX,010h[ESP]
mov EDX,8[ESP]
L22: cmp [ECX][EDX],054h
jne L45
mov AL,1[ECX][EDX]
cmp AL,041h
jne L39
cmp byte ptr 2[ECX][EDX],047h
je L44
jmp short L3D
L39: cmp AL,047h
jne L45
L3D: cmp byte ptr 2[ECX][EDX],041h
jne L45
L44: inc EBX
L45: inc EDX
cmp EDX,ESI
jb L22
L4A: pop ESI
mov EAX,EBX
pop EBX
pop ECX
ret 8
--------------------------
gdc -O3 -frelease -inline -S test1.d -o test1.s
_D5test14testFAaZi:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %eax
movl 12(%ebp), %edx
movl $0, -24(%ebp)
subl $3, %eax
movl %edx, -20(%ebp)
movl %eax, -16(%ebp)
je .L5
xorl %edx, %edx
jmp .L8
.p2align 4,,7
.p2align 3
.L6:
addl $1, -24(%ebp)
addl $1, %edx
cmpl %edx, -16(%ebp)
jbe .L5
.L8:
movl -20(%ebp), %eax
movl $.LC0, %edi
movl $3, %ecx
addl %edx, %eax
movl %eax, %esi
repz cmpsb
je .L6
movl %eax, %esi
movl $.LC1, %edi
movl $3, %ecx
repz cmpsb
je .L6
movl $.LC2, %edi
movl %eax, %esi
movl $3, %ecx
repz cmpsb
je .L6
addl $1, %edx
cmpl %edx, -16(%ebp)
ja .L8
.L5:
movl -24(%ebp), %eax
addl $12, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
--------------------------------------
gdc -O3 -frelease -inline -S test2.d -o test2.s
_D5test24testFAaZi:
pushl %ebp
xorl %edx, %edx
movl %esp, %ebp
xorl %eax, %eax
pushl %edi
pushl %esi
pushl %ebx
subl $16, %esp
movl 8(%ebp), %ebx
movl 12(%ebp), %esi
movl %ebx, %edi
subl $3, %edi
jne .L18
jmp .L9
.p2align 4,,7
.p2align 3
.L10:
movl -16(%ebp), %ecx
movzbl .LC1, %ebx
cmpb (%ecx), %bl
je .L15
.L12:
movzbl .LC2, %ebx
cmpb (%ecx), %bl
je .L21
.L13:
addl $1, %edx
cmpl %edx, %edi
jbe .L9
.p2align 4,,7
.p2align 3
.L18:
leal (%esi,%edx), %ecx
movzbl .LC0, %ebx
movl $3, -20(%ebp)
movl %ecx, -16(%ebp)
cmpb (%ecx), %bl
jne .L10
movzbl .LC0+1, %ebx
cmpb 1(%ecx), %bl
jne .L10
movzbl .LC0+2, %ebx
cmpb 2(%ecx), %bl
jne .L10
.L11:
addl $1, %edx
addl $1, %eax
cmpl %edx, %edi
ja .L18
.L9:
addl $16, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.p2align 4,,7
.p2align 3
.L15:
movzbl .LC1+1, %ebx
cmpb 1(%ecx), %bl
jne .L12
movzbl .LC1+2, %ebx
cmpb 2(%ecx), %bl
je .L11
movzbl .LC2, %ebx
cmpb (%ecx), %bl
jne .L13
.p2align 4,,7
.p2align 3
.L21:
movzbl .LC2+1, %ebx
cmpb 1(%ecx), %bl
jne .L13
movzbl .LC2+2, %ebx
cmpb 2(%ecx), %bl
jne .L13
jmp .L11
--------------------------------------
gdc -O3 -frelease -inline -S test3.d -o test3.s
_D5test34testFAaZi:
pushl %ebp
xorl %eax, %eax
movl %esp, %ebp
pushl %esi
movl 8(%ebp), %esi
pushl %ebx
movl 12(%ebp), %ebx
subl $3, %esi
je .L6
movl $1, %edx
jmp .L7
.p2align 4,,7
.p2align 3
.L3:
cmpl %edx, %esi
leal 1(%edx), %ecx
jbe .L6
.L11:
movl %ecx, %edx
.L7:
cmpb $84, -1(%ebx,%edx)
jne .L3
movzbl (%ebx,%edx), %ecx
cmpb $65, %cl
je .L10
cmpb $71, %cl
jne .L3
xorl %ecx, %ecx
cmpb $65, 1(%ebx,%edx)
sete %cl
addl %ecx, %eax
cmpl %edx, %esi
leal 1(%edx), %ecx
ja .L11
.p2align 4,,7
.p2align 3
.L6:
popl %ebx
popl %esi
popl %ebp
ret
---------------------------
ldc -O3 -release -inline -output-s test1.d
_D5test14testFAaZi:
pushl %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $20, %esp
movl 40(%esp), %esi
cmpl $3, %esi
je .LBB1_8
addl $4294967293, %esi
xorl %ebx, %ebx
movl %ebx, %edi
.align 16
.LBB1_2:
movl 44(%esp), %eax
leal (%eax,%ebx), %ebp
movl %ebp, 4(%esp)
movl $_D11TypeInfo_Aa6__initZ, 16(%esp)
movl $.str, 12(%esp)
movl $3, 8(%esp)
movl $3, (%esp)
call _adEq
testl %eax, %eax
jne .LBB1_5
movl %ebp, 4(%esp)
movl $_D11TypeInfo_Aa6__initZ, 16(%esp)
movl $.str1, 12(%esp)
movl $3, 8(%esp)
movl $3, (%esp)
call _adEq
testl %eax, %eax
jne .LBB1_5
movl %ebp, 4(%esp)
movl $_D11TypeInfo_Aa6__initZ, 16(%esp)
movl $.str2, 12(%esp)
movl $3, 8(%esp)
movl $3, (%esp)
call _adEq
testl %eax, %eax
je .LBB1_6
.LBB1_5:
incl %edi
.LBB1_6:
incl %ebx
cmpl %esi, %ebx
jb .LBB1_2
.LBB1_7:
movl %edi, %eax
addl $20, %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
ret $8
.LBB1_8:
xorl %edi, %edi
jmp .LBB1_7
---------------------------
ldc -O3 -release -inline -output-s test2.d
_D5test24testFAaZi:
pushl %ebx
pushl %esi
movl 12(%esp), %ecx
cmpl $3, %ecx
je .LBB2_14
movl 16(%esp), %edx
addl $4294967293, %ecx
xorl %eax, %eax
movl %eax, %esi
.align 16
.LBB2_2:
movb (%edx,%esi), %bl
cmpb $84, %bl
jne .LBB2_12
movb 1(%edx,%esi), %bh
cmpb $65, %bh
jne .LBB2_5
cmpb $71, 2(%esi,%edx)
je .LBB2_11
.LBB2_5:
cmpb $84, %bl
jne .LBB2_12
cmpb $71, %bh
jne .LBB2_8
cmpb $65, 2(%esi,%edx)
je .LBB2_11
.LBB2_8:
cmpb $65, %bh
setne %bh
cmpb $84, %bl
jne .LBB2_12
testb %bh, %bh
jne .LBB2_12
cmpb $65, 2(%esi,%edx)
jne .LBB2_12
.LBB2_11:
incl %eax
.LBB2_12:
incl %esi
cmpl %ecx, %esi
jb .LBB2_2
.LBB2_13:
popl %esi
popl %ebx
ret $8
.LBB2_14:
xorl %eax, %eax
jmp .LBB2_13
---------------------------
ldc -O3 -release -inline -output-s test3.d
_D5test34testFAaZi:
pushl %ebx
pushl %edi
pushl %esi
movl 16(%esp), %ecx
cmpl $3, %ecx
je .LBB1_12
movl 20(%esp), %edx
addl $4294967293, %ecx
xorl %eax, %eax
movl %eax, %esi
.align 16
.LBB1_2:
cmpb $84, (%edx,%esi)
jne .LBB1_10
movb 1(%edx,%esi), %bl
cmpb $71, %bl
je .LBB1_8
cmpb $65, %bl
jne .LBB1_10
movb 2(%esi,%edx), %bl
cmpb $71, %bl
jne .LBB1_7
incl %eax
jmp .LBB1_10
.LBB1_7:
cmpb $65, %bl
jmp .LBB1_9
.LBB1_8:
cmpb $65, 2(%esi,%edx)
.LBB1_9:
sete %bl
movzbl %bl, %edi
addl %edi, %eax
.LBB1_10:
incl %esi
cmpl %ecx, %esi
jb .LBB1_2
.LBB1_11:
popl %esi
popl %edi
popl %ebx
ret $8
.LBB1_12:
xorl %eax, %eax
jmp .LBB1_11
---------------------------
Bye,
bearophile
More information about the Digitalmars-d
mailing list