String compare performance

bearophile bearophileHUGS at lycos.com
Sun Nov 28 14:32:23 PST 2010


Robert Jacques:

> Still, part of the point was that string comparisons in general were way  
> too slow. Anyways, I've applied the same technique in a partially unrolled  
> version if you want to check it out:

The version #11 (I have not reformatted your function):


// D version #11
import std.file: read;
import std.c.stdio: printf;

// not formatted
bool arrayComp(T, size_t N)(const T[] a, ref const T[N] b) pure nothrow {
     static if(T.sizeof*N <= uint.sizeof) {
         return a.length == N && !( (*(cast(uint*)a.ptr)  ^
*(cast(uint*)b.ptr))  & (uint.max >> 8*(uint.sizeof  - T.sizeof*N) ));
     } else static if(T.sizeof*N <= ulong.sizeof) {
         return a.length == N && !( (*(cast(ulong*)a.ptr) ^
*(cast(ulong*)b.ptr)) & (ulong.max>> 8*(ulong.sizeof - T.sizeof*N) ));
     } else { // Fall back to a loop
         if(a.length != N || (*(cast(ulong*)a.ptr) != *(cast(ulong*)b.ptr))
) return false;
         enum alignment = T.sizeof*N % ulong.sizeof > 0 ? T.sizeof*N %
ulong.sizeof : ulong.sizeof;
         auto pa      = cast(ulong*)(cast(ubyte*)a.ptr + alignment);
         auto pb      = cast(ulong*)(cast(ubyte*)b.ptr + alignment);
         auto pa_end  = cast(ulong*)(cast(ubyte*)a.ptr + T.sizeof*N);
         while (pa < pa_end) {
             if(*pa++ != *pb++ ) return false;
         }
         return true;
     }
}

pure int test(const char[] data) {
    int count;
    foreach (i; 0 ..  data.length - 3) {
        const char[] codon = data[i .. i + 3];
        if (arrayComp!(char, 3)(codon, "TAG") ||
            arrayComp!(char, 3)(codon, "TGA") ||
            arrayComp!(char, 3)(codon, "TAA"))
            count++;
    }
    return count;
}

void main() {
    char[] data0 = cast(char[])read("data.txt");
    int n = 300;
    char[] data = new char[data0.length * n];
    for (size_t pos; pos < data.length; pos += data0.length)
        data[pos .. pos + data0.length] = data0;

    printf("%d\n", test(data));
}


Timings, dmd compiler, best of 4, seconds:
  D #1:  5.72
  D #9:  5.04
  D #10: 3.31
  D #4:  1.84
  D #5:  1.73
  Psy:   1.59
  D #8:  1.51
  D #7:  0.56
  D #2:  0.55
  D #6:  0.47
  D #11: 0.47
  D #3:  0.34

Only the #3 (tree-based) version is faster.
This problem probably needs to be solved by the compiler.

Bye,
bearophile


More information about the Digitalmars-d mailing list