Suffix tree benchmark

Basile B. via Digitalmars-d digitalmars-d at puremagic.com
Wed May 18 13:06:33 PDT 2016


On Wednesday, 18 May 2016 at 19:55:40 UTC, Andrei Alexandrescu 
wrote:
> On 5/18/16 3:14 PM, Basile B. wrote:
>> Hey, los douchbagos...
>>
>> I've recently worked on a suffix array
>> (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)
>>
>> Test for presence are excelent (O1)
>>
>> canFind    % of canFind   : 100
>> SuffixArr1 % of canFind   : 2.78019
>> SuffixArr2 % of canFind   : 3.15439
>> runtime AA % of runtime AA: 100
>> EMSI hashs % of runtime AA: 167.434
>> SuffixArr1 % of runtime AA: 97.8582
>> SuffixArr2 % of runtime AA: 111.029
>> EMSI hashs % of EMSI hashs: 100
>> SuffixArr1 % of EMSI hashs: 58.4458
>> SuffixArr2 % of EMSI hashs: 66.3123
>
> Could you explain the numbers a bit? -- Andrei

the benchmark is:
----
#!runnable-flags: -O -release -inline -boundscheck=off
module runnable;

import std.stdio;
import containers.hashset, containers.internal.hash;
import std.experimental.allocator.mallocator;

void main(string[] args)
{
     import std.datetime, iz.strings, std.random, std.range;
     import core.memory: GC;
     GC.disable;

     string[] wordsA, wordsB;
     ubyte[] source = cast(ubyte[])"ABCDEFGHIJKL".dup;
     foreach(i; 0..64)
     {
         wordsA ~= cast(string) randomCover(source, rndGen).array;
         rndGen.popFront;
         if ((i & 1) == 0)
             wordsB ~= wordsA[i];
         else
         {
             wordsB ~= cast(string) randomCover(source, 
rndGen).array;
             rndGen.popFront;
         }
     }

     auto arrOld = SuffixArray!string(wordsA);
     auto arrNew = SuffixArrayTemp!string(wordsA);

     enum repeat = 2^^10;

     size_t test(T)(ref T t)
     {
         StopWatch sw;
         sw.start;
         foreach(i; 0..64)
             foreach(j; 0..repeat)
         {
             auto n = wordsB[i] in t;
         }
         return sw.peek.hnsecs;
     }

     size_t refTest()
     {
         import std.algorithm.searching: canFind;
         import std.algorithm.sorting;
         StopWatch sw;
         auto sorted = sort(wordsA).array;
         sw.start;
         foreach(i; 0..64)
             foreach(j; 0..repeat)
         {
             auto n = sorted.canFind(wordsB[i]);
         }
         return sw.peek.hnsecs;
     }

     size_t aaTest()
     {
         StopWatch sw;
         bool[string] aa;
         foreach(w; wordsA)
             aa[w] = true;
         sw.start;
         foreach(i; 0..64)
             foreach(j; 0..repeat)
         {
             auto n = wordsB[i] in aa;
         }
         return sw.peek.hnsecs;
     }

     size_t aa2Test()
     {
         static size_t hasher(string value) pure
         {
             size_t result;
             foreach(i; 1 .. value.length)
                 result += value[i-1] ^ value[i] << (i-1);
             return result;
         }
         StopWatch sw;
         alias HS = HashSet!(string, Mallocator, 
generateHash!string, false);
         HS aa;
         foreach(w; wordsA)
             aa.insert(w);
         sw.start;
         foreach(i; 0..64)
             foreach(j; 0..repeat)
         {
             auto n = wordsB[i] in aa;
         }
         return sw.peek.hnsecs;
     }


     auto tA = refTest();
     auto tB = aaTest();
     auto tC = aa2Test();
     auto t1 = test(arrOld);
     auto t2 = test(arrNew);

     writeln("canFind    % of canFind   : ", tA / (tA * 0.01));
     writeln("SuffixArr1 % of canFind   : ", t1 / (tA * 0.01));
     writeln("SuffixArr2 % of canFind   : ", t2 / (tA * 0.01));

     writeln("runtime AA % of runtime AA: ", tB / (tB * 0.01));
     writeln("EMSI hashs % of runtime AA: ", tC / (tB * 0.01));
     writeln("SuffixArr1 % of runtime AA: ", t1 / (tB * 0.01));
     writeln("SuffixArr2 % of runtime AA: ", t2 / (tB * 0.01));

     writeln("EMSI hashs % of EMSI hashs: ", tC / (tC * 0.01));
     writeln("SuffixArr1 % of EMSI hashs: ", t1 / (tC * 0.01));
     writeln("SuffixArr2 % of EMSI hashs: ", t2 / (tC * 0.01));
}
----


More information about the Digitalmars-d mailing list