Suffix tree benchmark
Basile B. via Digitalmars-d
digitalmars-d at puremagic.com
Wed May 18 13:06:33 PDT 2016
On Wednesday, 18 May 2016 at 19:55:40 UTC, Andrei Alexandrescu
wrote:
> On 5/18/16 3:14 PM, Basile B. wrote:
>> Hey, los douchbagos...
>>
>> I've recently worked on a suffix array
>> (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)
>>
>> Test for presence are excelent (O1)
>>
>> canFind % of canFind : 100
>> SuffixArr1 % of canFind : 2.78019
>> SuffixArr2 % of canFind : 3.15439
>> runtime AA % of runtime AA: 100
>> EMSI hashs % of runtime AA: 167.434
>> SuffixArr1 % of runtime AA: 97.8582
>> SuffixArr2 % of runtime AA: 111.029
>> EMSI hashs % of EMSI hashs: 100
>> SuffixArr1 % of EMSI hashs: 58.4458
>> SuffixArr2 % of EMSI hashs: 66.3123
>
> Could you explain the numbers a bit? -- Andrei
the benchmark is:
----
#!runnable-flags: -O -release -inline -boundscheck=off
module runnable;
import std.stdio;
import containers.hashset, containers.internal.hash;
import std.experimental.allocator.mallocator;
void main(string[] args)
{
import std.datetime, iz.strings, std.random, std.range;
import core.memory: GC;
GC.disable;
string[] wordsA, wordsB;
ubyte[] source = cast(ubyte[])"ABCDEFGHIJKL".dup;
foreach(i; 0..64)
{
wordsA ~= cast(string) randomCover(source, rndGen).array;
rndGen.popFront;
if ((i & 1) == 0)
wordsB ~= wordsA[i];
else
{
wordsB ~= cast(string) randomCover(source,
rndGen).array;
rndGen.popFront;
}
}
auto arrOld = SuffixArray!string(wordsA);
auto arrNew = SuffixArrayTemp!string(wordsA);
enum repeat = 2^^10;
size_t test(T)(ref T t)
{
StopWatch sw;
sw.start;
foreach(i; 0..64)
foreach(j; 0..repeat)
{
auto n = wordsB[i] in t;
}
return sw.peek.hnsecs;
}
size_t refTest()
{
import std.algorithm.searching: canFind;
import std.algorithm.sorting;
StopWatch sw;
auto sorted = sort(wordsA).array;
sw.start;
foreach(i; 0..64)
foreach(j; 0..repeat)
{
auto n = sorted.canFind(wordsB[i]);
}
return sw.peek.hnsecs;
}
size_t aaTest()
{
StopWatch sw;
bool[string] aa;
foreach(w; wordsA)
aa[w] = true;
sw.start;
foreach(i; 0..64)
foreach(j; 0..repeat)
{
auto n = wordsB[i] in aa;
}
return sw.peek.hnsecs;
}
size_t aa2Test()
{
static size_t hasher(string value) pure
{
size_t result;
foreach(i; 1 .. value.length)
result += value[i-1] ^ value[i] << (i-1);
return result;
}
StopWatch sw;
alias HS = HashSet!(string, Mallocator,
generateHash!string, false);
HS aa;
foreach(w; wordsA)
aa.insert(w);
sw.start;
foreach(i; 0..64)
foreach(j; 0..repeat)
{
auto n = wordsB[i] in aa;
}
return sw.peek.hnsecs;
}
auto tA = refTest();
auto tB = aaTest();
auto tC = aa2Test();
auto t1 = test(arrOld);
auto t2 = test(arrNew);
writeln("canFind % of canFind : ", tA / (tA * 0.01));
writeln("SuffixArr1 % of canFind : ", t1 / (tA * 0.01));
writeln("SuffixArr2 % of canFind : ", t2 / (tA * 0.01));
writeln("runtime AA % of runtime AA: ", tB / (tB * 0.01));
writeln("EMSI hashs % of runtime AA: ", tC / (tB * 0.01));
writeln("SuffixArr1 % of runtime AA: ", t1 / (tB * 0.01));
writeln("SuffixArr2 % of runtime AA: ", t2 / (tB * 0.01));
writeln("EMSI hashs % of EMSI hashs: ", tC / (tC * 0.01));
writeln("SuffixArr1 % of EMSI hashs: ", t1 / (tC * 0.01));
writeln("SuffixArr2 % of EMSI hashs: ", t2 / (tC * 0.01));
}
----
More information about the Digitalmars-d
mailing list