std.algorithm issue
bearophile
bearophileHUGS at lycos.com
Sat May 24 16:11:37 PDT 2008
Dee Girl:
> I did not install bearophile library. It would be interesting to see what the speed is of std.algorithm and d.func.
I am not using D 2.x, but I have performed some benchmarks, with quite interesting results (DMD 1.029 because my libs don't work at all with DMD 1.030, on a Core Duo, using 1 core only):
import std.stdio: put = writef, putr = writefln;
import d.func: map, range;
import d.time: clock;
import std.traits: ReturnType;
import d.extra: NewVoidCArray, delVoidC, NewVoidGCArray;
import std.c.stdlib: alloca;
static int inv(int x) { return -x; }
ReturnType!(f)[] Map2(alias f)(int[] arr) {
auto result = new ReturnType!(f)[arr.length];
foreach (i, el; arr)
result[i] = f(el);
return result;
}
ReturnType!(f)[] Map3(alias f)(int[] arr) {
auto result = NewVoidGCArray!(int)(arr.length);
foreach (i, el; arr)
result[i] = f(el);
return result;
}
void Map4(alias f)(int[] arr) {
auto result = (cast(int*)alloca(arr.length * int.sizeof))[0 .. arr.length];
if (result.ptr is null)
throw new Exception("Not enough free stack");
foreach (i, el; arr)
result[i] = f(el);
return result;
}
void main() {
auto data = range(6_000_000);
double t;
typeof(data) r;
t = clock();
for (uint i; i < 100U; ++i) {
r = new int[data.length];
foreach (pos, el; data)
r[pos] = -el;
}
putr("inline: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i)
r = map(&inv, data);
putr("map global func: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i)
r = map((int x) {return -x;}, data);
putr("map locally defined delegate: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i)
r = map(function(int x) {return -x;}, data);
putr("map locally defined function: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i)
r = Map2!(inv)(data);
putr("Map2 with global function: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i) {
auto r2 = NewVoidCArray!(int)(data.length);
foreach (pos, el; data)
r2[pos] = -el;
delVoidC(r2);
}
putr("inline C malloc: ", clock() - t);
t = clock();
for (uint i; i < 100U; ++i)
r = Map3!(inv)(data);
putr("Map3 with global function: ", clock() - t, " ", r[$-1]);
t = clock();
for (uint i; i < 100U; ++i)
Map4!(inv)(data);
putr("Map4 with global function: ", clock() - t);
}
Timings results, n = 6_000_000 of ints:
inline: 4.59
map global func: 4.95
map locally defined delegate: 4.73
map locally defined function: 4.31
Map2 with global function: 3.97
inline C malloc: 2.01
Map3 with global function: 2.53
Map4 with global function: 2.0
I can see two surprises there, one is that putting "function" in the map avoids it to become a delegate, speeding up the code a little. The other bigger surprise is the first version (inlined) being slower than the version "map locally defined function".
Bye,
bearophile
More information about the Digitalmars-d
mailing list