std.algorithm issue

bearophile bearophileHUGS at lycos.com
Sat May 24 16:11:37 PDT 2008


Dee Girl:
> I did not install bearophile library. It would be interesting to see what the speed is of std.algorithm and d.func.

I am not using D 2.x, but I have performed some benchmarks, with quite interesting results (DMD 1.029 because my libs don't work at all with DMD 1.030, on a Core Duo, using 1 core only):

import std.stdio: put = writef, putr = writefln;
import d.func: map, range;
import d.time: clock;
import std.traits: ReturnType;
import d.extra: NewVoidCArray, delVoidC, NewVoidGCArray;
import std.c.stdlib: alloca;

static int inv(int x) { return -x; }

ReturnType!(f)[] Map2(alias f)(int[] arr) {
    auto result = new ReturnType!(f)[arr.length];
    foreach (i, el; arr)
        result[i] = f(el);
    return result;
}

ReturnType!(f)[] Map3(alias f)(int[] arr) {
    auto result = NewVoidGCArray!(int)(arr.length);
    foreach (i, el; arr)
        result[i] = f(el);
    return result;
}

void Map4(alias f)(int[] arr) {
    auto result = (cast(int*)alloca(arr.length * int.sizeof))[0 .. arr.length];
    if (result.ptr is null)
        throw new Exception("Not enough free stack");

    foreach (i, el; arr)
        result[i] = f(el);
    return result;
}

void main() {
    auto data = range(6_000_000);
    double t;
    typeof(data) r;

    t = clock();
    for (uint i; i < 100U; ++i) {
        r = new int[data.length];
        foreach (pos, el; data)
            r[pos] = -el;
    }
    putr("inline: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i)
        r = map(&inv, data);
    putr("map global func: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i)
        r = map((int x) {return -x;}, data);
    putr("map locally defined delegate: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i)
        r = map(function(int x) {return -x;}, data);
    putr("map locally defined function: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i)
        r = Map2!(inv)(data);
    putr("Map2 with global function: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i) {
        auto r2 = NewVoidCArray!(int)(data.length);
        foreach (pos, el; data)
            r2[pos] = -el;
        delVoidC(r2);
    }
    putr("inline C malloc: ", clock() - t);

    t = clock();
    for (uint i; i < 100U; ++i)
        r = Map3!(inv)(data);
    putr("Map3 with global function: ", clock() - t, " ", r[$-1]);

    t = clock();
    for (uint i; i < 100U; ++i)
        Map4!(inv)(data);
    putr("Map4 with global function: ", clock() - t);
}



Timings results, n = 6_000_000 of ints:
  inline: 4.59
  map global func: 4.95
  map locally defined delegate: 4.73
  map locally defined function: 4.31
  Map2 with global function: 3.97
  inline C malloc: 2.01
  Map3 with global function: 2.53
  Map4 with global function: 2.0

I can see two surprises there, one is that putting "function" in the map avoids it to become a delegate, speeding up the code a little. The other bigger surprise is the first version (inlined) being slower than the version "map locally defined function".

Bye,
bearophile



More information about the Digitalmars-d mailing list