Holes in structs and opEquals

bearophile bearophileHUGS at lycos.com
Tue Mar 9 08:41:53 PST 2010


I guess what's holding back Walter on this improvement is the higher performance of the equal done ignoring the holes compared to the more correct one. So I have done a test to see how can large is such performance difference, because with not even a bit of experimental data it's hard to discuss:


import std.stdio: writeln;
import std.c.string: memcmp, memset;


/*
Basic uniform random generator: Minimal Standard in Park and
Miller (1988): "Random Number Generators: Good Ones Are Hard to
Find", Comm. of the ACM, 31, 1192-1201.
Parameters: m = 2^31-1, a=48271.

Translated to D from Pascal code by Jesper Lund:
http://www.gnu-pascal.de/crystal/gpc/en/mail1390.html
*/
struct Random {
    enum int m = int.max;
    enum int a = 48_271;
    enum int q = m / a;
    enum int r = m % a;
    int seed = 42;

    double nextDouble() {
        int k = seed / q;
        seed = a * (seed - k * q) - r * k;
        if (seed < 1)
            seed += m;
        return cast(double)seed / m;
    }

    int nextInt(int max) {
        int n = max + 1;
        int k = cast(int)(n * this.nextDouble());
        return (k == n) ? k - 1 : k;
    }
}


struct SA {
    short s;
    double d;
}

struct SB {
    short s;
    double d;
    bool opBinary(string S:"==")(SB other) {
        return (this.s == other.s) && (this.d == other.d);
    }
}


void main() {
    static assert(SA.sizeof == SB.sizeof);
    enum Versions { basic, defined, memcmp }

    enum int N = 1_000; // small, to keep data in CPU cache
    enum int NLOOPS = 100_000;
    enum Versions vers = Versions.defined; // basic, defined, memcmp

    SA[6] sa_items = [SA(20, 5.5), SA(20, 5.5),
                      SA(20, 6.5), SA(20, 6.5),
                      SA(30, 6.5), SA(30, 6.5)];

    memset(&(sa_items[1]), 255, SA.sizeof);
    sa_items[1].s = sa_items[0].s;
    sa_items[1].d = sa_items[0].d;

    memset(&(sa_items[3]), 255, SA.sizeof);
    sa_items[3].s = sa_items[2].s;
    sa_items[3].d = sa_items[2].d;

    memset(&(sa_items[5]), 255, SA.sizeof);
    sa_items[5].s = sa_items[4].s;
    sa_items[5].d = sa_items[4].d;


    auto sa1 = new SA[N];
    auto sb1 = new SB[sa1.length];
    auto sa2 = new SA[sa1.length];
    auto sb2 = new SB[sa1.length];

    auto rnd = Random(1);

    foreach (i; 0 .. sa1.length) {
        int r1 = rnd.nextInt(sa_items.length - 1);
        sa1[i] = sa_items[r1];
        sb1[i] = SB(sa1[i].s, sa1[i].d);

        int r2 = rnd.nextInt(sa_items.length - 1);
        sa2[i] = sa_items[r2];
        sb2[i] = SB(sa2[i].s, sa2[i].d);
    }

    int sum;

    final switch(vers) {
        case Versions.basic:
            for (int i; i < NLOOPS; i++)
                for (int j; j < N; j++)
                    sum += (sa1[j] == sa2[j]);
            break;
        case Versions.defined:
            for (int i; i < NLOOPS; i++)
                for (int j; j < N; j++)
                    sum += (sb1[j] == sb2[j]);
            break;
        case Versions.memcmp:
            for (int i; i < NLOOPS; i++)
                for (int j; j < N; j++)
                    sum += memcmp(&(sa1[j]), &(sa2[j]), SA.sizeof) == 0;
            break;
    }

    writeln(sum);
}


Timings, N=1_000, NLOOPS=100_000, seconds:
  basic:   3.76 (result sum = 16400000)
  defined: 4.48 (result sum = 34100000)
  memcmp:  4.81 (result sum = 16400000)

Bye,
bearophile



More information about the Digitalmars-d mailing list