Holes in structs and opEquals
bearophile
bearophileHUGS at lycos.com
Tue Mar 9 08:41:53 PST 2010
I guess what's holding back Walter on this improvement is the higher performance of the equal done ignoring the holes compared to the more correct one. So I have done a test to see how can large is such performance difference, because with not even a bit of experimental data it's hard to discuss:
import std.stdio: writeln;
import std.c.string: memcmp, memset;
/*
Basic uniform random generator: Minimal Standard in Park and
Miller (1988): "Random Number Generators: Good Ones Are Hard to
Find", Comm. of the ACM, 31, 1192-1201.
Parameters: m = 2^31-1, a=48271.
Translated to D from Pascal code by Jesper Lund:
http://www.gnu-pascal.de/crystal/gpc/en/mail1390.html
*/
struct Random {
enum int m = int.max;
enum int a = 48_271;
enum int q = m / a;
enum int r = m % a;
int seed = 42;
double nextDouble() {
int k = seed / q;
seed = a * (seed - k * q) - r * k;
if (seed < 1)
seed += m;
return cast(double)seed / m;
}
int nextInt(int max) {
int n = max + 1;
int k = cast(int)(n * this.nextDouble());
return (k == n) ? k - 1 : k;
}
}
struct SA {
short s;
double d;
}
struct SB {
short s;
double d;
bool opBinary(string S:"==")(SB other) {
return (this.s == other.s) && (this.d == other.d);
}
}
void main() {
static assert(SA.sizeof == SB.sizeof);
enum Versions { basic, defined, memcmp }
enum int N = 1_000; // small, to keep data in CPU cache
enum int NLOOPS = 100_000;
enum Versions vers = Versions.defined; // basic, defined, memcmp
SA[6] sa_items = [SA(20, 5.5), SA(20, 5.5),
SA(20, 6.5), SA(20, 6.5),
SA(30, 6.5), SA(30, 6.5)];
memset(&(sa_items[1]), 255, SA.sizeof);
sa_items[1].s = sa_items[0].s;
sa_items[1].d = sa_items[0].d;
memset(&(sa_items[3]), 255, SA.sizeof);
sa_items[3].s = sa_items[2].s;
sa_items[3].d = sa_items[2].d;
memset(&(sa_items[5]), 255, SA.sizeof);
sa_items[5].s = sa_items[4].s;
sa_items[5].d = sa_items[4].d;
auto sa1 = new SA[N];
auto sb1 = new SB[sa1.length];
auto sa2 = new SA[sa1.length];
auto sb2 = new SB[sa1.length];
auto rnd = Random(1);
foreach (i; 0 .. sa1.length) {
int r1 = rnd.nextInt(sa_items.length - 1);
sa1[i] = sa_items[r1];
sb1[i] = SB(sa1[i].s, sa1[i].d);
int r2 = rnd.nextInt(sa_items.length - 1);
sa2[i] = sa_items[r2];
sb2[i] = SB(sa2[i].s, sa2[i].d);
}
int sum;
final switch(vers) {
case Versions.basic:
for (int i; i < NLOOPS; i++)
for (int j; j < N; j++)
sum += (sa1[j] == sa2[j]);
break;
case Versions.defined:
for (int i; i < NLOOPS; i++)
for (int j; j < N; j++)
sum += (sb1[j] == sb2[j]);
break;
case Versions.memcmp:
for (int i; i < NLOOPS; i++)
for (int j; j < N; j++)
sum += memcmp(&(sa1[j]), &(sa2[j]), SA.sizeof) == 0;
break;
}
writeln(sum);
}
Timings, N=1_000, NLOOPS=100_000, seconds:
basic: 3.76 (result sum = 16400000)
defined: 4.48 (result sum = 34100000)
memcmp: 4.81 (result sum = 16400000)
Bye,
bearophile
More information about the Digitalmars-d
mailing list