Array append performance
bearophile
bearophileHUGS at lycoc.com
Wed Aug 27 16:03:52 PDT 2008
Fawzi Mohamed:
> > memory ordering probably, looping from 0 to 7 is faster than 7 to 0...
I don't think that's the problem.
> so
...
> should be better...
It is not, it seems. For reference below some code you can run:
import std.c.string: memcpy;
template Tuple(T...) {
alias T Tuple;
}
long timer() {
asm {
naked;
push ECX;
push EBX;
mov EAX, 0;
cpuid;
pop EBX;
pop ECX;
rdtsc;
ret;
}
}
template Range(int stop) {
static if (stop <= 0)
alias Tuple!() Range;
else
alias Tuple!(Range!(stop-1), stop-1) Range;
}
void main() {
const int N = 500_000_000;
alias uint DataType;
for (int i = 4; i < N; i *= 2) {
auto b = new DataType[i];
auto a = new DataType[i];
a[] = b[]; // pre-cache
auto count = N / i;
auto start = timer();
while (count--)
a[] = b[];
auto end = timer();
auto t1 = end - start;
count = N / i;
start = timer();
while (count--)
memcpy(a.ptr, b.ptr, a.length * DataType.sizeof);
end = timer();
auto t2 = end - start;
count = N / i;
start = timer();
while (count--)
for (int j; j < i; j++)
a[j] = b[j];
end = timer();
auto t3 = end - start;
count = N / i;
start = timer();
while (count--) {
auto pa = a.ptr;
auto pb = b.ptr;
int nloop = i;
while (nloop--)
*pa++ = *pb++;
}
end = timer();
auto t4 = end - start;
count = N / i;
start = timer();
while (count--) { // apparently the faster version
switch (i) {
case 0: break;
case 1: foreach (j; Range!(1)) a[j] = b[j]; break;
case 2: foreach (j; Range!(2)) a[j] = b[j]; break;
case 3: foreach (j; Range!(3)) a[j] = b[j]; break;
case 4: foreach (j; Range!(4)) a[j] = b[j]; break;
case 5: foreach (j; Range!(5)) a[j] = b[j]; break;
case 6: foreach (j; Range!(6)) a[j] = b[j]; break;
case 7: foreach (j; Range!(7)) a[j] = b[j]; break;
case 8: foreach (j; Range!(8)) a[j] = b[j]; break;
default: memcpy(a.ptr, b.ptr, a.length * DataType.sizeof);
}
}
end = timer();
auto t5 = end - start;
count = N / i;
start = timer();
while (count--) {
int j = i;
switch(i) {
case 8: a[j] = b[j]; ++j;
case 7: a[j] = b[j]; ++j;
case 6: a[j] = b[j]; ++j;
case 5: a[j] = b[j]; ++j;
case 4: a[j] = b[j]; ++j;
case 3: a[j] = b[j]; ++j;
case 1: a[j] = b[j];
case 0: break;
default: memcpy( a.ptr, b.ptr, a.length * DataType.sizeof );
}
}
end = timer();
auto t6 = end - start;
printf("i: %8d,\t[]=[]: %8lld, \tt1/t2: %.2f \tt1/t3: %.2f \tt1/t4: %.2f \tt1/t5: %.2f \tt1/t6: %.2f\n",
i, t1, cast(float)t1/t2, cast(float)t1/t3, cast(float)t1/t4, cast(float)t1/t5,
cast(float)t1/t6);
}
}
Bye,
bearophile
More information about the Digitalmars-d
mailing list