DMD 1.034 and 2.018 releases
bearophile
bearophileHUGS at mailas.com
Sat Aug 9 08:11:37 PDT 2008
C version too:
#include "stdlib.h"
#include "stdio.h"
#include "time.h"
#define MYOP *
typedef int T;
#define TFORM "%d "
void error(char *string) {
fprintf(stderr, "ERROR: %s\n", string);
exit(EXIT_FAILURE);
}
double myclock() {
clock_t t = clock();
if (t == -1)
return 0.0;
else
return t / (double)CLOCKS_PER_SEC;
}
int main(int argc, char** argv) {
int n = argc >= 2 ? atoi(argv[1]) : 10;
n *= 8; // to avoid problems with SSE2
int nloops = argc >= 3 ? atoi(argv[2]) : 1;
printf("array len= %d nloops= %d\n", n, nloops);
//__attribute__((aligned(16)))
T* __restrict a1 = (T*)malloc(sizeof(T) * n + 16);
T* __restrict a2 = (T*)malloc(sizeof(T) * n + 16);
T* __restrict a3 = (T*)malloc(sizeof(T) * n + 16);
if (a1 == NULL || a2 == NULL || a3 == NULL)
error("memory overflow");
int i, j;
for (i = 0; i < n; i++) {
a1[i] = i * 7 + 1;
a2[i] = i + 1;
}
double t = myclock();
for (j = 0; j < nloops; j++)
for (i = 0; i < n; i++) // Alignment of access forced using peeling.
a3[i] = a1[i] MYOP a2[i];
printf("time= %f s\n", myclock() - t);
if (n < 300) {
printf("\nResult:\n");
for (i = 0; i < n; i++)
printf(TFORM, a3[i]);
putchar('\n');
}
return 0;
}
/*
MYOP = *, compiled with:
gcc -Wall -O3 -s benchmark.c -o benchmark
C:\>benchmark 100 3000000
array len= 800 nloops= 3000000
time= 3.656000 s
C:\>benchmark 10000 10000
array len= 80000 nloops= 10000
time= 1.374000 s
C:\>benchmark 12000000 1
array len= 96000000 nloops= 1
time= 0.547000 s
MYOP = *, compiled with:
gcc -Wall -O3 -s -ftree-vectorize -msse3 -ftree-vectorizer-verbose=5 benchmark.c -o benchmark
C:\>benchmark 100 3000000
array len= 800 nloops= 3000000
time= 3.468000 s
C:\>benchmark 10000 10000
array len= 80000 nloops= 10000
time= 1.156000 s
C:\>benchmark 12000000 1
array len= 96000000 nloops= 1
time= 0.531000 s
In the larger array the cache effects may dominate over computing time.
*/
More information about the Digitalmars-d-announce
mailing list