DMD 1.034 and 2.018 releases

bearophile bearophileHUGS at mailas.com
Sat Aug 9 08:11:37 PDT 2008


C version too:

#include "stdlib.h"
#include "stdio.h"
#include "time.h"

#define MYOP *
typedef int T;
#define TFORM "%d "

void error(char *string) {
    fprintf(stderr, "ERROR: %s\n", string);
    exit(EXIT_FAILURE);
}

double myclock() {
    clock_t t = clock();
    if (t == -1)
        return 0.0;
    else
        return t / (double)CLOCKS_PER_SEC;
}

int main(int argc, char** argv) {
    int n = argc >= 2 ? atoi(argv[1]) : 10;

    n *= 8; // to avoid problems with SSE2
    int nloops = argc >= 3 ? atoi(argv[2]) : 1;

    printf("array len= %d  nloops= %d\n", n, nloops);

    //__attribute__((aligned(16)))
    T* __restrict a1 = (T*)malloc(sizeof(T) * n + 16);
    T* __restrict a2 = (T*)malloc(sizeof(T) * n + 16);
    T* __restrict a3 = (T*)malloc(sizeof(T) * n + 16);
    if (a1 == NULL || a2 == NULL || a3 == NULL)
        error("memory overflow");

    int i, j;
    for (i = 0; i < n; i++) {
        a1[i] = i * 7 + 1;
        a2[i] = i + 1;
    }

    double t = myclock();
    for (j = 0; j < nloops; j++)
        for (i = 0; i < n; i++) // Alignment of access forced using peeling.
            a3[i] = a1[i] MYOP a2[i];
    printf("time= %f s\n", myclock() - t);

    if (n < 300) {
        printf("\nResult:\n");
        for (i = 0; i < n; i++)
            printf(TFORM, a3[i]);
        putchar('\n');
    }

    return 0;
}

/*

MYOP = *, compiled with:
gcc -Wall -O3 -s benchmark.c -o benchmark
    C:\>benchmark 100 3000000
    array len= 800  nloops= 3000000
    time= 3.656000 s

    C:\>benchmark 10000 10000
    array len= 80000  nloops= 10000
    time= 1.374000 s

    C:\>benchmark 12000000 1
    array len= 96000000  nloops= 1
    time= 0.547000 s


MYOP = *, compiled with:
gcc -Wall -O3 -s -ftree-vectorize -msse3 -ftree-vectorizer-verbose=5 benchmark.c -o benchmark
    C:\>benchmark 100 3000000
    array len= 800  nloops= 3000000
    time= 3.468000 s

    C:\>benchmark 10000 10000
    array len= 80000  nloops= 10000
    time= 1.156000 s

    C:\>benchmark 12000000 1
    array len= 96000000  nloops= 1
    time= 0.531000 s

In the larger array the cache effects may dominate over computing time.

*/



More information about the Digitalmars-d-announce mailing list