Struct method access speed

bearophile bearophileHUGS at lycos.com
Wed Dec 26 13:47:31 PST 2007


A tiny benchmark of mine that's a reduced version of some real code (I hope my code doesn't contain bugs/silly inefficiencies). Four compilations (gdc, dmd, gcc, gcc aggressive):

gcc version 3.4.5 (mingw special) (gdc 0.24, using dmd 1.020)
gdc -O3 -s -frelease -finline-functions -ffast-math -fomit-frame-pointer -funroll-loops -march=pentiumpro vec_test.d -o vec_test_gdc

dmd v1.024
dmd -O -release -inline vec_test.d

1) gcc version 3.4.2 (mingw-special)
g++ -s -O3 vec_testCpp.cpp -o vec_testCpp

2) gcc version 3.4.2 (mingw-special)
g++ -O3 -s -finline-functions -ffast-math -fomit-frame-pointer -funroll-loops -march=pentiumpro vec_testCpp.cpp -o vec_testCpp


Timings in seconds, N = 20_000, on Pentium 3:

dmd (D):
  3.31
  1.66

gdc (D):
  8.92
  1.39

gcc 1 (C++):
  1.67
  1.66

gcc 2 (C++):
  1.36
  1.36

The interesting results are for dmd: I think such two benchmarks have to run for the same time, but the experiment shows it's false for DMD.

Note that this isn't an inlining problem, because if you remove the -inline flag you obtain much worse timings.


// D ------------------------
import std.stdio, std.gc, std.c.time;

struct Vec(T, int N) {
  T* ap;

  void create() {
    ap = cast(T*)malloc(N * T.sizeof);
    for(int i; i < N; i++)
      ap[i] = T.init;
  }

  void opIndexAssign(T x, uint i) {
    ap[i] = x;
  }
}

void main() {
  const uint N = 20_000;
  alias uint T;

  auto t = clock();
  Vec!(T, N) v1;
  v1.create;
  for(uint i; i < N; i++)
    for(uint j; j < N; j++)
      v1[j] = j;
  printf("%.2f\n", cast(float)(clock()-t)/CLOCKS_PER_SEC);

  t = clock();
  auto v2 = cast(T*)malloc(N * T.sizeof);
  for(uint i; i < N; i++)
    for(uint j; j < N; j++)
      v2[j] = j;
  printf("%.2f\n", cast(float)(clock()-t)/CLOCKS_PER_SEC);
}


// C++ ------------------------
#include <time.h>
#include <stdio.h>
#include <stdlib.h>

#define N 20000

template <class T, int M> struct Vec {
  T* ap;

  void create() {
    ap = (T*)malloc(M * sizeof(T));
    for(int i = 0; i < M; i++)
      ap[i] = 0;
  }

  T &operator[](unsigned int i) {
    return ap[i];
  }
};

int main() {
  typedef unsigned int T;
  clock_t t;

  t = clock();
  Vec<T, N> v1;
  v1.create();
  for(unsigned int i = 0; i < N; i++)
    for(unsigned int j = 0; j < N; j++)
      v1[j] = j;
  printf("%.2f\n", (float)(clock()-t)/CLOCKS_PER_SEC);

  t = clock();
  T* v2 = (T*)malloc(N * sizeof(T));
  for(unsigned int i = 0; i < N; i++)
    for(unsigned int j = 0; j < N; j++)
      v2[j] = j;
  printf("%.2f\n", (float)(clock()-t)/CLOCKS_PER_SEC);
}

Bye,
bearophile



More information about the Digitalmars-d mailing list