Inlining Code Test

Craig Black craigblack2 at cox.net
Sun Dec 12 18:56:16 PST 2010


If the problem is not inlining, then why does the same code in C++ does not 
suffer from this performance limitation (when compiled with Visual C++ 
2008)?

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>

template <class T>
T min(T a, T b) { return a < b ? a : b; }
template <class T>
T max(T a, T b) { return a > b ? a : b; }

inline void *operator new(size_t, void *ptr) { return ptr; }
inline void operator delete(void *, void *) {}

template <class T>
class Array
{
public:

  Array() {}
  Array(int length) { resize(length); }
  Array(const Array<T> &a) { append(a); }

  void clear() { base.clear(); }

  void resize(int sz)
  {
    assert(sz >= 0);
    if(sz == 0) return clear();
    if(!base.array) reserve(sz, sz);
    *base.lengthPtr() = sz;
  }

  void reserve(int capacity, int length)
  {
    assert(length <= capacity);
    if(capacity == 0) return clear();
    ArrayBase old;
    if(base.array)
    {
      if(base.capacity() >= capacity) return;
      old.array = base.array;
      base.array = 0;
    }
    base.array = (char*)(new char[capacity*sizeof(T)+8]);
    *base.lengthPtr() = length;
    *base.capacityPtr() = capacity;
    for(int i = 0; i < capacity; i++) new(base.data()+i)T;
    if(old.array) copyData(old);
  }

  int length() const { return base.length(); }
  int capacity() const { return base.capacity(); }

  bool empty() const { return base.array == 0; }

  T &at(int i)
  {
   assert(!empty());
    assert(i >= 0 && i < length());
    return base.data()[i];
  }

  const T &at(int i) const
  {
    assert(!empty());
    assert(i >= 0 && i < length());
    return base.data()[i];
  }

  T &operator [](int i) { return at(i); }
  const T &operator [](int i) const { return at(i); }

  void operator = (const Array &array)
  {
    copy(array);
  }

  void copy(const Array &array)
  {
    if(array.empty()) return clear();
    int len = array.length();
    reserve(len, len);
    for(int i = 0; i < len; i++) at(i) = array[i];
  }

  T &front() { return at(0); }
  const T &front() const { return at(0); }
  T &back() { return at(length()-1); }
  const T &back() const { return at(length()-1); }

  T &append()
  {
    int sz = length();
    int sp = capacity();
    if(sp > sz) (*base.lengthPtr())++;
    else
    {
      sz++;
      sp = max(max(2, sp+sp/2), sz);
      reserve(sp, sz);
    }
    return back();
  }

  void append(const Array<T> &a)
  {
    int prevLen = length();
    resize(prevLen + a.length());
    for(int i = 0; i < a.length(); i++) at(i+prevLen) = a[i];
  }

private:

  class ArrayBase
  {
  public:

    ArrayBase() { array = 0; }
    ~ArrayBase() { clear(); }
    void clear() { if(array) delete array; }

    int length() const { return array ? *lengthPtr() : 0; }
    int capacity() const { return array ? *capacityPtr() : 0; }
    int* lengthPtr() const { return (int*)array; }
    int* capacityPtr() const { return (int*)(array+4); }
    T* data() const { return (T*)(array+8); }

    char* array;
  };

  ArrayBase base;

  void copyData(ArrayBase &array)
  {
    int copyLen = min(length(), array.length());
    for(int i = 0; i < copyLen; i++) { at(i) = array.data()[i]; }
  }
};

template <class T>
struct Less
{
  bool operator() (T a, T b) { return a < b; }
};

template <class T>
void swap(T &a, T &b)
{
      T temp = a;
      a = b;
      b = temp;
}

template <class T, class L>
void insertionSort1(Array<T> &a, int low, int high, L less)
{
  for(int i = low; i <= high; i++)
  {
    int min = i;
    for(int j = i + 1; j <= high; j++)
      if(less(a[j], a[min])) min = j;
    swap(a[i], a[min]);
  }
}

template<class T, class L>
int partition1(Array<T> &a, int p, int r, L less)
{
  T x = a[r];
  int j = p - 1;
  for (int i = p; i < r; i++)
  {
    if (less(x, a[i])) continue;
    swap(a[i], a[++j]);
  }
  a[r] = a[j + 1];
  a[j + 1] = x;
  return j + 1;
}

template<class T, class L>
void quickSort1(Array<T> &a, int p, int r, L less)
{
  if(p + 7 > r) return insertionSort1(a, p, r, less);
  if (p < r)
  {
    int q = partition1(a, p, r, less);
    quickSort1(a, p, q - 1, less);
    quickSort1(a, q + 1, r, less);
  }
}

template <class T, class L>
void sort1(Array<T> &a) { quickSort1(a, 0, a.length()-1, Less<T>()); }

template <class T, class L>
void insertionSort2(T *a, int low, int high, L less)
{
  for(int i = low; i <= high; i++)
  {
    int min = i;
    for(int j = i + 1; j <= high; j++)
      if(less(a[j], a[min])) min = j;
    swap(a[i], a[min]);
  }
}

template<class T, class L>
int partition2(T *a, int p, int r, L less)
{
  T x = a[r];
  int j = p - 1;
  for (int i = p; i < r; i++)
  {
    if (less(x, a[i])) continue;
    swap(a[i], a[++j]);
  }
  a[r] = a[j + 1];
  a[j + 1] = x;
  return j + 1;
}

template<class T, class L>
void quickSort2(T *a, int p, int r, L less)
{
  if(p + 7 > r) return insertionSort2(a, p, r, less);
  if (p < r)
  {
    int q = partition2(a, p, r, less);
    quickSort2(a, p, q - 1, less);
    quickSort2(a, q + 1, r, less);
  }
}

template <class T, class L>
void sort2(Array<T> &a) { quickSort2(&a[0], 0, a.length()-1, Less<T>()); }

typedef unsigned long long uint64;

uint64 getCycle() {
    __asm { RDTSC }
}

Array<double> vals;

uint64 bench1()
{
      uint64 startTime = getCycle();
  Array<double> v;
  for(int i = 0; i < 100; i++)
  {
    v = vals;
    sort1<double, Less<double> >(v);
  }
      return getCycle()-startTime;
}

uint64 bench2()
{
      uint64 startTime = getCycle();
  Array<double> v;
  for(int i = 0; i < 100; i++)
  {
    v = vals;
    sort2<double, Less<double> >(v);
  }
      return getCycle()-startTime;
}

void main()
{
  vals.resize(1000);
  for(int i = 0; i < 1000; i++) vals[i] = (rand() % 1000000) / 1000.0;

  uint64 time1 = 0;
  uint64 time2 = 0;

  for(int i = 0; i < 100; i++)
  {
    time1 += bench1();
    time2 += bench2();
  }
  printf("Sorting with Array: %f\n", time1/100000.0);
  printf("Sorting with pointers: %f\n", time2/100000.0);
  printf("%f percent faster\n", 100.0 * (time1-time2) / time1);
}




More information about the Digitalmars-d mailing list