Inlining Code Test
Craig Black
craigblack2 at cox.net
Sun Dec 12 18:56:16 PST 2010
If the problem is not inlining, then why does the same code in C++ does not
suffer from this performance limitation (when compiled with Visual C++
2008)?
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
template <class T>
T min(T a, T b) { return a < b ? a : b; }
template <class T>
T max(T a, T b) { return a > b ? a : b; }
inline void *operator new(size_t, void *ptr) { return ptr; }
inline void operator delete(void *, void *) {}
template <class T>
class Array
{
public:
Array() {}
Array(int length) { resize(length); }
Array(const Array<T> &a) { append(a); }
void clear() { base.clear(); }
void resize(int sz)
{
assert(sz >= 0);
if(sz == 0) return clear();
if(!base.array) reserve(sz, sz);
*base.lengthPtr() = sz;
}
void reserve(int capacity, int length)
{
assert(length <= capacity);
if(capacity == 0) return clear();
ArrayBase old;
if(base.array)
{
if(base.capacity() >= capacity) return;
old.array = base.array;
base.array = 0;
}
base.array = (char*)(new char[capacity*sizeof(T)+8]);
*base.lengthPtr() = length;
*base.capacityPtr() = capacity;
for(int i = 0; i < capacity; i++) new(base.data()+i)T;
if(old.array) copyData(old);
}
int length() const { return base.length(); }
int capacity() const { return base.capacity(); }
bool empty() const { return base.array == 0; }
T &at(int i)
{
assert(!empty());
assert(i >= 0 && i < length());
return base.data()[i];
}
const T &at(int i) const
{
assert(!empty());
assert(i >= 0 && i < length());
return base.data()[i];
}
T &operator [](int i) { return at(i); }
const T &operator [](int i) const { return at(i); }
void operator = (const Array &array)
{
copy(array);
}
void copy(const Array &array)
{
if(array.empty()) return clear();
int len = array.length();
reserve(len, len);
for(int i = 0; i < len; i++) at(i) = array[i];
}
T &front() { return at(0); }
const T &front() const { return at(0); }
T &back() { return at(length()-1); }
const T &back() const { return at(length()-1); }
T &append()
{
int sz = length();
int sp = capacity();
if(sp > sz) (*base.lengthPtr())++;
else
{
sz++;
sp = max(max(2, sp+sp/2), sz);
reserve(sp, sz);
}
return back();
}
void append(const Array<T> &a)
{
int prevLen = length();
resize(prevLen + a.length());
for(int i = 0; i < a.length(); i++) at(i+prevLen) = a[i];
}
private:
class ArrayBase
{
public:
ArrayBase() { array = 0; }
~ArrayBase() { clear(); }
void clear() { if(array) delete array; }
int length() const { return array ? *lengthPtr() : 0; }
int capacity() const { return array ? *capacityPtr() : 0; }
int* lengthPtr() const { return (int*)array; }
int* capacityPtr() const { return (int*)(array+4); }
T* data() const { return (T*)(array+8); }
char* array;
};
ArrayBase base;
void copyData(ArrayBase &array)
{
int copyLen = min(length(), array.length());
for(int i = 0; i < copyLen; i++) { at(i) = array.data()[i]; }
}
};
template <class T>
struct Less
{
bool operator() (T a, T b) { return a < b; }
};
template <class T>
void swap(T &a, T &b)
{
T temp = a;
a = b;
b = temp;
}
template <class T, class L>
void insertionSort1(Array<T> &a, int low, int high, L less)
{
for(int i = low; i <= high; i++)
{
int min = i;
for(int j = i + 1; j <= high; j++)
if(less(a[j], a[min])) min = j;
swap(a[i], a[min]);
}
}
template<class T, class L>
int partition1(Array<T> &a, int p, int r, L less)
{
T x = a[r];
int j = p - 1;
for (int i = p; i < r; i++)
{
if (less(x, a[i])) continue;
swap(a[i], a[++j]);
}
a[r] = a[j + 1];
a[j + 1] = x;
return j + 1;
}
template<class T, class L>
void quickSort1(Array<T> &a, int p, int r, L less)
{
if(p + 7 > r) return insertionSort1(a, p, r, less);
if (p < r)
{
int q = partition1(a, p, r, less);
quickSort1(a, p, q - 1, less);
quickSort1(a, q + 1, r, less);
}
}
template <class T, class L>
void sort1(Array<T> &a) { quickSort1(a, 0, a.length()-1, Less<T>()); }
template <class T, class L>
void insertionSort2(T *a, int low, int high, L less)
{
for(int i = low; i <= high; i++)
{
int min = i;
for(int j = i + 1; j <= high; j++)
if(less(a[j], a[min])) min = j;
swap(a[i], a[min]);
}
}
template<class T, class L>
int partition2(T *a, int p, int r, L less)
{
T x = a[r];
int j = p - 1;
for (int i = p; i < r; i++)
{
if (less(x, a[i])) continue;
swap(a[i], a[++j]);
}
a[r] = a[j + 1];
a[j + 1] = x;
return j + 1;
}
template<class T, class L>
void quickSort2(T *a, int p, int r, L less)
{
if(p + 7 > r) return insertionSort2(a, p, r, less);
if (p < r)
{
int q = partition2(a, p, r, less);
quickSort2(a, p, q - 1, less);
quickSort2(a, q + 1, r, less);
}
}
template <class T, class L>
void sort2(Array<T> &a) { quickSort2(&a[0], 0, a.length()-1, Less<T>()); }
typedef unsigned long long uint64;
uint64 getCycle() {
__asm { RDTSC }
}
Array<double> vals;
uint64 bench1()
{
uint64 startTime = getCycle();
Array<double> v;
for(int i = 0; i < 100; i++)
{
v = vals;
sort1<double, Less<double> >(v);
}
return getCycle()-startTime;
}
uint64 bench2()
{
uint64 startTime = getCycle();
Array<double> v;
for(int i = 0; i < 100; i++)
{
v = vals;
sort2<double, Less<double> >(v);
}
return getCycle()-startTime;
}
void main()
{
vals.resize(1000);
for(int i = 0; i < 1000; i++) vals[i] = (rand() % 1000000) / 1000.0;
uint64 time1 = 0;
uint64 time2 = 0;
for(int i = 0; i < 100; i++)
{
time1 += bench1();
time2 += bench2();
}
printf("Sorting with Array: %f\n", time1/100000.0);
printf("Sorting with pointers: %f\n", time2/100000.0);
printf("%f percent faster\n", 100.0 * (time1-time2) / time1);
}
More information about the Digitalmars-d
mailing list