summing large arrays - or - performance of tight loops

Dave Dave_member at pathlink.com
Thu Apr 19 13:19:01 PDT 2007


Manfred Nowak wrote:
> Dave wrote
>> Mine were ran on a P4 chip and an AMD64 chip (original was
>> Turion). 
> 
> It is disgusting to see, that three random benchmarkers all get 
> different results. Mine were run on an AMD64 X2 under WinXP32 and 
> compiled with DMD1.012.
> 
> Quotients (foreach/other):
> Thomas: 200%
> Dave:   100%
> me:      85%
> 
> What causes those huge differences?
> 
> Here is my main:
> 
> import std.stdio, std.perf;
> void main(){
>   const int f= 256;
>   int[ ] a= new int[ 1024*1024*f];
>   auto c=new PerformanceCounter;
>   auto c2=new PerformanceCounter;
>   c.start;
>   for(int i= 1; i<=5120/f; i++)
>     volatile sum_foreach( a);
>   c.stop;
>   volatile auto t= c.microseconds;
>   
>   c2.start;
>   for(int i= 1; i<=5120/f; i++)
>     volatile sum_array( a);
>   c2.stop;
>   writefln( "%s %s %s"
>     , cast(real)t/c2.microseconds
>     , t
>     , c2.microseconds
>   );
> }
> 
> -manfred
>  

I was using a simple 1024 x 1024 size array before.

Using a 1024 x 1024 x 64 array, I got:

P4:    97% (linux32 FC5)
AMD64: 92% (WinXP32)

So, the array size seems to make some difference, at least on AMD machines.

Here's all of the code I tested with:

import std.date, std.perf, std.stdio;

void main()
{
   int[] arr = new int[1024*1024*64];
   int result1 = 0, result2 = 0;

   // initialize with values that won't cause an integer overflow
   foreach(i, ref e; arr) e = i % 5 + 1;

  version(perftime)
  {
   auto t1 = new PerformanceCounter;
   t1.start;
   for(auto i = 0; i < 10; i++)
     result1 = sum_foreach(arr);
   t1.stop;

   auto t2 = new PerformanceCounter;
   t2.start;
   for(auto i = 0; i < 10; i++)
     result2 = sum_array(arr);
   t2.stop;

   writefln("%s %s %s %s %s"
     , cast(real)t1.microseconds/t2.microseconds
     , t1.microseconds
     , t2.microseconds
     , result1
     , result2
   );
  }
  else
  {
   d_time s1 = getUTCtime;
   for(auto i = 0; i < 10; i++)
     result1 = sum_foreach(arr);
   d_time e1 = getUTCtime;

   d_time s2 = getUTCtime;
   for(auto i = 0; i < 10; i++)
     result2 = sum_array(arr);
   d_time e2 = getUTCtime;

   writefln("%s %s %s %s %s"
     , cast(real)(e1-s1)/(e2-s2)
     , e1-s1
     , e2-s2
     , result1
     , result2
   );
  }
}

T sum_foreach(T)(T[] data)
{
	T result = 0;
	foreach(element; data)
         {
		result += element;
	}
	return result;
}

T sum_array(T)(T[] data)
{
	T result = 0;
	size_t index = 0;
	while(index < data.length)
	{
		result += data[index];
		index++;
	}
	return result;
}



More information about the Digitalmars-d mailing list