Array fill performance differences between for, foreach, slice

Tue Mar 31 21:30:53 UTC 2020

I've observed large differences in timing performance while 
filling arrays using different methods (for vs foreach vs arr[] = 
x) and don't know why. I've looked at array.d 
(https://github.com/dlang/dmd/blob/9792735c82ac997d11d7fe6c3d6c604389b3f5bd/src/dmd/root/array.d) but I'm still none the wiser.

Here is an example:

```
/* fill.d */

import std.stdio: writeln;
import std.typecons: Tuple, tuple;
import std.algorithm.iteration: mean;
import std.algorithm.iteration: sum;
import std.datetime.stopwatch: AutoStart, StopWatch;

/* Benchmarking Function */
auto bench(alias fun, string units = "msecs",
           ulong minN = 10, bool doPrint = false)(ulong n, string 
msg = "")
{
   auto times = new double[n];
   auto sw = StopWatch(AutoStart.no);
   for(ulong i = 0; i < n; ++i)
   {
     sw.start();
     fun();
     sw.stop();
     times[i] = cast(double)sw.peek.total!units;
     sw.reset();
   }
   double ave = mean(times);
   double sd = 0;

   if(n >= minN)
   {
     for(ulong i = 0; i < n; ++i)
       sd += (times[i] - ave)^^2;
     sd /= (n - 1);
     sd ^^= 0.5;
   }else{
     sd = double.nan;
   }

   static if(doPrint)
     writeln(msg ~ "Mean time("~ units ~ "): ", ave, ", Standard 
Deviation: ", sd);

   return tuple!("mean", "sd")(ave, sd);
}

/* Fill Functions */
auto fill_for(alias x, ulong n)()
{
   alias T = typeof(x);
   auto arr = new T[n];

   for(ulong i = 0; i < n; ++i)
     arr[i] = x;

   return arr;
}

auto fill_foreach(alias x, ulong n)()
{
   alias T = typeof(x);
   auto arr = new T[n];

   foreach(ref el; arr)
     el = x;

   return arr;
}

auto fill_slice(alias x, ulong n)()
{
   alias T = typeof(x);
   auto arr = new T[n];

   arr[] = x;

   return arr;
}

void main()
{
   double x = 42;

   bench!(fill_slice!(x, 100_000), "usecs", 10, true)(100, "Slice: 
");
   bench!(fill_foreach!(x, 100_000), "usecs", 10, true)(100, 
"Foreach: ");
   bench!(fill_for!(x, 100_000), "usecs", 10, true)(100, "For: ");
}

/*
$ dmd fill.d && ./fill
Slice: Mean time(usecs): 87.38, Standard Deviation: 54.1542
Foreach: Mean time(usecs): 179.9, Standard Deviation: 41.4109
For: Mean time(usecs): 245.81, Standard Deviation: 53.0798

$ dmd --version
DMD64 D Compiler v2.090.1
...
*/
```

It would be great to know why there are large differences in 
performance between these approaches and it would be great to see 
where array's opSliceAssign (or the equivalent method) for D's 
native array is implemented. Playing with `-boundscheck` made no 
difference in the contrasting performances. Thanks.