On 01/05/2016 10:47 AM, Charles Smith wrote:
 >          auto result = 1_000.iota.map!(a => a.repeat(count(arr[],
 > a))).chain.array;
You are traversing the array per index value (1000 times), which can be 
done once up front:
enum elementCount = 1_000_000;
enum valueCount = 1000;
void main() {
     import std.conv : to;
     import std.datetime;
     import std.random;
     import std.stdio;
     int[elementCount] arr;
     for(int i; i < arr.length; ++i)
         arr[i] = uniform(0, valueCount);
     // Now they get their own arrays:
     auto benchmarks = benchmark!(() => algorithmSort(arr.dup),
                                  () => countingSort(arr.dup))(1);
     writeln("Algorithm's Sort: ", to!Duration(benchmarks[0]));
     writeln("Counting Sort   : ", to!Duration(benchmarks[1]));
}
auto algorithmSort(int[] arr) {
     import std.algorithm : sort, sum;
     auto result = sort(arr[]);
     return result.sum;
}
auto countingSort(int[] arr) {
     import std.algorithm : count, map, joiner, sum, each;
     import std.range : array, repeat, enumerate;
     uint[valueCount] hist;
     arr.each!(a => ++hist[a]);
     auto result = hist[].enumerate.map!(t => t[0].repeat(t[1])).joiner;
     // To make sure that we consume the lazy range:
     return result.sum;
}
 > 2. I noticed that result within `countingSort` is an array of arrays. I
 > thought `chain` would concat them... did I miss something obvious?
I think .joiner is what you're looking for.
 > 3. It's kind of hard to compare performance because of (1), but is there
 > a better way to do this?
I changed the code to pass each function a different array.
My results with -O -inline -noboundscheck:
Algorithm's Sort: 76 ms, 910 μs, and 8 hnsecs
Counting Sort   : 21 ms, 563 μs, and 9 hnsecs
Ali