Proposed Changes to the Range API for Phobos v3

Tue Sep 24 14:36:05 UTC 2024

On Monday, 23 September 2024 at 08:20:59 UTC, Eyal Lotem wrote:
> Results on my laptop:
>
> **opApplyRange** took **13637** hnsecs
> **phobosRange** took **207873** hnsecs

I implemented a quick-and-dirty "getNext" style range as outlined 
in the post, which is not the greatest, and doesn't support ref 
ranges.

I also just did a "simple implementation" of Map and Filter as 
ranges.

The results tell an interesting story.

```d
import std: iota, unaryFun, ForeachType;
import std.datetime.stopwatch : benchmark;

struct MapResult(alias _Func, R) {
     alias Func = unaryFun!_Func;
     R underlying;
     auto getNext(ref bool valid) {
         auto x = underlying.getNext(valid);
         if(valid) return Func(x);
         return typeof(Func(x)).init;
     }
     int opApply(scope int delegate(ref 
typeof(Func(ForeachType!R.init))) dg) {
         foreach(ref x; underlying) {
             auto res = Func(x);
             if(int rc = dg(res)) return rc;
         }
         return 0;
     }
}

struct MapResult2(alias _Func, R) {
     alias Func = unaryFun!_Func;
     R underlying;
     auto front() => Func(underlying.front);
     void popFront() => underlying.popFront;
     bool empty() => underlying.empty;
}

struct FilterResult(alias _Func, R) {
     alias Func = unaryFun!_Func;
     R underlying;
     auto getNext(ref bool valid) {
         typeof(underlying.getNext(valid)) x;
         do {
             x = underlying.getNext(valid);
         } while(valid && !Func(x));
         return x;
     }
     int opApply(scope int delegate(ref ForeachType!R) dg) {
         foreach(ref x; underlying) {
             if(Func(x)) if(int rc = dg(x)) return rc;
         }
         return 0;
     }
}

struct FilterResult2(alias _Func, R) {
     alias Func = unaryFun!_Func;
     R underlying;
     this(R underlying) {
         while(!underlying.empty && !Func(underlying.front)) 
underlying.popFront;
         this.underlying = underlying;
     }

     auto front() => underlying.front;
     void popFront() {
         underlying.popFront;
         while(!underlying.empty && !Func(underlying.front)) 
underlying.popFront;
     }

     bool empty() => underlying.empty;
}

// strawman protocol, no ref support.
auto getNext(R)(ref R range, ref bool valid) {
     typeof(range.front()) x;
     if((valid = !range.empty) == true)
     {
         x = range.front;
         range.popFront;
     }
     return x;
}

auto map(alias Func, R)(R rng) { return MapResult!(Func, R)(rng); 
}
auto filter(alias Func, R)(R rng) { return FilterResult!(Func, 
R)(rng); }
auto map2(alias Func, R)(R rng) { return MapResult2!(Func, 
R)(rng); }
auto filter2(alias Func, R)(R rng) { return FilterResult2!(Func, 
R)(rng); }

auto opApplyRange() {
     uint res;
     foreach(x; 
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3") res += x;
     assert(res == 1890804904);
}

auto newRangeStyle() {
     uint res;
     auto r = 
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3";
     bool valid = false;
     while(true) {
         auto x = r.getNext(valid);
         if(!valid) break;
         res += x;
     }
     assert(res == 1890804904);
}

auto phobosRange() {
     uint res;
     import std: map, filter;
     foreach(x; 
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3") res += x;
     assert(res == 1890804904);
}

auto simpleRanges() {
     uint res;
     foreach(x; 
iota(1000000).filter2!"a%2".map2!"a*2".filter2!"a%3".map2!"a*2".filter2!"a%3".map2!"a*2".filter2!"a%3") res += x;
     assert(res == 1890804904);
}

unittest {
     import std: writeln;
     immutable runs = 100;
     auto data = benchmark!(opApplyRange, phobosRange, 
newRangeStyle, simpleRanges)(runs);
     writeln(i"opApplyRange took $(data[0]) for $(runs) runs");
     writeln(i"phobosRange took $(data[1]) for $(runs) runs");
     writeln(i"newRangeStyle took $(data[2]) for $(runs) runs");
     writeln(i"simpleRanges took $(data[3]) for $(runs) runs");
}
```

Some notes here:

1. I used phobos benchmark instead of the custom one. My results 
were very unstable, so I wanted something that was somewhat 
immune from random CPU usage on my laptop.
2. I added an assert to ensure the functions were actually 
working as expected!

Results:
```
opApplyRange took 186 ms, 123 μs, and 9 hnsecs for 100 runs
phobosRange took 1 sec, 345 ms, 67 μs, and 5 hnsecs for 100 runs
newRangeStyle took 83 ms, 706 μs, and 2 hnsecs for 100 runs
simpleRanges took 136 ms and 47 μs for 100 runs
```

Interestingly here, the simple ranges seem properly inlined. So 
somehow the optimizer could see through those despite having 3 
function API, but not the normal ranges? Is there something I did 
special in there that the normal functions prevent?

But even then, the performance of the `getNext` API destroys 
everything. So there is still something to this to be explored.

For sure we should figure out why phobos does so poorly here when 
clearly it's possible to do much better.

My laptop details:
```
LDC - the LLVM D compiler (1.39.0):
   based on DMD v2.109.1 and LLVM 17.0.6
   built with LDC - the LLVM D compiler (1.39.0)
   Default target: arm64-apple-darwin23.5.0
   Host CPU: apple-m1
```
-Steve