Proposed Changes to the Range API for Phobos v3
Steven Schveighoffer
schveiguy at gmail.com
Tue Sep 24 14:36:05 UTC 2024
On Monday, 23 September 2024 at 08:20:59 UTC, Eyal Lotem wrote:
> Results on my laptop:
>
> **opApplyRange** took **13637** hnsecs
> **phobosRange** took **207873** hnsecs
I implemented a quick-and-dirty "getNext" style range as outlined
in the post, which is not the greatest, and doesn't support ref
ranges.
I also just did a "simple implementation" of Map and Filter as
ranges.
The results tell an interesting story.
```d
import std: iota, unaryFun, ForeachType;
import std.datetime.stopwatch : benchmark;
struct MapResult(alias _Func, R) {
alias Func = unaryFun!_Func;
R underlying;
auto getNext(ref bool valid) {
auto x = underlying.getNext(valid);
if(valid) return Func(x);
return typeof(Func(x)).init;
}
int opApply(scope int delegate(ref
typeof(Func(ForeachType!R.init))) dg) {
foreach(ref x; underlying) {
auto res = Func(x);
if(int rc = dg(res)) return rc;
}
return 0;
}
}
struct MapResult2(alias _Func, R) {
alias Func = unaryFun!_Func;
R underlying;
auto front() => Func(underlying.front);
void popFront() => underlying.popFront;
bool empty() => underlying.empty;
}
struct FilterResult(alias _Func, R) {
alias Func = unaryFun!_Func;
R underlying;
auto getNext(ref bool valid) {
typeof(underlying.getNext(valid)) x;
do {
x = underlying.getNext(valid);
} while(valid && !Func(x));
return x;
}
int opApply(scope int delegate(ref ForeachType!R) dg) {
foreach(ref x; underlying) {
if(Func(x)) if(int rc = dg(x)) return rc;
}
return 0;
}
}
struct FilterResult2(alias _Func, R) {
alias Func = unaryFun!_Func;
R underlying;
this(R underlying) {
while(!underlying.empty && !Func(underlying.front))
underlying.popFront;
this.underlying = underlying;
}
auto front() => underlying.front;
void popFront() {
underlying.popFront;
while(!underlying.empty && !Func(underlying.front))
underlying.popFront;
}
bool empty() => underlying.empty;
}
// strawman protocol, no ref support.
auto getNext(R)(ref R range, ref bool valid) {
typeof(range.front()) x;
if((valid = !range.empty) == true)
{
x = range.front;
range.popFront;
}
return x;
}
auto map(alias Func, R)(R rng) { return MapResult!(Func, R)(rng);
}
auto filter(alias Func, R)(R rng) { return FilterResult!(Func,
R)(rng); }
auto map2(alias Func, R)(R rng) { return MapResult2!(Func,
R)(rng); }
auto filter2(alias Func, R)(R rng) { return FilterResult2!(Func,
R)(rng); }
auto opApplyRange() {
uint res;
foreach(x;
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3") res += x;
assert(res == 1890804904);
}
auto newRangeStyle() {
uint res;
auto r =
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3";
bool valid = false;
while(true) {
auto x = r.getNext(valid);
if(!valid) break;
res += x;
}
assert(res == 1890804904);
}
auto phobosRange() {
uint res;
import std: map, filter;
foreach(x;
iota(1000000).filter!"a%2".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3".map!"a*2".filter!"a%3") res += x;
assert(res == 1890804904);
}
auto simpleRanges() {
uint res;
foreach(x;
iota(1000000).filter2!"a%2".map2!"a*2".filter2!"a%3".map2!"a*2".filter2!"a%3".map2!"a*2".filter2!"a%3") res += x;
assert(res == 1890804904);
}
unittest {
import std: writeln;
immutable runs = 100;
auto data = benchmark!(opApplyRange, phobosRange,
newRangeStyle, simpleRanges)(runs);
writeln(i"opApplyRange took $(data[0]) for $(runs) runs");
writeln(i"phobosRange took $(data[1]) for $(runs) runs");
writeln(i"newRangeStyle took $(data[2]) for $(runs) runs");
writeln(i"simpleRanges took $(data[3]) for $(runs) runs");
}
```
Some notes here:
1. I used phobos benchmark instead of the custom one. My results
were very unstable, so I wanted something that was somewhat
immune from random CPU usage on my laptop.
2. I added an assert to ensure the functions were actually
working as expected!
Results:
```
opApplyRange took 186 ms, 123 μs, and 9 hnsecs for 100 runs
phobosRange took 1 sec, 345 ms, 67 μs, and 5 hnsecs for 100 runs
newRangeStyle took 83 ms, 706 μs, and 2 hnsecs for 100 runs
simpleRanges took 136 ms and 47 μs for 100 runs
```
Interestingly here, the simple ranges seem properly inlined. So
somehow the optimizer could see through those despite having 3
function API, but not the normal ranges? Is there something I did
special in there that the normal functions prevent?
But even then, the performance of the `getNext` API destroys
everything. So there is still something to this to be explored.
For sure we should figure out why phobos does so poorly here when
clearly it's possible to do much better.
My laptop details:
```
LDC - the LLVM D compiler (1.39.0):
based on DMD v2.109.1 and LLVM 17.0.6
built with LDC - the LLVM D compiler (1.39.0)
Default target: arm64-apple-darwin23.5.0
Host CPU: apple-m1
```
-Steve
More information about the Digitalmars-d
mailing list