Proposal for SentinelInputRange
deadalnix
deadalnix at gmail.com
Thu Feb 28 10:12:35 PST 2013
On Thursday, 28 February 2013 at 17:00:51 UTC, Walter Bright
wrote:
> On 2/28/2013 6:31 AM, Steven Schveighoffer wrote:
>> If this doesn't translate to the same code, I don't know why
>> not.
>
> Try it and see with your favorite C compiler.
>
> Then try the lookahead cases I also posted.
You are being stubborn here.
You claim that this is significantly faster, but give no numbers.
You claim that the compiler can't do some optimization, when LLVM
does it already.
See sample code bellow, which is a typical structure find in
lexer.c :
int bar(int n) {
switch(n) {
case 0:
return 0; // Bad case !
case 25:
return 75;
case 42:
return 69;
case 666:
return 999;
default:
return -1;
}
}
int foo(int n) {
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
As we see, foo don't check for 0, and bar does, but only when
necessary, as bar isn't called. Or this is what you think. Let's
see how SDC compile this :
define i32 @_D3barFiZi(i32 %arg.n) {
%n = alloca i32
store i32 %arg.n, i32* %n
br label %body
body: ; preds = %0
%1 = load i32* %n
switch i32 %1, label %default [
i32 0, label %.case
i32 25, label %.case1
i32 42, label %.case2
i32 666, label %.case3
]
switchstart: ; No
predecessors!
br label %.case
.case: ; preds =
%body, %switchstart
ret i32 0
.case1: ; preds = %body
ret i32 75
.case2: ; preds = %body
ret i32 69
.case3: ; preds = %body
ret i32 999
default: ; preds = %body
ret i32 -1
switchend: ; No
predecessors!
unreachable
}
define i32 @_D3fooFiZi(i32 %arg.n) {
%n = alloca i32
store i32 %arg.n, i32* %n
br label %body
body: ; preds = %0
%1 = load i32* %n
switch i32 %1, label %default [
i32 1, label %.case
i32 2, label %.case1
i32 3, label %.case2
]
switchstart: ; No
predecessors!
br label %.case
.case: ; preds =
%body, %switchstart
ret i32 23
.case1: ; preds = %body
br label %.case2
.case2: ; preds =
%body, %.case1
%2 = load i32* %n
ret i32 %2
default: ; preds = %body
%3 = load i32* %n
%4 = call i32 @_D3barFiZi(i32 %3)
ret i32 %4
switchend: ; No
predecessors!
unreachable
}
Here is the raw result of the frontend in LLVM IR. Now here is
what the optimizer give us :
define i32 @_D3barFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default [
i32 0, label %.case
i32 25, label %.case1
i32 42, label %.case2
i32 666, label %.case3
]
.case: ; preds =
%default, %.case3, %.case2, %.case1, %body
%merge = phi i32 [ 0, %body ], [ 75, %.case1 ], [ 69, %.case2
], [ 999, %.case3 ], [ -1, %default ]
ret i32 %merge
.case1: ; preds = %body
br label %.case
.case2: ; preds = %body
br label %.case
.case3: ; preds = %body
br label %.case
default: ; preds = %body
br label %.case
}
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i [
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 0, label %_D3barFiZi.exit
i32 25, label %.case1.i
i32 42, label %.case2.i
i32 666, label %.case3.i
]
.case: ; preds =
%body, %_D3barFiZi.exit, %.case2
%merge = phi i32 [ 23, %body ], [ %arg.n, %.case2 ], [
%merge.i, %_D3barFiZi.exit ]
ret i32 %merge
.case2: ; preds =
%body, %body
br label %.case
.case1.i: ; preds = %body
br label %_D3barFiZi.exit
.case2.i: ; preds = %body
br label %_D3barFiZi.exit
.case3.i: ; preds = %body
br label %_D3barFiZi.exit
default.i: ; preds = %body
br label %_D3barFiZi.exit
_D3barFiZi.exit: ; preds =
%body, %.case1.i, %.case2.i, %.case3.i, %default.i
%merge.i = phi i32 [ 75, %.case1.i ], [ 69, %.case2.i ], [ 999,
%.case3.i ], [ -1, %default.i ], [ 0, %body ]
br label %.case
}
See, foo don't even call bar anymore. No let's include the null
check, as it would have been done if checking for empty on a
regular range (that happen to internally use a sentinel) :
int foo(int n) {
if(n != 0) {
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
return bar(n);
}
I didn't repeated bar as it doesn't change.
Which is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i [
i32 0, label %_D3barFiZi.exit8
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 666, label %.case3.i
i32 25, label %_D3barFiZi.exit
i32 42, label %.case2.i
]
.case: ; preds =
%body, %_D3barFiZi.exit8, %_D3barFiZi.exit, %.case2
%merge = phi i32 [ %arg.n, %.case2 ], [ %merge.i,
%_D3barFiZi.exit ], [ 0, %_D3barFiZi.exit8 ], [ 23, %body ]
ret i32 %merge
.case2: ; preds =
%body, %body
br label %.case
.case2.i: ; preds = %body
br label %_D3barFiZi.exit
.case3.i: ; preds = %body
br label %_D3barFiZi.exit
default.i: ; preds = %body
br label %_D3barFiZi.exit
_D3barFiZi.exit: ; preds =
%body, %.case2.i, %.case3.i, %default.i
%merge.i = phi i32 [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1,
%default.i ], [ 75, %body ]
br label %.case
_D3barFiZi.exit8: ; preds = %body
br label %.case
}
The exact damn same thing ! Let's try with a different schemes,
we check first an early return :
int foo(int n) {
if(n == 0) {
return bar(n);
}
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
Guess what ? It is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i7 [
i32 0, label %_D3barFiZi.exit
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 666, label %.case3.i6
i32 25, label %_D3barFiZi.exit8
i32 42, label %.case2.i5
]
_D3barFiZi.exit: ; preds =
%body, %_D3barFiZi.exit8, %.case2, %.case
%merge9 = phi i32 [ 0, %body ], [ 23, %.case ], [ %arg.n,
%.case2 ], [ %merge.i3, %_D3barFiZi.exit8 ]
ret i32 %merge9
.case: ; preds = %body
br label %_D3barFiZi.exit
.case2: ; preds =
%body, %body
br label %_D3barFiZi.exit
.case2.i5: ; preds = %body
br label %_D3barFiZi.exit8
.case3.i6: ; preds = %body
br label %_D3barFiZi.exit8
default.i7: ; preds = %body
br label %_D3barFiZi.exit8
_D3barFiZi.exit8: ; preds =
%body, %.case2.i5, %.case3.i6, %default.i7
%merge.i3 = phi i32 [ 69, %.case2.i5 ], [ 999, %.case3.i6 ], [
-1, %default.i7 ], [ 75, %body ]
br label %_D3barFiZi.exit
}
Yet again the same thing.
A range using a sentinel internally is clearly beneficial for a
lexer (like C strings). Defining explicitly a range that does so
is plain useless.
More information about the Digitalmars-d
mailing list