Proposal for SentinelInputRange

deadalnix deadalnix at gmail.com
Thu Feb 28 10:12:35 PST 2013


On Thursday, 28 February 2013 at 17:00:51 UTC, Walter Bright 
wrote:
> On 2/28/2013 6:31 AM, Steven Schveighoffer wrote:
>> If this doesn't translate to the same code, I don't know why 
>> not.
>
> Try it and see with your favorite C compiler.
>
> Then try the lookahead cases I also posted.

You are being stubborn here.

You claim that this is significantly faster, but give no numbers.
You claim that the compiler can't do some optimization, when LLVM 
does it already.

See sample code bellow, which is a typical structure find in 
lexer.c :

int bar(int n) {
	switch(n) {
		case 0:
			return 0; // Bad case !
		
		case 25:
			return 75;
		
		case 42:
			return 69;
		
		case 666:
			return 999;
		
		default:
			return -1;
	}
}

int foo(int n) {
	switch(n) {
		case 1:
			return 23;
		
		case 2:
		case 3:
			return n;
			
		default:
			return bar(n);
	}
}

As we see, foo don't check for 0, and bar does, but only when 
necessary, as bar isn't called. Or this is what you think. Let's 
see how SDC compile this :

define i32 @_D3barFiZi(i32 %arg.n) {
   %n = alloca i32
   store i32 %arg.n, i32* %n
   br label %body

body:                                             ; preds = %0
   %1 = load i32* %n
   switch i32 %1, label %default [
     i32 0, label %.case
     i32 25, label %.case1
     i32 42, label %.case2
     i32 666, label %.case3
   ]

switchstart:                                      ; No 
predecessors!
   br label %.case

.case:                                            ; preds = 
%body, %switchstart
   ret i32 0

.case1:                                           ; preds = %body
   ret i32 75

.case2:                                           ; preds = %body
   ret i32 69

.case3:                                           ; preds = %body
   ret i32 999

default:                                          ; preds = %body
   ret i32 -1

switchend:                                        ; No 
predecessors!
   unreachable
}

define i32 @_D3fooFiZi(i32 %arg.n) {
   %n = alloca i32
   store i32 %arg.n, i32* %n
   br label %body

body:                                             ; preds = %0
   %1 = load i32* %n
   switch i32 %1, label %default [
     i32 1, label %.case
     i32 2, label %.case1
     i32 3, label %.case2
   ]

switchstart:                                      ; No 
predecessors!
   br label %.case

.case:                                            ; preds = 
%body, %switchstart
   ret i32 23

.case1:                                           ; preds = %body
   br label %.case2

.case2:                                           ; preds = 
%body, %.case1
   %2 = load i32* %n
   ret i32 %2

default:                                          ; preds = %body
   %3 = load i32* %n
   %4 = call i32 @_D3barFiZi(i32 %3)
   ret i32 %4

switchend:                                        ; No 
predecessors!
   unreachable
}

Here is the raw result of the frontend in LLVM IR. Now here is 
what the optimizer give us :

define i32 @_D3barFiZi(i32 %arg.n) nounwind readnone {
body:
   switch i32 %arg.n, label %default [
     i32 0, label %.case
     i32 25, label %.case1
     i32 42, label %.case2
     i32 666, label %.case3
   ]

.case:                                            ; preds = 
%default, %.case3, %.case2, %.case1, %body
   %merge = phi i32 [ 0, %body ], [ 75, %.case1 ], [ 69, %.case2 
], [ 999, %.case3 ], [ -1, %default ]
   ret i32 %merge

.case1:                                           ; preds = %body
   br label %.case

.case2:                                           ; preds = %body
   br label %.case

.case3:                                           ; preds = %body
   br label %.case

default:                                          ; preds = %body
   br label %.case
}

define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
   switch i32 %arg.n, label %default.i [
     i32 1, label %.case
     i32 2, label %.case2
     i32 3, label %.case2
     i32 0, label %_D3barFiZi.exit
     i32 25, label %.case1.i
     i32 42, label %.case2.i
     i32 666, label %.case3.i
   ]

.case:                                            ; preds = 
%body, %_D3barFiZi.exit, %.case2
   %merge = phi i32 [ 23, %body ], [ %arg.n, %.case2 ], [ 
%merge.i, %_D3barFiZi.exit ]
   ret i32 %merge

.case2:                                           ; preds = 
%body, %body
   br label %.case

.case1.i:                                         ; preds = %body
   br label %_D3barFiZi.exit

.case2.i:                                         ; preds = %body
   br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
   br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
   br label %_D3barFiZi.exit

_D3barFiZi.exit:                                  ; preds = 
%body, %.case1.i, %.case2.i, %.case3.i, %default.i
   %merge.i = phi i32 [ 75, %.case1.i ], [ 69, %.case2.i ], [ 999, 
%.case3.i ], [ -1, %default.i ], [ 0, %body ]
   br label %.case
}

See, foo don't even call bar anymore. No let's include the null 
check, as it would have been done if checking for empty on a 
regular range (that happen to internally use a sentinel) :

int foo(int n) {
	if(n != 0) {
		switch(n) {
			case 1:
				return 23;
		
			case 2:
			case 3:
				return n;
			
			default:
				return bar(n);
		}
	}
	
	return bar(n);
}

I didn't repeated bar as it doesn't change.

Which is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
   switch i32 %arg.n, label %default.i [
     i32 0, label %_D3barFiZi.exit8
     i32 1, label %.case
     i32 2, label %.case2
     i32 3, label %.case2
     i32 666, label %.case3.i
     i32 25, label %_D3barFiZi.exit
     i32 42, label %.case2.i
   ]

.case:                                            ; preds = 
%body, %_D3barFiZi.exit8, %_D3barFiZi.exit, %.case2
   %merge = phi i32 [ %arg.n, %.case2 ], [ %merge.i, 
%_D3barFiZi.exit ], [ 0, %_D3barFiZi.exit8 ], [ 23, %body ]
   ret i32 %merge

.case2:                                           ; preds = 
%body, %body
   br label %.case

.case2.i:                                         ; preds = %body
   br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
   br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
   br label %_D3barFiZi.exit

_D3barFiZi.exit:                                  ; preds = 
%body, %.case2.i, %.case3.i, %default.i
   %merge.i = phi i32 [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, 
%default.i ], [ 75, %body ]
   br label %.case

_D3barFiZi.exit8:                                 ; preds = %body
   br label %.case
}

The exact damn same thing ! Let's try with a different schemes, 
we check first an early return :

int foo(int n) {
	if(n == 0) {
		return bar(n);	
	}
	
	switch(n) {
		case 1:
			return 23;
	
		case 2:
		case 3:
			return n;
		
		default:
			return bar(n);
	}
}

Guess what ? It is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
   switch i32 %arg.n, label %default.i7 [
     i32 0, label %_D3barFiZi.exit
     i32 1, label %.case
     i32 2, label %.case2
     i32 3, label %.case2
     i32 666, label %.case3.i6
     i32 25, label %_D3barFiZi.exit8
     i32 42, label %.case2.i5
   ]

_D3barFiZi.exit:                                  ; preds = 
%body, %_D3barFiZi.exit8, %.case2, %.case
   %merge9 = phi i32 [ 0, %body ], [ 23, %.case ], [ %arg.n, 
%.case2 ], [ %merge.i3, %_D3barFiZi.exit8 ]
   ret i32 %merge9

.case:                                            ; preds = %body
   br label %_D3barFiZi.exit

.case2:                                           ; preds = 
%body, %body
   br label %_D3barFiZi.exit

.case2.i5:                                        ; preds = %body
   br label %_D3barFiZi.exit8

.case3.i6:                                        ; preds = %body
   br label %_D3barFiZi.exit8

default.i7:                                       ; preds = %body
   br label %_D3barFiZi.exit8

_D3barFiZi.exit8:                                 ; preds = 
%body, %.case2.i5, %.case3.i6, %default.i7
   %merge.i3 = phi i32 [ 69, %.case2.i5 ], [ 999, %.case3.i6 ], [ 
-1, %default.i7 ], [ 75, %body ]
   br label %_D3barFiZi.exit
}

Yet again the same thing.

A range using a sentinel internally is clearly beneficial for a 
lexer (like C strings). Defining explicitly a range that does so 
is plain useless.


More information about the Digitalmars-d mailing list