How to track down a bad llvm optimization pass

Joakim via digitalmars-d-ldc digitalmars-d-ldc at puremagic.com
Thu Jun 16 16:23:21 PDT 2016


Since the update to ddmdfe 2.070, a single assert trips up on 
Android/ARM when running the druntime/phobos tests:

https://github.com/dlang/phobos/blob/v2.070.2/std/conv.d#L5716

I've copy-pasted the relevant lines of the test into another 
file, testconv.d:

version(unittest) import std.conv, std.array, std.range;
unittest
{
     auto r = toChars!(16)(16u);
     assert(r.length == 2);
     assert(r[1..2].array == "0");
}

The test runs fine with -O1, but with -O2 or -O3, ie the levels 
when inlining are enabled, the second assert fails.  If I compile 
with -O2/3 -disable-inlining _or_ comment out the first assert, 
it passes.  Here's the IR generated for the unittest block with 
the first assert commented out, and with it included.

./bin/ldc2 -unittest -O2 --output-ll -c testconv.d -of=without.ll

define void @_D8testconv14__unittestL2_1FZv() comdat {
   br label %forcond.i.i

forcond.i.i:                                      ; preds = 
%forcond.i.i, %0
   %indvars.iv.i.i = phi i32 [ %indvars.iv.next.i.i, %forcond.i.i 
], [ 1, %0 ] ; [#uses = 2, type = i32]
   %value.0.i.i = phi i32 [ %1, %forcond.i.i ], [ 16, %0 ] ; 
[#uses = 1, type = i32]
   %1 = lshr i32 %value.0.i.i, 4                   ; [#uses = 2]
   %2 = icmp eq i32 %1, 0                          ; [#uses = 1]
   %indvars.iv.next.i.i = add nuw nsw i32 %indvars.iv.i.i, 1 ; 
[#uses = 1]
   br i1 %2, label %bounds.ok.i, label %forcond.i.i

bounds.ok.i:                                      ; preds = 
%forcond.i.i
   %indvars.iv.i.i.lcssa = phi i32 [ %indvars.iv.i.i, %forcond.i.i 
] ; [#uses = 1, type = i32]
   %3 = tail call i8* 
@_D4core6memory2GC6mallocFNaNbkkxC8TypeInfoZPv(%object.TypeInfo* 
null, i32 2, i32 1) ; [#uses = 2]
   %4 = insertvalue { i32, i8* } { i32 1, i8* undef }, i8* %3, 1 ; 
[#uses = 1]
   %5 = shl i32 %indvars.iv.i.i.lcssa, 2           ; [#uses = 1]
   %6 = and i32 %5, 1020                           ; [#uses = 1]
   %7 = add nsw i32 %6, -12                        ; [#uses = 1]
   %8 = lshr i32 16, %7                            ; [#uses = 1]
   %9 = or i32 %8, 48                              ; [#uses = 1]
   %10 = trunc i32 %9 to i8                        ; [#uses = 1]
   store i8 %10, i8* %3, align 1
   %11 = tail call i32 @_adEq2({ i32, i8* } %4, { i32, i8* } { i32 
1, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, 
i32 0) }, %object.TypeInfo* nonnull @_D11TypeInfo_Aa6__initZ) #1 
; [#uses = 1]
   %12 = icmp eq i32 %11, 0                        ; [#uses = 1]
   br i1 %12, label %assertFailed, label %assertPassed

assertPassed:                                     ; preds = 
%bounds.ok.i
   ret void

assertFailed:                                     ; preds = 
%bounds.ok.i
   tail call void @_d_assert({ i32, i8* } { i32 10, i8* 
getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i32 0, i32 
0) }, i32 4) #2
   unreachable
}

./bin/ldc2 -unittest -O2 --output-ll -c testconv.d -of=with.ll

define void @_D8testconv14__unittestL2_1FZv() comdat {
   br label %forcond.i.i

forcond.i.i:                                      ; preds = 
%forcond.i.i, %0
   %indvars.iv.i.i = phi i32 [ %indvars.iv.next.i.i, %forcond.i.i 
], [ 1, %0 ] ; [#uses = 2, type = i32]
   %value.0.i.i = phi i32 [ %1, %forcond.i.i ], [ 16, %0 ] ; 
[#uses = 1, type = i32]
   %1 = lshr i32 %value.0.i.i, 4                   ; [#uses = 2]
   %2 = icmp eq i32 %1, 0                          ; [#uses = 1]
   %indvars.iv.next.i.i = add nuw nsw i32 %indvars.iv.i.i, 1 ; 
[#uses = 1]
   br i1 %2, label 
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit, label %forcond.i.i

_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit: ; preds = %forcond.i.i
   %indvars.iv.i.i.lcssa = phi i32 [ %indvars.iv.i.i, %forcond.i.i 
] ; [#uses = 1, type = i32]
   %3 = and i32 %indvars.iv.i.i.lcssa, 255         ; [#uses = 1]
   %4 = icmp eq i32 %3, 2                          ; [#uses = 1]
   br i1 %4, label %bounds.ok.i, label %assertFailed

bounds.ok.i:                                      ; preds = 
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit
   %5 = tail call i8* 
@_D4core6memory2GC6mallocFNaNbkkxC8TypeInfoZPv(%object.TypeInfo* 
null, i32 2, i32 1) ; [#uses = 2]
   %6 = insertvalue { i32, i8* } { i32 1, i8* undef }, i8* %5, 1 ; 
[#uses = 1]
   store i8 -1, i8* %5, align 1
   %7 = tail call i32 @_adEq2({ i32, i8* } %6, { i32, i8* } { i32 
1, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 
0, i32 0) }, %object.TypeInfo* nonnull @_D11TypeInfo_Aa6__initZ) 
#3 ; [#uses = 1]
   %8 = icmp eq i32 %7, 0                          ; [#uses = 1]
   br i1 %8, label %assertFailed2, label %assertPassed1

assertFailed:                                     ; preds = 
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit
   tail call void @_d_assert({ i32, i8* } { i32 10, i8* 
getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 
0) }, i32 3) #2
   unreachable

assertPassed1:                                    ; preds = 
%bounds.ok.i
   ret void

assertFailed2:                                    ; preds = 
%bounds.ok.i
   tail call void @_d_assert({ i32, i8* } { i32 10, i8* 
getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 
0) }, i32 4) #2
   unreachable
}

Clearly the problem is that those seven instructions before the 
call to _adEq2 in the bounds.ok.i section of the first IR get 
turned into this nonsense instruction in the second IR:

store i8 -1, i8* %5, align 1

Why including the first assert combines with some optimization 
pass and inlining to produce this junk instruction instead, I 
don't know.  I don't think this is something that needs to be 
fixed on the ldc end, but who knows.  Anyone have any tips on 
tracking this down?


More information about the digitalmars-d-ldc mailing list