How to track down a bad llvm optimization pass
Joakim via digitalmars-d-ldc
digitalmars-d-ldc at puremagic.com
Thu Jun 16 16:23:21 PDT 2016
Since the update to ddmdfe 2.070, a single assert trips up on
Android/ARM when running the druntime/phobos tests:
https://github.com/dlang/phobos/blob/v2.070.2/std/conv.d#L5716
I've copy-pasted the relevant lines of the test into another
file, testconv.d:
version(unittest) import std.conv, std.array, std.range;
unittest
{
auto r = toChars!(16)(16u);
assert(r.length == 2);
assert(r[1..2].array == "0");
}
The test runs fine with -O1, but with -O2 or -O3, ie the levels
when inlining are enabled, the second assert fails. If I compile
with -O2/3 -disable-inlining _or_ comment out the first assert,
it passes. Here's the IR generated for the unittest block with
the first assert commented out, and with it included.
./bin/ldc2 -unittest -O2 --output-ll -c testconv.d -of=without.ll
define void @_D8testconv14__unittestL2_1FZv() comdat {
br label %forcond.i.i
forcond.i.i: ; preds =
%forcond.i.i, %0
%indvars.iv.i.i = phi i32 [ %indvars.iv.next.i.i, %forcond.i.i
], [ 1, %0 ] ; [#uses = 2, type = i32]
%value.0.i.i = phi i32 [ %1, %forcond.i.i ], [ 16, %0 ] ;
[#uses = 1, type = i32]
%1 = lshr i32 %value.0.i.i, 4 ; [#uses = 2]
%2 = icmp eq i32 %1, 0 ; [#uses = 1]
%indvars.iv.next.i.i = add nuw nsw i32 %indvars.iv.i.i, 1 ;
[#uses = 1]
br i1 %2, label %bounds.ok.i, label %forcond.i.i
bounds.ok.i: ; preds =
%forcond.i.i
%indvars.iv.i.i.lcssa = phi i32 [ %indvars.iv.i.i, %forcond.i.i
] ; [#uses = 1, type = i32]
%3 = tail call i8*
@_D4core6memory2GC6mallocFNaNbkkxC8TypeInfoZPv(%object.TypeInfo*
null, i32 2, i32 1) ; [#uses = 2]
%4 = insertvalue { i32, i8* } { i32 1, i8* undef }, i8* %3, 1 ;
[#uses = 1]
%5 = shl i32 %indvars.iv.i.i.lcssa, 2 ; [#uses = 1]
%6 = and i32 %5, 1020 ; [#uses = 1]
%7 = add nsw i32 %6, -12 ; [#uses = 1]
%8 = lshr i32 16, %7 ; [#uses = 1]
%9 = or i32 %8, 48 ; [#uses = 1]
%10 = trunc i32 %9 to i8 ; [#uses = 1]
store i8 %10, i8* %3, align 1
%11 = tail call i32 @_adEq2({ i32, i8* } %4, { i32, i8* } { i32
1, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0,
i32 0) }, %object.TypeInfo* nonnull @_D11TypeInfo_Aa6__initZ) #1
; [#uses = 1]
%12 = icmp eq i32 %11, 0 ; [#uses = 1]
br i1 %12, label %assertFailed, label %assertPassed
assertPassed: ; preds =
%bounds.ok.i
ret void
assertFailed: ; preds =
%bounds.ok.i
tail call void @_d_assert({ i32, i8* } { i32 10, i8*
getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i32 0, i32
0) }, i32 4) #2
unreachable
}
./bin/ldc2 -unittest -O2 --output-ll -c testconv.d -of=with.ll
define void @_D8testconv14__unittestL2_1FZv() comdat {
br label %forcond.i.i
forcond.i.i: ; preds =
%forcond.i.i, %0
%indvars.iv.i.i = phi i32 [ %indvars.iv.next.i.i, %forcond.i.i
], [ 1, %0 ] ; [#uses = 2, type = i32]
%value.0.i.i = phi i32 [ %1, %forcond.i.i ], [ 16, %0 ] ;
[#uses = 1, type = i32]
%1 = lshr i32 %value.0.i.i, 4 ; [#uses = 2]
%2 = icmp eq i32 %1, 0 ; [#uses = 1]
%indvars.iv.next.i.i = add nuw nsw i32 %indvars.iv.i.i, 1 ;
[#uses = 1]
br i1 %2, label
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit, label %forcond.i.i
_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit: ; preds = %forcond.i.i
%indvars.iv.i.i.lcssa = phi i32 [ %indvars.iv.i.i, %forcond.i.i
] ; [#uses = 1, type = i32]
%3 = and i32 %indvars.iv.i.i.lcssa, 255 ; [#uses = 1]
%4 = icmp eq i32 %3, 2 ; [#uses = 1]
br i1 %4, label %bounds.ok.i, label %assertFailed
bounds.ok.i: ; preds =
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit
%5 = tail call i8*
@_D4core6memory2GC6mallocFNaNbkkxC8TypeInfoZPv(%object.TypeInfo*
null, i32 2, i32 1) ; [#uses = 2]
%6 = insertvalue { i32, i8* } { i32 1, i8* undef }, i8* %5, 1 ;
[#uses = 1]
store i8 -1, i8* %5, align 1
%7 = tail call i32 @_adEq2({ i32, i8* } %6, { i32, i8* } { i32
1, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32
0, i32 0) }, %object.TypeInfo* nonnull @_D11TypeInfo_Aa6__initZ)
#3 ; [#uses = 1]
%8 = icmp eq i32 %7, 0 ; [#uses = 1]
br i1 %8, label %assertFailed2, label %assertPassed1
assertFailed: ; preds =
%_D3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZS3std4conv47__T7toCharsVii16TaVE3std5ascii10LetterCasei1TkZ7toCharsFNaNbNiNfkZ6Result.exit
tail call void @_d_assert({ i32, i8* } { i32 10, i8*
getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32
0) }, i32 3) #2
unreachable
assertPassed1: ; preds =
%bounds.ok.i
ret void
assertFailed2: ; preds =
%bounds.ok.i
tail call void @_d_assert({ i32, i8* } { i32 10, i8*
getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32
0) }, i32 4) #2
unreachable
}
Clearly the problem is that those seven instructions before the
call to _adEq2 in the bounds.ok.i section of the first IR get
turned into this nonsense instruction in the second IR:
store i8 -1, i8* %5, align 1
Why including the first assert combines with some optimization
pass and inlining to produce this junk instruction instead, I
don't know. I don't think this is something that needs to be
fixed on the ldc end, but who knows. Anyone have any tips on
tracking this down?
More information about the digitalmars-d-ldc
mailing list