[Issue 3751] New: Optimalization error in some floating point code
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Thu Jan 28 20:18:00 PST 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Summary: Optimalization error in some floating point code
Product: D
Version: 2.039
Platform: x86
OS/Version: Linux
Status: NEW
Severity: major
Priority: P2
Component: DMD
AssignedTo: nobody at puremagic.com
ReportedBy: baryluk at smp.if.uj.edu.pl
--- Comment #0 from Witold Baryluk <baryluk at smp.if.uj.edu.pl> 2010-01-28 20:17:58 PST ---
I tested this in 2.039 and v2.028, so probably all version beetwen are
affected.
I don't know how far this bug was introducent.
Here is simplified test case (functionally it is buggy but it is sufficient to
show bug):
import std.math;
import std.stdio;
double bisect(double z) {
double left = -7.0, right = 7.0, half;
while (true) { // do {} while(true); // also have this problem
half = (left+right)*0.5;
version(something) {
writefln("%s", half); // adding this solves problem
}
if (left == half) {
return half; // or break
}
if (half == right) {
return half; // or break
}
/+
// the same effect as two if statments.
if ((left == half) || (half == right)) {
return half; // or break
}
+/
double fhalf = exp(-0.5*half*half) * (half + 0.7) - z;
if (fhalf > 0.0) {
right = half;
} else if (fhalf <= 0.0) {
left = half;
}
};
//return half; // not rechable, irrevelant
}
void main() {
foreach (i; 1 .. 10) {
auto x = 0.1 + 0.001*i;
writefln("%g %g", x, bisect(x));
}
}
compile without optimalisation:
# dmd blad.d; ./blad
0.101 -0.580467
0.102 -0.579361
0.103 -0.578256
0.104 -0.577153
0.105 -0.57605
0.106 -0.574949
0.107 -0.573849
0.108 -0.57275
0.109 -0.571653
#
compile with optimalisation:
# dmd -O blad.d; ./blad
0.101 -0.580467
.... nothing happens, CPU usage 100%
^C
#
compile with optimalisation and dummy write:
# dmd -O -version=something blad.d; ./blad
...
...
0.109 -0.571653
# // correctly ends just like without optimalisations
You can check asmbler code here:
version without optimalisations:
.text._D4blad6bisectFdZd segment
assume CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
fld qword ptr FLAT:.rodata[08h]
fstp qword ptr -020h[EBP]
fld qword ptr FLAT:.rodata[019h]
fstp qword ptr -018h[EBP]
fld qword ptr FLAT:.rodata[02Ah]
fstp qword ptr -010h[EBP]
L21: fld qword ptr -020h[EBP]
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP6 at SYM32[09h]
fstp qword ptr -010h[EBP]
fld qword ptr -020h[EBP]
fld qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L46
jp L46
fld qword ptr -010h[EBP]
leave
ret 8
L46: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L5C
jp L5C
fld qword ptr -010h[EBP]
leave
ret 8
L5C: fld qword ptr -010h[EBP]
fmul qword ptr _TMP6 at SYM32[049h]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd at PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP10 at SYM32
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fstp qword ptr -8[EBP]
fld qword ptr -8[EBP]
ftst
fstsw AX
sahf
fstp ST
jbe L98
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
jmp short L21
L98: fld qword ptr -8[EBP]
ftst
fstsw AX
sahf
fstp ST
ja L21
jp L21
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp near ptr L21
nop
nop
nop
.text._D4blad6bisectFdZd ends
Version with optimalistion:
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
mov dword ptr -010h[EBP],0
fld qword ptr FLAT:.rodata[0Fh]
fld qword ptr FLAT:.rodata[01Dh]
fxch ST1
mov dword ptr -0Ch[EBP],0
fstp qword ptr -020h[EBP]
fstp qword ptr -018h[EBP]
L28: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L40
jp L40
L37: fld qword ptr -010h[EBP]
mov ESP,EBP
pop EBP
ret 8
L40: fld qword ptr -010h[EBP]
fmul qword ptr _TMP6 at SYM32[025h]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd at PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP6 at SYM32[044h]
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fst qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
jbe L93
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
L77: fld qword ptr -020h[EBP]
fld ST0
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP10 at SYM32[09h]
fst qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jp L28
je L37
jmp short L28
L93: fld qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
ja L77
jp L77
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp short L77
nop
nop
nop
.text._D4blad6bisectFdZd ends
version with "something" added artifacialy:
.text._D4blad6bisectFdZd segment
assume CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
mov dword ptr -010h[EBP],0
fld qword ptr _TMP0 at SYM32[017h]
fld qword ptr _TMP0 at SYM32[025h]
fxch ST1
mov dword ptr -0Ch[EBP],0
fstp qword ptr -020h[EBP]
fstp qword ptr -018h[EBP]
push dword ptr _TMP0 at SYM32[02Eh]
push dword ptr _TMP0 at SYM32[030h]
push 0
push 0
call near ptr ...writeflnTAyaTdZ8writeflnFAyadZv at PC32
L3D: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L55
jp L55
L4C: fld qword ptr -010h[EBP]
mov ESP,EBP
pop EBP
ret 8
L55: fld qword ptr -010h[EBP]
fmul qword ptr _TMP7 at SYM32[03Ah]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd at PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP10 at SYM32[09h]
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fst qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
jbe LCA
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
L8C: push dword ptr _TMP10 at SYM32[02h]
push dword ptr _TMP10 at SYM32[04h]
fld qword ptr -020h[EBP]
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP11 at SYM32[028h]
fst qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr ...writeflnTAyaTdZ8writeflnFAyadZv at PC32
fld qword ptr -020h[EBP]
fld qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jp L3D
je L4C
jmp near ptr L3D
LCA: fld qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
ja L8C
jp L8C
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp short L8C
.text._D4blad6bisectFdZd ends
What is interesting, that just adding single writefln makes this asembler code
change in much more places than just call of this function.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list