sclytrack sclytrack at fake.com
Tue Jan 24 09:49:07 PST 2012

```
Yes, I missed my lessons in clear communication.

-------------------------------------------------
+	The complete source files.
-------------------------------------------------

import std.stdio;

void calculate1(float a, float b)
{
float s1 = 1.0f - a;
float s2 = a * b;
writeln(s2);
writeln(a * b);
}

void calculate2(float a, float b)
{
float s1 = 1.0 - a;
float s2 = a * b;
writeln(s2);
writeln(a * b);
}

void calculate3(double a, double b)
{
double s1 = 1.0 - a;
double s2 = a * b;
writeln(s2);
writeln(a * b);
}

int main()
{
writeln("calculate1:");
calculate1(0.75f,2.0f);
writeln("calculate2:");
calculate2(0.75f,2.0f);
writeln("calculate3:");
calculate3(0.75f,2.0f);
return 0;
}

-------------------------------------------------
+	The complete output
-------------------------------------------------

calculate1:
1.5
1.5
calculate2:
0
1.5
calculate3:
1.5
1.5

-------------------------------------------------
+	Compiler
-------------------------------------------------

DMD64 D Compiler v2.057
Copyright (c) 1999-2011 by Digital Mars written by Walter Bright
Documentation: http://www.digitalmars.com/d/2.0/index.html
Usage:

I'm using eclipse to compile it. Added the -debug.

-------------------------------------------------
+	Ubuntu 11.10 64 bit
-------------------------------------------------

uname -r
3.0.0-15-generic

-------------------------------------------------
+	BEHAVIOUR
-------------------------------------------------

As for the behaviour on 32 bit. I wish
somebody else would do it. :-)

In the previous message here below is the assembly
output of calculate2. Pay close attention to the
cvtss2sd, the cvtsd2ss and the mulss.

float s1 = 1.0 - a;
float s2 = a * b;

It converts the float a to double precision.
then performs the s1 calculation.

Then does the s2 calculation with the high
precision a and the normal float b.

Also it performs the "cvtsd2ss %xmm2,%xmm2"
for no reason at all. As it is no longer used
in the rest the code.

-------------------------------------------------
+	objdump -S test1.o
-------------------------------------------------

0000000000000000 <_D4main10calculate2FffZv>:
0:	55                   	push   %rbp
1:	48 8b ec             	mov    %rsp,%rbp
4:	48 83 ec 20          	sub    \$0x20,%rsp
8:	f3 0f 11 45 f0       	movss  %xmm0,-0x10(%rbp)
d:	f3 0f 11 4d f8       	movss  %xmm1,-0x8(%rbp)
12:	48 b8 00 00 00 00 00 	movabs \$0x3ff0000000000000,%rax
19:	00 f0 3f
1c:	48 89 45 e0          	mov    %rax,-0x20(%rbp)
20:	f2 0f 10 55 e0       	movsd  -0x20(%rbp),%xmm2
25:	f3 0f 5a c9          	cvtss2sd %xmm1,%xmm1
29:	f2 0f 5c d1          	subsd  %xmm1,%xmm2
2d:	f2 0f 5a d2          	cvtsd2ss %xmm2,%xmm2
31:	f3 0f 59 c8          	mulss  %xmm0,%xmm1
35:	f3 0f 11 4d e8       	movss  %xmm1,-0x18(%rbp)
3a:	f3 0f 10 45 e8       	movss  -0x18(%rbp),%xmm0
3f:	e8 00 00 00 00       	callq  44 <_D4main10calculate2FffZv+0x44>
44:	f3 0f 10 45 f8       	movss  -0x8(%rbp),%xmm0
49:	f3 0f 10 4d f0       	movss  -0x10(%rbp),%xmm1
4e:	f3 0f 59 c1          	mulss  %xmm1,%xmm0
52:	e8 00 00 00 00       	callq  57 <_D4main10calculate2FffZv+0x57>
57:	c9                   	leaveq
58:	c3                   	retq
59:	90                   	nop
5a:	90                   	nop
5b:	90                   	nop

I'm going to eat now.

```