sin, cos, other languages and DMD/LDC difference

Sat Feb 8 06:13:36 PST 2014

Philippe Sigaud:

> Now, what I found more confusing is that, compiling with DMD or 
> LDC, I got different results. Since Phobos code defining sin 
> and cos in std.math and core.stdc.math is the same for DMD and 
> LDC (duh!), I guess that means different intrinsics are used?

LDC2 optimizes this code even worse than DMD.

I opened a related thread:
http://forum.dlang.org/thread/rrryhcuqdffownpmlaen@forum.dlang.org

--------------------------

import core.stdc.stdio: printf;
import std.math: sin, cos;

double g(in double x) pure nothrow {
     return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
     double x = 0;
     foreach (immutable _; 0 .. 100_000_000)
         x = x.g;

     printf("%f\n", x);
}

/*
-O -release -inline -noboundscheck

DMD:

_D4test1gFNaNbxdZd  comdat
         fld qword ptr 4[ESP]
         fmul    qword ptr FLAT:_DATA[00h]
         fsin
         fld qword ptr 4[ESP]
         fmul    qword ptr FLAT:_DATA[08h]
         fcos
         faddp   ST(1),ST
         ret 8

__Dmain comdat
L0:     sub ESP,0Ch
         xor EAX,EAX
         mov dword ptr 4[ESP],0
         mov dword ptr 8[ESP],0
L15:    fld qword ptr 4[ESP]
         inc EAX
         cmp EAX,05F5E100h
         fmul    qword ptr FLAT:_DATA[00h]
         fsin
         fld qword ptr 4[ESP]
         fmul    qword ptr FLAT:_DATA[08h]
         fcos
         faddp   ST(1),ST
         fstp    qword ptr 4[ESP]
         jb  L15
         push    dword ptr 8[ESP]
         mov EAX,offset FLAT:_DATA[010h]
         push    dword ptr 8[ESP]
         push    EAX
         call    near ptr _printf
         add ESP,0Ch
         add ESP,0Ch
         xor EAX,EAX
         ret

// -------------------------

LDC2:

__D4test1gFNaNbxdZd:
     pushl   %ebp
     movl    %esp, %ebp
     andl    $-8, %esp
     subl    $56, %esp
     movsd   LCPI0_0, %xmm0
     mulsd   8(%ebp), %xmm0
     movsd   %xmm0, 40(%esp)
     fldl    40(%esp)
     fstpt   (%esp)
     calll   __D3std4math3sinFNaNbNfeZe
     subl    $12, %esp
     fstpt   12(%esp)
     movsd   8(%ebp), %xmm0
     mulsd   LCPI0_1, %xmm0
     movsd   %xmm0, 48(%esp)
     fldl    48(%esp)
     fstpt   (%esp)
     calll   __D3std4math3cosFNaNbNfeZe
     subl    $12, %esp
     fldt    12(%esp)
     faddp   %st(1)
     fstpl   32(%esp)
     movsd   32(%esp), %xmm0
     movsd   %xmm0, 24(%esp)
     fldl    24(%esp)
     movl    %ebp, %esp
     popl    %ebp
     ret $8

__Dmain:
     pushl   %ebp
     movl    %esp, %ebp
     pushl   %esi
     andl    $-8, %esp
     subl    $72, %esp
     xorps   %xmm0, %xmm0
     movl    $100000000, %esi
     .align  16, 0x90
LBB1_1:
     movsd   %xmm0, 16(%esp)
     mulsd   LCPI1_0, %xmm0
     movsd   %xmm0, 48(%esp)
     fldl    48(%esp)
     fstpt   (%esp)
     calll   __D3std4math3sinFNaNbNfeZe
     subl    $12, %esp
     fstpt   28(%esp)
     movsd   16(%esp), %xmm0
     mulsd   LCPI1_1, %xmm0
     movsd   %xmm0, 56(%esp)
     fldl    56(%esp)
     fstpt   (%esp)
     calll   __D3std4math3cosFNaNbNfeZe
     subl    $12, %esp
     fldt    28(%esp)
     faddp   %st(1)
     fstpl   40(%esp)
     movsd   40(%esp), %xmm0
     decl    %esi
     jne LBB1_1
     movsd   %xmm0, 4(%esp)
     movl    $_.str, (%esp)
     calll   ___mingw_printf
     xorl    %eax, %eax
     leal    -4(%ebp), %esp
     popl    %esi
     popl    %ebp
     ret

--------------------------

import core.stdc.stdio: printf;
import core.stdc.math: sin, cos;

double g(in double x) pure nothrow {
     return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
     double x = 0;
     foreach (immutable _; 0 .. 100_000_000)
         x = x.g;

     printf("%f\n", x);
}

/*
-O -release -inline -noboundscheck

LDC2:

__D5test21gFNaNbxdZd:
     pushl   %ebp
     movl    %esp, %ebp
     andl    $-8, %esp
     subl    $40, %esp
     movsd   LCPI0_0, %xmm0
     mulsd   8(%ebp), %xmm0
     movsd   %xmm0, (%esp)
     calll   _sin
     fstpl   32(%esp)
     movsd   32(%esp), %xmm0
     movsd   %xmm0, 8(%esp)
     movsd   8(%ebp), %xmm0
     mulsd   LCPI0_1, %xmm0
     movsd   %xmm0, (%esp)
     calll   _cos
     fstpl   24(%esp)
     movsd   8(%esp), %xmm0
     addsd   24(%esp), %xmm0
     movsd   %xmm0, 16(%esp)
     fldl    16(%esp)
     movl    %ebp, %esp
     popl    %ebp
     ret $8

__Dmain:
     pushl   %ebp
     movl    %esp, %ebp
     pushl   %esi
     andl    $-8, %esp
     subl    $56, %esp
     xorps   %xmm0, %xmm0
     movl    $100000000, %esi
     .align  16, 0x90
LBB1_1:
     movsd   %xmm0, 16(%esp)
     mulsd   LCPI1_0, %xmm0
     movsd   %xmm0, (%esp)
     calll   _sin
     fstpl   40(%esp)
     movsd   40(%esp), %xmm0
     movsd   %xmm0, 24(%esp)
     movsd   16(%esp), %xmm0
     mulsd   LCPI1_1, %xmm0
     movsd   %xmm0, (%esp)
     calll   _cos
     fstpl   32(%esp)
     movsd   24(%esp), %xmm0
     addsd   32(%esp), %xmm0
     decl    %esi
     jne LBB1_1
     movsd   %xmm0, 4(%esp)
     movl    $_.str, (%esp)
     calll   ___mingw_printf
     xorl    %eax, %eax
     leal    -4(%ebp), %esp
     popl    %esi
     popl    %ebp
     ret

--------------------------

import core.stdc.stdio: printf;

version(LDC) {
     import ldc.intrinsics;

     double g(in double x) pure nothrow {
         return llvm_sin(2.3 * x) + llvm_cos(3.7 * x);
     }
}

void main() {
     double x = 0;
     foreach (immutable _; 0 .. 100_000_000)
         x = x.g;

     printf("%f\n", x);
}

/*
-O -release -inline -noboundscheck

LDC2:

__D5test31gFNaNbxdZd:
     pushl   %ebp
     movl    %esp, %ebp
     andl    $-8, %esp
     subl    $40, %esp
     movsd   LCPI0_0, %xmm0
     mulsd   8(%ebp), %xmm0
     movsd   %xmm0, (%esp)
     calll   _sin
     fstpl   24(%esp)
     movsd   24(%esp), %xmm0
     movsd   %xmm0, 8(%esp)
     movsd   8(%ebp), %xmm0
     mulsd   LCPI0_1, %xmm0
     movsd   %xmm0, (%esp)
     calll   _cos
     fstpl   16(%esp)
     movsd   8(%esp), %xmm0
     addsd   16(%esp), %xmm0
     movsd   %xmm0, 32(%esp)
     fldl    32(%esp)
     movl    %ebp, %esp
     popl    %ebp
     ret $8

__Dmain:
     pushl   %ebp
     movl    %esp, %ebp
     pushl   %esi
     andl    $-8, %esp
     subl    $56, %esp
     xorps   %xmm0, %xmm0
     movl    $100000000, %esi
     .align  16, 0x90
LBB1_1:
     movsd   %xmm0, 16(%esp)
     mulsd   LCPI1_0, %xmm0
     movsd   %xmm0, (%esp)
     calll   _sin
     fstpl   40(%esp)
     movsd   40(%esp), %xmm0
     movsd   %xmm0, 24(%esp)
     movsd   16(%esp), %xmm0
     mulsd   LCPI1_1, %xmm0
     movsd   %xmm0, (%esp)
     calll   _cos
     fstpl   32(%esp)
     movsd   24(%esp), %xmm0
     addsd   32(%esp), %xmm0
     decl    %esi
     jne LBB1_1
     movsd   %xmm0, 4(%esp)
     movl    $_.str, (%esp)
     calll   ___mingw_printf
     xorl    %eax, %eax
     leal    -4(%ebp), %esp
     popl    %esi
     popl    %ebp
     ret

--------------------------

// C99 code
#include <stdio.h>
#include <math.h>

double g(const double x) {
     return sin(2.3 * x) + cos(3.7 * x);
}

int main() {
     double x = 0;
     for (int i = 0; i < 100000000; i++)
         x = g(x);

     printf("%f\n", x);
     return 0;
}

/*
gcc -fkeep-inline-functions -std=c99 -flto -S -Ofast test4.c -o 
test4.s

_g:
     fldl    4(%esp)
     fldl    LC0
     fmul    %st(1), %st
     fsin
     fxch    %st(1)
     fmull   LC1
     fcos
     faddp   %st, %st(1)
     ret

_main:
     pushl   %ebp
     movl    %esp, %ebp
     andl    $-16, %esp
     subl    $16, %esp
     call    ___main
     movl    $100000000, %eax
     fld1
     fldz
     fldl    LC0
     fxch    %st(2)
     jmp L19
     .p2align 4,,7
L22:
     fld %st(0)
     fmul    %st(2), %st
     fsin
     fxch    %st(1)
     fmull   LC1
     fcos
L19:
     subl    $1, %eax
     faddp   %st, %st(1)
     jne L22
     fstp    %st(1)
     fstpl   4(%esp)
     movl    $LC5, (%esp)
     call    _printf
     xorl    %eax, %eax
     leave
     .cfi_restore 5
     .cfi_def_cfa 4, 4
     ret

Bye,
bearophile