GDC vs dmd speed

Mon Oct 14 12:24:28 PDT 2013

Hello,

Whilst porting some C++ code I have discovered that the compiled output 
from the gdc compiler seems to be 47% quicker than the dmd compiler. The 
code I believe that is the 'busy' code is below. Although I could 
provide a complete test if anyone is interested. Is this an expected 
result and/or is there something I could change to make the compilers 
perform similarly. The function render_minecraft gets called repeatedly 
to render single frames. framebufindex is a simple function to return a 
buffer index. Perhaps it is not being inlined? rgba is another simple 
function.

further details are:

dmd32 v2.063.2
with flags: ["-O", "-release", "-noboundscheck", "-inline"]

gdc 4.6 (0.29.1-4.6.4-1ubuntu4) Which I assume might be v2.020?
with flags: ["-O2"]

// render the next frame into the given 'frame_buf'
void render_minecraft(void * private_renderer_data, uint32_t * frame_buf)
{
     render_info * info = cast(render_info *)private_renderer_data;
     const float pi = 3.14159265f;

     float dx = cast(float)(Clock.currSystemTick.length % 
(TickDuration.ticksPerSec * 10)) / (TickDuration.ticksPerSec * 10);
     float xRot = sin(dx * pi * 2) * 0.4f + pi / 2;
     float yRot = cos(dx * pi * 2) * 0.4f;
     float yCos = cos(yRot);
     float ySin = sin(yRot);
     float xCos = cos(xRot);
     float xSin = sin(xRot);

     float ox = 32.5f + dx * 64;
     float oy = 32.5f;
     float oz = 32.5f;

     for (int x = 0; x < width; ++x) {
         float ___xd = cast(float)(x - width / 2) / height;
         for (int y = 0; y < height; ++y) {
             float __yd = cast(float)(y - height / 2) / height;
             float __zd = 1;

             float ___zd = __zd * yCos + __yd * ySin;
             float _yd = __yd * yCos - __zd * ySin;

             float _xd = ___xd * xCos + ___zd * xSin;
             float _zd = ___zd * xCos - ___xd * xSin;

             uint32_t col = 0;
             uint32_t br = 255;
             float ddist = 0;
             float closest = 32;

             for (int d = 0; d < 3; ++d) {
                 float dimLength = _xd;
                 if (d == 1)
                     dimLength = _yd;
                 if (d == 2)
                     dimLength = _zd;

                 float ll = 1 / (dimLength < 0 ? -dimLength : dimLength);
                 float xd = (_xd) * ll;
                 float yd = (_yd) * ll;
                 float zd = (_zd) * ll;

                 float initial = ox - cast(int)ox;
                 if (d == 1)
                     initial = oy - cast(int)oy;
                 if (d == 2)
                     initial = oz - cast(int)oz;
                 if (dimLength > 0)
                     initial = 1 - initial;

                 float dist = ll * initial;

                 float xp = ox + xd * initial;
                 float yp = oy + yd * initial;
                 float zp = oz + zd * initial;

                 if (dimLength < 0) {
                     if (d == 0)
                         xp--;
                     if (d == 1)
                         yp--;
                     if (d == 2)
                         zp--;
                 }

                 while (dist < closest) {
                     uint tex = info.map[mapindex(xp, yp, zp)];

                     if (tex > 0) {
                         uint u = cast(uint32_t)((xp + zp) * 16) & 15;
                         uint v = (cast(uint32_t)(yp * 16) & 15) + 16;
                         if (d == 1) {
                             u = cast(uint32_t)(xp * 16) & 15;
                             v = (cast(uint32_t)(zp * 16) & 15);
                             if (yd < 0)
                                 v += 32;
                         }

                         uint32_t cc = info.texmap[u + v * 16 + tex * 
256 * 3];
                         if (cc > 0) {
                             col = cc;
                             ddist = 255 - cast(int)(dist / 32 * 255);
                             br = 255 * (255 - ((d + 2) % 3) * 50) / 255;
                             closest = dist;
                         }
                     }
                     xp += xd;
                     yp += yd;
                     zp += zd;
                     dist += ll;
                 }
             }

             const uint32_t r = cast(uint32_t)(((col >> 16) & 0xff) * br 
* ddist / (255 * 255));
             const uint32_t g = cast(uint32_t)(((col >> 8) & 0xff) * br 
* ddist / (255 * 255));
             const uint32_t b = cast(uint32_t)(((col) & 0xff) * br * 
ddist / (255 * 255));

             frame_buf[framebufindex(x, y)] = rgba(r, g, b);
         }
     }
}