Intrinsics, std.bind problems, list comphrensions, recursivity test

bearophile bearophileHUGS at lycos.com
Fri Aug 24 01:06:19 PDT 2007


Some time ago I have tried to improve the Shootout nsieve-bits test, but I have failed:
http://shootout.alioth.debian.org/gp4sandbox/benchmark.php?test=nsievebits&lang=all
On my PC that version that uses intrinsics is faster, but on the PC used by the Shootout the version that uses local functions seems faster. Do you know how is this possibile?
BTW, now that I know D a bit better I think my version of the program can be improved some more with:
uint[] flags = void;
But probably the speed difference isn't enough to make it go up in the time rank.

---------------------------------

import std.stdio, std.bind, std.traits, std.random;

int randInt(int min, int max) {
  int k, n;
  n = (max - min) + 1;
  k = cast(int)(n * (rand() / (uint.max + 1.0)));
  return (k == n) ? k + min - 1 : k + min;
}

int randInt(int max) {
  int k, n;
  n = max + 1;
  k = cast(int)(n * (rand() / (uint.max + 1.0)));
  return (k == n) ? k - 1 : k;
}

void main() {
  // This line:
  // auto rnd = bind(&randInt, 100);

  // Raises:
  // ...\std\bind.d(395): static assert  is false

  // Line 394-395 of std.bind.d:
  // // make sure we'll copy all args the function is going to need
  // static assert (res >= minFuncArgs);

  auto rnd100 = bind(&randInt, 0, 99);

  writefln( typeid(typeof(rnd100)) , '\n');
  // Prints:
  // std.bind.BoundFunc!(int(*)(int min, int max),NullAlias,Tuple!(int,int) ).BoundFunc

  writefln(rnd100(), ' ', rnd100(), '\n'); // OK

  // This line:
  // ReturnType!(typeof(rnd100)) x;

  // Raises:
  // ...\std\traits.d(34): alias std.traits.ReturnType!(BoundFunc).ReturnType recursive alias declaration

  static if (is(rnd100 TyOut == return)) {
    TyOut x;
    writefln(typeid(typeof(x)));
  } else
    writefln("No return"); // Prints this
}

(In the end I have simply solved the problem without std.bind, defining a lambda function.)

---------------------------------

I think D developers can take a look at ShedSkin, its type inferencing is among the most powerful you can see around (but it's slow too, partially because it's written in Python). It shows how some Python can be compiled to C++. Python list comphrensions are a quite easy syntax to build various arrays (the latest Javascript of Mozilla has them too), this is a little Python code that shows few usages of LCs:

print [x*x for x in xrange(16) if x & 1]
# Prints: [1, 9, 25, 49, 81, 121, 169, 225]

print [x+y for x in xrange(5) if x & 1 for y in xrange(5)]
# Prints: [1, 2, 3, 4, 5, 3, 4, 5, 6, 7]

print [[x+y for x in xrange(3)] for y in xrange(3)]
# Prints: [[0, 1, 2], [1, 2, 3], [2, 3, 4]]

def odds():
  x = 0
  while x < 36:
    yield x
    x += 1

print [x*x for x in odds() if x > 30]
# Prints: [961, 1024, 1089, 1156, 1225]


This is how ShedSkin translates them to C++ (this is most of the produced code):

str *__name__;
int __12;

static inline list<int> *list_comp_0() {
  int x, __1, __0;
  list<int> *result = new list<int>();

  FAST_FOR(x,0,16,1,0,1)
    if ((x&1)) {
      result->append((x*x));
    }
  END_FOR

  return result;
}

static inline list<int> *list_comp_1() {
  int __5, __4, __3, __2, y, x;
  list<int> *result = new list<int>();

  FAST_FOR(x,0,5,1,2,3)
    if ((x&1)) {
      FAST_FOR(y,0,5,1,4,5)
        result->append((x+y));
      END_FOR

    }
  END_FOR

  return result;
}

static inline list<int> *list_comp_2(int y) {
  int x, __9, __8;
  list<int> *result = new list<int>();

  FAST_FOR(x,0,3,1,8,9)
    result->append((x+y));
  END_FOR

  return result;
}

static inline list<list<int> *> *list_comp_3() {
  int y, __7, __6;
  list<list<int> *> *result = new list<list<int> *>();

  FAST_FOR(y,0,3,1,6,7)
    result->append(list_comp_2(y));
  END_FOR

  return result;
}

static inline list<int> *list_comp_4(__iter<int> *__13) {
  __iter<int> *__11, *__10;
  int x;
  list<int> *result = new list<int>();

  FOR_IN(x,__13,11)
    if ((x>30)) {
      result->append((x*x));
    }
  END_FOR

  return result;
}

class __gen_odds : public __iter<int> {
public:
  int x;
  int __last_yield;

  __gen_odds() {
    __last_yield = -1;
  }

  int next() {
    switch(__last_yield) {
      case 0: goto __after_yield_0;
      default: break;
    }
    x = 0;

    while((x<36)) {
      __last_yield = 0;
      return x;
      __after_yield_0:;
      x = (x+1);
    }
    throw new StopIteration();
  }

};

__iter<int> *odds() {
  return new __gen_odds();
}

void __init() { // the main program
  __name__ = new str("__main__");

  print("%s\n", list_comp_0());
  print("%s\n", list_comp_1());
  print("%s\n", list_comp_3());
  print("%s\n", list_comp_4(odds()));
}


The list_comp_0 shows a small function and an if too.
list_comp_1 shows that LCs can be attached too.
list_comp_2 and list_comp_3 are nested, and list_comp_4 shows a case where you don't know how much long the resulting array will become, so it is built with successive appends.

-------------------------------

In one of my recent emails I have tried to show some code that D executes relatively slowly, but Walter has spotted a stupid error of mine. So I try again (it's a bit of code that comes from the Shootout):

First D version:

import std.conv;
int ack(int m, int n) {
  if (m == 0) return n+1;
  if (n == 0) return ack(m-1, 1);
  return ack(m-1, ack(m, n-1));
}
void main(string[] args) {
  int n = args.length > 1 ? toInt(args[1]) : 11;
  printf("ack(3,%d): %d\n", n, ack(3, n));
}


Second D version (nested function):

import std.conv;
void main(string[] args) {
  int ack(int m, int n) {
    if (m == 0) return n+1;
    if (n == 0) return ack(m-1, 1);
    return ack(m-1, ack(m, n-1));
  }
  int n = args.length > 1 ? toInt(args[1]) : 11;
  printf("ack(3,%d): %d\n", n, ack(3, n));
}


C version:

#include <stdio.h>
#include <stdlib.h>
int ack(int m, int n) {
  if (m == 0) return n+1;
  if (n == 0) return ack(m-1, 1);
  return ack(m-1, ack(m, n-1));
}
int main(int argc, char *argv[]) {
  int n = argc > 1 ? atoi(argv[1]) : 11;
  printf("Ack(3,%d): %d\n", n, ack(3, n));
  return 0;
}


Object Pascal version:

*{$APPTYPE CONSOLE}
uses SysUtils;
// integer is signed 32-bit
function ack(m: integer; n: integer): integer;
  begin
    if m = 0 then
        ack := n+1
    else
       if n = 0 then
         ack := ack(m-1, 1)
       else
         ack := ack(m-1, ack(m, n-1));
  end;
var n, code: Integer;
begin
  if ParamCount > 0 then
    Val(ParamStr(1), n, code)
  else
    n := 6;
  writeln('ack(3,', n, '): ', ack(3, n));
end.



Running time, input n = 11 (Pentium3 CPU):
  C 1:           2.91 seconds
  C 2:           4.61 s
  ObjectPascal:  6.78 s
  D 1:           8.15 s
  D 2:          16.36 s

Compilers used and settings:
  C 1: MinGW -O3 -fomit-frame-pointer
  C 2: MinGW -O3 (same code as C 1).
  ObjectPascal: Delphi 5, runtime errors disabled
  D 1: dmd -O -inline -release
  D 2: dmd -O -inline -release, nested function

Bye,
bearophile


More information about the Digitalmars-d-learn mailing list