Speed of synchronized

Sun Oct 16 01:41:26 PDT 2016

Hi,

for an exercise I had to implement a thread safe counter.
This is what I came up with:

---SNIP---

import std.stdio;
import core.thread;
import std.conv;
import std.datetime;
static import core.atomic;
import core.sync.mutex;

int NR_OF_THREADS = 100;
int NR_OF_INCREMENTS = 10000;

interface Counter {
  void increment() shared;
  long get() shared;
}
class ThreadUnsafeCounter : Counter {
  long counter;
  void increment() shared {
    counter++;
  }
  long get() shared {
    return counter;
  }
}

class ThreadSafe1Counter : Counter {
  private long counter;
  synchronized void increment() shared {
    counter++;
  }
  long get() shared {
    return counter;
  }
}

class ThreadSafe2Counter : Counter {
  private long counter;
  __gshared Mutex lock; //
http://forum.dlang.org/post/rzyooanimrynpmqlywmf@forum.dlang.org
  this() shared {
    lock = new Mutex;
  }
  void increment() shared {
    synchronized (lock) {
      counter++;
    }
  }
  long get() shared {
    return counter;
  }
}

class AtomicCounter : Counter {
  private long counter;
  void increment() shared {
    core.atomic.atomicOp!"+="(this.counter, 1);
  }
  long get() shared {
    return counter;
  }
}
void main() {
  void runWith(Counter)() {
    shared Counter counter = new shared Counter();
    void doIt() {
      Thread[] threads;
      for (int i=0; i<NR_OF_THREADS; ++i) {
        threads ~= new Thread({
            for (int i=0; i<NR_OF_INCREMENTS; ++i) {
              counter.increment();
            }
          });
      }
      foreach (Thread t; threads) {
        t.start();
      }
      foreach (Thread t; threads) {
        t.join();
      }
    }
    auto duration = benchmark!(doIt)(1);
    writeln(typeid(counter), ": got: ", counter.get(), " expected: ",
NR_OF_THREADS * NR_OF_INCREMENTS, " in ", to!Duration(duration[0]));
  }

  runWith!(AtomicCounter)();
  runWith!(ThreadSafe1Counter)();
  runWith!(ThreadSafe2Counter)();
  runWith!(ThreadUnsafeCounter)();

  void doIt2() {
    auto mutex      = new Mutex;
    int  numThreads = NR_OF_THREADS;
    int  numTries   = NR_OF_INCREMENTS;
    int  lockCount  = 0;

    void testFn() {
      for( int i = 0; i < numTries; ++i ) {
        synchronized( mutex ) {
          ++lockCount;
        }
      }
    }

    auto group = new ThreadGroup;

    for( int i = 0; i < numThreads; ++i )
      group.create( &testFn );

    group.joinAll();
    assert( lockCount == numThreads * numTries );
  }

  auto duration = benchmark!(doIt2)(1);
  writeln("from example got: ", to!Duration(duration[0]));
}

---SNIP---

For completeness I added also the example from core.sync.mutex
(https://dlang.org/phobos/core_sync_mutex.html) at the end.

My question now is, why is each mutex based thread safe variant so slow
compared to a similar java program? The only hint could be something
like:
https://blogs.oracle.com/dave/entry/java_util_concurrent_reentrantlock_vs that
mentions, that there is some magic going on underneath.
For the atomic and the non thread safe variant, the d solution seems to
be twice as fast as my java program, for the locked variant, the java
program seems to be 40 times faster?

btw. I run the code with dub run --build=release

Thanks in advance,
Christian