Slow performance compared to C++, ideas?

Joseph Rushton Wakeling joseph.wakeling at webdrake.net
Fri May 31 04:44:47 PDT 2013


On 05/31/2013 01:05 PM, Timon Gehr wrote:
> That wouldn't make any sense though, since after template expansion there is no
> difference between the generated version and a particular handwritten version.

That's what I'd assumed too, but there _is_ a speed difference.  I'm open to
suggestions as to why.

Compare these profile results for the core inner function -- the original (even
though there's a 'New' in there somewhere):

  %   cumulative   self              self     total
 time   seconds   seconds    calls   s/call   s/call  name
 93.33      4.69     4.69  4084637     0.00     0.00
_D8infected5model115__T13NewSimulationS248infected5model8StateSISS238infected5model7SeedSISS388infected5model21NewUpdateMeanFieldSISTdZ13NewSimulation19__T11UpdateStateTdZ6updateMFKC8infected5model15__T8StateSISTdZ8StateSISKxC8infected5model14__T7SeedSISTdZ7SeedSISZd


... and the newer version:

  %   cumulative   self              self     total
 time   seconds   seconds    calls   s/call   s/call  name
 92.73      5.23     5.23  4078287     0.00     0.00
_D8infected5model292__T10SimulationS358infected5model18UpdateMeanFieldSISTC8infected5model15__T8StateSISTdZ8StateSISTC8infected5model164__T7SeedSISTdTAyAS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleTyS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleZ7SeedSISTydZ10Simulation17__T11UpdateStateZ163__T6updateTdTAyAS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleTyS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleZ6updateMFNbNfKC8infected5model15__T8StateSISTdZ8StateSISKxC8infected5model164__T7SeedSISTdTAyAS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleTyS3std8typecons50__T5TupleTmVAyaa2_6964TdVAyaa9_696e666c75656e6365Z5TupleZ7SeedSISZd

I'm not sure what, other than a change of template design, could be responsible
here.  The key bits of code follow -- the original version:

	mixin template UpdateState(T)
	{
		T update(ref StateSIS!T st, const ref SeedSIS!T sd)
		{
			T d = to!T(0);
			static T[] sick;
			sick.length = st.infected.length;
			sick[] = st.infected[];

			foreach(i; 0..sick.length) {
				T noTransmission = to!T(1);
				foreach(link; sd.network[i])
					noTransmission *= (to!T(1) - sick[link.id] * link.influence);
				T getSick = (to!T(1) - sick[i]) * (sd.susceptible[i] + (to!T(1) -
sd.susceptible[i]) * (to!T(1) - noTransmission));
				T staySick = sick[i] * (to!T(1) - sd.recover[i]);
				st.infected[i] = (to!T(1) - sd.immune[i]) * (getSick + staySick);
				assert(to!T(0) <= st.infected[i]);
				assert(st.infected[i] <= to!T(1));
				d = max(abs(st.infected[i] - sick[i]), d);
			}

			return d;
		}
	}

... and for clarity, the StateSIS and SeedSIS classes:

	class StateSIS(T)
	{
		T[] infected;

		this(){}

		this(T[] inf)
		{
			infected = inf;
		}

		auto size() @property pure const nothrow
		{
			return infected.length;
		}

		T infection() @property pure const nothrow
		{
			return reduce!"a+b"(to!T(0), infected);
		}
	}

	class SeedSIS(T)
	{
		T[] immune;
		T[] susceptible;
		T[] recover;
		Link!T[][] network;

		this() {}

		this(T[] imm, T[] sus, T[] rec, Link!T[][] net)
		{
			immune = imm;
			susceptible = sus;
			recover = rec;
			network = net;
		}

		auto size() @property pure const nothrow
			in
			{
				assert(immune.length == susceptible.length);
				assert(immune.length == recover.length);
				assert(immune.length == network.length);
			}
		body
		{
			return immune.length;
		}
	}

... and the "Link" template:

	template Link(T)
	{
		alias Tuple!(size_t, "id", T, "influence") Link;
	}

... and now for comparison the new versions:

	mixin template UpdateState()
	{
		T update(T, N : L[][], L)(ref StateSIS!T st, const ref SeedSIS!(T, N, L) sd)
		{
			T d = to!T(0);
			static T[] sick;
			sick.length = st.infected.length;
			sick[] = st.infected[];

			foreach(i; 0..sick.length) {
				T noTransmission = to!T(1);
				foreach(link; sd.network[i])
					noTransmission *= (to!T(1) - sick[link.id] * link.influence);
				T getSick = (to!T(1) - sick[i]) * (sd.susceptible[i] + (to!T(1) -
sd.susceptible[i]) * (to!T(1) - noTransmission));
				T staySick = sick[i] * (to!T(1) - sd.recover[i]);
				st.infected[i] = (to!T(1) - sd.immune[i]) * (getSick + staySick);
				assert(to!T(0) <= st.infected[i]);
				assert(st.infected[i] <= to!T(1));
				d = max(abs(st.infected[i] - sick[i]), d);
			}

			return d;
		}
	}

	class StateSIS(T)
	{
		T[] infected;

		this() {}

		this(T[] inf)
		{
			infected = inf;
		}

		auto size() @property pure const nothrow
		{
			return infected.length;
		}

		T infection() @property pure const nothrow
		{
			return reduce!"a+b"(to!T(0), infected);
		}
	}

	auto stateSIS(T)(T[] inf)
	{
		return new StateSIS!T(inf);
	}

	class SeedSIS(T, Network : L[][], L)
	{
		T[] immune;
		T[] susceptible;
		T[] recover;
		Network network;

		this() {}

		this(T[] imm, T[] sus, T[] rec, Network net)
		{
			immune = imm;
			susceptible = sus;
			recover = rec;
			network = net;
		}

		auto size() @property pure const nothrow
			in
			{
				assert(immune.length == susceptible.length);
				assert(immune.length == recover.length);
				assert(immune.length == network.length);
			}
		body
		{
			return immune.length;
		}
	}

	auto seedSIS(T, Network : L[][], L)(T[] imm, T[] sus, T[] rec, Network net)
	{
		return new SeedSIS!(T, Network, L)(imm, sus, rec, net);
	}

... note that the Network that is passed to SeedSIS is still always a Link!T[][].


More information about the Digitalmars-d mailing list