Code speed
bearophile
bearophileHUGS at lycos.com
Tue Apr 13 19:33:21 PDT 2010
This is a single module, and on my PC is more than two times faster than the original code. Now I think there are no naive ways left to speed it up.
Compile it with:
dmd -O -release -inline
The low performance was caused by:
- virtual methods that don't need to be virtual;
- ^^0.5 and pow(x,2) doesn't perfectly efficient;
- the two nested loops in the main are more efficient as ref double, this is something dmd will need to fix;
- Mersenne twister is slow. I have used a Kiss rnd generator here, adapted from Tango.
- the uniform of std.random is bad, it contains nextafter that's slow, it can be avoided using a closed range: "[]" instead of the default one "[)".
- Somewhere in the middle of the random generator there was an enforce. I am now not so sure it's good to use a lot of enforce() in the std lib of a system language. Maybe Andrei will need to go back using asserts inside contracts.
- The temporary struct inside the two main loops didn't get optimized away
- the append was messy and slow, I have removed the appends and replaced with simpler and tidier code with an index, that's quite faster.
Now the profiling result is clean:
16394266 main
8079735 2performance510FastRandom7uniformMFddZd
2841876 2performance510FastRandom8randUintMFZk
1923377 2performance54Yzlm20userReputationUpdateMFKAS12performance56RatingKAdKAdZv
1729333 2performance54Yzlm22objectReputationUpdateMFKAS12performance56RatingKAdKAdZd
Most of the time is spent in the main and generating rnd values, but the timings are clean.
The resulting code can be slower than the C++ code, but now it's mostly a matter of backend. If I port this code to D1 for Tango and I compile it very well with LDC the performance can be about as good as the C++ version or better :-) If necessary I can use an even faster random number generator, but that's for tomorrow, if you want.
import std.stdio: printf;
import std.math: sqrt, pow;
struct FastRandom {
uint kiss_x = 1;
uint kiss_y = 2;
uint kiss_z = 4;
uint kiss_w = 8;
uint kiss_carry = 0;
uint kiss_k, kiss_m;
this(uint seed) {
this.seed(seed);
}
void seed(uint seed) {
kiss_x = seed | 1;
kiss_y = seed | 2;
kiss_z = seed | 4;
kiss_w = seed | 8;
kiss_carry = 0;
}
uint randUint() {
kiss_x = kiss_x * 69069 + 1;
kiss_y ^= kiss_y << 13;
kiss_y ^= kiss_y >> 17;
kiss_y ^= kiss_y << 5;
kiss_k = (kiss_z >> 2) + (kiss_w >> 3) + (kiss_carry >> 2);
kiss_m = kiss_w + kiss_w + kiss_z + kiss_carry;
kiss_z = kiss_w;
kiss_w = kiss_m;
kiss_carry = kiss_k >> 30;
return kiss_x + kiss_y + kiss_w;
}
double random() {
return this.randUint() / (uint.max + 1.0);
}
double uniform(double a, double b) {
double r = cast(double)this.randUint() / (uint.max + 1.0);
return a + (b - a) * r;
}
}
struct Rating {
uint user, object;
double value;
}
abstract class ReputationAlgorithm {
this() {}
}
final class Yzlm : ReputationAlgorithm {
double beta;
double convergenceRequirement;
double errorMin;
// uint[] userLinks; // commented out because for now it
// has a constant value for all users
double[] weightSum;
double[] oldReputationObject;
double objectReputationUpdate(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
double diff = 0;
double[] temp = oldReputationObject;
// Original version had:
//
// oldReputationObject[] = reputationObject[]
//
// This version is an attempt to save effort
// by just switching round the memory the two
// arrays are pointing at -- not sure if it
// actually does what I'm expecting it to.
// Doesn't seem to improve speed. :-(
oldReputationObject = reputationObject;
reputationObject = temp;
reputationObject[] = 0;
weightSum[] = 0;
foreach (Rating r; ratings) {
reputationObject[r.object] += reputationUser[r.user] * r.value;
weightSum[r.object] += reputationUser[r.user];
}
foreach (uint object, ref double r; reputationObject) {
r /= (weightSum[object] > 0) ? weightSum[object] : 1;
auto aux = (r - oldReputationObject[object]);
diff += aux * aux;
}
return sqrt(diff);
}
void userReputationUpdate(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
reputationUser[] = 0;
foreach (Rating r; ratings) {
auto aux = (r.value - reputationObject[r.object]);
reputationUser[r.user] += aux * aux;
}
foreach (uint user, ref double r; reputationUser) {
//if(userLinks[user]>0)
r = pow( (r/reputationObject.length/*userLinks[user]*/) + errorMin, -beta);
}
}
void opCall(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
// userLinks.length = reputationUser.length;
// userLinks[] = 0;
weightSum.length = reputationObject.length;
oldReputationObject.length = reputationObject.length;
// foreach (Rating r; ratings)
// userLinks[r.user]++;
double diff;
uint iterations = 0;
do {
userReputationUpdate(ratings, reputationUser, reputationObject);
diff = objectReputationUpdate(ratings, reputationUser, reputationObject);
++iterations;
} while (diff > convergenceRequirement);
printf("Exited in %u iterations with diff = %g < %g\n",
iterations, diff, convergenceRequirement);
}
this() {}
this(double b, double c, double e) {
beta = b;
convergenceRequirement = c;
errorMin = e;
assert(beta >= 0);
assert(convergenceRequirement > 0);
assert(errorMin >= 0);
}
this(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject,
double b, double c, double e) {
this(b,c,e);
opCall(ratings, reputationUser, reputationObject);
}
}
class AvgWeighted : ReputationAlgorithm {
double[] weightSum;
void opCall(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
weightSum.length = reputationObject.length;
weightSum[] = 0;
reputationObject[] = 0;
foreach (Rating r; ratings) {
reputationObject[r.object] += reputationUser[r.user]*r.value;
weightSum[r.object] += reputationUser[r.user];
}
foreach (uint o, ref double r; reputationObject)
r /= weightSum[o];
}
this() {}
this(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
opCall(ratings, reputationUser, reputationObject);
}
}
final class AvgArithmetic : AvgWeighted {
override void opCall(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
reputationUser[] = 1;
super.opCall(ratings, reputationUser, reputationObject);
}
this() {}
this(ref Rating[] ratings,
ref double[] reputationUser,
ref double[] reputationObject) {
opCall(ratings, reputationUser, reputationObject);
}
}
void main() {
Rating[] ratings;
double[] reputationObject, reputationUser;
double[] objectQuality, userError;
auto aa = new AvgArithmetic;
auto yzlm = new Yzlm(0.8, 1e-12, 1e-36);
auto rnd = FastRandom(1001);
reputationObject.length = 1_000;
reputationUser.length = 1_000;
objectQuality.length = reputationObject.length;
userError.length = reputationUser.length;
ratings.length = reputationObject.length * reputationUser.length;
foreach (uint i; 0 .. 100) {
foreach (ref double Q; objectQuality)
Q = rnd.uniform(0.0, 10.0);
foreach (ref double sigma2; userError)
sigma2 = rnd.random();
int pos;
foreach (uint object, ref double Q; objectQuality)
foreach (uint user, ref double sigma2; userError)
ratings[pos++] = Rating(user, object, rnd.uniform(Q - sigma2, Q + sigma2));
printf("We now have %u ratings.\n", ratings.length);
aa(ratings, reputationUser, reputationObject);
yzlm(ratings, reputationUser, reputationObject);
double deltaQ = 0;
foreach (uint object, double r; reputationObject) {
auto aux = (r - objectQuality[object]);
deltaQ += aux * aux;
}
deltaQ = sqrt(deltaQ / reputationObject.length);
printf("[%u] Error in quality estimate: %g\n", i, deltaQ);
}
}
Bye,
bearophile
More information about the Digitalmars-d-learn
mailing list