Code speed

bearophile bearophileHUGS at lycos.com
Tue Apr 13 19:33:21 PDT 2010


This is a single module, and on my PC is more than two times faster than the original code. Now I think there are no naive ways left to speed it up.

Compile it with:
dmd -O -release -inline

The low performance was caused by:
- virtual methods that don't need to be virtual;
- ^^0.5 and pow(x,2) doesn't perfectly efficient;
- the two nested loops in the main are more efficient as ref double, this is something dmd will need to fix;
- Mersenne twister is slow. I have used a Kiss rnd generator here, adapted from Tango.
- the uniform of std.random is bad, it contains nextafter that's slow, it can be avoided using a closed range: "[]" instead of the default one "[)".
- Somewhere in the middle of the random generator there was an enforce. I am now not so sure it's good to use a lot of enforce() in the std lib of a system language. Maybe Andrei will need to go back using asserts inside contracts.
- The temporary struct inside the two main loops didn't get optimized away
- the append was messy and slow, I have removed the appends and replaced with simpler and tidier code with an index, that's quite faster.

Now the profiling result is clean:
16394266 main
 8079735 2performance510FastRandom7uniformMFddZd
 2841876 2performance510FastRandom8randUintMFZk
 1923377 2performance54Yzlm20userReputationUpdateMFKAS12performance56RatingKAdKAdZv
 1729333 2performance54Yzlm22objectReputationUpdateMFKAS12performance56RatingKAdKAdZd

Most of the time is spent in the main and generating rnd values, but the timings are clean.

The resulting code can be slower than the C++ code, but now it's mostly a matter of backend. If I port this code to D1 for Tango and I compile it very well with LDC the performance can be about as good as the C++ version or better :-) If necessary I can use an even faster random number generator, but that's for tomorrow, if you want.


import std.stdio: printf;
import std.math: sqrt, pow;

struct FastRandom {
    uint kiss_x = 1;
    uint kiss_y = 2;
    uint kiss_z = 4;
    uint kiss_w = 8;
    uint kiss_carry = 0;
    uint kiss_k, kiss_m;

    this(uint seed) {
        this.seed(seed);
    }

    void seed(uint seed) {
        kiss_x = seed | 1;
        kiss_y = seed | 2;
        kiss_z = seed | 4;
        kiss_w = seed | 8;
        kiss_carry = 0;
    }

    uint randUint() {
        kiss_x = kiss_x * 69069 + 1;
        kiss_y ^= kiss_y << 13;
        kiss_y ^= kiss_y >> 17;
        kiss_y ^= kiss_y << 5;
        kiss_k = (kiss_z >> 2) + (kiss_w >> 3) + (kiss_carry >> 2);
        kiss_m = kiss_w + kiss_w + kiss_z + kiss_carry;
        kiss_z = kiss_w;
        kiss_w = kiss_m;
        kiss_carry = kiss_k >> 30;
        return kiss_x + kiss_y + kiss_w;
    }

    double random() {
        return this.randUint() / (uint.max + 1.0);
    }

    double uniform(double a, double b) {
        double r = cast(double)this.randUint() / (uint.max + 1.0);
        return a + (b - a) * r;
    }
}


struct Rating {
    uint user, object;
    double value;
}


abstract class ReputationAlgorithm {
    this() {}
}


final class Yzlm : ReputationAlgorithm {
    double beta;
    double convergenceRequirement;
    double errorMin;
//    uint[] userLinks; // commented out because for now it
                        // has a constant value for all users
    double[] weightSum;
    double[] oldReputationObject;

    double objectReputationUpdate(ref Rating[] ratings,
                                  ref double[] reputationUser,
                                  ref double[] reputationObject) {
        double diff = 0;
        double[] temp = oldReputationObject;

        // Original version had:
        //
        //    oldReputationObject[] = reputationObject[]
        //
        // This version is an attempt to save effort
        // by just switching round the memory the two
        // arrays are pointing at -- not sure if it
        // actually does what I'm expecting it to.
        // Doesn't seem to improve speed. :-(
        oldReputationObject = reputationObject;
        reputationObject = temp;
        reputationObject[] = 0;

        weightSum[] = 0;

        foreach (Rating r; ratings) {
            reputationObject[r.object] += reputationUser[r.user] * r.value;
            weightSum[r.object] += reputationUser[r.user];
        }

        foreach (uint object, ref double r; reputationObject) {
            r /= (weightSum[object] > 0) ? weightSum[object] : 1;
            auto aux = (r - oldReputationObject[object]);
            diff += aux * aux;
        }

        return sqrt(diff);
    }

    void userReputationUpdate(ref Rating[] ratings,
                              ref double[] reputationUser,
                              ref double[] reputationObject) {
        reputationUser[] = 0;

        foreach (Rating r; ratings) {
            auto aux = (r.value - reputationObject[r.object]);
            reputationUser[r.user] += aux * aux;
        }

        foreach (uint user, ref double r; reputationUser) {
            //if(userLinks[user]>0)
                r = pow( (r/reputationObject.length/*userLinks[user]*/) + errorMin, -beta);
        }
    }

    void opCall(ref Rating[] ratings,
                ref double[] reputationUser,
                ref double[] reputationObject) {
    //    userLinks.length = reputationUser.length;
    //    userLinks[] = 0;

        weightSum.length = reputationObject.length;
        oldReputationObject.length = reputationObject.length;

    //    foreach (Rating r; ratings)
    //        userLinks[r.user]++;

        double diff;
        uint iterations = 0;
        do {
            userReputationUpdate(ratings, reputationUser, reputationObject);
            diff = objectReputationUpdate(ratings, reputationUser, reputationObject);
            ++iterations;
        } while (diff > convergenceRequirement);
        printf("Exited in %u iterations with diff = %g < %g\n",
               iterations, diff, convergenceRequirement);
    }

    this() {}

    this(double b, double c, double e) {
        beta = b;
        convergenceRequirement = c;
        errorMin = e;
        assert(beta >= 0);
        assert(convergenceRequirement > 0);
        assert(errorMin >= 0);
    }

    this(ref Rating[] ratings,
         ref double[] reputationUser,
         ref double[] reputationObject,
         double b, double c, double e) {
        this(b,c,e);

        opCall(ratings, reputationUser, reputationObject);
    }
}


class AvgWeighted : ReputationAlgorithm {
    double[] weightSum;

    void opCall(ref Rating[] ratings,
                ref double[] reputationUser,
                ref double[] reputationObject) {
        weightSum.length = reputationObject.length;
        weightSum[] = 0;

        reputationObject[] = 0;

        foreach (Rating r; ratings) {
            reputationObject[r.object] += reputationUser[r.user]*r.value;
            weightSum[r.object] += reputationUser[r.user];
        }

        foreach (uint o, ref double r; reputationObject)
            r /= weightSum[o];
    }

    this() {}

    this(ref Rating[] ratings,
         ref double[] reputationUser,
         ref double[] reputationObject) {
        opCall(ratings, reputationUser, reputationObject);
    }
}


final class AvgArithmetic : AvgWeighted {
    override void opCall(ref Rating[] ratings,
            ref double[] reputationUser,
            ref double[] reputationObject) {
        reputationUser[] = 1;
        super.opCall(ratings, reputationUser, reputationObject);
    }

    this() {}

    this(ref Rating[] ratings,
         ref double[] reputationUser,
         ref double[] reputationObject) {
        opCall(ratings, reputationUser, reputationObject);
    }
}


void main() {
    Rating[] ratings;
    double[] reputationObject, reputationUser;
    double[] objectQuality, userError;
    auto aa = new AvgArithmetic;
    auto yzlm = new Yzlm(0.8, 1e-12, 1e-36);

    auto rnd = FastRandom(1001);

    reputationObject.length = 1_000;
    reputationUser.length = 1_000;

    objectQuality.length = reputationObject.length;
    userError.length = reputationUser.length;

    ratings.length = reputationObject.length * reputationUser.length;

    foreach (uint i; 0 .. 100) {
        foreach (ref double Q; objectQuality)
            Q = rnd.uniform(0.0, 10.0);

        foreach (ref double sigma2; userError)
            sigma2 = rnd.random();

        int pos;
        foreach (uint object, ref double Q; objectQuality)
            foreach (uint user, ref double sigma2; userError)
                ratings[pos++] = Rating(user, object, rnd.uniform(Q - sigma2, Q + sigma2));

        printf("We now have %u ratings.\n", ratings.length);

        aa(ratings, reputationUser, reputationObject);
        yzlm(ratings, reputationUser, reputationObject);

        double deltaQ = 0;
        foreach (uint object, double r; reputationObject) {
            auto aux = (r - objectQuality[object]);
            deltaQ += aux * aux;
        }
        deltaQ = sqrt(deltaQ / reputationObject.length);
        printf("[%u] Error in quality estimate: %g\n", i, deltaQ);
    }
}


Bye,
bearophile


More information about the Digitalmars-d-learn mailing list