D vs Haskell

Juan Manuel Cabo juanmanuel.cabo at gmail.com
Sat Jun 22 14:11:08 PDT 2013


On 06/22/2013 03:25 PM, Szymon Gatner wrote:
> Word counting problem in D and Haskell:
> 
> http://leonardo-m.livejournal.com/109201.html

I thought that the D time could be improved further with
little changes.

Testing against Complete Works of William Shakespeare (5.3 MiB
plaintext): http://www.gutenberg.org/ebooks/100
and using "dmd -O -inline -noboundscheck -release" on both
the last version of the author, and my version using "byWord"
I got these times (minimum of 10 runs):

    before: 2781 ms

    after:   805 ms


Here is the code, with a "byWord" range using std.ascii.toAlpha:


    import std.stdio, std.conv, std.file, std.string,
           std.algorithm, std.range, std.traits, std.ascii;


    auto hashCounter(R)(R items) if (isForwardRange!R) {
        size_t[ForeachType!R] result;
        foreach (x; items)
            result[x]++;
        return result.byKey.zip(result.byValue);
    }

    void main(string[] args) {
        //Slow:
        //      args[1]
        //      .readText
        //      .toLower
        //      .tr("A-Za-z", "\n", "cs")
        //      .split

        //Faster:
        args[1]
        .readText
        .byWord
        .map!toLower()
        .array
        .hashCounter
        .array
        .sort!"-a[1] < -b[1]"()
        .take(args[2].to!uint)
        .map!q{ text(a[0], " ", a[1]) }
        .join("\n")
        .writeln;
    }

    /** Range that extracts words from a string. Words are
        strings composed only of chars accepted by std.ascii.toAlpha() */
    struct byWord {
        string s;
        size_t pos;
        string word;

        this(string s) {
            this.s = s;
            popFront();
        }

        @property bool empty() const {
            return s.length == 0;
        }

        @property string front() {
            return word;
        }

        void popFront() {
            if (pos == s.length) {
                //Mark the range as empty, only after popFront fails:
                s = null;
                return;
            }

            while (pos < s.length && !std.ascii.isAlpha(s[pos])) {
                ++pos;
            }
            auto start = pos;
            while (pos < s.length && std.ascii.isAlpha(s[pos])) {
                ++pos;
            }

            if (start == s.length) {
                //No more words. Range empty:
                s = null;
            } else {
                word = s[start .. pos];
            }
        }
    }

    unittest {
        assert([] == array(byWord("")));
        assert([] == array(byWord("!@#$")));
        assert(["a", "b"] == array(byWord("a b")));
        assert(["a", "b", "c"] == array(byWord("a b c")));
    }


--jm




More information about the Digitalmars-d mailing list