D vs Haskell
Juan Manuel Cabo
juanmanuel.cabo at gmail.com
Sat Jun 22 14:11:08 PDT 2013
On 06/22/2013 03:25 PM, Szymon Gatner wrote:
> Word counting problem in D and Haskell:
>
> http://leonardo-m.livejournal.com/109201.html
I thought that the D time could be improved further with
little changes.
Testing against Complete Works of William Shakespeare (5.3 MiB
plaintext): http://www.gutenberg.org/ebooks/100
and using "dmd -O -inline -noboundscheck -release" on both
the last version of the author, and my version using "byWord"
I got these times (minimum of 10 runs):
before: 2781 ms
after: 805 ms
Here is the code, with a "byWord" range using std.ascii.toAlpha:
import std.stdio, std.conv, std.file, std.string,
std.algorithm, std.range, std.traits, std.ascii;
auto hashCounter(R)(R items) if (isForwardRange!R) {
size_t[ForeachType!R] result;
foreach (x; items)
result[x]++;
return result.byKey.zip(result.byValue);
}
void main(string[] args) {
//Slow:
// args[1]
// .readText
// .toLower
// .tr("A-Za-z", "\n", "cs")
// .split
//Faster:
args[1]
.readText
.byWord
.map!toLower()
.array
.hashCounter
.array
.sort!"-a[1] < -b[1]"()
.take(args[2].to!uint)
.map!q{ text(a[0], " ", a[1]) }
.join("\n")
.writeln;
}
/** Range that extracts words from a string. Words are
strings composed only of chars accepted by std.ascii.toAlpha() */
struct byWord {
string s;
size_t pos;
string word;
this(string s) {
this.s = s;
popFront();
}
@property bool empty() const {
return s.length == 0;
}
@property string front() {
return word;
}
void popFront() {
if (pos == s.length) {
//Mark the range as empty, only after popFront fails:
s = null;
return;
}
while (pos < s.length && !std.ascii.isAlpha(s[pos])) {
++pos;
}
auto start = pos;
while (pos < s.length && std.ascii.isAlpha(s[pos])) {
++pos;
}
if (start == s.length) {
//No more words. Range empty:
s = null;
} else {
word = s[start .. pos];
}
}
}
unittest {
assert([] == array(byWord("")));
assert([] == array(byWord("!@#$")));
assert(["a", "b"] == array(byWord("a b")));
assert(["a", "b", "c"] == array(byWord("a b c")));
}
--jm
More information about the Digitalmars-d
mailing list