How to use sets in D?
mark
mark at qtrac.eu
Sun Mar 8 08:43:10 UTC 2020
I use sets a lot and am now working on a program that will need
to hold sets of 65,000+ items, so I thought I do some timings for
the different approaches.
Here are some timings (uset uses the AA Unit approach, tset uses
an rbtree, and aset uses an AA with bool values):
$ ./sets.d
size 50,000
uset 1 ms, 340 μs, and 8 hnsecs
tset 4 ms, 637 μs, and 1 hnsec
aset 1 ms, 402 μs, and 6 hnsecs
$ ./sets.d
size 100,000
uset 2 ms, 338 μs, and 4 hnsecs
tset 12 ms, 262 μs, and 6 hnsecs
aset 2 ms and 991 μs
$ ./sets.d
size 200,000
uset 5 ms, 971 μs, and 5 hnsecs
tset 30 ms, 675 μs, and 5 hnsecs
aset 6 ms, 74 μs, and 6 hnsecs
$ ./sets.d
size 400,000
uset 11 ms, 823 μs, and 4 hnsecs
tset 74 ms, 146 μs, and 2 hnsecs
aset 12 ms, 560 μs, and 5 hnsecs
What seems pretty clear is that for my purposes (checking
presence or absence of membership in a set), AAs are much faster
than rbtrees. (This is to be expected since if an AA uses a hash
is should be O(1) vs O(lg n) for an rbtree).
Here is the test code I used:
#!/usr/bin/env rdmd
enum SIZE = 400_000;
enum SUB_SIZE = SIZE / 10;
enum MIN_LEN = 10;
enum MAX_LEN = 50;
enum AZ = "abcdefghijklmnopqrstuvwxyz";
void main() {
import std.stdio: writefln;
auto data = Data.populate();
auto sets = Sets(data.words);
writefln("size %,d", SIZE);
check(sets.uset, data.present, data.absent, "uset");
check(sets.tset, data.present, data.absent, "tset");
check(sets.aset, data.present, data.absent, "aset");
}
struct Sets {
import std.container.rbtree: RedBlackTree;
alias Unit = void[0];
enum unit = Unit.init;
Unit[string] uset;
RedBlackTree!string tset;
bool[string] aset;
this(string[] words) {
tset = new RedBlackTree!string;
foreach (word; words) {
uset[word] = unit;
tset.insert(word);
aset[word] = false;
}
}
}
struct Data {
string[] words;
string[] present;
string[] absent;
static Data populate() {
Data data;
for (int i = 0; i < SIZE; i++) {
auto word = makeWord;
data.words ~= word;
if (data.present.length < SUB_SIZE)
data.present ~= word;
if (data.absent.length < SUB_SIZE)
data.absent ~= word ~ "9";
}
return data;
}
}
string makeWord() {
import std.random: randomCover, uniform;
import std.range: take;
import std.string: join, split;
enum AZS = (AZ ~ AZ ~ AZ ~ AZ ~ AZ ~ AZ).split("");
return randomCover(AZS).take(uniform(MIN_LEN,
MAX_LEN)).join("");
}
void check(T)(T set, string[] present, string[] absent, string
name) {
import std.datetime.stopwatch: AutoStart, StopWatch;
import std.stdio: writeln;
auto timer = StopWatch(AutoStart.yes);
foreach (string p; present)
assert(p in set);
foreach (string a; absent)
assert(a !in set);
writeln(name, " ", timer.peek);
}
More information about the Digitalmars-d-learn
mailing list