Fast string search
bearophile
bearophileHUGS at lycos.com
Sun Dec 12 23:28:03 PST 2010
Found through Reddit, it may be useful for Phobos:
http://volnitsky.com/project/str_search/
A possible partial D2 translation:
const(char)* volnitsky(string haystack, string needle) {
enum size_t wSize = ushort.sizeof;
enum size_t hSize = 64 * 1024;
immutable char* S = haystack.ptr;
immutable char* Se = &haystack[$ - 1];
immutable char* SS = needle.ptr;
immutable char* SSe = &needle[$ - 1];
immutable size_t SS_size = SSe - SS; // needle length
immutable size_t step = SS_size - wSize + 1;
// if S is too small or SS is too big, fallback to std::search()
if (step < wSize || SS_size >= (1 << 8) - 1) {
throw new Exception("search!");
//return std::search(S, Se, SS, SSe);
}
// make SS hash
//auto H = new ubyte[hSize]; // 64 KB
ubyte[hSize] H; // 64 KB
for (size_t SSi = 0; SSi < (SS_size - wSize + 1); SSi++) {
size_t h = *cast(ushort*)(SS + SSi) % hSize;
while (H[h])
h = (h + 1) % hSize; // find free cell
H[h] = cast(ubyte)(SSi + 1); // can't overflow!
}
// step through text
for (char* p = cast(char*)(S + step); p <= Se - wSize; p += step) {
size_t h = *cast(ushort*)p % hSize;
if (H[h]) {
TRY_HASH_CELL:
int SSi = H[h] - 1;
const char* R = p - SSi;
for (size_t i = 0; i < SS_size; i++) {
if (R[i] != SS[i]) {
size_t h_next = (h + 1) % hSize;
if (H[h_next]) {
h = h_next;
goto TRY_HASH_CELL;
} else
goto NEXT_STEP;
}
}
return R;
}
NEXT_STEP:;
}
return Se;
}
//---------------------------------------------
import std.stdio: writeln;
void main() {
string s1 = "hello, how are you? I'm well, thanks";
string s2 = "wello"; // doesn't work with strings of odd lenght!
auto ptr = volnitsky(s1, s2);
writeln(ptr - s1.ptr);
writeln(s1[ptr - s1.ptr]);
}
But as the example shows it doesn't seem to work with strings of odd length. Also it lacks the call to std::search() still.
Bye,
bearophile
More information about the Digitalmars-d
mailing list