Strange CTFE issue, string replacement
Jethro via Digitalmars-d-learn
digitalmars-d-learn at puremagic.com
Sun Apr 9 12:38:33 PDT 2017
I tried to make a string like replacement called fstring which
uses buffering to avoid lots of little allocations. The problem
is, that it actually causes the compiler to use 10x the memory
and about 10 times slower.
It doesn't do anything special but tries to emulate string(so it
can be a drop in replacement, more or less)
Why would it be so ineffective in CTFE? I know strings are built
in and the compiler probably has some optimizations, but an order
of magnitude more memory usage is was not expected(should be
lower)? I could understand a small speed slowdown, but not 10x.
Any ideas?
class fstring(T = char)
{
import std.traits;
T[] buf;
int length = 0;
int Inc = 65535;
this(int inc = 65535)
{
Inc = inc;
buf.length = Inc;
for(int i = 0; i < buf.length; i++) buf[i] = 0;
}
// foreach
ReturnType!(D) opApply(D)(scope D dg)
{
ReturnType!(D) result;
for (int i = 0; i < length; i++)
{
result = dg(buf[i]);
if (result) break;
}
return result;
}
// Append string to end
auto Append(S)(S s)
{
while (length + s.length >= buf.length) buf.length += Inc;
for (int i = 0; i < s.length; i++) buf[length + i] =
cast(T)(s[i]);
length += s.length;
}
// Prepends string to start
auto Prepend(S)(S s)
{
while (length + s.length >= buf.length) buf.length += Inc;
for (int i = 0; i < length; i++) buf[s.length + length - i - 1]
= buf[length - i - 1];
for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
length += s.length;
}
auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s); }
void opAssign(S)(S s) if (!is(S == typeof(this))) { length = 0;
Append(s); }
auto opBinary(string op, S)(S s) if (op == "~") { Append(s);
return this; }
auto opBinaryRight(string op, S)(S s) if (op == "~") {
Prepend(s); return this; }
bool opEquals(S)(S s)
{
if (buf.length != s.length) return false;
for(int i = 0; i < buf.length; i++) if (buf[i] != s[i]) return
false;
return true;
}
// Forward replaces string a with string b in place(no memory
allocations) and greedily(as they are found from left to right)
auto replace(A, B)(A a, B b)
{
if (length < a.length || a.length == 0 || b.length == 0) return
this;
auto small = (a.length >= b.length);
int idx = 0;
int count = 0; // number of matches, used when b.length >
a.length to determine how much to overallocate(which depends on
the number of matches)
for(int i = 0; i <= length - a.length; i++)
{
if (buf[i] == a[0])
{
auto found = true;
for(int j = 1; j < a.length; j++) // Work backwards as more
likely to be a mismatch(faster)
if (buf[i + a.length - j] != a[a.length - j])
{
found = false;
break;
}
if (found)
{
count++;
i += a.length - 1; // dec by 1 because for loop will inc
if (small) for(int j = 0; j < b.length; j++) buf[idx++] =
b[j];
continue;
}
}
if (small) buf[idx++] = buf[i];
}
int extra = -count*(a.length - b.length);
if (small) { length += extra; if (buf.length > length)
buf[length] = 0; return this; }
// We now know the count so the (a.length - b.length)*count is
the amount to expand/contract buf
auto end = length + extra;
if (end >= buf.length) buf.length += Inc;
// shift the string to make space at the head, this allows us
to use essentially the same algorithm as above, the difference is
that we must first know the number of matches to overallocate the
buffer if necessary(could do dynamically) and to shift the string
ahead
for(int i = 0; i < length; i++)
buf[end - 1 - i] = buf[length - i - 1];
idx = 0;
for(int i = extra; i <= length - a.length + extra; i++)
{
if (buf[i] == a[0])
{
auto found = true;
for(int j = 1; j < a.length; j++) // Work backwards as more
likely to be a mismatch(faster)
if (buf[i + a.length - j] != a[a.length - j])
{
found = false;
break;
}
if (found)
{
count--;
i += a.length - 1; // dec by 1 because for loop will inc
for(int j = 0; j < b.length; j++)
buf[idx++] = b[j];
if (count == 0) break;
continue;
}
}
buf[idx++] = buf[i];
}
length = length + extra;
assert(count == 0, "fstring: Programmatic Bug - Replace String
Error!");
return this;
}
// Strips away the begining(dir = -1), the end(dir = 1), or
both(dir = 0) that match s
auto strip(int dir = 0, Args...)(Args s)
{
int start = 0, end = length;
if (dir <= 0)
for(int i = 0; i <= length/2 + 1; i++)
{
bool anyFound = false;
foreach(ss; s)
{
auto found = true;
for(int j = 0; j < ss.length; j++)
{
if (i + j >= length) break;
if (buf[i + j] != ss[j])
found = false;
}
anyFound = anyFound | found;
if (found) i += ss.length - 1;
}
if (anyFound)
start = i+1;
else
break;
}
if (dir >= 0)
for(int i = length/2; i < length; i++)
{
bool anyFound = false;
foreach(ss; s)
{
auto found = true;
for(int j = 0; j < ss.length; j++)
{
if (i + j >= length) break;
if (buf[i + j] != ss[j])
found = false;
}
anyFound = anyFound | found;
if (found) i += ss.length - 1;
}
if (!anyFound)
end = i+1;
}
for(int i = start; i < end; i++)
buf[i - start] = buf[i];
length = end - start;
return this;
}
auto opSlice(int start, int end) { return buf[start..end]; }
@property int opDollar(size_t dim : 0)() { return length; }
auto opIndex() { return buf[0..length]; }
auto opIndex(size_t i) { return buf[i]; }
auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
override string toString() { return to!string(buf[0..length]); }
}
More information about the Digitalmars-d-learn
mailing list