Strange CTFE issue, string replacement
Stefan Koch via Digitalmars-d-learn
digitalmars-d-learn at puremagic.com
Sun Apr 9 12:55:57 PDT 2017
On Sunday, 9 April 2017 at 19:38:33 UTC, Jethro wrote:
> I tried to make a string like replacement called fstring which
> uses buffering to avoid lots of little allocations. The problem
> is, that it actually causes the compiler to use 10x the memory
> and about 10 times slower.
>
>
> It doesn't do anything special but tries to emulate string(so
> it can be a drop in replacement, more or less)
>
>
>
> Why would it be so ineffective in CTFE? I know strings are
> built in and the compiler probably has some optimizations, but
> an order of magnitude more memory usage is was not
> expected(should be lower)? I could understand a small speed
> slowdown, but not 10x.
>
> Any ideas?
>
> class fstring(T = char)
> {
> import std.traits;
> T[] buf;
> int length = 0;
> int Inc = 65535;
>
> this(int inc = 65535)
> {
> Inc = inc;
> buf.length = Inc;
> for(int i = 0; i < buf.length; i++) buf[i] = 0;
> }
>
> // foreach
> ReturnType!(D) opApply(D)(scope D dg)
> {
> ReturnType!(D) result;
> for (int i = 0; i < length; i++)
> {
> result = dg(buf[i]);
> if (result) break;
> }
> return result;
> }
>
> // Append string to end
> auto Append(S)(S s)
> {
> while (length + s.length >= buf.length) buf.length += Inc;
> for (int i = 0; i < s.length; i++) buf[length + i] =
> cast(T)(s[i]);
> length += s.length;
> }
>
> // Prepends string to start
> auto Prepend(S)(S s)
> {
> while (length + s.length >= buf.length) buf.length += Inc;
> for (int i = 0; i < length; i++) buf[s.length + length - i -
> 1] = buf[length - i - 1];
> for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
> length += s.length;
> }
>
>
> auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s);
> }
> void opAssign(S)(S s) if (!is(S == typeof(this))) { length =
> 0; Append(s); }
> auto opBinary(string op, S)(S s) if (op == "~") { Append(s);
> return this; }
> auto opBinaryRight(string op, S)(S s) if (op == "~") {
> Prepend(s); return this; }
>
> bool opEquals(S)(S s)
> {
> if (buf.length != s.length) return false;
> for(int i = 0; i < buf.length; i++) if (buf[i] != s[i])
> return false;
> return true;
> }
>
> // Forward replaces string a with string b in place(no memory
> allocations) and greedily(as they are found from left to right)
> auto replace(A, B)(A a, B b)
> {
> if (length < a.length || a.length == 0 || b.length == 0)
> return this;
> auto small = (a.length >= b.length);
> int idx = 0;
> int count = 0; // number of matches, used when b.length >
> a.length to determine how much to overallocate(which depends on
> the number of matches)
> for(int i = 0; i <= length - a.length; i++)
> {
> if (buf[i] == a[0])
> {
> auto found = true;
> for(int j = 1; j < a.length; j++) // Work backwards as more
> likely to be a mismatch(faster)
> if (buf[i + a.length - j] != a[a.length - j])
> {
> found = false;
> break;
> }
>
> if (found)
> {
> count++;
> i += a.length - 1; // dec by 1 because for loop will inc
> if (small) for(int j = 0; j < b.length; j++) buf[idx++] =
> b[j];
> continue;
> }
> }
>
> if (small) buf[idx++] = buf[i];
> }
>
> int extra = -count*(a.length - b.length);
> if (small) { length += extra; if (buf.length > length)
> buf[length] = 0; return this; }
>
> // We now know the count so the (a.length - b.length)*count
> is the amount to expand/contract buf
> auto end = length + extra;
> if (end >= buf.length) buf.length += Inc;
>
>
> // shift the string to make space at the head, this allows us
> to use essentially the same algorithm as above, the difference
> is that we must first know the number of matches to
> overallocate the buffer if necessary(could do dynamically) and
> to shift the string ahead
> for(int i = 0; i < length; i++)
> buf[end - 1 - i] = buf[length - i - 1];
>
> idx = 0;
> for(int i = extra; i <= length - a.length + extra; i++)
> {
> if (buf[i] == a[0])
> {
> auto found = true;
> for(int j = 1; j < a.length; j++) // Work backwards as more
> likely to be a mismatch(faster)
> if (buf[i + a.length - j] != a[a.length - j])
> {
> found = false;
> break;
> }
>
> if (found)
> {
> count--;
> i += a.length - 1; // dec by 1 because for loop will inc
> for(int j = 0; j < b.length; j++)
> buf[idx++] = b[j];
> if (count == 0) break;
> continue;
> }
> }
>
> buf[idx++] = buf[i];
> }
>
> length = length + extra;
> assert(count == 0, "fstring: Programmatic Bug - Replace
> String Error!");
> return this;
> }
>
> // Strips away the begining(dir = -1), the end(dir = 1), or
> both(dir = 0) that match s
> auto strip(int dir = 0, Args...)(Args s)
> {
>
> int start = 0, end = length;
> if (dir <= 0)
> for(int i = 0; i <= length/2 + 1; i++)
> {
> bool anyFound = false;
> foreach(ss; s)
> {
>
> auto found = true;
> for(int j = 0; j < ss.length; j++)
> {
> if (i + j >= length) break;
> if (buf[i + j] != ss[j])
> found = false;
> }
> anyFound = anyFound | found;
>
> if (found) i += ss.length - 1;
> }
>
> if (anyFound)
> start = i+1;
> else
> break;
> }
>
> if (dir >= 0)
> for(int i = length/2; i < length; i++)
> {
> bool anyFound = false;
> foreach(ss; s)
> {
> auto found = true;
> for(int j = 0; j < ss.length; j++)
> {
> if (i + j >= length) break;
> if (buf[i + j] != ss[j])
> found = false;
> }
> anyFound = anyFound | found;
>
> if (found) i += ss.length - 1;
> }
>
> if (!anyFound)
> end = i+1;
> }
>
> for(int i = start; i < end; i++)
> buf[i - start] = buf[i];
> length = end - start;
> return this;
> }
>
> auto opSlice(int start, int end) { return buf[start..end]; }
> @property int opDollar(size_t dim : 0)() { return length; }
> auto opIndex() { return buf[0..length]; }
> auto opIndex(size_t i) { return buf[i]; }
> auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
> override string toString() { return to!string(buf[0..length]);
> }
>
> }
The constructor is nuts.
You do not need to zero the string!
Also avoid templates if you can.
More information about the Digitalmars-d-learn
mailing list