Strange CTFE issue, string replacement

Sun Apr 9 12:55:57 PDT 2017

On Sunday, 9 April 2017 at 19:38:33 UTC, Jethro wrote:
> I tried to make a string like replacement called fstring which 
> uses buffering to avoid lots of little allocations. The problem 
> is, that it actually causes the compiler to use 10x the memory 
> and about 10 times slower.
>
>
> It doesn't do anything special but tries to emulate string(so 
> it can be a drop in replacement, more or less)
>
>
>
> Why would it be so ineffective in CTFE? I know strings are 
> built in and the compiler probably has some optimizations, but 
> an order of magnitude more memory usage is was not 
> expected(should be lower)? I could understand a small speed 
> slowdown, but not 10x.
>
> Any ideas?
>
> class fstring(T = char)
> {
> 	import std.traits;
> 	T[] buf;	
> 	int length = 0;
> 	int Inc = 65535;
>
> 	this(int inc = 65535)
> 	{
> 		Inc = inc;
> 		buf.length = Inc;
> 		for(int i = 0; i < buf.length; i++)	buf[i] = 0;
> 	}
>
> 	// foreach
> 	ReturnType!(D) opApply(D)(scope D dg)
>     {
> 		ReturnType!(D) result;
> 		for (int i = 0; i < length; i++)
> 		{
> 			result = dg(buf[i]);
> 			if (result) break;
> 		}
> 		return result;
>     }
>
> 	// Append string to end
> 	auto Append(S)(S s)
> 	{
> 		while (length + s.length >= buf.length) buf.length += Inc;
> 		for (int i = 0; i < s.length; i++) buf[length + i] = 
> cast(T)(s[i]);
> 		length += s.length;
> 	}
>
> 	// Prepends string to start
> 	auto Prepend(S)(S s)
> 	{
> 		while (length + s.length >= buf.length) buf.length += Inc;
> 		for (int i = 0; i < length; i++) buf[s.length + length - i - 
> 1] = buf[length - i - 1];
> 		for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
> 		length += s.length;
> 	}
>
>
> 	auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s); 
> }	
> 	void opAssign(S)(S s) if (!is(S == typeof(this))) { length = 
> 0; Append(s); }
> 	auto opBinary(string op, S)(S s) if (op == "~") { Append(s); 
> return this; }
> 	auto opBinaryRight(string op, S)(S s) if (op == "~") { 
> Prepend(s); return this; }
>
> 	bool opEquals(S)(S s)
> 	{		
> 		if (buf.length != s.length) return false;
> 		for(int i = 0; i < buf.length; i++)	if (buf[i] != s[i]) 
> return false;
> 		return true;
> 	}
>
> 	// Forward replaces string a with string b in place(no memory 
> allocations) and greedily(as they are found from left to right)
> 	auto replace(A, B)(A a, B b)
> 	{
> 		if (length < a.length || a.length == 0 || b.length == 0) 
> return this;
> 		auto small = (a.length >= b.length);
> 		int idx = 0;
> 		int count = 0; // number of matches, used when b.length > 
> a.length to determine how much to overallocate(which depends on 
> the number of matches)
> 		for(int i = 0; i <= length - a.length; i++)
> 		{
> 			if (buf[i] == a[0])
> 			{
> 				auto found = true;
> 				for(int j = 1; j < a.length; j++)	// Work backwards as more 
> likely to be a mismatch(faster)
> 					if (buf[i + a.length - j] != a[a.length - j])
> 					{
> 						found = false;
> 						break;
> 					}
>
> 				if (found)
> 				{
> 					count++;
> 					i += a.length - 1;	// dec by 1 because for loop will inc
> 					if (small) for(int j = 0; j < b.length; j++) buf[idx++] = 
> b[j];
> 					continue;
> 				}
> 			}
>
> 			if (small) buf[idx++] = buf[i]; 			
> 		}
>
> 		int extra = -count*(a.length - b.length);
> 		if (small) { length += extra; if (buf.length > length) 
> buf[length] = 0; return this; }
>
> 		// We now know the count so the (a.length - b.length)*count 
> is the amount to expand/contract buf		
> 		auto end = length + extra;
> 		if (end >= buf.length) buf.length += Inc;
>
>
> 		// shift the string to make space at the head, this allows us 
> to use essentially the same algorithm as above, the difference 
> is that we must first know the number of matches to 
> overallocate the buffer if necessary(could do dynamically) and 
> to shift the string ahead
> 		for(int i = 0; i < length; i++)	
> 			buf[end - 1 - i] = buf[length - i - 1];
>
> 		idx = 0;		
> 		for(int i = extra; i <= length - a.length + extra; i++)
> 		{
> 			if (buf[i] == a[0])
> 			{
> 				auto found = true;
> 				for(int j = 1; j < a.length; j++)	// Work backwards as more 
> likely to be a mismatch(faster)
> 					if (buf[i + a.length - j] != a[a.length - j])
> 					{
> 						found = false;
> 						break;
> 					}
>
> 				if (found)
> 				{
> 					count--;
> 					i += a.length - 1;	// dec by 1 because for loop will inc
> 					for(int j = 0; j < b.length; j++)
> 						buf[idx++] = b[j];
> 					if (count == 0) break;
> 					continue;
> 				}
> 			}
>
> 			buf[idx++] = buf[i]; 			
> 		}
>
> 		length = length + extra;
> 		assert(count == 0, "fstring: Programmatic Bug - Replace 
> String Error!");
> 		return this;
> 	}
>
> 	// Strips away the begining(dir = -1), the end(dir = 1), or 
> both(dir = 0) that match s
> 	auto strip(int dir = 0, Args...)(Args s)
> 	{
>
> 		int start = 0, end = length;
>                 if (dir <= 0)
> 		for(int i = 0; i <= length/2 + 1; i++)
> 		{
> 			bool anyFound = false;
> 			foreach(ss; s)
> 			{			
> 				
> 				auto found = true;
> 				for(int j = 0; j < ss.length; j++)
> 				{
> 					if (i + j >= length) break;
> 					if (buf[i + j] != ss[j])
> 						found = false;
> 				}
> 				anyFound = anyFound | found;
>
> 				if (found) i += ss.length - 1;
> 			}
>
> 			if (anyFound)
> 				start = i+1;
> 			else
> 				break;
> 		}
>
> 	        if (dir >= 0)	
> 		for(int i = length/2; i < length; i++)
> 		{
> 			bool anyFound = false;
> 			foreach(ss; s)
> 			{			
> 				auto found = true;
> 				for(int j = 0; j < ss.length; j++)
> 				{
> 					if (i + j >= length) break;
> 					if (buf[i + j] != ss[j])
> 						found = false;
> 				}
> 				anyFound = anyFound | found;
>
> 				if (found) i += ss.length - 1;
> 			}
>
> 			if (!anyFound)
> 				end = i+1;
> 		}
> 		
> 		for(int i = start; i < end; i++)
> 			buf[i - start] = buf[i];
> 		length = end - start;
> 		return this;
> 	}
>
> 	auto opSlice(int start, int end) { return buf[start..end]; }
> 	@property int opDollar(size_t dim : 0)() { return length; }
> 	auto opIndex() { return buf[0..length]; }
> 	auto opIndex(size_t i) { return buf[i]; }
> 	auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
> 	override string toString() { return to!string(buf[0..length]); 
> }
>
> }

The constructor is nuts.
You do not need to zero the string!

Also avoid templates if you can.