Strange CTFE issue, string replacement

Jethro via Digitalmars-d-learn digitalmars-d-learn at puremagic.com
Sun Apr 9 12:38:33 PDT 2017


I tried to make a string like replacement called fstring which 
uses buffering to avoid lots of little allocations. The problem 
is, that it actually causes the compiler to use 10x the memory 
and about 10 times slower.


It doesn't do anything special but tries to emulate string(so it 
can be a drop in replacement, more or less)



Why would it be so ineffective in CTFE? I know strings are built 
in and the compiler probably has some optimizations, but an order 
of magnitude more memory usage is was not expected(should be 
lower)? I could understand a small speed slowdown, but not 10x.

Any ideas?

class fstring(T = char)
{
	import std.traits;
	T[] buf;	
	int length = 0;
	int Inc = 65535;

	this(int inc = 65535)
	{
		Inc = inc;
		buf.length = Inc;
		for(int i = 0; i < buf.length; i++)	buf[i] = 0;
	}

	// foreach
	ReturnType!(D) opApply(D)(scope D dg)
     {
		ReturnType!(D) result;
		for (int i = 0; i < length; i++)
		{
			result = dg(buf[i]);
			if (result) break;
		}
		return result;
     }

	// Append string to end
	auto Append(S)(S s)
	{
		while (length + s.length >= buf.length) buf.length += Inc;
		for (int i = 0; i < s.length; i++) buf[length + i] = 
cast(T)(s[i]);
		length += s.length;
	}

	// Prepends string to start
	auto Prepend(S)(S s)
	{
		while (length + s.length >= buf.length) buf.length += Inc;
		for (int i = 0; i < length; i++) buf[s.length + length - i - 1] 
= buf[length - i - 1];
		for (int i = 0; i < s.length; i++) buf[i] = cast(T)(s[i]);
		length += s.length;
	}


	auto opOpAssign(string op, S)(S s) if (op == "~") { Append(s); }	
	void opAssign(S)(S s) if (!is(S == typeof(this))) { length = 0; 
Append(s); }
	auto opBinary(string op, S)(S s) if (op == "~") { Append(s); 
return this; }
	auto opBinaryRight(string op, S)(S s) if (op == "~") { 
Prepend(s); return this; }

	bool opEquals(S)(S s)
	{		
		if (buf.length != s.length) return false;
		for(int i = 0; i < buf.length; i++)	if (buf[i] != s[i]) return 
false;
		return true;
	}

	// Forward replaces string a with string b in place(no memory 
allocations) and greedily(as they are found from left to right)
	auto replace(A, B)(A a, B b)
	{
		if (length < a.length || a.length == 0 || b.length == 0) return 
this;
		auto small = (a.length >= b.length);
		int idx = 0;
		int count = 0; // number of matches, used when b.length > 
a.length to determine how much to overallocate(which depends on 
the number of matches)
		for(int i = 0; i <= length - a.length; i++)
		{
			if (buf[i] == a[0])
			{
				auto found = true;
				for(int j = 1; j < a.length; j++)	// Work backwards as more 
likely to be a mismatch(faster)
					if (buf[i + a.length - j] != a[a.length - j])
					{
						found = false;
						break;
					}

				if (found)
				{
					count++;
					i += a.length - 1;	// dec by 1 because for loop will inc
					if (small) for(int j = 0; j < b.length; j++) buf[idx++] = 
b[j];
					continue;
				}
			}

			if (small) buf[idx++] = buf[i]; 			
		}

		int extra = -count*(a.length - b.length);
		if (small) { length += extra; if (buf.length > length) 
buf[length] = 0; return this; }

		// We now know the count so the (a.length - b.length)*count is 
the amount to expand/contract buf		
		auto end = length + extra;
		if (end >= buf.length) buf.length += Inc;


		// shift the string to make space at the head, this allows us 
to use essentially the same algorithm as above, the difference is 
that we must first know the number of matches to overallocate the 
buffer if necessary(could do dynamically) and to shift the string 
ahead
		for(int i = 0; i < length; i++)	
			buf[end - 1 - i] = buf[length - i - 1];

		idx = 0;		
		for(int i = extra; i <= length - a.length + extra; i++)
		{
			if (buf[i] == a[0])
			{
				auto found = true;
				for(int j = 1; j < a.length; j++)	// Work backwards as more 
likely to be a mismatch(faster)
					if (buf[i + a.length - j] != a[a.length - j])
					{
						found = false;
						break;
					}

				if (found)
				{
					count--;
					i += a.length - 1;	// dec by 1 because for loop will inc
					for(int j = 0; j < b.length; j++)
						buf[idx++] = b[j];
					if (count == 0) break;
					continue;
				}
			}

			buf[idx++] = buf[i]; 			
		}

		length = length + extra;
		assert(count == 0, "fstring: Programmatic Bug - Replace String 
Error!");
		return this;
	}

	// Strips away the begining(dir = -1), the end(dir = 1), or 
both(dir = 0) that match s
	auto strip(int dir = 0, Args...)(Args s)
	{

		int start = 0, end = length;
                 if (dir <= 0)
		for(int i = 0; i <= length/2 + 1; i++)
		{
			bool anyFound = false;
			foreach(ss; s)
			{			
				
				auto found = true;
				for(int j = 0; j < ss.length; j++)
				{
					if (i + j >= length) break;
					if (buf[i + j] != ss[j])
						found = false;
				}
				anyFound = anyFound | found;

				if (found) i += ss.length - 1;
			}

			if (anyFound)
				start = i+1;
			else
				break;
		}

	        if (dir >= 0)	
		for(int i = length/2; i < length; i++)
		{
			bool anyFound = false;
			foreach(ss; s)
			{			
				auto found = true;
				for(int j = 0; j < ss.length; j++)
				{
					if (i + j >= length) break;
					if (buf[i + j] != ss[j])
						found = false;
				}
				anyFound = anyFound | found;

				if (found) i += ss.length - 1;
			}

			if (!anyFound)
				end = i+1;
		}
		
		for(int i = start; i < end; i++)
			buf[i - start] = buf[i];
		length = end - start;
		return this;
	}

	auto opSlice(int start, int end) { return buf[start..end]; }
	@property int opDollar(size_t dim : 0)() { return length; }
	auto opIndex() { return buf[0..length]; }
	auto opIndex(size_t i) { return buf[i]; }
	auto opIndexAssign(S)(S v, size_t i) { buf[i] = v; }
	override string toString() { return to!string(buf[0..length]); }

}


More information about the Digitalmars-d-learn mailing list