D memory consumption/runtime speed problem

sybrandy sybrandy at gmail.com
Thu Jan 14 15:48:24 PST 2010


<snip>

Using a small buffer as suggested by Daniel Keep and Steven 
Schveighoffer significantly improved performance.  Now down to about 5 
seconds.  I ended up using the static array buffer since the 
encodeNumber function will be in its own file in my resulting program, 
so I can keep it private.  Doing something similar to the output buffer 
had a similar effect and it's now processing everything in less than 2 
seconds!

I didn't realize that all those little arrays were created.  Perhaps 
this is something that should be detailed in the arrays documentation 
or, perhaps even better, an optimization guide?  I honestly thought the 
GC would help keep memory in check as I didn't want to assume a 
worst-case scenario, which with my RLE implementation is 2 * input 
buffer size, but I guess I have to.

Well, perhaps my original code will be of use if Walter and the gang 
decide to try to revamp the GC and want some "bad" code to test it with.

Thanks all!

Btw: below is the updated code

import std.conv;
import std.stdio;
import std.stream;
import std.date;
import std.mmfile;
import std.array;

string filename = "enwik8_small";

private immutable uint ONE_BYTE_VAL = (1 << 6) - 1;
private immutable uint TWO_BYTE_VAL = (1 << 14) - 1;
private immutable uint THREE_BYTE_VAL = (1 << 22) - 1;
private immutable uint FOUR_BYTE_VAL = (1 << 30) - 1;
private immutable uint ONE_BYTE_MASK = (0 << 6);
private immutable uint TWO_BYTE_MASK = (1 << 6);
private immutable uint THREE_BYTE_MASK = (2 << 6);
private immutable uint FOUR_BYTE_MASK = (3 << 6);
private static ubyte[4] encodeBuff;

ubyte[] encodeNumber(in uint count)
{
     if (count <= ONE_BYTE_VAL)
     {
         encodeBuff[0] = cast(ubyte)(ONE_BYTE_MASK | count);
         return encodeBuff[0..1];
     }
     else if (count <= TWO_BYTE_VAL)
     {
         encodeBuff[0] = cast(ubyte)(TWO_BYTE_MASK | (count >>> 8));
         encodeBuff[1] = cast(ubyte)(count & 0x000000ff);
         return encodeBuff[0..2];
     }
     else if (count <= THREE_BYTE_VAL)
     {
         encodeBuff[0] = cast(ubyte)(THREE_BYTE_MASK | (count >>> 16));
         encodeBuff[1] = cast(ubyte)((count >>> 8) & 0x000000ff);
         encodeBuff[2] = cast(ubyte)(count & 0x000000ff);
         return encodeBuff[0..3];
     }
     else if (count <= FOUR_BYTE_VAL)
     {
         encodeBuff[0] = cast(ubyte)(FOUR_BYTE_MASK | (count >>> 24));
         encodeBuff[1] = cast(ubyte)((count >>> 16) & 0x000000ff);
         encodeBuff[2] = cast(ubyte)((count >>> 8) & 0x000000ff);
         encodeBuff[3] = cast(ubyte)(count & 0x000000ff);
         return encodeBuff[0..4];
     }
     else
     {
         throw new Exception("Invalid count provided!");
     }
}

void encode(in ubyte[] buff, ref ubyte[] output)
{
     ubyte currByte = buff[0];
     uint count = 0;
     uint outIdx = 0;
     ubyte[] temp;
     foreach (byteVal; buff)
     {
         if (byteVal != currByte && count > 0)
         {
             temp = encodeNumber(count);
             foreach (t; temp)
             {
                 output[outIdx++] = t;
             }
             output[outIdx++] = currByte;
             currByte = byteVal;
             count = 0;
         }
         count++;
     }
     temp = encodeNumber(count);
     foreach (t; temp)
     {
         output[outIdx++] = t;
     }
     output[outIdx++] = currByte;
}

void benchCode()
{
     MmFile buff = new MmFile(filename);
     ubyte[] encodedBytes;
     encodedBytes.length = cast(size_t)buff.length * 2;
     encode(cast(ubyte[])buff[], encodedBytes);
}

void main(string[] args)
{
     filename = args[1];
     writeln("Benchmark time: ", benchmark!(benchCode)(to!(uint)(args[2])));
}



More information about the Digitalmars-d-learn mailing list