D memory consumption/runtime speed problem

sybrandy sybrandy at gmail.com
Wed Jan 13 16:54:22 PST 2010


Hello,

I've been writing a bit of compression code and I noticed some strange 
behavior that's driving me a bit batty.  I don't know if it's a bug with 
D or something I did.  All I know is I can't figure it out.

Below is the simplified version of the code as a single file.  It takes 
two parameters.  The first is a file to "compress" and the second is the 
number of times to run the benchmark.  E.g. bugtest foo.txt 2

Now, if I set the second parameter to 1, it runs decently fast.  26 
seconds on my work laptop for a half-sized enwiki8 from the Hutter 
challenge.  If I set it to 2, then it takes about 142 seconds.  In both 
cases a lot of memory is used and I'm not really sure why.  Also, after 
it prints out the results, it takes several seconds for the program to exit.

Am I doing something wrong?  I've tried every trick that I could find by 
reading the documentation.  Btw: The last time I tried this was with the 
latest version of D released at the beginning of the month.

__CODE__

import std.conv;
import std.stdio;
import std.stream;
import std.date;
import std.mmfile;
import std.array;

string filename = "enwik8_small";

private immutable uint ONE_BYTE_VAL = (1 << 6) - 1;
private immutable uint TWO_BYTE_VAL = (1 << 14) - 1;
private immutable uint THREE_BYTE_VAL = (1 << 22) - 1;
private immutable uint FOUR_BYTE_VAL = (1 << 30) - 1;
private immutable uint ONE_BYTE_MASK = (0 << 6);
private immutable uint TWO_BYTE_MASK = (1 << 6);
private immutable uint THREE_BYTE_MASK = (2 << 6);
private immutable uint FOUR_BYTE_MASK = (3 << 6);

ubyte[] encodeNumber(in uint count)
{
     if (count <= ONE_BYTE_VAL)
     {
         return [cast(ubyte)(ONE_BYTE_MASK | count)];
     }
     else if (count <= TWO_BYTE_VAL)
     {
         return [cast(ubyte)(TWO_BYTE_MASK | (count >>> 8))]
                ~ [cast(ubyte)(count & 0x000000ff)];
     }
     else if (count <= THREE_BYTE_VAL)
     {
         return [cast(ubyte)(THREE_BYTE_MASK | (count >>> 16))]
                ~ [cast(ubyte)((count >>> 8) & 0x000000ff)]
                ~ [cast(ubyte)(count & 0x000000ff)];
     }
     else if (count <= FOUR_BYTE_VAL)
     {
         return [cast(ubyte)(FOUR_BYTE_MASK | (count >>> 24))]
                ~ [cast(ubyte)((count >>> 16) & 0x000000ff)]
                ~ [cast(ubyte)((count >>> 8) & 0x000000ff)]
                ~ [cast(ubyte)(count & 0x000000ff)];
     }
     else
     {
         throw new Exception("Invalid count provided!");
     }
}

void encode(in ubyte[] buff, out ubyte[] output)
{
     ubyte currByte = buff[0];
     uint count = 0;
     auto appOutput = appender(&output);
     foreach (byteVal; buff)
     {
         if (byteVal != currByte && count > 0)
         {
             appOutput.put(encodeNumber(count));
             appOutput.put(currByte);
             currByte = byteVal;
             count = 0;
         }
         count++;
     }
     appOutput.put(encodeNumber(count));
     appOutput.put(currByte);
}

void benchCode()
{
     MmFile buff = new MmFile(filename);
     ubyte[] encodedBytes;
     encode(cast(ubyte[])buff[], encodedBytes);
}

void main(string[] args)
{
     filename = args[1];
     writeln("Benchmark time: ", benchmark!(benchCode)(to!(uint)(args[2])));
}


More information about the Digitalmars-d-learn mailing list