GC performances

Mon Nov 26 05:41:01 PST 2007

Lutger Wrote:

> Another interesting thing here is that the Java version beats all other 
> languages too, including the ones with manual memory management.

Because the MMM is done in a vey naive way. See at the bottom of the page for faster versions:

http://shootout.alioth.debian.org/gp4sandbox/benchmark.php?test=binarytrees&lang=all#alt

> But this example does show how relative these microbenchmarks are. After 
> all, it costs the Java version more than ten times the memory to 
> outperform D, how will that affect the speed of a complex application?

At the moment people use Java on big-iron servers, not D. Programming efforts often cost less than the RAM, it seems.

> btw. I tried implementing this with a freelist, as suggested by the 
> article at the digitalmars site, but it was slower than manual memory 
> management. Might be because the code for allocation wasn't inlined for 
> some reason. 

Try beating this on the last DMD:

static import std.gc;
import std.c.stdlib: malloc;
import std.conv: toInt;

struct TyTreeNode {
    TyTreeNode* left, right;
    int item;
}

static TyTreeNode* freelist;

TyTreeNode* newTreeNode(int item, TyTreeNode* left=null, TyTreeNode* right=null) {
    TyTreeNode* t = freelist;
    freelist = freelist.left;
    t.left = left;
    t.right = right;
    t.item = item;
    return t;
}

int itemCheck(TyTreeNode* tree) {
    if (tree.left is null)
        return tree.item;
    else
        return tree.item + itemCheck(tree.left) - itemCheck(tree.right);
}

TyTreeNode* makeTree(int item, uint depth) {
    if (depth > 0)
        return newTreeNode(item, makeTree(2*item-1, depth-1), makeTree(2*item, depth-1));
    else
        return newTreeNode(item);
}

void deleteTree(TyTreeNode* tree) {
    if (tree.left !is null) {
        deleteTree(tree.left);
        deleteTree(tree.right);
    }

    tree.left = freelist;
    freelist = tree;
}

void main(string[] args) {
    std.gc.disable();
    const minDepth = 4;
    int n = (args.length > 1) ? toInt(args[1]) : 2;
    int maxDepth = (minDepth + 2) > n ? minDepth + 2 : n;

    const int nnodes = ((1 << 16) - 1) / TyTreeNode.sizeof;
    const int block_len = nnodes > 0 ? nnodes : 1;

    uint m = n > 6 ? (1<<(n+2)) : 255;
    while (m > 0) {
        uint toAllocate = m > block_len ? block_len : m;
        m -= toAllocate;
        auto pool = cast(TyTreeNode*)malloc(TyTreeNode.sizeof * toAllocate);
        pool[toAllocate-1].left = freelist;
        freelist = pool;
        for (uint i = 0; i < toAllocate-1; i++)
            pool[i].left = &(pool[i+1]);
    }

    auto stretchTree = makeTree(0, maxDepth + 1);
    printf("stretch tree of depth %u\t check: %li\n", maxDepth + 1, itemCheck(stretchTree));
    deleteTree(stretchTree);

    auto longLivedTree = makeTree(0, maxDepth);
    for (int depth = minDepth; depth <= maxDepth; depth += 2) {
        int iterations = 1 << (maxDepth - depth + minDepth);
        int check = 0;

        for (int i = 1; i <= iterations; i++) {
            auto tempTree = makeTree(i, depth);
            check += itemCheck(tempTree);
            deleteTree(tempTree);

            tempTree = makeTree(-i, depth);
            check += itemCheck(tempTree);
            deleteTree(tempTree);
        }

        printf("%li\t trees of depth %u\t check: %li\n", iterations*2, depth, check);
    }

    printf("int lived tree of depth %u\t check: %li\n", maxDepth, itemCheck(longLivedTree) );
}

> Likely not the intent of the benchmark, but an array based 
> implementation could be faster here.

It seems slower.

Bye,
bearophile