GC performances
bearophile
bearophileHUGS at lycos.com
Mon Nov 26 05:41:01 PST 2007
Lutger Wrote:
> Another interesting thing here is that the Java version beats all other
> languages too, including the ones with manual memory management.
Because the MMM is done in a vey naive way. See at the bottom of the page for faster versions:
http://shootout.alioth.debian.org/gp4sandbox/benchmark.php?test=binarytrees&lang=all#alt
> But this example does show how relative these microbenchmarks are. After
> all, it costs the Java version more than ten times the memory to
> outperform D, how will that affect the speed of a complex application?
At the moment people use Java on big-iron servers, not D. Programming efforts often cost less than the RAM, it seems.
> btw. I tried implementing this with a freelist, as suggested by the
> article at the digitalmars site, but it was slower than manual memory
> management. Might be because the code for allocation wasn't inlined for
> some reason.
Try beating this on the last DMD:
static import std.gc;
import std.c.stdlib: malloc;
import std.conv: toInt;
struct TyTreeNode {
TyTreeNode* left, right;
int item;
}
static TyTreeNode* freelist;
TyTreeNode* newTreeNode(int item, TyTreeNode* left=null, TyTreeNode* right=null) {
TyTreeNode* t = freelist;
freelist = freelist.left;
t.left = left;
t.right = right;
t.item = item;
return t;
}
int itemCheck(TyTreeNode* tree) {
if (tree.left is null)
return tree.item;
else
return tree.item + itemCheck(tree.left) - itemCheck(tree.right);
}
TyTreeNode* makeTree(int item, uint depth) {
if (depth > 0)
return newTreeNode(item, makeTree(2*item-1, depth-1), makeTree(2*item, depth-1));
else
return newTreeNode(item);
}
void deleteTree(TyTreeNode* tree) {
if (tree.left !is null) {
deleteTree(tree.left);
deleteTree(tree.right);
}
tree.left = freelist;
freelist = tree;
}
void main(string[] args) {
std.gc.disable();
const minDepth = 4;
int n = (args.length > 1) ? toInt(args[1]) : 2;
int maxDepth = (minDepth + 2) > n ? minDepth + 2 : n;
const int nnodes = ((1 << 16) - 1) / TyTreeNode.sizeof;
const int block_len = nnodes > 0 ? nnodes : 1;
uint m = n > 6 ? (1<<(n+2)) : 255;
while (m > 0) {
uint toAllocate = m > block_len ? block_len : m;
m -= toAllocate;
auto pool = cast(TyTreeNode*)malloc(TyTreeNode.sizeof * toAllocate);
pool[toAllocate-1].left = freelist;
freelist = pool;
for (uint i = 0; i < toAllocate-1; i++)
pool[i].left = &(pool[i+1]);
}
auto stretchTree = makeTree(0, maxDepth + 1);
printf("stretch tree of depth %u\t check: %li\n", maxDepth + 1, itemCheck(stretchTree));
deleteTree(stretchTree);
auto longLivedTree = makeTree(0, maxDepth);
for (int depth = minDepth; depth <= maxDepth; depth += 2) {
int iterations = 1 << (maxDepth - depth + minDepth);
int check = 0;
for (int i = 1; i <= iterations; i++) {
auto tempTree = makeTree(i, depth);
check += itemCheck(tempTree);
deleteTree(tempTree);
tempTree = makeTree(-i, depth);
check += itemCheck(tempTree);
deleteTree(tempTree);
}
printf("%li\t trees of depth %u\t check: %li\n", iterations*2, depth, check);
}
printf("int lived tree of depth %u\t check: %li\n", maxDepth, itemCheck(longLivedTree) );
}
> Likely not the intent of the benchmark, but an array based
> implementation could be faster here.
It seems slower.
Bye,
bearophile
More information about the Digitalmars-d
mailing list