why allocation of large amount of small objects so slow (x10) in D?

Robert Fraser fraserofthenight at gmail.com
Thu May 21 18:36:36 PDT 2009


Robert Fraser wrote:
> nobody wrote:
>> $ g++     alloc.cpp   -o alloc
>> $ time ./alloc
>> real    0m1.946s
>> user    0m1.688s
>> sys     0m0.256s
>>
>> $ dmd -O -release allocd.d
>> $ time ./allocd
>> real    0m22.734s
>> user    0m22.353s
>> sys     0m0.360s
>>
>> $ cat alloc.cpp
>> #include <vector>
>>
>> typedef std::vector<int> intvec;
>> typedef intvec* intvecp;
>>
>> int main() {
>>   int i, n = 20000000;
>>   intvecp* iva;
>>   iva = new intvecp[n];
>>   for (i = n; i-- > 0; ) {
>>     iva[i] = new intvec();
>>   }
>>
>>   return 0;
>> }
>>
>> $ cat allocd.d
>> int main() {
>>   int i, n = 20000000;
>>   Object[] oa;
>>   oa = new Object[n];
>>   for (i = n; i-- > 0; ) {
>>     oa[i] = new Object();
>>   }
>>
>>   return 0;
>> }
> 
> I use this a structure for arena-based memory allocation (attached).
> 
> Example of use:
> 
> import candy.util.MemPool
> 
> MemStack!() stack;
> 
> class MyObject
> {
>     mixin MemPoolNew!(stack);
> }
> 
> int main()
> {
>     stack.push();
>     int i, n = 20000000;
>     MyObject[] oa;
>     oa = new MyObject[n];
>     for (i = n; i-- > 0; )
>     {
>         oa[i] = new MyObject();
>     }
>     stack.pop();
>     return 0;
> }
> 
> The push() and pop() allows memory to be allocated and deallocated as 
> large blocks. However, you shouldn't need to deallocate manually -- it's 
> GCed memory, so ideally the GC should free it when it's no longer 
> referenced. That being said, I've run some tests, and the GC will free 
> it *eventually*, but it allocates 4-6x as much memory as it needs before 
> it starts freeing it, even when GC.collect() is called manually.
> 
> --------------------
> 
> /**
>  * Provides a pool of GCed memory to allocate things from a block.
>  * This maintains cache coherency for related types (i.e. tree nodes).
>  * It doesn't garuntee any ordering, though, the array struct should be
>  * used for that. Also, everything has to be freed at once, freeing one
>  * portion of this has no effect.
>  *
>  * Based on a similar concept posted by bearophile at:
>  * 
> http://www.digitalmars.com/webnews/newsgroups.php?art_group=digitalmars.D&article_id=88227 
> 
>  */
> public struct MemPool(size_t BLOCK_SIZE = 1 << 14)
> {
>     private void* next; // Next available block
>     private void* end;  // End of the current block
>     private void*[] blocks;
> 
>     public void* alloc(size_t sz)
>     {
>         sz = ((sz + 7) & ~7); // Next multiple of 8 if this isn't a 
> multiple of 8
>         if (this.next + sz >= this.end)
>         {
>             void* blk = GC.calloc(BLOCK_SIZE);
>             this.blocks.length = this.blocks.length + 1;
>             this.blocks[$ - 1] = blk;
>             this.next = blk;
>             this.end = blk + BLOCK_SIZE;
>         }
> 
>         void* ret = this.next;
>         this.next += sz;
>         return ret;
>     }
> 
>     public void free()
>     {
>         foreach(blk; this.blocks)
>             GC.free(blk);
>         this.blocks = null;
>         this.blocks.length = 0;
>         this.next = null;
>         this.end = null;
>     }
> }
> 
> /**
>  * Wrapper for MemPool that allocates the given struct
>  */
> public struct StructPool(T)
> {
>     private MemPool!() pool;
>     public T* alloc()  { return cast(T*) pool.alloc(T.sizeof); }
> }
> 
> public struct MemStack(size_t BLOCK_SIZE = 1 << 14)
> {
>     private Stack!(MemPool!(BLOCK_SIZE)*, 16, true, true) stack;
>     public static const size_t MAX_ALLOC = BLOCK_SIZE;
>     
>     public void* alloc(size_t sz) { return 
> stack.peek().alloc(sz);          }
>     public void  push()           { stack.push(new 
> MemPool!(BLOCK_SIZE));   }
>     public void  pop()            { 
> stack.pop().free();                     }
> }
> 
> /**
>  * Placement new mixin for allocating from a memory pool. Benchmarks 
> show this
>  * as faster than the D new in real usage (i.e. the parser runs about 1.2x
>  * faster using this).
>  */
> public template MemPoolNew(alias Pool)
> {
>     version(NoMemPool) { } else
>     {
>         public final new(uint sz)    { return Pool.alloc(sz); }
>         public final delete(void *p) {                        }
>     }
> }

Oops, that needs another module. Okay, both are attached in a 
compile-able form.
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: memory.d
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090521/1e56f1f4/attachment.ksh>
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: array.d
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090521/1e56f1f4/attachment-0001.ksh>


More information about the Digitalmars-d mailing list