why does array appending add a page size instead of doubling ?

timotheecour thelastmammoth at gmail.com
Sun Feb 3 00:53:10 PST 2013


> Note that dynamic arrays are generic containers, so they aren't 
> exactly optimized for anything. You could try that test with 
> the "made for appending" std.array.appender: It always tries to 
> extend without reallocating, and only relocates when the 
> current memory segment is 100% full.
>
> Furthermore, when it *does* relocate, it use D-style memmove 
> without calling postblit. Independent of language, and with the 
> requirement of contiguous memory, I can't really think of a 
> scheme that could be better than that...?


modified a bit (see below) to include timing etc. I'm on OSX 
(16GB ram, core i7 2.3GHz).
dmd is slow even with optimizations so I used ldc:

ldmd2 -inline -O -release -noboundscheck  (similar results with 
ldc2 -O3 -release)
for C++: g++ -O2

It seems that:
* the number of moves is up to 2x (resp. 1.8x) as the C++ version 
for T[] (resp. appender)
* the C++ version is 7x (resp 1.8x) times faster .
* it seems even the appender version grows super-linearly whereas 
C++ version stays roughly linear in that regime in terms of time.

Based on this, I'm curious whether the current growing scheme 
could be improved, or be closer to the simple doubling scheme 
used in C++.

T[]: 94 s
appender!(T[]): 26.130
std::vector<T> (C++): 14.635s



D version with T[]:
Elements[0]: 1, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[1]: 2, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[2]: 4, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[3]: 8, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[4]: 16, Allocations: 2, Moved: 15; Moved/Elements = 
0.9375; time/Elements=0
Elements[5]: 32, Allocations: 3, Moved: 46; Moved/Elements = 
1.4375; time/Elements=0
Elements[6]: 64, Allocations: 4, Moved: 109; Moved/Elements = 
1.70312; time/Elements=0
Elements[7]: 128, Allocations: 5, Moved: 236; Moved/Elements = 
1.84375; time/Elements=0
Elements[8]: 256, Allocations: 6, Moved: 491; Moved/Elements = 
1.91797; time/Elements=0
Elements[9]: 512, Allocations: 7, Moved: 1001; Moved/Elements = 
1.95508; time/Elements=0
Elements[10]: 1024, Allocations: 8, Moved: 2023; Moved/Elements = 
1.97559; time/Elements=0
Elements[11]: 2048, Allocations: 9, Moved: 4069; Moved/Elements = 
1.98682; time/Elements=0
Elements[12]: 4096, Allocations: 9, Moved: 4069; Moved/Elements = 
0.993408; time/Elements=0
Elements[13]: 8192, Allocations: 9, Moved: 4069; Moved/Elements = 
0.496704; time/Elements=0
Elements[14]: 16384, Allocations: 9, Moved: 4069; Moved/Elements 
= 0.248352; time/Elements=0
Elements[15]: 32768, Allocations: 9, Moved: 4069; Moved/Elements 
= 0.124176; time/Elements=0
Elements[16]: 65536, Allocations: 9, Moved: 4069; Moved/Elements 
= 0.062088; time/Elements=1.52588e-05
Elements[17]: 131072, Allocations: 9, Moved: 4069; Moved/Elements 
= 0.031044; time/Elements=1.52588e-05
Elements[18]: 262144, Allocations: 9, Moved: 4069; Moved/Elements 
= 0.015522; time/Elements=2.28882e-05
Elements[19]: 524288, Allocations: 10, Moved: 114644; 
Moved/Elements = 0.218666; time/Elements=2.09808e-05
Elements[20]: 1048576, Allocations: 11, Moved: 745411; 
Moved/Elements = 0.710879; time/Elements=2.19345e-05
Elements[21]: 2097152, Allocations: 12, Moved: 2342834; 
Moved/Elements = 1.11715; time/Elements=2.24113e-05
Elements[22]: 4194304, Allocations: 13, Moved: 7344033; 
Moved/Elements = 1.75095; time/Elements=2.19345e-05
Elements[23]: 8388608, Allocations: 13, Moved: 10469281; 
Moved/Elements = 1.24804; time/Elements=2.18153e-05
Elements[24]: 16777216, Allocations: 13, Moved: 17981345; 
Moved/Elements = 1.07177; time/Elements=2.15173e-05
Elements[25]: 33554432, Allocations: 14, Moved: 59359120; 
Moved/Elements = 1.76904; time/Elements=2.18451e-05
Elements[26]: 67108864, Allocations: 14, Moved: 127885200; 
Moved/Elements = 1.90564; time/Elements=2.17557e-05
Elements[27]: 134217728, Allocations: 15, Moved: 196579199; 
Moved/Elements = 1.46463; time/Elements=2.16886e-05
Elements[28]: 268435456, Allocations: 14, Moved: 408059792; 
Moved/Elements = 1.52014; time/Elements=2.19792e-05
Elements[29]: 536870912, Allocations: 14, Moved: 1094434704; 
Moved/Elements = 2.03854; time/Elements=2.21152e-05
Elements[30]: 1073741824, Allocations: 14, Moved: 1874734992; 
Moved/Elements = 1.74598; time/Elements=2.17576e-05
Elements[31]: 2147483648, Allocations: 13, Moved: 2723954593; 
Moved/Elements = 1.26844; time/Elements=2.20812e-05
./test_010  91.65s user 2.97s system 99% cpu 1:34.79 total

D version with appender:
Elements[0]: 1, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[1]: 2, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[2]: 4, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[3]: 8, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[4]: 16, Allocations: 1, Moved: 0; Moved/Elements = 0; 
time/Elements=0
Elements[5]: 32, Allocations: 2, Moved: 16; Moved/Elements = 0.5; 
time/Elements=0
Elements[6]: 64, Allocations: 2, Moved: 16; Moved/Elements = 
0.25; time/Elements=0
Elements[7]: 128, Allocations: 3, Moved: 80; Moved/Elements = 
0.625; time/Elements=0
Elements[8]: 256, Allocations: 3, Moved: 80; Moved/Elements = 
0.3125; time/Elements=0
Elements[9]: 512, Allocations: 4, Moved: 336; Moved/Elements = 
0.65625; time/Elements=0
Elements[10]: 1024, Allocations: 4, Moved: 336; Moved/Elements = 
0.328125; time/Elements=0
Elements[11]: 2048, Allocations: 5, Moved: 1360; Moved/Elements = 
0.664062; time/Elements=0
Elements[12]: 4096, Allocations: 6, Moved: 3408; Moved/Elements = 
0.832031; time/Elements=0
Elements[13]: 8192, Allocations: 6, Moved: 3408; Moved/Elements = 
0.416016; time/Elements=0
Elements[14]: 16384, Allocations: 6, Moved: 3408; Moved/Elements 
= 0.208008; time/Elements=0
Elements[15]: 32768, Allocations: 6, Moved: 3408; Moved/Elements 
= 0.104004; time/Elements=0
Elements[16]: 65536, Allocations: 6, Moved: 3408; Moved/Elements 
= 0.052002; time/Elements=0
Elements[17]: 131072, Allocations: 6, Moved: 3408; Moved/Elements 
= 0.026001; time/Elements=0
Elements[18]: 262144, Allocations: 6, Moved: 3408; Moved/Elements 
= 0.0130005; time/Elements=3.8147e-06
Elements[19]: 524288, Allocations: 7, Moved: 462160; 
Moved/Elements = 0.8815; time/Elements=3.8147e-06
Elements[20]: 1048576, Allocations: 7, Moved: 527696; 
Moved/Elements = 0.50325; time/Elements=4.76837e-06
Elements[21]: 2097152, Allocations: 9, Moved: 3083600; 
Moved/Elements = 1.47038; time/Elements=5.24521e-06
Elements[22]: 4194304, Allocations: 9, Moved: 3304784; 
Moved/Elements = 0.787922; time/Elements=5.00679e-06
Elements[23]: 8388608, Allocations: 9, Moved: 6622544; 
Moved/Elements = 0.789469; time/Elements=5.36442e-06
Elements[24]: 16777216, Allocations: 10, Moved: 14069072; 
Moved/Elements = 0.838582; time/Elements=5.36442e-06
Elements[25]: 33554432, Allocations: 10, Moved: 29244752; 
Moved/Elements = 0.871562; time/Elements=5.42402e-06
Elements[26]: 67108864, Allocations: 10, Moved: 57773392; 
Moved/Elements = 0.860891; time/Elements=5.39422e-06
Elements[27]: 134217728, Allocations: 11, Moved: 241548624; 
Moved/Elements = 1.79968; time/Elements=5.93811e-06
Elements[28]: 268435456, Allocations: 10, Moved: 478076240; 
Moved/Elements = 1.78097; time/Elements=5.68479e-06
Elements[29]: 536870912, Allocations: 9, Moved: 773229904; 
Moved/Elements = 1.44025; time/Elements=5.64754e-06
Elements[30]: 1073741824, Allocations: 9, Moved: 1165659472; 
Moved/Elements = 1.0856; time/Elements=5.46221e-06
Elements[31]: 2147483648, Allocations: 10, Moved: 3299478864; 
Moved/Elements = 1.53644; time/Elements=6.32741e-06
./test_010  22.63s user 3.31s system 99% cpu 26.130 total


C++ version with std::vector:
g++ -O2 test8.cc -o test && time ./test 	
Elements[0] : 1 Allocations: 1 Moved: 0 Moved/Elements: 0 
time/Elements: 0
Elements[1] : 2 Allocations: 2 Moved: 1 Moved/Elements: 0.5 
time/Elements: 0
Elements[2] : 4 Allocations: 3 Moved: 3 Moved/Elements: 0.75 
time/Elements: 0
Elements[3] : 8 Allocations: 4 Moved: 7 Moved/Elements: 0.875 
time/Elements: 0
Elements[4] : 16 Allocations: 5 Moved: 15 Moved/Elements: 0.9375 
time/Elements: 0
Elements[5] : 32 Allocations: 6 Moved: 31 Moved/Elements: 0.96875 
time/Elements: 0
Elements[6] : 64 Allocations: 7 Moved: 63 Moved/Elements: 
0.984375 time/Elements: 0
Elements[7] : 128 Allocations: 8 Moved: 127 Moved/Elements: 
0.992188 time/Elements: 0
Elements[8] : 256 Allocations: 9 Moved: 255 Moved/Elements: 
0.996094 time/Elements: 0
Elements[9] : 512 Allocations: 10 Moved: 511 Moved/Elements: 
0.998047 time/Elements: 0
Elements[10] : 1024 Allocations: 11 Moved: 1023 Moved/Elements: 
0.999023 time/Elements: 0
Elements[11] : 2048 Allocations: 12 Moved: 2047 Moved/Elements: 
0.999512 time/Elements: 0
Elements[12] : 4096 Allocations: 13 Moved: 4095 Moved/Elements: 
0.999756 time/Elements: 0
Elements[13] : 8192 Allocations: 14 Moved: 8191 Moved/Elements: 
0.999878 time/Elements: 0
Elements[14] : 16384 Allocations: 15 Moved: 16383 Moved/Elements: 
0.999939 time/Elements: 0
Elements[15] : 32768 Allocations: 16 Moved: 32767 Moved/Elements: 
0.999969 time/Elements: 0
Elements[16] : 65536 Allocations: 17 Moved: 65535 Moved/Elements: 
0.999985 time/Elements: 0
Elements[17] : 131072 Allocations: 18 Moved: 131071 
Moved/Elements: 0.999992 time/Elements: 0
Elements[18] : 262144 Allocations: 19 Moved: 262143 
Moved/Elements: 0.999996 time/Elements: 3.8147e-06
Elements[19] : 524288 Allocations: 20 Moved: 524287 
Moved/Elements: 0.999998 time/Elements: 3.8147e-06
Elements[20] : 1048576 Allocations: 21 Moved: 1048575 
Moved/Elements: 0.999999 time/Elements: 2.86102e-06
Elements[21] : 2097152 Allocations: 22 Moved: 2097151 
Moved/Elements: 1 time/Elements: 2.86102e-06
Elements[22] : 4194304 Allocations: 23 Moved: 4194303 
Moved/Elements: 1 time/Elements: 2.86102e-06
Elements[23] : 8388608 Allocations: 24 Moved: 8388607 
Moved/Elements: 1 time/Elements: 3.09944e-06
Elements[24] : 16777216 Allocations: 25 Moved: 16777215 
Moved/Elements: 1 time/Elements: 3.03984e-06
Elements[25] : 33554432 Allocations: 26 Moved: 33554431 
Moved/Elements: 1 time/Elements: 3.15905e-06
Elements[26] : 67108864 Allocations: 27 Moved: 67108863 
Moved/Elements: 1 time/Elements: 3.08454e-06
Elements[27] : 134217728 Allocations: 28 Moved: 134217727 
Moved/Elements: 1 time/Elements: 3.09944e-06
Elements[28] : 268435456 Allocations: 29 Moved: 268435455 
Moved/Elements: 1 time/Elements: 3.24473e-06
Elements[29] : 536870912 Allocations: 30 Moved: 536870911 
Moved/Elements: 1 time/Elements: 3.32668e-06
Elements[30] : 1073741824 Allocations: 31 Moved: 1073741823 
Moved/Elements: 1 time/Elements: 3.34531e-06
Elements[31] : 2147483648 Allocations: 32 Moved: 2147483647 
Moved/Elements: 1 time/Elements: 3.36301e-06

real	0m14.635s
user	0m11.665s
sys	0m2.962s



----
import std.stdio;
import std.datetime;

import std.array;


static private StopWatch sw;//TEMP:move?
auto TIC(){
   sw.reset();
   sw.start();
}
auto TOC(){
   sw.stop();
   return sw.peek().msecs;
}

void main(){
   enum N=32;
   size_t maxElements=1;
   alias ubyte T;
    for (size_t iter=0;iter<N;iter++ , maxElements*=2) {
     //if(iter<N-1) continue;


        size_t totalElementsMoved = 0;
        size_t totalAllocations = 0;
        TIC();
        version(none){
        T[] s;
        foreach (i; 0 .. maxElements) {
            auto oldPtr = s.ptr;
            s ~= 0;
            if (s.ptr != oldPtr) {
                totalElementsMoved += (s.length - 1);
                ++totalAllocations;
            }
        }
      }
      else{
        auto s = appender!(T[])();
        foreach (i; 0 .. maxElements) {
            auto oldPtr = s.data.ptr;
            s.put(cast(T)0);
            if (s.data.ptr != oldPtr) {
                totalElementsMoved += (s.data.length - 1);
                ++totalAllocations;
            }
        }
      }
        auto t=TOC();
        writefln("Elements[%s]: %s, Allocations: %s, Moved: %s; 
Moved/Elements = %s; time/Elements=%s",
         iter, maxElements, totalAllocations, totalElementsMoved, 
totalElementsMoved/cast(double)(maxElements), 
t/cast(double)(maxElements));
    }
}
----

----
#include <iostream>
#include <vector>

#include <sys/time.h>
#include <stdio.h>
#include <unistd.h>

using namespace std;

struct timeval start, end;
     long mtime, seconds, useconds;

void tic(){
     gettimeofday(&start, NULL);
   }
long toc(){
     gettimeofday(&end, NULL);

     seconds  = end.tv_sec  - start.tv_sec;
     useconds = end.tv_usec - start.tv_usec;

     mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
     return mtime;
}


int main(){
   int N=32;
   size_t maxElements=1;
   typedef unsigned char T;
    for (size_t iter=0;iter<N;iter++ , maxElements*=2) {
        vector<T> s;
        size_t totalElementsMoved = 0;
        size_t totalAllocations = 0;

tic();
        for (size_t i = 0; i < maxElements; ++i) {
            T *oldPtr = s.empty() ? NULL : &s[0];
            s.push_back(0);

            if (&s[0] != oldPtr) {
                totalElementsMoved += (s.size() - 1);
                ++totalAllocations;
            }
        }
long t=toc();

        cout << "Elements[" << iter << "] : " << maxElements
             << " Allocations: " << totalAllocations
             << " Moved: " << totalElementsMoved
             << " Moved/Elements: " << 
totalElementsMoved/static_cast<double>(maxElements)
             << " time/Elements: " << 
t/static_cast<double>(maxElements)
             << '\n';
    }
}
----


More information about the Digitalmars-d-learn mailing list