[Issue 5813] [patch] std.array.Appender has severe performance and memory leak problems.

d-bugmail at puremagic.com d-bugmail at puremagic.com
Fri May 20 10:34:13 PDT 2011


http://d.puremagic.com/issues/show_bug.cgi?id=5813



--- Comment #1 from Rob Jacques <sandford at jhu.edu> 2011-05-20 10:29:57 PDT ---
*Update*
The previous version ended up having value semantics, not reference semantics,
due to the fact _tail wouldn't get updated and later nodes would be ignored.
For example:

    auto test = Appender!string(16);
    void func(Writer)(Writer w) {
        w.put("The quick brown fox jumps over the lazy dog");
    }
    func(test);
    assert(test.data == "The quick brown ");
    //assert(test.data == "The quick brown fox jumps over the lazy dog");

I've updated the code to check/update tail lazily as needed. As an out of date
_tail naturally triggers memory allocation, this doesn't overly change the
performance of put on average.

==============================================================================

struct Appender(A : T[], T) {
    private {
        enum  PageSize = 4096;          // Memory page size
        alias Unqual!T E;               // Internal element type

        struct Data {
            Data*       next;           // The next data segment
            size_t      capacity;       // Capacity of this segment
            E[]         arr;            // This segment's array

            // Initialize a segment using an existing array
            void opAssign(E[] _arr) {
                next           = null;
                capacity       = _arr.capacity;
                arr            = _arr;
                if(_arr.length < capacity) {
                    arr.length = capacity;
                    arr.length = _arr.length;
                }
                assert(_arr.ptr is arr.ptr,"Unexpected reallocation occurred");
            }

            // Create a new segment using an existing array
            this(Unqual!T[] _arr) { this = _arr; }

            // Create a new segment with at least size bytes
            this(size_t size) {
                if(size > PageSize)
                    size = (size +  PageSize-1) & ~(PageSize-1);
                auto bi  = GC.qalloc(size, !hasIndirections!T * 2);
                next     = null;
                capacity = bi.size / T.sizeof;
                arr      = (cast(E*)bi.base)[0..0];
                static assert(!false*2 == GC.BlkAttr.NO_SCAN);
            }
        }
        Data*  _head = null;                   // The head data segment
        Data*  _tail = null;                   // The last data segment

        // Returns: the total number of elements in the appender
        size_t _length() {
            size_t len = 0;
            for(auto d = _head; d !is null; d = d.next)
                len   += d.arr.length;
            return len;
        }

        // Flatten all the data segments into a single array
        E[] flatten() {
            if(!_head) return null;
            if( _head && _head.next is null)
                return _head.arr;

            size_t N   = _length;
            size_t len = N;
            size_t i   = 0;
            auto arr   = new E[N];
            for(auto d = _head; N > 0; d = d.next, N -= len, i += len) {
                len    = min(N, d.arr.length);
                memcpy(arr.ptr+i, d.arr.ptr, len * T.sizeof);
            }
            return arr;
        }

        // Returns: the next capacity size
        size_t nextCapacity() nothrow pure {
            auto   cap = _tail.capacity * T.sizeof * 2;
            return cap < PageSize ? cap : PageSize;
        }
    }

    /** Construct an appender with a given array.  Note that this does not copy
     *  the data.  If the array has a larger capacity as determined by
     *  arr.capacity, it will be used by the appender.  After initializing an
     *  appender on an array, appending to the original array will reallocate.
     */
    this(T[] arr) {
        if(arr is null) _head = _tail = new Data( 16 * T.sizeof );
        else            _head = _tail = new Data( cast(E[]) arr );
    }

    /// Construct an appender with a capacity of at least N elements.
    this(size_t N) { _head = _tail = new Data( N * T.sizeof ); }

    /// Returns: the maximum length that can be accommodated without allocation
    size_t capacity() {
        size_t cap = 0;
        for(auto d = _head; d !is null; d = d.next)
            cap   += d.capacity;
        return cap;
    }

    /// Returns: a mutable copy of the data.
    E[] dup()  {
        if(_head && _head.next is null)
            return flatten.dup;
        return flatten;
    }

    /// Returns: a immutable copy of the data.
    immutable(E)[] idup() {
        return cast(immutable(E)[]) dup;
    }

    /// Returns: the appender's data as an array.
    T[] data() {
        auto arr = flatten;
        if(_head !is _tail) {
            *_head = arr;
             _tail = _head;
        }
        return cast(T[]) arr;
    }

    /** Reserve at least newCapacity elements for appending.  Note that more
     *  elements may be reserved than requested.  If newCapacity < capacity,
     *  then nothing is done.
     */
    void reserve(size_t newCapacity) {
        auto cap  = capacity;
        if(  cap >= newCapacity) return;

        auto size = ( newCapacity - cap) * T.sizeof;

        // Initialize if not done so.
        if(!_head) return _head = _tail = new Data( size );

        // Update tail
        while(_tail.next !is null) _tail = _tail.next;

        // Try extending
        if( auto u = GC.extend(_tail.arr.ptr, size, size) )
            return _tail.capacity = u / T.sizeof;

        // If full, add a segment
        if(_tail.arr.length == _tail.capacity)
             return _tail = _tail.next = new Data( size );

        // Allocate & copy
        auto next = Data(size);
        memcpy(next.arr.ptr, _tail.arr.ptr, _tail.arr.length * T.sizeof);
        _tail.arr       = next.arr.ptr[0.._tail.arr.length];
        _tail.capacity  = next.capacity;
    }

    /// Appends to the output range
    void put(U)(U item) if ( isOutputRange!(Unqual!T[],U) ){
        // Transcoding is required to support char[].put(dchar)
        static if(isSomeChar!T && isSomeChar!U &&  T.sizeof < U.sizeof){
            E[T.sizeof == 1 ? 4 : 2] encoded;
            auto len = std.utf.encode(encoded, item);
            return put(encoded[0 .. len]);

        // put(T)
        } else static if(isImplicitlyConvertible!(U, E)) {
            if(!_head)
                _head = _tail  = new Data( 16 * T.sizeof );
            else if( _tail.arr.length == _tail.capacity  ) {   // Try extending
                while(_tail.next !is null) _tail = _tail.next; // Update tail
                if( auto u = GC.extend(_tail.arr.ptr, T.sizeof, nextCapacity) )
                     _tail.capacity     = u / T.sizeof;
                else _tail = _tail.next = new Data( nextCapacity );
            }
            auto          len  = _tail.arr.length;
            _tail.arr.ptr[len] = item;
            _tail.arr          = _tail.arr.ptr[0 .. len + 1];

        // fast put(T[])
        } else static if (is(typeof(_tail.arr[0..1] = item[0..1]))) {
            auto items  = cast(E[]) item[];
            if(!_head)
                _head   = _tail = new Data(  items.length * T.sizeof );
            auto arr    = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
            size_t len  = items.length;
            if(arr.length < len) {                             // Try extending
                while(_tail.next !is null) {                   // Update tail
                    _tail = _tail.next;
                    arr   = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
                }
                auto size  = max(items.length*T.sizeof, nextCapacity);
                if( auto u = GC.extend(_tail.arr.ptr, T.sizeof, size) ) {
                    _tail.capacity = u / T.sizeof;
                    arr    = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
                }
                if(arr.length < len) len = arr.length;
            }
            arr[0..len] = items[0..len];
            items       = items[len..$];
            _tail.arr   = _tail.arr.ptr[0 .. _tail.arr.length + len];
            if( items.length > 0 ) {               // Add a segment and advance
                _tail.next = new Data(max(items.length*T.sizeof,nextCapacity));
                _tail      = _tail.next;
                _tail.arr.ptr[0..items.length] = items[];
                _tail.arr   = _tail.arr.ptr[0..items.length];
            }

        // Kitchen sink
        } else {
            .put!(typeof(this),U,true)(this,item);
        }
    }

    // only allow overwriting data on non-immutable and non-const data
    static if(!is(T == immutable) && !is(T == const)) {
        /** Clears the managed array. This function may reduce the appender's
         *  capacity.
         */
        void clear() {
            _head     = _tail;            // Save the largest chunk and move on
            if(_head) {
                _head.arr  = _head.arr.ptr[0..0];
                _head.next = null;
            }
        }

        /** Shrinks the appender to a given length. Passing in a length that's
         *  greater than the current array length throws an enforce exception.
         *  This function may reduce the appender's capacity.
         */
        void shrinkTo(size_t newlength) {
            for(auto d = _head; d !is null; d = d.next) {
                if(d.arr.length >= newlength) {
                    d.arr  = d.arr.ptr[0..newlength];
                    d.next = null;
                }
                newlength -= d.arr.length;
            }
            enforce(newlength == 0, "Appender.shrinkTo: newlength > capacity");
        }
    }
}

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------


More information about the Digitalmars-d-bugs mailing list