[Issue 5813] [patch] std.array.Appender has severe performance and memory leak problems.
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Fri May 20 10:34:13 PDT 2011
http://d.puremagic.com/issues/show_bug.cgi?id=5813
--- Comment #1 from Rob Jacques <sandford at jhu.edu> 2011-05-20 10:29:57 PDT ---
*Update*
The previous version ended up having value semantics, not reference semantics,
due to the fact _tail wouldn't get updated and later nodes would be ignored.
For example:
auto test = Appender!string(16);
void func(Writer)(Writer w) {
w.put("The quick brown fox jumps over the lazy dog");
}
func(test);
assert(test.data == "The quick brown ");
//assert(test.data == "The quick brown fox jumps over the lazy dog");
I've updated the code to check/update tail lazily as needed. As an out of date
_tail naturally triggers memory allocation, this doesn't overly change the
performance of put on average.
==============================================================================
struct Appender(A : T[], T) {
private {
enum PageSize = 4096; // Memory page size
alias Unqual!T E; // Internal element type
struct Data {
Data* next; // The next data segment
size_t capacity; // Capacity of this segment
E[] arr; // This segment's array
// Initialize a segment using an existing array
void opAssign(E[] _arr) {
next = null;
capacity = _arr.capacity;
arr = _arr;
if(_arr.length < capacity) {
arr.length = capacity;
arr.length = _arr.length;
}
assert(_arr.ptr is arr.ptr,"Unexpected reallocation occurred");
}
// Create a new segment using an existing array
this(Unqual!T[] _arr) { this = _arr; }
// Create a new segment with at least size bytes
this(size_t size) {
if(size > PageSize)
size = (size + PageSize-1) & ~(PageSize-1);
auto bi = GC.qalloc(size, !hasIndirections!T * 2);
next = null;
capacity = bi.size / T.sizeof;
arr = (cast(E*)bi.base)[0..0];
static assert(!false*2 == GC.BlkAttr.NO_SCAN);
}
}
Data* _head = null; // The head data segment
Data* _tail = null; // The last data segment
// Returns: the total number of elements in the appender
size_t _length() {
size_t len = 0;
for(auto d = _head; d !is null; d = d.next)
len += d.arr.length;
return len;
}
// Flatten all the data segments into a single array
E[] flatten() {
if(!_head) return null;
if( _head && _head.next is null)
return _head.arr;
size_t N = _length;
size_t len = N;
size_t i = 0;
auto arr = new E[N];
for(auto d = _head; N > 0; d = d.next, N -= len, i += len) {
len = min(N, d.arr.length);
memcpy(arr.ptr+i, d.arr.ptr, len * T.sizeof);
}
return arr;
}
// Returns: the next capacity size
size_t nextCapacity() nothrow pure {
auto cap = _tail.capacity * T.sizeof * 2;
return cap < PageSize ? cap : PageSize;
}
}
/** Construct an appender with a given array. Note that this does not copy
* the data. If the array has a larger capacity as determined by
* arr.capacity, it will be used by the appender. After initializing an
* appender on an array, appending to the original array will reallocate.
*/
this(T[] arr) {
if(arr is null) _head = _tail = new Data( 16 * T.sizeof );
else _head = _tail = new Data( cast(E[]) arr );
}
/// Construct an appender with a capacity of at least N elements.
this(size_t N) { _head = _tail = new Data( N * T.sizeof ); }
/// Returns: the maximum length that can be accommodated without allocation
size_t capacity() {
size_t cap = 0;
for(auto d = _head; d !is null; d = d.next)
cap += d.capacity;
return cap;
}
/// Returns: a mutable copy of the data.
E[] dup() {
if(_head && _head.next is null)
return flatten.dup;
return flatten;
}
/// Returns: a immutable copy of the data.
immutable(E)[] idup() {
return cast(immutable(E)[]) dup;
}
/// Returns: the appender's data as an array.
T[] data() {
auto arr = flatten;
if(_head !is _tail) {
*_head = arr;
_tail = _head;
}
return cast(T[]) arr;
}
/** Reserve at least newCapacity elements for appending. Note that more
* elements may be reserved than requested. If newCapacity < capacity,
* then nothing is done.
*/
void reserve(size_t newCapacity) {
auto cap = capacity;
if( cap >= newCapacity) return;
auto size = ( newCapacity - cap) * T.sizeof;
// Initialize if not done so.
if(!_head) return _head = _tail = new Data( size );
// Update tail
while(_tail.next !is null) _tail = _tail.next;
// Try extending
if( auto u = GC.extend(_tail.arr.ptr, size, size) )
return _tail.capacity = u / T.sizeof;
// If full, add a segment
if(_tail.arr.length == _tail.capacity)
return _tail = _tail.next = new Data( size );
// Allocate & copy
auto next = Data(size);
memcpy(next.arr.ptr, _tail.arr.ptr, _tail.arr.length * T.sizeof);
_tail.arr = next.arr.ptr[0.._tail.arr.length];
_tail.capacity = next.capacity;
}
/// Appends to the output range
void put(U)(U item) if ( isOutputRange!(Unqual!T[],U) ){
// Transcoding is required to support char[].put(dchar)
static if(isSomeChar!T && isSomeChar!U && T.sizeof < U.sizeof){
E[T.sizeof == 1 ? 4 : 2] encoded;
auto len = std.utf.encode(encoded, item);
return put(encoded[0 .. len]);
// put(T)
} else static if(isImplicitlyConvertible!(U, E)) {
if(!_head)
_head = _tail = new Data( 16 * T.sizeof );
else if( _tail.arr.length == _tail.capacity ) { // Try extending
while(_tail.next !is null) _tail = _tail.next; // Update tail
if( auto u = GC.extend(_tail.arr.ptr, T.sizeof, nextCapacity) )
_tail.capacity = u / T.sizeof;
else _tail = _tail.next = new Data( nextCapacity );
}
auto len = _tail.arr.length;
_tail.arr.ptr[len] = item;
_tail.arr = _tail.arr.ptr[0 .. len + 1];
// fast put(T[])
} else static if (is(typeof(_tail.arr[0..1] = item[0..1]))) {
auto items = cast(E[]) item[];
if(!_head)
_head = _tail = new Data( items.length * T.sizeof );
auto arr = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
size_t len = items.length;
if(arr.length < len) { // Try extending
while(_tail.next !is null) { // Update tail
_tail = _tail.next;
arr = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
}
auto size = max(items.length*T.sizeof, nextCapacity);
if( auto u = GC.extend(_tail.arr.ptr, T.sizeof, size) ) {
_tail.capacity = u / T.sizeof;
arr = _tail.arr.ptr[_tail.arr.length.._tail.capacity];
}
if(arr.length < len) len = arr.length;
}
arr[0..len] = items[0..len];
items = items[len..$];
_tail.arr = _tail.arr.ptr[0 .. _tail.arr.length + len];
if( items.length > 0 ) { // Add a segment and advance
_tail.next = new Data(max(items.length*T.sizeof,nextCapacity));
_tail = _tail.next;
_tail.arr.ptr[0..items.length] = items[];
_tail.arr = _tail.arr.ptr[0..items.length];
}
// Kitchen sink
} else {
.put!(typeof(this),U,true)(this,item);
}
}
// only allow overwriting data on non-immutable and non-const data
static if(!is(T == immutable) && !is(T == const)) {
/** Clears the managed array. This function may reduce the appender's
* capacity.
*/
void clear() {
_head = _tail; // Save the largest chunk and move on
if(_head) {
_head.arr = _head.arr.ptr[0..0];
_head.next = null;
}
}
/** Shrinks the appender to a given length. Passing in a length that's
* greater than the current array length throws an enforce exception.
* This function may reduce the appender's capacity.
*/
void shrinkTo(size_t newlength) {
for(auto d = _head; d !is null; d = d.next) {
if(d.arr.length >= newlength) {
d.arr = d.arr.ptr[0..newlength];
d.next = null;
}
newlength -= d.arr.length;
}
enforce(newlength == 0, "Appender.shrinkTo: newlength > capacity");
}
}
}
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list