Small Buffer Optimization for string and friends
Per Nordlöw
per.nordlow at gmail.com
Tue Apr 17 15:27:42 UTC 2018
On Sunday, 8 April 2012 at 05:56:36 UTC, Andrei Alexandrescu
wrote:
> Walter and I discussed today about using the small string
> optimization in string and other arrays of immutable small
> objects.
I put together SSOString at
https://github.com/nordlow/phobos-next/blob/967eb1088fbfab8be5ccd811b66e7b5171b46acf/src/sso_string.d
that uses small-string-optimization on top of a normal D string
(slice).
I'm satisfied with everything excepts that -dip1000 doesn't
vorbids `f` from compiling.
I also don't understand why `x[0]` cannot be returned by ref in
the function `g`.
Comments are welcome.
Contents of sso_string.d follows:
module sso_string;
/** Small-size-optimized string.
*
* Store on the stack if constructed with <= `smallCapacity`
number of
* characters, otherwise on the GC heap.
*/
struct SSOString
{
private alias E = immutable(char); // immutable element type
private alias ME = char; // mutable element type
pure nothrow:
/** Construct from `elements`, with potential GC-allocation
(iff
* `elements.length > smallCapacity`).
*/
this()(scope ME[] elements) @trusted // template-lazy
{
if (elements.length <= smallCapacity)
{
small.data[0 .. elements.length] = elements;
small.length =
cast(typeof(small.length))(2*elements.length);
}
else
{
large = elements.idup; // GC-allocate
raw.length *= 2; // shift up
raw.length |= 1; // tag as large
}
}
@nogc:
// TODO add @nogc overload to construct from mutable static
array <= smallCapacity
/** Construct from `elements` without any kind of heap
allocation.
*/
this()(immutable(E)[] elements) @trusted // template-lazy
{
if (elements.length <= smallCapacity)
{
small.data[0 .. elements.length] = elements;
small.length =
cast(typeof(small.length))(2*elements.length);
}
else
{
large = elements; // @nogc
raw.length *= 2; // shift up
raw.length |= 1; // tag as large
}
}
@property size_t length() const @trusted
{
if (isLarge)
{
return large.length/2; // skip first bit
}
else
{
return small.length/2; // skip fist bit
}
}
scope ref inout(E) opIndex(size_t index) inout return @trusted
{
return opSlice()[index]; // automatic range checking
}
scope inout(E)[] opSlice() inout return @trusted
{
if (isLarge)
{
union RawLarge
{
Raw raw;
Large large;
}
RawLarge copy = void;
copy.large = cast(Large)large;
copy.raw.length /= 2; // adjust length
return copy.large;
}
else
{
return small.data[0 .. small.length/2]; // scoped
}
}
private @property bool isLarge() const @trusted
{
return large.length & 1; // first bit discriminates small
from large
}
private:
struct Raw // same memory layout as `E[]`
{
size_t length; // can be bit-fiddled without GC
allocation
E* ptr;
}
alias Large = E[];
enum smallCapacity = Large.sizeof - Small.length.sizeof;
static assert(smallCapacity > 0, "No room for small elements
for E being " ~ E.stringof);
version(LittleEndian) // see:
http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
{
struct Small
{
ubyte length;
E[smallCapacity] data;
}
}
else
{
static assert(0, "BigEndian support and test");
}
union
{
Raw raw;
Large large;
Small small;
}
}
///
@safe pure nothrow @nogc unittest
{
import container_traits : mustAddGCRange;
alias S = SSOString;
static assert(S.sizeof == 2*size_t.sizeof); // two words
static assert(S.smallCapacity == 15);
static assert(mustAddGCRange!S); // `Large large.ptr` must be
scanned
auto s0 = S.init;
assert(s0.length == 0);
assert(!s0.isLarge);
assert(s0[] == []);
const s7 = S("0123456");
static assert(is(typeof(s7[]) == string));
assert(!s7.isLarge);
assert(s7.length == 7);
assert(s7[] == "0123456");
// TODO assert(s7[0 .. 4] == "0123");
const s15 = S("012345678901234");
static assert(is(typeof(s15[]) == string));
assert(!s15.isLarge);
assert(s15.length == 15);
assert(s15[] == "012345678901234");
const s16 = S("0123456789abcdef");
static assert(is(typeof(s16[]) == string));
assert(s16.isLarge);
assert(s16.length == 16);
assert(s16[] == "0123456789abcdef");
assert(s16[0] == '0');
assert(s16[10] == 'a');
assert(s16[15] == 'f');
// TODO static assert(!__traits(compiles, { auto _ =
S((char[]).init); }));
string f() @safe pure nothrow @nogc
{
S x;
return x[]; // TODO should fail with -dip1000
}
// TODO activate
// ref char g() @safe pure nothrow @nogc
// {
// S x;
// return x[0]; // TODO should fail with
-dip1000
// }
}
More information about the Digitalmars-d
mailing list