Small Buffer Optimization for string and friends

Per Nordlöw per.nordlow at gmail.com
Tue Apr 17 15:27:42 UTC 2018


On Sunday, 8 April 2012 at 05:56:36 UTC, Andrei Alexandrescu 
wrote:
> Walter and I discussed today about using the small string 
> optimization in string and other arrays of immutable small 
> objects.

I put together SSOString at

https://github.com/nordlow/phobos-next/blob/967eb1088fbfab8be5ccd811b66e7b5171b46acf/src/sso_string.d

that uses small-string-optimization on top of a normal D string 
(slice).

I'm satisfied with everything excepts that -dip1000 doesn't 
vorbids `f` from compiling.

I also don't understand why `x[0]` cannot be returned by ref in 
the function `g`.

Comments are welcome.

Contents of sso_string.d follows:

module sso_string;

/** Small-size-optimized string.
  *
  * Store on the stack if constructed with <= `smallCapacity` 
number of
  * characters, otherwise on the GC heap.
  */
struct SSOString
{
     private alias E = immutable(char); // immutable element type
     private alias ME = char;           // mutable element type

     pure nothrow:

     /** Construct from `elements`, with potential GC-allocation 
(iff
      * `elements.length > smallCapacity`).
      */
     this()(scope ME[] elements) @trusted // template-lazy
     {
         if (elements.length <= smallCapacity)
         {
             small.data[0 .. elements.length] = elements;
             small.length = 
cast(typeof(small.length))(2*elements.length);
         }
         else
         {
             large = elements.idup; // GC-allocate
             raw.length *= 2;  // shift up
             raw.length |= 1;  // tag as large
         }
     }

     @nogc:

     // TODO add @nogc overload to construct from mutable static 
array <= smallCapacity

     /** Construct from `elements` without any kind of heap 
allocation.
      */
     this()(immutable(E)[] elements) @trusted // template-lazy
     {
         if (elements.length <= smallCapacity)
         {
             small.data[0 .. elements.length] = elements;
             small.length = 
cast(typeof(small.length))(2*elements.length);
         }
         else
         {
             large = elements;   // @nogc
             raw.length *= 2;    // shift up
             raw.length |= 1;    // tag as large
         }
     }

     @property size_t length() const @trusted
     {
         if (isLarge)
         {
             return large.length/2; // skip first bit
         }
         else
         {
             return small.length/2; // skip fist bit
         }
     }

     scope ref inout(E) opIndex(size_t index) inout return @trusted
     {
         return opSlice()[index]; // automatic range checking
     }

     scope inout(E)[] opSlice() inout return @trusted
     {
         if (isLarge)
         {
             union RawLarge
             {
                 Raw raw;
                 Large large;
             }
             RawLarge copy = void;
             copy.large = cast(Large)large;
             copy.raw.length /= 2; // adjust length
             return copy.large;
         }
         else
         {
             return small.data[0 .. small.length/2]; // scoped
         }
     }

     private @property bool isLarge() const @trusted
     {
         return large.length & 1; // first bit discriminates small 
from large
     }

private:
     struct Raw                  // same memory layout as `E[]`
     {
         size_t length;          // can be bit-fiddled without GC 
allocation
         E* ptr;
     }

     alias Large = E[];

     enum smallCapacity = Large.sizeof - Small.length.sizeof;
     static assert(smallCapacity > 0, "No room for small elements 
for E being " ~ E.stringof);
     version(LittleEndian) // see: 
http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
     {
         struct Small
         {
             ubyte length;
             E[smallCapacity] data;
         }
     }
     else
     {
         static assert(0, "BigEndian support and test");
     }

     union
     {
         Raw raw;
         Large large;
         Small small;
     }
}

///
@safe pure nothrow @nogc unittest
{
     import container_traits : mustAddGCRange;
     alias S = SSOString;

     static assert(S.sizeof == 2*size_t.sizeof); // two words
     static assert(S.smallCapacity == 15);
     static assert(mustAddGCRange!S); // `Large large.ptr` must be 
scanned

     auto s0 = S.init;
     assert(s0.length == 0);
     assert(!s0.isLarge);
     assert(s0[] == []);

     const s7 = S("0123456");
     static assert(is(typeof(s7[]) == string));
     assert(!s7.isLarge);
     assert(s7.length == 7);
     assert(s7[] == "0123456");
     // TODO assert(s7[0 .. 4] == "0123");

     const s15 = S("012345678901234");
     static assert(is(typeof(s15[]) == string));
     assert(!s15.isLarge);
     assert(s15.length == 15);
     assert(s15[] == "012345678901234");

     const s16 = S("0123456789abcdef");
     static assert(is(typeof(s16[]) == string));
     assert(s16.isLarge);
     assert(s16.length == 16);
     assert(s16[] == "0123456789abcdef");
     assert(s16[0] == '0');
     assert(s16[10] == 'a');
     assert(s16[15] == 'f');

     // TODO static assert(!__traits(compiles, { auto _ = 
S((char[]).init); }));

     string f() @safe pure nothrow @nogc
     {
         S x;
         return x[];             // TODO should fail with -dip1000
     }

     // TODO activate
     // ref char g() @safe pure nothrow @nogc
     // {
     //     S x;
     //     return x[0];             // TODO should fail with 
-dip1000
     // }
}



More information about the Digitalmars-d mailing list