[review] new string type
Steven Schveighoffer
schveiguy at yahoo.com
Tue Nov 30 07:48:03 PST 2010
In a prior thread, I promised to create a narrow string type that would
enforce the requirements of narrow strings. That is, the new string type
should respect the encoding of narrow strings.
Here is a rough draft, tested minimally, but it does compile and pass
simple tests. It's pretty simple, which is what I would expect. I copied
a lot of stuff from std.array to get this to work.
The point of this type is -- if we replace what the compiler considers
"strings" with this type, then we get both the compiler *and* phobos
agreeing as to what this type is: A bi-directional range of dchar.
As a bonus, char[] and wchar[] now would become arrays and be manipulated
consistently with other arrays, which if not done correctly could cause
problems, but may provide more flexibility and opportunity for
performance. Instead of the library fighting you on it.
Anyways, here it is, released under the boost license, commence attack ;)
// Written in the D programming language.
/**
Copyright: Copyright Andrei Alexandrescu and Steven Schveighoffer 2008-2010
License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License
1.0</a>.
Authors: $(WEB erdani.org, Andrei Alexandrescu, Steven Schveighoffer)
Copyright Andrei Alexandrescu and Steven Schveighoffer 2008-2010.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
import std.utf;
import std.traits;
struct string_t(T) if (is(Unqual!T == char) || is(Unqual!T == wchar))
{
private T[] _data;
this(T[] data)
{
this._data = data;
}
// note, this assumes that idx is a valid index
private size_t _charStart(size_t idx) const
{
static if(is(Unqual!T == wchar))
{
immutable c = _data.ptr[idx];
if(c >= 0xDC00 && c <= 0xDFFF)
{
// surrogate pair detected, verify we have at least 2
wchars,
// and that both wchars are properly encoded.
assert(idx > 0, "Invalid UTF character at beginning of
string");
return idx-1;
}
else
return idx;
}
else
{
const p = _data.ptr + idx;
if ((p[0] & 0b1100_0000) != 0b1000_0000)
{
return idx;
}
else if (idx >= 1 && (p[-1] & 0b1100_0000) != 0b1000_0000)
{
return idx - 1;
}
else if (idx >= 2 && (p[-2] & 0b1100_0000) != 0b1000_0000)
{
return idx - 2;
}
else if (idx >= 3 && (p[-3] & 0b1100_0000) != 0b1000_0000)
{
return idx - 3;
}
else
{
assert(false, "Invalid UTF character in string");
}
}
}
void popFront()
{
auto nc = std.utf.stride(_data, 0);
assert(nc <= _data.length && nc != 0xFF, "Invalid sequence at
beginning of string");
_data = _data[nc .. $];
}
void popBack()
{
immutable n = _data.length;
assert(n, "Attempting to pop back of an empty string");
_data = _data.ptr[0.._charStart(n-1)];
}
@property dchar front() const
{
assert(_data.length, "Attempting to fetch the front of an empty
string");
size_t i = 0;
return decode(_data, i);
}
@property dchar back() const
{
immutable n = _data.length;
assert(n, "Attempting to fetch the back of an empty string");
auto idx = _charStart(n-1);
return std.utf.decode(_data, idx);
}
@property bool empty() const
{
return !_data.length;
}
@property typeof(this) save()
{
return this;
}
// support read-only random access via code unit index.
dchar opIndex(size_t idx)
{
idx = _charStart(idx);
return std.utf.decode(_data, idx);
}
string_t opSlice()
{
return this;
}
string_t opSlice(size_t start, size_t end)
{
if(start != _data.length)
start = _charStart(start);
if(end != _data.length)
end = _charStart(end);
return string_t(_data[start..end]);
}
// note we don't call this length because length can be assumed to be
the
// number of elements, which this isn't.
@property size_t codeUnits() const
{
return _data.length;
}
// support append and concat
// TODO: need to support appending various types of strings to
eachother
// (right now only same-type strings can be appended, or raw arrays)
ref string_t opOpAssign(string op, U)(U data) if (op == "~" && is(U ==
string_t))
{
_data ~= data._data;
return this;
}
ref string_t opOpAssign(string op, U)(U data) if (op == "~" && !is(U
== string_t) && is(typeof(_data ~= U.init)))
{
_data ~= data;
return this;
}
string_t opBinary(string op, U)(U data) if (op == "~" && is(U ==
string_t))
{
return string_t(_data ~ data._data);
}
string_t opBinary(string op, U)(U data) if (op == "~" && !is(U ==
string_t) && is(typeof(_data ~ U.init)))
{
return string_t(_data ~ data);
}
}
template string_t(T) if (is(Unqual!T == dchar))
{
alias T[] string_t;
}
/** begin test code **/
import std.stdio;
alias string_t!(immutable char) mystring;
alias string_t!(immutable wchar) mywstring;
alias string_t!(immutable dchar) mydstring;
void main()
{
auto str = mystring("hello");
str ~= " world";
str ~= mystring("!!!");
writeln(str._data);
}
More information about the Digitalmars-d
mailing list