How to erase chars from char[]?
Ben Hanson
Ben.Hanson at tfbplc.co.uk
Mon Jun 21 04:43:02 PDT 2010
I've changed the code to use CharT[] again, which simplified things
substantially. However, I can't find a way to erase characters from a char[].
Can anyone help?
See the current code below.
Thanks,
Ben
module main;
import std.algorithm;
import std.string;
template regex(CharT)
{
struct basic_string_token
{
bool _negated = false;
CharT[] _charset;
enum size_t MAX_CHARS = CharT.max + 1;
enum size_t START_CHAR = cast(CharT) 0x80 < 0 ? 0x80 : 0;
this(const bool negated_, ref CharT[] charset_)
{
_negated = negated_;
_charset = charset_;
}
void remove_duplicates()
{
_charset.sort;
_charset = squeeze(_charset.idup).dup;
}
void normalise()
{
if (_charset.length == MAX_CHARS)
{
_negated = !_negated;
_charset.clear();
}
else if (_charset.length > MAX_CHARS / 2)
{
negate();
}
}
void negate()
{
CharT curr_char_ = START_CHAR;
CharT[] temp_;
CharT *ptr_;
CharT *curr_ = _charset.ptr;
CharT *end_ = curr_ + _charset.length;
size_t i_ = 0;
_negated = !_negated;
temp_.length = MAX_CHARS - _charset.length;
ptr_ = temp_.ptr;
while (curr_ < end_)
{
while (*curr_ > curr_char_)
{
*ptr_ = curr_char_;
++ptr_;
++curr_char_;
++i_;
}
++curr_char_;
++curr_;
++i_;
}
for (; i_ < MAX_CHARS; ++i_)
{
*ptr_ = curr_char_;
++ptr_;
++curr_char_;
}
_charset = temp_;
}
bool empty()
{
return _charset.length == 0 && !_negated;
}
bool any()
{
return _charset.length == 0 && _negated;
}
void clear()
{
_negated = false;
_charset.length = 0;
}
void intersect(ref basic_string_token rhs_,
ref basic_string_token overlap_)
{
if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
!any () && !rhs_.any ()))
{
intersect_same_types (rhs_, overlap_);
}
else
{
intersect_diff_types (rhs_, overlap_);
}
}
private:
void intersect_same_types (ref basic_string_token rhs_,
ref basic_string_token overlap_)
{
if (any ())
{
clear ();
overlap_._negated = true;
rhs_.clear ();
}
else
{
CharT *iter_ = _charset.ptr;
CharT *end_ = iter_ + _charset.length;
CharT *rhs_iter_ = rhs_._charset.ptr;
CharT *rhs_end_ = rhs_iter_ + rhs_._charset.length;
overlap_._negated = _negated;
while (iter_ != end_ && rhs_iter_ != rhs_end_)
{
if (*iter_ < *rhs_iter_)
{
++iter_;
}
else if (*iter_ > *rhs_iter_)
{
++rhs_iter_;
}
else
{
overlap_._charset ~= *iter_;
_charset.erase(i_);
iter_ = _charset.ptr;
end_ = iter_ + _charset.length;
rhs_._charset.erase(rhs_i_);
rhs_iter_ = rhs_._charset.ptr;
rhs_end_ = rhs_iter_ + rhs_._charset.length;
}
}
if (_negated)
{
}
else if (!overlap_._charset.length == 0)
{
normalise ();
overlap_.normalise ();
rhs_.normalise ();
}
}
}
void intersect_diff_types(ref basic_string_token rhs_,
ref basic_string_token overlap_)
{
if (any ())
{
intersect_any(rhs_, overlap_);
}
else if (_negated)
{
intersect_negated(rhs_, overlap_);
}
else // _negated == false
{
intersect_charset(rhs_, overlap_);
}
}
void intersect_any (ref basic_string_token rhs_, ref basic_string_token overlap_)
{
if (rhs_._negated)
{
rhs_.intersect_negated (this, overlap_);
}
else // rhs._negated == false
{
rhs_.intersect_charset (this, overlap_);
}
}
void intersect_negated (ref basic_string_token rhs_,
ref basic_string_token overlap_)
{
if (rhs_.any ())
{
overlap_._negated = true;
overlap_._charset = _charset;
rhs_._negated = false;
rhs_._charset = _charset;
clear ();
}
else // rhs._negated == false
{
rhs_.intersect_charset (this, overlap_);
}
}
void intersect_charset (ref basic_string_token rhs_,
ref basic_string_token overlap_)
{
if (rhs_.any ())
{
overlap_._charset = _charset;
rhs_._negated = true;
rhs_._charset = _charset;
clear ();
}
else // rhs_._negated == true
{
}
}
};
}
int main(char[][]argv)
{
regex!(char).basic_string_token token_;
token_._charset = "cccbba";
token_.remove_duplicates();
token_.negate();
return 0;
}
More information about the Digitalmars-d
mailing list