How to erase chars from char[]?

Ben Hanson Ben.Hanson at tfbplc.co.uk
Mon Jun 21 04:43:02 PDT 2010


I've changed the code to use CharT[] again, which simplified things
substantially. However, I can't find a way to erase characters from a char[].
Can anyone help?

See the current code below.

Thanks,

Ben

module main;

import std.algorithm;
import std.string;

template regex(CharT)
{
struct basic_string_token
{
	bool _negated = false;
	CharT[] _charset;
	enum size_t MAX_CHARS = CharT.max + 1;
	enum size_t START_CHAR = cast(CharT) 0x80 < 0 ? 0x80 : 0;

	this(const bool negated_, ref CharT[] charset_)
	{
		_negated = negated_;
		_charset = charset_;
	}

	void remove_duplicates()
	{
		_charset.sort;
		_charset = squeeze(_charset.idup).dup;
	}

	void normalise()
	{
		if (_charset.length == MAX_CHARS)
		{
			_negated = !_negated;
			_charset.clear();
		}
		else if (_charset.length > MAX_CHARS / 2)
		{
			negate();
		}
	}

	void negate()
	{
		CharT curr_char_ = START_CHAR;
		CharT[] temp_;
		CharT *ptr_;
		CharT *curr_ = _charset.ptr;
		CharT *end_ = curr_ + _charset.length;
		size_t i_ = 0;

		_negated = !_negated;
		temp_.length = MAX_CHARS - _charset.length;
		ptr_ = temp_.ptr;

		while (curr_ < end_)
		{
			while (*curr_ > curr_char_)
			{
				*ptr_ = curr_char_;
				++ptr_;
				++curr_char_;
				++i_;
			}

			++curr_char_;
			++curr_;
			++i_;
		}

		for (; i_ < MAX_CHARS; ++i_)
		{
			*ptr_ = curr_char_;
			++ptr_;
			++curr_char_;
		}

		_charset = temp_;
	}

	bool empty()
	{
		return _charset.length == 0 && !_negated;
	}

	bool any()
	{
		return _charset.length == 0 && _negated;
	}

	void clear()
	{
		_negated = false;
		_charset.length = 0;
	}

	void intersect(ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
			!any () && !rhs_.any ()))
		{
			intersect_same_types (rhs_, overlap_);
		}
		else
		{
			intersect_diff_types (rhs_, overlap_);
		}
	}

private:
	void intersect_same_types (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (any ())
		{
			clear ();
			overlap_._negated = true;
			rhs_.clear ();
		}
		else
		{
			CharT *iter_ = _charset.ptr;
			CharT *end_ = iter_ + _charset.length;
			CharT *rhs_iter_ = rhs_._charset.ptr;
			CharT *rhs_end_ = rhs_iter_ + rhs_._charset.length;

			overlap_._negated = _negated;

			while (iter_ != end_ && rhs_iter_ != rhs_end_)
			{
				if (*iter_ < *rhs_iter_)
				{
					++iter_;
				}
				else if (*iter_ > *rhs_iter_)
				{
					++rhs_iter_;
				}
				else
				{
					overlap_._charset ~= *iter_;
					_charset.erase(i_);
					iter_ = _charset.ptr;
					end_ = iter_ + _charset.length;
					rhs_._charset.erase(rhs_i_);
					rhs_iter_ = rhs_._charset.ptr;
					rhs_end_ = rhs_iter_ + rhs_._charset.length;
				}
			}

			if (_negated)
			{
			}
			else if (!overlap_._charset.length == 0)
			{
				normalise ();
				overlap_.normalise ();
				rhs_.normalise ();
			}
		}
	}

	void intersect_diff_types(ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (any ())
		{
			intersect_any(rhs_, overlap_);
		}
		else if (_negated)
		{
			intersect_negated(rhs_, overlap_);
		}
		else // _negated == false
		{
			intersect_charset(rhs_, overlap_);
		}
	}

	void intersect_any (ref basic_string_token rhs_, ref basic_string_token overlap_)
	{
		if (rhs_._negated)
		{
			rhs_.intersect_negated (this, overlap_);
		}
		else // rhs._negated == false
		{
			rhs_.intersect_charset (this, overlap_);
		}
	}

	void intersect_negated (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (rhs_.any ())
		{
			overlap_._negated = true;
			overlap_._charset = _charset;
			rhs_._negated = false;
			rhs_._charset = _charset;
			clear ();
		}
		else // rhs._negated == false
		{
			rhs_.intersect_charset (this, overlap_);
		}
	}

	void intersect_charset (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (rhs_.any ())
		{
			overlap_._charset = _charset;
			rhs_._negated = true;
			rhs_._charset = _charset;
			clear ();
		}
		else // rhs_._negated == true
		{
		}
	}
};
}

int main(char[][]argv)
{
	regex!(char).basic_string_token token_;

	token_._charset = "cccbba";
	token_.remove_duplicates();
	token_.negate();
	return 0;
}


More information about the Digitalmars-d mailing list