[Issue 1395] New: Regex does not support range [something to \*]
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Thu Aug 2 01:13:03 PDT 2007
http://d.puremagic.com/issues/show_bug.cgi?id=1395
Summary: Regex does not support range [something to \*]
Product: D
Version: 1.021
Platform: All
OS/Version: All
Status: NEW
Severity: normal
Priority: P2
Component: Phobos
AssignedTo: bugzilla at digitalmars.com
ReportedBy: alan at akbkhome.com
test case:
r = new Regex("^(#)?([\w-\*]+)");
Error: inverted range in character class w > * in ^(#)?([\w-\*]+)
(error expanded a bit here..)
the problem appears to be the '*' char as a [...-*]
Along with having \w as the start char (which does resolve well in the check in
regex.)
This is a fix for parseRange() that seems to work.
for (;;)
{
int lastStart = 0;
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case ']':
switch (rs)
{
case RS.dash:
r.setbit2('-');
case RS.rliteral:
r.setbit2(c);
break;
case RS.start:
break;
default:
assert(0);
}
p++;
break;
case '\\':
p++;
r.setbitmax(cmax);
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case 'd':
for (i = '0'; i <= '9'; i++)
r.bits[i] = 1;
lastStart = '0';
goto Lrs;
case 'D':
for (i = 1; i < '0'; i++)
r.bits[i] = 1;
for (i = '9' + 1; i <= cmax;
i++)
r.bits[i] = 1;
lastStart = 1;
goto Lrs;
case 's':
lastStart = -1;
for (i = 0; i <= cmax; i++)
if (isspace(i)) {
r.bits[i] = 1;
lastStart =
lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'S':
lastStart = -1;
for (i = 1; i <= cmax; i++)
if (!isspace(i)) {
r.bits[i] = 1;
lastStart =
lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'w':
lastStart = -1;
for (i = 1; i <= cmax; i++)
if (isword(i)) {
r.bits[i] = 1;
lastStart =
lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'W':
for (i = 1; i <= cmax; i++)
if (!isword(i)) {
r.bits[i] = 1;
lastStart =
lastStart > -1 ? lastStart : i;
}
goto Lrs;
Lrs:
switch (rs)
{
case RS.dash:
r.setbit2('-');
case RS.rliteral:
r.setbit2(c);
break;
default:
break;
}
rs = RS.start;
continue;
default:
break;
}
c2 = escape();
goto Lrange;
case '-':
p++;
if (rs == RS.start)
goto Lrange;
else if (rs == RS.rliteral)
rs = RS.dash;
else if (rs == RS.dash)
{
r.setbit2(c);
r.setbit2('-');
rs = RS.start;
}
continue;
default:
lastStart = c;
c2 = pattern[p];
p++;
Lrange:
switch (rs)
{
case RS.rliteral:
r.setbit2(c);
case RS.start:
c = c2;
rs = RS.rliteral;
break;
case RS.dash:
if (c2 == '*') { // [a-\*]
for (i = lastStart; i
<= cmax; i++)
r.bits[i] = 1;
r.setbitmax(cmax);
rs = RS.start;
break;
} else if (c > c2) {
error("inverted range
in character class "
~ cast(char)c ~
" > " ~ cast(char)c2 ~ " in " ~ pattern ~ " \n" );
return 0;
}
r.setbitmax(c2);
//printf("c = %x, c2 =
%x\n",c,c2);
for (; c <= c2; c++)
r.bits[c] = 1;
rs = RS.start;
break;
default:
assert(0);
}
continue;
}
break;
}
--
More information about the Digitalmars-d-bugs
mailing list