D wrapper of PCRE 7.0 by iceeLyne
yidabu
yidabu.nospam at gmail.com
Fri Jan 18 02:03:24 PST 2008
Regular Expression Module in D based on the Powerful PCRE C library by iceeLyne
http://icube.freezope.org/pcred/
how to build pcre.lib:
download pcred.zip : http://icube.freezope.org/pcred/pcred.zip unpack to \pcred\
download the PCRE 7.0 src distribution from PCRE.ORG, unpack to \pcre-7.0-src\
copy the files (config.h, makefile) in the \pcred\pcre_c_src\pcre-7.0\ to the \pcre-7.0-src\src\pcre\7.0\pcre-7.0-src\
replace the original files (pcre.h, pcre_globals.c) in the \pcre-7.0-src\src\pcre\7.0\pcre-7.0-src\ with the files in the \pcred\pcre_c_src\pcre-7.0\, if you need some extra functions.
run "make LIBALL", then copy pcre.lib to the \tango\lib\ directory.
You can find here (tested with DMD 1.024, Tango 0.994):
http://svn.dsource.org/projects/dwin/trunk/text/pcre/
Example:
//group
scope regex1 = new RegExp(r"(Sample):(?P<num>[0-9]{1,2})");
//,CompileTimeOption.MULTILINE|CompileTimeOption.NEWLINE_CRLF);
//regex1.study();
char[] str1 = "Now Sample:1 Ok, Sample:99 Yes. Sample:22 and Sample:48 is important.";
RegMatch m1 = regex1.execute(str1);
assert( m1.group() == "Sample:1", "m1.group" );
assert( m1.group(1) == "Sample", "m1.group(1)" );
assert( m1.group(2) == "1", "m1.group(2)" );
//iterate
RegIter iter1 = regex1.iterate(str1, 0); //str, start, end
//while((m = iter1.next()) !is null) {
foreach(RegMatch m; iter1)
{
//Trace.formatln(m.group());
/*
Sample:1
Sample:99
Sample:22
Sample:48
*/
}
assert(iter1.next() is null);
//split
char[][] ch = regex1.split(str1);
foreach(char[] c; ch)
{
//Trace.format(c);
}
/*
Now Sample1 Ok, Sample99 Yes. Sample22 and Sample48 is important.
*/
//expand
RegTemplate tmp = new RegTemplate(r"\0||\1\2||\g(0),\g<num>\0\0\0");
m1 = regex1.execute(str1);
assert(m1, "m1");
assert( m1.expand(tmp) == "Sample:1||Sample1||Sample:1,1Sample:1Sample:1Sample:1", "expand");
// back reference
// Some more complex PCRE pattern samples:
// NOTE:
// These patterns are NOT for general HTML parsing,
// just some attempts to match some particular cases.
scope regex2 = new RegExp(r"<(?P<name>\w+)>.*?</(?i:(?P=name))>");
char[] str2 = "<br/><br/><p><pre>char[] str2 = "";</pRe></p><div><div></div></div>";
RegIter iter2 = regex2.iterate(str2);
foreach(RegMatch m; iter2)
{
//Trace.formatln("PCRE BACKREF: {}", m.group());
}
/*
<p><pre>char[] str2 = ;</pRe></p>
<div><div></div>
*/
scope regex3 = new RegExp(r"<(?P<name>\w+)(?P<closed>/)?>(?(closed)|[^<>]*?</(?i:(?P=name))>)");
char[] str3 = "<br/><br/><p><pre>char[] str2 = "";</pRe></p><div><div></div></div>";
RegIter iter3 = regex3.iterate(str3);
foreach(RegMatch m; iter3)
{
Trace.formatln("PCRE CONDITION: {}" , m.group());
}
/*
PCRE CONDITION: <br/>
PCRE CONDITION: <br/>
PCRE CONDITION: <pre>char[] str2 = ;</pRe>
PCRE CONDITION: <div></div>
*/
--
yidabu <yidabu.nospam at gmail.com>
D Programming Language China:
http://www.d-programming-language-china.org/
More information about the Digitalmars-d-announce
mailing list