"spirit" like template parser generator in 100 Lines of code (almost)
BCS
BCS at pathlink.com
Mon Dec 18 11:50:37 PST 2006
(The almost is with regards to it working not the 100 LOC, if all blank
lines and comments are removed it is exactly 100 LOC.)
It actual doesn't work because of several things. However, some of these
are bugs (I think) and the rest I am putting forward as feature requests.
What I have actually done is make an almost working prototype of a
template system that generates functions for a recursive decent parser.
It is to be mixed into a class or struct. The parameter for the template
is a string containing almost normal BNF reduction definitions.
It extends BNF by allowing "|" for "or" but doesn't implement optional
or repeated terms.
It isn't implemented yet, but actions will be defined by providing
specializations for the "Action" template.
Example:
<code>
struct P
{
char[] data;
int pos;
PObject Action(char[]string:"ADD_ACT")(PObject l, PObject e, PObject r)
{
Numeric ln = cast(Numeric) l;
Numeric rn = cast(Numeric) r;
return new AddExp(ln, rn);
}
... //some more actions
PObject ParseRule(char[] string : "PLUS")()
{
if (data[pos] == '+')
{
pos++;
return PObject.pass;
}
return PObject.fail;
}
... // more terminals
// ADD_EXP is the name of the reduction
// MUL2ADD, ADD_ACT, SUB_ACT, CAT2ADD are action names
mixin Parser!("ADD_EXP :
MUL2ADD / MUL_EXP |
ADD_ACT / ADD_EXP PLUS MUL_EXP |
SUB_ACT / ADD_EXP MINUS MUL_EXP |
CAT2ADD / CAT_EXP;"); // also has more non terminals.
}
vod main()
{
P p;
p.data = GetData();
p.pos = 0;
if(ret = cast(AddExp)p.ParseRule!("ADD_EXP")())
//good
}
</code>
(The templates are at the bottom of the post)
Now for why it doesn't work.
===Bugs:
-- Mixins don't work quite right. --
this for instance doesn't work unless the second line in foo is removed:
template First(char[] string) { const char[] First = string[0..1] }
template Temp(char[] string)
{
static if(string != "")
{
void TempFn(char[] s : First!(string)) {}
mixin Temp!(string[1..$]);
}
}
struct S
{
mixin Temp!("ab");
void Foo()
{
TempFn!("a")();
TempFn!("b")();// this fails
}
}
what seems to be happening is that only the first iteration of the
template is actual mixed in. Use of pragma(msg,...) seems to show that
template does recurse correctly.
I think this is a bug
-- The parameter on a foreach isn't const. --
Actual this doesn't stop anything from working, it just makes some
things more difficult.
foreach(i;TupleMaker!())
Template!(i)();
becomes:
alias TupleMaker!() tempTuple
foreach(j,_;tempTuple)
{
const i = tempTuple[j];
Template!()();
}
This is not a trivial complaint. While I was working on my BigNum
template I noticed that the const variable adds ops to the function even
with the -O flag.
===New Feature Needed To Make This Work.
-- Abstract Templates --
The idea here is that a template marked as abstract would have no
generic implementation, only specializations. Furthermore the compiler
would assume that any given specializations it is asked for exists and
leave it up to the linker to find the correct implementation. this would
allow cyclical calls by specializations of a template.
abstract template foo(char[] s)
{
void foo(uint);
}
template foo(char[] s : "A")
{
void foo(uint i)
{ // nothing generated here
if(i) foo!("B")(i-1);
}
}
template foo(char[] s : "B") // foo!("B") generated here.
{
void foo(uint i)
{
if(i>1) foo!("A")(i-2);
if(i>1) foo!("C")(0); // will cause a link error
}
}
-- "static foreach" at same level as "static if" --
two parts:
make it "static forach"
allow it the same places as "static if"
template foo(char[] string)
{
static foreach(i;BreakBy!(string,','))
mixin Other!(i);
}
this isn't strictly needed to make my template work, but it makes it a
whole lot cleaner.
===================
Now for what you all have been waiting for!!!!!
origonal formatting at:
http://www.webpages.uidaho.edu/~shro8822/dspirit.d
<code>
/************
Discard leading whitespace
*/
template DropWhite(char[] string)
{
static if(string == "")
alias string DropWhite;
else static if(string[0] == ' ' || string[0] == '\t' || string[0] ==
'\r' || string[0] == '\n')
alias DropWhite!(string[1..$]) DropWhite;
else
alias string DropWhite;
}
/************
return a slice upto the first whitespace char
*/
template UseTillWhite(char[] string)
{
static if(string == "" || string[0] == ' ' || string[0] == '\t' ||
string[0] == '\r' || string[0] == '\n')
const char[] UseTillWhite = "";
else
const char[] UseTillWhite = string[0] ~ UseTillWhite!(string[1..$]);
}
/************
return a slice starting with the first whitespace char
*/
template DiscardTillWhite(char[] string)
{
static if(string == "" || string[0] == ' ' || string[0] == '\t' ||
string[0] == '\r' || string[0] == '\n')
alias string DiscardTillWhite;
else
alias DiscardTillWhite!(string[1..$]) DiscardTillWhite;
}
/************
Return a slice up-to but not including the first instance of t.
*/
template UseTill(char[] string, char t)
{
static if(string == "" || string[0] == t)
const char[] UseTill = "";
else
const char[] UseTill = string[0..1] ~ UseTill!(string[1..$],t);
}
/***********
Return a slice starting after the first instance of t and containing
the rest of the string.
*/
template DiscardTill(char[] string, char t)
{
static if(string == "")
const char[] DiscardTill = "";
else static if(string[0] == t)
const char[] DiscardTill = string[1..$];
else
const char[] DiscardTill = DiscardTill!(string[1..$],t);
}
/***********
discard [ ]*
then return [a-zA-Z_][a-zA-Z0-9_]*
*/
template GetID(char[] string) { alias GetID_A!(DropWhite!(string)) GetID; }
template GetID_A(char[] string)
{
static if(string == "")
const char[] GetID_A = "";
else static if('_' == string[0] || ('a' <= string[0] && string[0] <=
'z') || ('A' <= string[0] && string[0] <= 'Z'))
const char[] GetID_A = string[0..1] ~ GetID_B!(string[1..$]);
else
const char[] GetID_A = "";
}
template GetID_B(char[] string)
{
static if(string == "")
const char[] GetID_B = "";
else static if('_' == string[0] || ('a' <= string[0] && string[0] <=
'z') || ('A' <= string[0] && string[0] <= 'Z') || ('0' <= string[0] &&
string[0] <= '9'))
const char[] GetID_B = string[0..1] ~ GetID_B!(string[1..$]);
else
const char[] GetID_B = "";
}
/***********
mixin temp with {V + 'string' broken up by 'c'}
*/
template BreakBy(char[] string, char d, V...)
{
static if(string == "")
alias V BreakBy;
else static
alias BreakBy!(DiscardTill!(string,d), d, V, UseTill!(string,d)) BreakBy;
}
/***********
mixin temp with {V + 'string' broken up by 'c'}
*/
template SplitWhite(char[] string, V...)
{
static if(DropWhite!(string).length == 0)
alias V SplitWhite;
else
alias SplitWhite!(DropWhite!(DiscardTillWhite!(string)), V,
UseTillWhite!(DropWhite!(string))) SplitWhite;
}
/****
PARSER : RULE PARSER | ;
RULE : ID.name ":" CASE ALT ";";
ALT : "|" CASE ALT | ;
CASE : ID.action "/" SEQUENCE;
SEQUENCE : ID SEQUENCE | ;
*/
template Parser(char[] string)
{
static if(string != "") // test for PARSER.2
{
// get RULE
private static const char[] rule = DropWhite!(UseTill!(string, ';'));
// report
pragma(msg, rule);
// instance RULE
bool ParseRule(char[] name : GetID!(rule))()
{
// record start location
uint start = this.pos;
// try caes
alias BreakBy!(DropWhite!(DiscardTill!(rule,':')),'|') cases;
caseLoop: foreach(ci,c;cases)
{
const char[] Case = cases[ci];
pragma(msg, "caseLoop: "~Case);
// return to start location
this.pos = start;
// allocate storage for returns
alias SplitWhite!(DiscardTill!(Case,'/')) clauses;
// try clauses
foreach(index, cl;clauses)
{
const char[] clause = clauses[index];
pragma(msg, "clauseLoop: "~clause);
// parse item
bool fail = ParseRule!(GetID!(clause))();
// if failed try next case
if(fail) continue caseLoop;
// otherwise try next clause
}
// enact reduction when all clauses fit.
return false;
}
return true;
}
// recur on remainder of PARSER
mixin Parser!(DiscardTill!(string, ';'));
}
}
</code>
More information about the Digitalmars-d-announce
mailing list