"spirit" like template parser generator in 100 Lines of code (almost)

BCS BCS at pathlink.com
Mon Dec 18 11:50:37 PST 2006


(The almost is with regards to it working not the 100 LOC, if all blank 
lines and comments are removed it is exactly 100 LOC.)

It actual doesn't work because of several things. However, some of these 
are bugs (I think) and the rest I am putting forward as feature requests.

What I have actually done is make an almost working prototype of a 
template system that generates functions for a recursive decent parser. 
It is to be mixed into a class or struct. The parameter for the template 
is a string containing almost normal BNF reduction definitions.

It extends BNF by allowing "|" for "or" but doesn't implement optional 
or repeated terms.

It isn't implemented yet, but actions will be defined by providing 
specializations for the "Action" template.

Example:

<code>
struct P
{
  char[] data;
  int pos;

  PObject Action(char[]string:"ADD_ACT")(PObject l, PObject e, PObject r)
  {
   Numeric ln = cast(Numeric) l;
   Numeric rn = cast(Numeric) r;
   return new AddExp(ln, rn);
  }
  ... //some more actions

  PObject ParseRule(char[] string : "PLUS")()
  {
   if (data[pos] == '+')
   {
    pos++;
    return PObject.pass;
   }
   return PObject.fail;
  }
  ... // more terminals

// ADD_EXP is the name of the reduction
// MUL2ADD, ADD_ACT, SUB_ACT, CAT2ADD are action names
  mixin Parser!("ADD_EXP :
       MUL2ADD / MUL_EXP |
       ADD_ACT / ADD_EXP PLUS MUL_EXP |
       SUB_ACT / ADD_EXP MINUS MUL_EXP |
       CAT2ADD / CAT_EXP;"); // also has more non terminals.
}

vod main()
{
  P p;
  p.data = GetData();
  p.pos = 0;
  if(ret = cast(AddExp)p.ParseRule!("ADD_EXP")())
   //good
}
</code>

(The templates are at the bottom of the post)

Now for why it doesn't work.

===Bugs:

-- Mixins don't work quite right. --
this for instance doesn't work unless the second line in foo is removed:

template First(char[] string) { const char[] First = string[0..1] }
template Temp(char[] string)
{
  static if(string != "")
  {
   void TempFn(char[] s : First!(string)) {}
   mixin Temp!(string[1..$]);
  }
}
struct S
{
  mixin Temp!("ab");
  void Foo()
  {
   TempFn!("a")();
   TempFn!("b")();// this fails
  }
}

what seems to be happening is that only the first iteration of the 
template is actual mixed in. Use of pragma(msg,...) seems to show that 
template does recurse correctly.

I think this is a bug

-- The parameter on a foreach isn't const. --
Actual this doesn't stop anything from working, it just makes some 
things more difficult.

foreach(i;TupleMaker!())
  Template!(i)();

becomes:

alias TupleMaker!() tempTuple
foreach(j,_;tempTuple)
{
  const i = tempTuple[j];
  Template!()();
}

This is not a trivial complaint. While I was working on my BigNum 
template I noticed that the const variable adds ops to the function even 
with the -O flag.


===New Feature Needed To Make This Work.


-- Abstract Templates --

The idea here is that a template marked as abstract would have no 
generic implementation, only specializations. Furthermore the compiler 
would assume that any given specializations it is asked for exists and 
leave it up to the linker to find the correct implementation. this would 
allow cyclical calls by specializations of a template.

abstract template foo(char[] s)
{
  void foo(uint);
}

template foo(char[] s : "A")
{
  void foo(uint i)
  {  // nothing generated here
   if(i) foo!("B")(i-1);
  }
}

template foo(char[] s : "B") // foo!("B") generated here.
{
  void foo(uint i)
  {
   if(i>1) foo!("A")(i-2);
   if(i>1) foo!("C")(0); // will cause a link error
  }
}

-- "static foreach" at same level as "static if" --

two parts:
  make it "static forach"
  allow it the same places as "static if"

template foo(char[] string)
{
  static foreach(i;BreakBy!(string,','))
   mixin Other!(i);
}

this isn't strictly needed to make my template work, but it makes it a 
whole lot cleaner.


===================


Now for what you all have been waiting for!!!!!

origonal formatting at:
http://www.webpages.uidaho.edu/~shro8822/dspirit.d

<code>
/************
  Discard leading whitespace
*/
template DropWhite(char[] string)
{
  static if(string == "")
   alias string DropWhite;
  else static if(string[0] == ' ' || string[0] == '\t' || string[0] == 
'\r' || string[0] == '\n')
   alias DropWhite!(string[1..$]) DropWhite;
  else
   alias string DropWhite;
}

/************
  return a slice upto the first whitespace char
*/
template UseTillWhite(char[] string)
{
  static if(string == "" || string[0] == ' ' || string[0] == '\t' || 
string[0] == '\r' || string[0] == '\n')
   const char[] UseTillWhite = "";
  else
   const char[] UseTillWhite = string[0] ~ UseTillWhite!(string[1..$]);
}

/************
  return a slice starting with the first whitespace char
*/
template DiscardTillWhite(char[] string)
{
  static if(string == "" || string[0] == ' ' || string[0] == '\t' || 
string[0] == '\r' || string[0] == '\n')
   alias string DiscardTillWhite;
  else
   alias DiscardTillWhite!(string[1..$]) DiscardTillWhite;
}

/************
  Return a slice up-to but not including the first instance of t.
*/
template UseTill(char[] string, char t)
{
  static if(string == "" || string[0] == t)
   const char[] UseTill = "";
  else
   const char[] UseTill = string[0..1] ~ UseTill!(string[1..$],t);
}

/***********
  Return a slice starting after the first instance of t and containing 
the rest of the string.
*/
template DiscardTill(char[] string, char t)
{
  static if(string == "")
   const char[] DiscardTill = "";
  else static if(string[0] == t)
   const char[] DiscardTill = string[1..$];
  else
   const char[] DiscardTill = DiscardTill!(string[1..$],t);
}

/***********
  discard [ ]*
  then return [a-zA-Z_][a-zA-Z0-9_]*
*/
template GetID(char[] string) { alias GetID_A!(DropWhite!(string)) GetID; }
template GetID_A(char[] string)
{
  static if(string == "")
   const char[] GetID_A = "";
  else static if('_' == string[0] || ('a' <= string[0] && string[0] <= 
'z') || ('A' <= string[0] && string[0] <= 'Z'))
   const char[] GetID_A = string[0..1] ~ GetID_B!(string[1..$]);
  else
   const char[] GetID_A = "";
}
template GetID_B(char[] string)
{
  static if(string == "")
   const char[] GetID_B = "";
  else static if('_' == string[0] || ('a' <= string[0] && string[0] <= 
'z') || ('A' <= string[0] && string[0] <= 'Z') || ('0' <= string[0] && 
string[0] <= '9'))
   const char[] GetID_B = string[0..1] ~ GetID_B!(string[1..$]);
  else
   const char[] GetID_B = "";
}

/***********
  mixin temp with {V + 'string' broken up by 'c'}
*/
template BreakBy(char[] string, char d, V...)
{
  static if(string == "")
   alias V BreakBy;
  else static
   alias BreakBy!(DiscardTill!(string,d), d, V, UseTill!(string,d)) BreakBy;
}

/***********
  mixin temp with {V + 'string' broken up by 'c'}
*/
template SplitWhite(char[] string, V...)
{
  static if(DropWhite!(string).length == 0)
   alias V SplitWhite;
  else
   alias SplitWhite!(DropWhite!(DiscardTillWhite!(string)), V, 
UseTillWhite!(DropWhite!(string))) SplitWhite;
}

/****
PARSER  : RULE PARSER | ;
RULE  : ID.name ":" CASE ALT ";";
ALT  : "|" CASE ALT | ;
CASE  : ID.action "/" SEQUENCE;
SEQUENCE : ID SEQUENCE | ;
*/
template Parser(char[] string)
{
  static if(string != "") // test for PARSER.2
  {
    // get RULE
   private static const char[] rule = DropWhite!(UseTill!(string, ';'));

    // report
   pragma(msg, rule);

    // instance RULE
   bool ParseRule(char[] name : GetID!(rule))()
   {
     // record start location
    uint start = this.pos;
     // try caes
    alias BreakBy!(DropWhite!(DiscardTill!(rule,':')),'|') cases;
    caseLoop: foreach(ci,c;cases)
    {
     const char[] Case = cases[ci];
     pragma(msg, "caseLoop: "~Case);
      // return to start location
     this.pos = start;

      // allocate storage for returns
     alias SplitWhite!(DiscardTill!(Case,'/')) clauses;

      // try clauses
     foreach(index, cl;clauses)
     {
      const char[] clause = clauses[index];
      pragma(msg, "clauseLoop: "~clause);
       // parse item
      bool fail = ParseRule!(GetID!(clause))();
       // if failed try next case
      if(fail) continue caseLoop;
       // otherwise try next clause
     }
      // enact reduction when all clauses fit.
     return false;
    }

    return true;
   }
    // recur on remainder of PARSER
   mixin Parser!(DiscardTill!(string, ';'));
  }
}
</code>



More information about the Digitalmars-d-announce mailing list