Recursive-descent parsing

Andrew Edwards via Digitalmars-d-learn digitalmars-d-learn at puremagic.com
Sat Dec 24 04:42:31 PST 2016


The authors of "The Art of Java" present, as a first coding 
example, a recursive-descent parser to demonstrate Java's ability 
to facilitate low level programming commonly performed in C and 
C++.

I took the opportunity to port the code to D. By doing this, I 
now have an understanding of how a recursive-descent parser is 
written and works. What I am now interested in, is learning about 
what makes this particular implementation a poor or strong one, 
how to improve upon it and other, possibly superior, 
implementations made possible by D.

Comments are invited and appreciated.

Andrew Edwards
If at first you don't succeed...

=========================
rdp2.d
=========================
//module rdp2;

class ParserException : Throwable
{
     this(string msg)
     {
         super(msg);
     }
}

// These are the token types.
enum
{
     NONE,
     DELIMITER,
     VARIABLE,
     NUMBER
}

// These are the types of syntax errors.
enum
{
     SYNTAX,
     UNBALPARENS,
     NOEXP,
     DIVBYZERO
}

// This token indecates end-of-expression.
enum EOE = ";";

string exp;     // refers to expression string
int ndx;     // current index into the expression
string token;   // holds current token
int tokType;    // holds token's types

// Array for variables.
double[] vars = new double[26];

// Parser entry poing.
double evaluate(string expstr)
{
     double result;
     exp = expstr;
     ndx = 0;

     getToken();
     if (token == EOE)
         handleErr(NOEXP); // no expression present

     // Parse and evaluate the expression.
     result = evalExp1();

     if (token != EOE)
         handleErr(SYNTAX); // last token must be EOE

     return result;
}

// Process an assignment.
double evalExp1()
{
     double result;
     int varNdx;
     int ttokType;
     string tmpToken;

     if (tokType == VARIABLE)
     {
         // save old token
         tmpToken = token;
         ttokType = tokType;

         // Compute the index of the variable.
         import std.ascii: toUpper;
         varNdx = token[0].toUpper - 'A';

         getToken();
         if (token != "=")
         {
             putBack(); // return current token
             // restore old token -- not an assignment
             token = tmpToken;
             tokType = ttokType;
         }
         else
         {
             getToken(); // get next part of exp
             result = evalExp2();
             vars[varNdx] = result;
             return result;
         }
     }
     return evalExp2();
}

// Add or subtract two terms
double evalExp2()
{
     char op;
     double result;
     double partialResult;

     result = evalExp3();

     while ((op = token[0]) == '+' || op == '-')
     {
         getToken();
         partialResult = evalExp3();
         final switch (op)
         {
             case '-':
             {
                 result -= partialResult;
                 break;
             }
             case '+':
             {
                 result += partialResult;
                 break;
             }
         }
     }
     return result;
}

// Multiply or divide two factors
double evalExp3()
{
     char op;
     double result;
     double partialResult;

     result = evalExp4();

     while ((op = token[0]) == '*' || op == '/' || op == '%')
     {
         getToken();
         partialResult = evalExp4();
         final switch (op)
         {
             case '*':
             {
                 result *= partialResult;
                 break;
             }
             case '/':
             {
                 if (partialResult == 0.0)
                     handleErr(DIVBYZERO);
                 result /= partialResult;
                 break;
             }
             case '%':
             {
                 if (partialResult == 0.0)
                     handleErr(DIVBYZERO);
                 result %= partialResult;
                 break;
             }
         }
     }
     return result;
}

// Process an exponent.
double evalExp4()
{
     double result;
     double partialResult;
     double ex;

     result = evalExp5();

     if (token == "^")
     {
         getToken();
         partialResult = evalExp4();
         ex = result;
         if (partialResult == 0.0)
         {
             result = 1.0;
         }
         else
         {
             for (int t = cast(int)partialResult-1; t > 0; t--)
                 result *= ex;
         }
     }
     return result;
}

// Evaluate a unary + or -
double evalExp5()
{
     double result;
     string op;

     op = null;
     if ((tokType == DELIMITER) && token == "+" || token == "-")
     {

         op = token;
         getToken();
     }
     result = evalExp6();

     //if (op == "-") result = -result;

     return (op == "-") ? -result : result;
}

// Process a parenthesized expression.
double evalExp6()
{
     double result;

     if (token == "(")
     {
         getToken();
         result = evalExp2();
         if (token != ")")
             handleErr(UNBALPARENS);
         getToken();
     }
     else
         result = atom();

     return result;
}

// Get the value of a number.
double atom()
{
     double result = 0.0;

     switch (tokType)
     {
         case NUMBER:
             try
             {
                 import std.conv: parse;
                 result = token.parse!double;
             }
             catch (Exception e)
             {
                 handleErr(SYNTAX);
             }
             getToken();
             break;
         case VARIABLE:
             result = findVar(token);
             getToken();
             break;
         default:
             handleErr(SYNTAX);
             break;
     }
     return result;
}

// Return the value of a variable.
double findVar(string vname)
{
     import std.ascii: isAlpha, toUpper;
     if (!vname[0].isAlpha)
     {
         handleErr(SYNTAX);
         return 0.0;
     }

     return vars[(vname[0] - 'A').toUpper];
}

// Return a token to the input stream.
void putBack()
{
     if (token == EOE) return;
     foreach(i; 0 .. token.length) ndx--;
}

// Handle an error.
void handleErr(int error)
{
     string[] err = [
         "Syntax Error",
         "Unbalanced Parentheses",
         "No Expression Present",
         "Division by Zero"
     ];

     throw new ParserException(err[error]);
}

// Obtain the next token.
void getToken()
{
     import std.ascii: isAlpha, isDigit, isWhite;
     tokType = NONE;
     token = null;

     // Check for end of expression
     if (ndx == exp.length)
     {
         token = EOE.dup;
         return;
     }

     // Skip over white space
     while (ndx < exp.length && exp[ndx].isWhite) ++ndx;

     // Trailing whitespace ends expression.
     if (ndx == exp.length)
     {
         token = EOE.dup;
         return;
     }

     if (exp[ndx].isDelim) // is operator
     {
         token ~= exp[ndx];
         ndx++;
         tokType = DELIMITER;
     }
     else if (exp[ndx].isAlpha) // is variable
     {
         while (!exp[ndx].isDelim)
         {
             token ~= exp[ndx];
             ndx++;
             if (ndx >= exp.length) break;
         }
         tokType = VARIABLE;
     }
     else if (exp[ndx].isDigit) // is number
     {
         while (!exp[ndx].isDelim)
         {
             token ~= exp[ndx];
             ndx++;
             if (ndx >= exp.length) break;

         }
         tokType = NUMBER;
     }
     else // unknown character terminates expression
     {
         token = EOE.dup;
         return;
     }
}

// Returns true if c is a delimiter.
bool isDelim(char c)
{
     import std.algorithm: canFind;
     return canFind(" +-/*%^=()", c);
}

=========================
demordp.d
=========================
mixin(import("rdp2.d"));

void main(string[] args)
{
     string expr;

     import std.stdio: write, writeln, readln;
     writeln("Enter an empty expression to stop.");

     for(;;)
     {
         write("Enter an expression: ");
         expr = readln();
         if (expr == "\n") break;
         try
         {
             writeln("Result: ", expr.evaluate);
         }
         catch (ParserException e)
         {
             e.msg.writeln;
         }
     }
}


More information about the Digitalmars-d-learn mailing list