Template regexes, version 2
Craig Black
cblack at ara.com
Tue Feb 21 10:53:33 PST 2006
Since it seems that there may be some interest, and to prove that I'm not bull-shitting, I thought I would post the code that I mentioned earlier. It describes a parser for an unnamed object-oriented language of my design (that rips off a lot of stuff from D). It's actually less than 230 lines if you don't count the comments and whitespace. It's all one big string that gets parsed at run-time, but perhaps D could turn it into optimized source code.
const char *languageParser =
"Real::(!Delims&(-?(/d+(/.(/d)+)?)|(/d*/.(/d)+)([eE][/-+]?/d{1,3})?)@~;)" // A real number
"Char::(!Delims&(\')@~&((\\)?.)@~\';;)" // char literal
"Text::(!Delims&(\")@~&([^\".]*)@~\";;)" // text literal
// Delimeters
"Comment1::(//(^(/n).)*)"
"Comment2::(/*(^(/*//).)*/*//)"
"Delims::(/s(//(!Comment1|!Comment2)/s)*)"
//
// Expressions
//
"BiOp1:([/./^])" // . ^ (exponentiation)
"BiOp2:([///*/%/#])" // / div * mul % mod # concat
"BiOp3:([/-+])" // - +
"BiOp4:(=|/!=|<|>|<=|>=|in|/~|/!/~)" // = != < > <= >= in ~ !~
"BiOp5:(/&|/||/./.)" // & | .. (range operator)
"Parens:(/(!Expr/))" // Expression bound by parenthesis
"MParam:(((`=)@~&(/i)@~;;=)?!Expr)"
"MParens:(&(/()@~(!MParam(,!MParam)*)?/);)" // 0 or more comma delimited expressions bound by parens
"Index:(&(/[)@~!Expr(,!Expr)*/];)" // Array index
"Tuple1:(&(/[)@~!MParam(,!MParam)*/];)" // Array index tuple
"Tuple2:(&(/()@~!MParam(,!MParam)*/);)" // Parens tuple
"Tuple:(!Tuple1|!Tuple2)"
"TBang:(&(/!)@~;)" // template instantiation bang
"DotExpr:(&(/.)@~;&(/i)@~;)" // dots in an id expression
"IdExpr:((`/:idexpr)@~&(/i)@~;(!DotExpr)*(((!TBang)?!MParens)|!Index)*;)" // Id expression
"LExpr:(!Real|!Text|!Char|!TypeofExpr|!IdExpr|!Tuple|!Parens|!MethodExpr|!DelegateExpr|!ClosureExpr|!PrefixExpr)"
// Prefix operators
"PrefixOp:([/*/-/!/&]|sizeof|typeid|typeof)"
"PrefixExpr:(&(!PrefixOp)@~!LExpr;)"
// Exponentiation
"BiExpr1:((`t1)@~!LExpr&(!BiOp1)$~!BiSub2;)"
// Multiplication, division, and modulus
"BiSub2:(!BiExpr1|!LExpr)"
"BiExpr2:((`t2)@~!BiSub2&(!BiOp2)$~!BiSub3;)"
// Addition and Subtraction
"BiSub3:(!BiExpr2|!BiSub2)"
"BiExpr3:((`t3)@~!BiSub3&(!BiOp3)$~!BiSub4;)"
// Comparisons == != < > <= >=
"BiSub4:(!BiExpr3|!BiSub3)"
"BiExpr4:((`t4)@~!BiSub4&(!BiOp4)$~!BiSub5;)"
// & | ..
"BiSub5:(!BiExpr4|!BiSub4)"
"BiSub6:(!BiExpr5|!BiSub5)"
"BiExpr5:((`t5)@~!BiSub5&(!BiOp5)$~!BiSub6;)"
"Keywords:(module|interface|class|enum|static|const|delegate|closure|method)"
// Type expression
"TypeofExpr:(&(typeof)@~!Expr(/.&(/i)@~;)*;)"
"TypeDots:(^(!Keywords)&(/i)@~(/.&(/i)@~;)*;)"
"Array:(&(/[)@~(!Expr(,!Expr)*)?/];)" // Array type
"Const:((&(/!)@~;)|((const(`/!))@~;))"
"MTypeExpr:(((!Const)?(&(/*)@~;|!Array))*(!Const)?(!DelegateType|!ClosureType|!TypeofExpr|!TypeDots))"
"TypeExpr:((`/:type)@~!MTypeExpr;)"
// delegate, closure types
// @void(int) myDelegate = @myMethod;
// @void(int) myDelegate = @myInstance.myMethod;
// #void(int) myClosure = #myMethod(5);
// #void(int) myDelegate = #myInstance.myMethod(5);
"SID:(&(/i)@~;^(/i))"
"DParam:((`/:var)@~!TypeExpr(!SID(,!SID)*)?(,;!DParam)?)"
"DParens:(/((!DParam;)?/))"
"Delegate:(/@|delegate)"
"Closure:(/#|closure)"
"DelegateType:((`/@)@~!Delegate!TypeExpr!DParens;)"
"ClosureType:((`/#)@~!Closure!TypeExpr!DParens;)"
// Anonymous method expression
// %void(int a) myDelegate = %void(int a) Write(a);;
"AParam:((`/:var)@~!TypeExpr!SID(,!SID)*(,;!AParam)?)"
"AParens:(/((!AParam;)?/))"
"MethodExpr:((`/%)@~!Method!TypeExpr!AParens!Block;)"
"DelegateExpr:((`/@)@~!Delegate(!IdExpr/.)?&(/i)@~;;)"
// Closure expression
"ClosureExpr:((`/#)@~!Closure!IdExpr;)"
// New expression
"NewExpr:(&(new)@~!TypeExpr(!MParens)?;)"
// CastExpr
"CastExpr:((`cast)@~!Expr&((!Static)?as)$~!TypeExpr;)"
// An expression
"BiOps:(!BiOp1|!BiOp2|!BiOp3|!BiOp4|!BiOp5)"
"PreLExpr:(!LExpr^(!BiOps))"
"Expr:(^[/]/;/)/}](!PreLExpr|!BiExpr5|!BiExpr4|!BiExpr3|"
"!BiExpr2|!BiExpr1|!NewExpr|!CastExpr))"
//
// Statements
//
// Assignment statement
"AssOp:(=|/+=|/-=|/*=|//=|/^=)"
"AssSt:((`ass)@~!IdExpr(&(!AssOp)$~)!Expr;)"
// Delete statement
"DeleteSt:(&(delete)@~!IdExpr;/;)"
// Return, break, continue statements
"ReturnSt:(&(return)@~(!Expr)?;/;)"
"BreakSt:(&(break)@~;/;)"
"ContinueSt:(&(continue)@~;/;)"
// Postfix statement
"PostfixOp:(/+/+|/-/-)"
"PostfixSt:((`pf)@~!IdExpr&(!PostfixOp)$~;)"
// Variable declaration statement
"Var:(&(/i)@~(=(!NewExpr|!Expr))?;)" // var = expr
"Static:((&(/$)@~;)|(static(`/$)@~;))"
"VarSt:((`/:var)@~(!Static)?!TypeExpr!Var(,!Var)*;/;)"
"SVarSt:((`/:var)@~!TypeExpr!Var(,!Var)*;/;)"
// If statement
"IfSt:(&(if)@~!Expr(/:)?!Block(else!Block)?;)"
// Static if statement
"SIfSt:(&(!Static(if))@~!Expr(/:)?!Block(else!Block)?;)"
// Switch statement
"CaseSt:((`case)@~(case)?!Expr*(/:)?!Block;)"
"DefaultSt:(&(default)@~(/:)?!Block;)"
"SwitchSt:(&(switch)@~/{(!CaseSt)*(!DefaultSt)?/};)"
// While statement
"WhileSt:(&(while)@~!Expr!Block;)"
// Do statement
"DoSt:(&(do)@~!Block(while)!Expr;/;)"
// Method call statement
"MethodSt:((`/(/))@~!IdExpr;/;)"
// Foreach statement
"FEVarDecl:((`/:var)@~!TypeExpr&(/i)@~;;)"
"ForeachHeader:(!FEVarDecl(,!FEVarDecl;)?/;!Expr(&(step)@~!Expr;)?)"
"ForeachSt:(&(foreach)@~/(!ForeachHeader/)!Block(else!Block);)"
// For statement
"FVarDecl:((`/:var)@~!TypeExpr&(/i)@~;(=!Expr);)"
"ForInit:(!FVarDecl|!AssSt)"
"FStatement:(!AssSt|!PostfixSt)"
"ForSt:(&(for)@~/((!ForInit(,!ForInit)*)?/;!Expr/;(!FStatement(,!FStatement)*)?/)!Block;)"
// In and out blocks can include preconditions and postconditions
"InBlock:(&(in)@~!Block;)"
"OutBlock:(&(out)@~!Block;)"
// Trace block
"TraceBlock:(&(trace)@~!Block;)"
// Echo, assert
"EchoSt:(&(echo)@~!Expr;)"
"AssertSt:(&(assert)@~!Expr;)"
// Exceptions
"CatchSt:((`catch)@~(catch)?&(/i)@~;*(/:)?!Block;)"
"DefaultSt:(&(default)@~(/:)?!Block;)"
"TryBlock:(&(try)@~/{(!CatchSt)*(!DefaultSt)?/};)"
"ThrowSt:(&(throw)@~;)"
// Enumeration definition
"EnumNode:(&(/i)@~(=!Expr);)"
"EnumDef:(&(enum)@~/{!EnumNode(,!EnumNode)*/};)"
// Method definition
"TID:(&(/i)@~^(/i)(=!Expr)?;)"
"TParam:((`/:var)@~(&(in|out|inout)@~;)?!TypeExpr!TID(,!TID)*(,;!TParam)?)"
"TParens:(/((!TParam;)?/))"
"Method:(/%|method)"
"SMethodDef:((`/%)@~!Method!TypeExpr&(/i)@@~;!TParens!Block;)"
"MMethodDef:((`/%)@~(!Static|!Const)?!Method!TypeExpr&(/i)@@~;!TParens!Block;)"
"IMethodDef:((`/%)@~(!Const)?!Method!TypeExpr&(/i)@@~;!TParens/;)"
//"Construct:(&(construct)@~!Block;)"
//"Destruct:(&(destruct)@~!Block;)"
//"Startup:..."
//"Terminate:..."
//"Set:(&(set)@~!TParens!Block;)"
//"Get:(!TypeExprget!Block;)"
// Operator overloading
"OpOverload:(&(operator)@~(!PrefixO|!PostfixO|!AssignO|!BinaryO)!Block;)"
"BinaryO:(&(^(/.)!BiOps)@~;!TypeExpr/(!TypeExpr&(/i)@~;,!TypeExpr&(/i)@~;/))"
"AssignO:(&(^(=)!AssOp)@~;void/(!TypeExpr&(/i)@~;/))"
"PrefixO:(&(/-/!)@~;!TypeExpr/(/))"
"PostfixO:(&(/+/+|/-/-)@~;void/(/))"
"BinaryOverload:(&(operator)@~!BinaryO!Block;)"
// Class definition
"ProtSt:(&(public|private|protected)@~;/:)"
"ClassSt:(!ProtSt|!Alias|!TypeDef|!VarSt|!MMethodDef|!ClassDef|!EnumDef|!InterfaceDef|!OpOverload)"
"ClassBlock:(&(/{)@~(!ClassSt)*/};)"
"TemplateParam:((&(int)@~;)?(&(/i)@~;))"
"TemplateParams:(!TemplateParam(,!TemplateParam)*)"
"ClassDef:(&(class)@~&(/i)@~(/(!TemplateParams/))?;(/:!IdExpr(,!IdExpr)*)?!ClassBlock;)"
// Interface definition
"InterfaceSt:(!ProtSt|!Alias|!TypeDef|!IMethodDef|!ClassDef|!EnumDef|!InterfaceDef)"
"InterfaceBlock:(&(/{)@~(!InterfaceSt)*/};)"
"InterfaceDef:(&(interface)@~&(/i)@~;(/:!IdExpr(,!IdExpr)*)?!InterfaceBlock;)"
"Alias:(&(alias)@~&(/i)@~;=!TypeExpr;)"
"TypeDef:(&(typedef)@~&(/i)@~;=!TypeExpr;)"
// import
"ImportNode:(&(/i)@~(/.&(/i)@@~;)*;)"
"ImportSt:(&(import)@~!ImportNode(,!ImportNode)*;/;)"
// error checking, exceptions
// in, out, inout params
// in, out code blocks
"ModuleInit:(&(module)@~&(/i)@@~;/:)"
"ModuleSt:(!ProtSt|!Alias|!TypeDef|!ImportSt|!SVarSt|!SMethodDef|!ClassDef|"
"!EnumDef|!InterfaceDef|!BinaryOverload)"
"ModuleDef:(!ModuleInit(!ModuleSt)*)"
// A statement
"Statement:(!IfSt|!ForSt|!ForeachSt|(!PostfixSt/;)|(!AssSt/;)|!MethodSt|"
"!VarSt|!InBlock|!OutBlock|!TraceBlock|!TryBlock|!ThrowSt|!EchoSt|"
"!AssertSt|!SwitchSt|!WhileSt|!DoSt|!ReturnSt|!BreakSt|!ContinueSt|"
"!DeleteSt|!SIfSt|!EnumDef|!SMethodDef|!ClassDef)"
// A statement block
"Block:((`/{)@~(/{(!Statement)*/}|!Statement);)";
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20060221/9a2788bf/attachment.html>
More information about the Digitalmars-d
mailing list