import std.metastrings; template Tuple(V...){ alias V Tuple; } enum: uint{ TOK_EOF, TOK_TEXT, TOK_IDENT, TOK_STRING, TOK_NUMBER, //TOK_SPACE, //not used (see below) TOK_UNUSED } /* Use on a token tuple set to display it's contents. template MyTokenizer(char[] input){ alias Tokenize!(Tokenize,input) MyTokenizer; } pragma(msg,TokenPrinter!(MyTokenizer!("hello world"))); */ template TokenPrinter(){ const char[] TokenPrinter = ""; } template TokenPrinter(alias T, V...){ const char[] TokenPrinter = Format!("(%s,%s) %s [%s]\n",T.Column,T.Row,T.Type,T.Tok) ~ TokenPrinter!(V); } template Token(size_t x,size_t y,uint type,char[] tok){ debug pragma(msg,Format!("(%s,%s) %s [%s]",y,x,type,tok)); alias x Column; alias y Row; alias type Type; alias tok Tok; } template ParseWhitespace(char[] input,char[] result="",uint lines=0){ static if(input.length == 0){ const char[] Token = result; const uint Lines = lines; } else static if(input[0] == '\n'){ alias ParseWhitespace!(input[1..$],result ~ input[0],lines+1) ParseWhitespace; } else static if( input[0] == ' ' || input[0] == '\r' || input[0] == '\t'){ alias ParseWhitespace!(input[1..$],result ~ input[0],lines) ParseWhitespace; } else{ const char[] Token = result; const uint Lines = lines; } } // throw out whitespace template TokenizeWhitespace(alias Recurse,char[] input,size_t x,size_t y,V...){ alias ParseWhitespace!(input) ws; //alias Token!(x,y,TOK_SPACE,ws) nextTok; static if(ws.Lines == 0){ const uint newx = x+ws.Token.length; } else{ const uint newx = 1; } const uint newy = y+ws.Lines; alias Recurse!(Recurse, input[ws.Token.length..$], newx, newy, V /*,nextTok*/) Next; } // Assumes leading " has already been parsed. // Returns string contents up to, but not including the closing " template ParseString(char[] input,char[] result=""){ static if(input.length == 0){ static assert(false,"expected closing \""); } else static if(input[0] == '"'){ const char[] ParseString = result; } else static if(input[0] == '\\'){ static if(input.length == 1){ static assert(false,"expected closing \""); } else{ // skip single-char escape sequence alias ParseString!(input[2..$],result ~ input[0..2]) ParseString; } } else{ alias ParseString!(input[1..$],result ~ input[0]) ParseString; } } template TokenizeString(alias Recurse,char[] input,size_t x,size_t y,V...){ alias ParseString!(input[1..$]) tok; alias Token!(x,y,TOK_STRING,tok) nextTok; alias Recurse!(Recurse, input[tok.length+2..$], x+tok.length+2 , y, V, nextTok) Next; } template ParseNumber(char[] input,char[] result=""){ static if(input.length == 0){ const char[] ParseNumber = result; } else static if(input[0] >= '0' && input[0] <= '9'){ alias ParseNumber!(input[1..$],result ~ input[0]) ParseNumber; } else{ const char[] ParseNumber = result; } } template TokenizeNumber(alias Recurse,char[] input,size_t x,size_t y,V...){ alias ParseNumber!(input) tok; alias Token!(x,y,TOK_NUMBER,tok) nextTok; alias Recurse!(Recurse, input[tok.length..$], x+tok.length, y, V, nextTok) Next; } template ParseIdentifier(char[] input,char[] result=""){ static if(input.length == 0){ const char[] ParseIdentifier = result; } else static if( (input[0] >= 'a' && input[0] <= 'z') || (input[0] >= 'A' && input[0] <= 'Z') || (input[0] >= '0' && input[0] <= '9') || input[0] == '_'){ alias ParseIdentifier!(input[1..$],result ~ input[0]) ParseIdentifier; } else{ const char[] ParseIdentifier = result; } } template ParseStartIdentifier(char[] input,char[] result=""){ static if(input.length == 0){ const char[] ParseStartIdentifier = result; } else static if( (input[0] >= 'a' && input[0] <= 'z') || (input[0] >= 'A' && input[0] <= 'Z') || input[0] == '_'){ alias ParseIdentifier!(input[1..$],result ~ input[0]) ParseStartIdentifier; } else{ const char[] ParseStartIdentifier = result; } } template TokenizeIdentifier(alias Recurse,char[] input,size_t x,size_t y,V...){ alias ParseStartIdentifier!(input[0..$]) tok; alias Token!(x,y,TOK_IDENT,tok) nextTok; alias Recurse!(Recurse, input[tok.length..$], x+tok.length , y, V, nextTok) Next; } /* Places all tokens parsed from input into a tuple of Token!() aliases. The tuple will contain all tokens and, at the very least, an EOF token at the end. Note that whitespace is thrown out, but can be enabled by altering the comments above. Recurse is provided as an "extension point" for tokenizing. Simply roll your own tokenzier rules into another template with the same signature as Tokenize(), and pass the template name as Recurse. */ template Tokenize(alias Recurse,char[] input,size_t x=1,size_t y=1,V...){ static if(input.length == 0){ alias Tuple!(V,Token!(x,y,TOK_EOF,"(EOF)")) Tokenize; } // WHITESPACE else static if( input[0] == ' ' || input[0] == '\n' || input[0] == '\r' || input[0] == '\t'){ alias TokenizeWhitespace!(Recurse,input,x,y,V).Next Tokenize; } // STRING else static if(input[0] == '"'){ alias TokenizeString!(Recurse,input,x,y,V).Next Tokenize; } // NUMBER else static if(input[0] >= '0' && input[0] <= '9'){ alias TokenizeNumber!(Recurse,input,x,y,V).Next Tokenize; } // IDENT else static if( (input[0] >= 'a' && input[0] <= 'z') || (input[0] >= 'A' && input[0] <= 'Z') || input[0] == '_'){ alias TokenizeIdentifier!(Recurse,input,x,y,V).Next Tokenize; } // OTHER SPECIAL CHARS else{ alias Recurse!(Recurse,input[1..$],x+1,y,V,Token!(x,y,TOK_TEXT,""~input[0])) Tokenize; } } /* Use: template MyTokenizer(char[] input){ alias Tokenize!(Tokenize,input) MyTokenizer; } */