suggestion: clean white space / end of line definition
Thomas Kuehne
thomas-dloop at kuehne.cn
Wed Nov 1 00:20:33 PST 2006
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Walter Bright schrieb am 2006-11-01:
> There is a problem though with replacing it all with a function - lexing
> speed. Lexing speed is critically dependent on being able to consume
> whitespace fast, hence all the inline code to do it. Running the source
> through two passes makes it half as fast.
Here is a faster mock-up(untested!) using functions.
Use of macros is certanly possible too.
Thomas
# unsigned char* isEndOfLine(unsigned char* input){
# switch(input[0]){
# /* covered by the lexer:
# * case 0x0A: // LINE FEED
# */
# case 0x0B: // LINE TABULATION
# case 0x0C: // FORM FEED
# return input;
# case 0x0D: // CARRIAGE RETURN
# if(input[1] == 0x0A){
# return input + 1;
# }
# return input;
# case 0xC2: // NEXT LINE
# if(input[1] == 0x85){
# return input + 1;
# }
# break;
# case 0xE2: // LINE SEPARATOR || PARAGRAPH SEPARATOR
# if((input[1] == 0x80) && ((input[2] == 0xA8) || (input[2] == 0xA9))){
# return input + 2;
# }
# break;
# default:
# break;
# }
#
# return 0;
# }
#
# unsigned char* isSpace(unsigned char* input){
# switch(input[0]){
# /* covered by the lexer:
# * case 0x20: // SPACE
# */
# case 0x09: // CHARACTER TABULATION
# case 0x1F: // INFORMATION SEPARATOR ONE
# return input;
# case 0xC2:
# if(input[1] == 0xA0){
# // NO-BREAK SPACE
# return input + 1;
# }
# break;
# case 0xE1:
# switch(input[1]){
# case 0xA9:
# if(input[2] == 0x80){
# // OGHAM SPACE MARK
# return input + 2;
# }
# break;
# case 0xA0:
# if(input[2] == 0x8E){
# // MONGOLIAN VOWEL SEPARATOR
# return input + 2;
# }
# break;
# default:
# break;
# }
# break;
# case 0xE2:
# switch(input[1]){
# case 0x80:
# if((0x80 <= input[2]) && (input[2] <= 0x8A)){
# // EN QUAD..HAIR SPACE
# return input + 2;
# }else if(input[2] == 0xAF){
# // NARROW NO-BREAK SPACE
# return input + 2;
# }
# break;
# case 0x81:
# if(input[2] == 0x9F){
# // MEDIUM MATHEMATICAL SPACE
# return input + 2;
# }
# break;
# default:
# break;
# }
# break;
# case 0xE3:
# if((input[1] == 0x80) && (input[2] == 0x80)){
# // IDEOGRAPHIC SPACE
# return input + 2;
# }
# break;
# default:
# break;
# }
# return 0;
# }
#
# void lexer(){
# unsigned char* p;
# unsigned char* tmp;
# while (1)
# {
# switch (*p)
# {
# Lspace:
# case ' ':
# p++;
# continue; // skip white space
#
# Lnew_line:
# case '\n':
# p++;
# //loc.linnum++;
# continue; // skip white space
#
# /* a lot more code goes here */
#
# default:
# if((tmp = isEndOfLine(p))){
# p = tmp;
# goto Lnew_line;
# }
# if((tmp = isSpace(p))){
# p = tmp;
# goto Lspace;
# }
#
# /* a lot more code goes here */
# }
# }
# }
-----BEGIN PGP SIGNATURE-----
iD8DBQFFSGXwLK5blCcjpWoRAvzLAKCO0gfLsLKj0nLykQoYOobQ1TKJXwCfUwg+
mSqDFqxJiaBcbCh5LR1Cae4=
=mpgd
-----END PGP SIGNATURE-----
More information about the Digitalmars-d
mailing list