1 /// Basic rcdata.parser usage with arrays. 2 module tests.parser.basic_lexer; 3 4 pure @safe: 5 6 import std.algorithm; 7 import rcdata.parser; 8 import tests.parser.base; 9 10 11 // Create the parser 12 struct Lexer { 13 14 mixin makeParser!(string, supply); 15 16 static: 17 18 // Merge a token list into a single token of given type 19 Match merge(TokenType type, funs...)(string text) { 20 21 import std.array; 22 23 auto matched = match!funs(text); 24 25 return matched 26 ? Match( 27 matched.matched, 28 [Token(type, matched.data.map!"a.content".join)] 29 ) 30 : matched; 31 32 } 33 34 // A simple grammar 35 Match lex(string input) @safe pure { 36 37 return matchUntil!( 38 "", // Match until end of file 39 token, 40 )(input); 41 42 } 43 44 /// Match any token 45 Match token(string input) pure @safe { 46 47 return matchOr!( 48 merge!( 49 TokenType.keyword, 50 matchOr!("if", "else", "end", "set", "equals", "echo"), 51 ), 52 merge!( 53 TokenType.number, 54 matchRepeatMinOnce!( 55 matchAny!(a => a >= '0' && a <= '9') 56 ) 57 ), 58 merge!( 59 TokenType.identifier, 60 matchAny!(a => a >= 'a' && a <= 'z' || a >= 'A' && a <= 'Z') 61 ), 62 merge!( 63 TokenType.eof, 64 "", // basicMatcher special-cases empty string as end of file 65 ), 66 67 // Also match "insignificant" tokens such as whitespace or comments 68 insignificant, 69 )(input); 70 71 } 72 73 /// Insignificant tokens: whitespace and commentss 74 Match insignificant(string input) pure @safe { 75 76 return matchOr!( 77 78 endOfLine, 79 80 merge!( 81 TokenType.comment, 82 "//", 83 matchUntil!endOfLine, 84 endOfLine, 85 ), 86 87 merge!( 88 TokenType.whitespace, 89 matchRepeatMinOnce!( 90 matchOr!(" ", "\t") 91 ) 92 ), 93 94 )(input); 95 96 } 97 98 Match endOfLine(string input) pure @safe { 99 100 return merge!( 101 TokenType.eol, 102 matchOr!( 103 "\r\n", "\r", "\n" 104 ) 105 )(input); 106 107 } 108 109 } 110 111 // Simple lexer. 112 unittest { 113 114 // Note: see file for definitions of TokenList, Token, etc. 115 116 Lexer.Match result = Lexer.lex(` 117 set A 15 118 if A equals 15 119 echo A 120 end 121 `); 122 123 // Check tokens except those with whitespace 124 with (TokenType) 125 assert(result.data.allButWhitespace.equal([ 126 Token(eol, "\n"), 127 Token(keyword, "set"), 128 Token(identifier, "A"), 129 Token(number, "15"), 130 Token(eol, "\n"), 131 132 Token(keyword, "if"), 133 Token(identifier, "A"), 134 Token(keyword, "equals"), 135 Token(number, "15"), 136 Token(eol, "\n"), 137 138 Token(keyword, "echo"), 139 Token(identifier, "A"), 140 Token(eol, "\n"), 141 142 Token(keyword, "end"), 143 Token(eol, "\n"), 144 ])); 145 146 } 147 148 // If a match fails, we can find out where it happened 149 unittest { 150 151 string source = ` 152 set A 1 153 set B # This isn't the correct syntax for comments! 154 `; 155 156 Lexer.Match result = Lexer.lex(source); 157 158 assert(!result); 159 160 // matched.source should point at the comment 161 assert(result.matched.source == source.find("#")); 162 163 // Match data should include all tokens before the match 164 with (TokenType) 165 assert(result.data.allButWhitespace.equal([ 166 Token(eol, "\n"), 167 Token(keyword, "set"), 168 Token(identifier, "A"), 169 Token(number, "1"), 170 Token(eol, "\n"), 171 172 Token(keyword, "set"), 173 Token(identifier, "B"), 174 ])); 175 176 }