tests.parser.basic_lexer source code

1 /// Basic rcdata.parser usage with arrays.
2 module tests.parser.basic_lexer;
3 
4 pure @safe:
5 
6 import std.algorithm;
7 import rcdata.parser;
8 import tests.parser.base;
9 
10 
11 // Create the parser
12 struct Lexer {
13 
14     mixin makeParser!(string, supply);
15 
16     static:
17 
18     // Merge a token list into a single token of given type
19     Match merge(TokenType type, funs...)(string text) {
20 
21         import std.array;
22 
23         auto matched = match!funs(text);
24 
25         return matched
26             ? Match(
27                 matched.matched,
28                 [Token(type, matched.data.map!"a.content".join)]
29             )
30             : matched;
31 
32     }
33 
34     // A simple grammar
35     Match lex(string input) @safe pure {
36 
37         return matchUntil!(
38             "",  // Match until end of file
39             token,
40         )(input);
41 
42     }
43 
44     /// Match any token
45     Match token(string input) pure @safe {
46 
47         return matchOr!(
48             merge!(
49                 TokenType.keyword,
50                 matchOr!("if", "else", "end", "set", "equals", "echo"),
51             ),
52             merge!(
53                 TokenType.number,
54                 matchRepeatMinOnce!(
55                     matchAny!(a => a >= '0' && a <= '9')
56                 )
57             ),
58             merge!(
59                 TokenType.identifier,
60                 matchAny!(a => a >= 'a' && a <= 'z' || a >= 'A' && a <= 'Z')
61             ),
62             merge!(
63                 TokenType.eof,
64                 "",  // basicMatcher special-cases empty string as end of file
65             ),
66 
67             // Also match "insignificant" tokens such as whitespace or comments
68             insignificant,
69         )(input);
70 
71     }
72 
73     /// Insignificant tokens: whitespace and commentss
74     Match insignificant(string input) pure @safe {
75 
76         return matchOr!(
77 
78             endOfLine,
79 
80             merge!(
81                 TokenType.comment,
82                 "//",
83                 matchUntil!endOfLine,
84                 endOfLine,
85             ),
86 
87             merge!(
88                 TokenType.whitespace,
89                 matchRepeatMinOnce!(
90                     matchOr!(" ", "\t")
91                 )
92             ),
93 
94         )(input);
95 
96     }
97 
98     Match endOfLine(string input) pure @safe {
99 
100         return merge!(
101             TokenType.eol,
102             matchOr!(
103                 "\r\n", "\r", "\n"
104             )
105         )(input);
106 
107     }
108 
109 }
110 
111 // Simple lexer.
112 unittest {
113 
114     // Note: see file for definitions of TokenList, Token, etc.
115 
116     Lexer.Match result = Lexer.lex(`
117         set A 15
118         if A equals 15
119             echo A
120         end
121     `);
122 
123     // Check tokens except those with whitespace
124     with (TokenType)
125     assert(result.data.allButWhitespace.equal([
126         Token(eol,        "\n"),
127         Token(keyword,    "set"),
128         Token(identifier, "A"),
129         Token(number,     "15"),
130         Token(eol,        "\n"),
131 
132         Token(keyword,    "if"),
133         Token(identifier, "A"),
134         Token(keyword,    "equals"),
135         Token(number,     "15"),
136         Token(eol,        "\n"),
137 
138         Token(keyword,    "echo"),
139         Token(identifier, "A"),
140         Token(eol,        "\n"),
141 
142         Token(keyword,    "end"),
143         Token(eol,        "\n"),
144     ]));
145 
146 }
147 
148 // If a match fails, we can find out where it happened
149 unittest {
150 
151     string source = `
152         set A 1
153         set B  # This isn't the correct syntax for comments!
154     `;
155 
156     Lexer.Match result = Lexer.lex(source);
157 
158     assert(!result);
159 
160     // matched.source should point at the comment
161     assert(result.matched.source == source.find("#"));
162 
163     // Match data should include all tokens before the match
164     with (TokenType)
165     assert(result.data.allButWhitespace.equal([
166         Token(eol,        "\n"),
167         Token(keyword,    "set"),
168         Token(identifier, "A"),
169         Token(number,     "1"),
170         Token(eol,        "\n"),
171 
172         Token(keyword,    "set"),
173         Token(identifier, "B"),
174     ]));
175 
176 }