Project Alice
Loading...
Searching...
No Matches
parser_generator.cpp
Go to the documentation of this file.
1#include <cstdint>
2#include <memory>
3#include <vector>
4#include <string_view>
5#include <string>
6#include <locale>
7#include <fstream>
8#include <functional>
9#include <cstdarg>
10#include <iostream>
11#include <optional>
12#include <sstream>
13#include <cassert>
14#include <algorithm>
15
16// Objects
18 std::string value;
19 std::string opt;
20};
22 std::string key;
23 std::string type;
25};
27 std::string key;
28 std::string type_or_function;
30 bool is_extern = false;
31};
33 std::string function;
34};
36 std::string group_context_type;
37 std::string group_object_type;
41 std::vector<group_association> groups;
42 std::vector<value_association> values;
45};
46
47// Diagnostics
49 location_info() = default;
50 location_info(int _row, int _column) : row{ _row }, column{ _column } {}
51 int row = 0;
52 int column = 0;
53};
54
55// Tokenizer and lexer stage
64};
65struct token {
66 token_type type = token_type::none;
67 std::string data;
69
70static std::string_view get_type_name(token_type const& type) {
71 switch(type) {
72 case token_type::none: return "none";
73 case token_type::group_ident: return "group_ident";
74 case token_type::group_item_ident: return "group_item_ident";
75 case token_type::ident: return "ident";
76 case token_type::lparen: return "(";
77 case token_type::rparen: return ")";
78 case token_type::newline: return "newline";
79 }
80 std::abort();
81}
82};
83
85 std::ctype<char> const* char_facet = nullptr;
86 std::vector<token> tokens;
88 std::vector<group_contents> groups;
89 int error_count = 0;
91 std::stringstream console_stream;
92 std::string_view file_name;
93
94parser_state(std::string_view const _file_name)
95 : file_name{ _file_name }
96{
97 char_facet = &std::use_facet<std::ctype<char>>(std::locale("C"));
98}
99
100void report_any(std::string_view const severity, int code, location_info local_loc_info, std::string_view const fmt) {
102 if(local_loc_info.row > 0) {
103 if(local_loc_info.column > 0)
104 console_stream << "(" << std::to_string(local_loc_info.row) << "," << std::to_string(local_loc_info.column) << ")";
105 else
106 console_stream << "(" << std::to_string(local_loc_info.row) << ")";
107 }
108 console_stream << ": " << severity << " " << std::to_string(code) << ": " << fmt.data() << "\n";
109}
110
111void report_error(int code, location_info local_loc_info, std::string_view const fmt) {
112 report_any("error", code, local_loc_info, fmt);
113 error_count++;
114}
115
116size_t get_column(std::string_view const s, std::string_view::iterator const it) {
117 return size_t(std::distance(s.begin(), it));
118}
119
120bool is_ident(char c) {
121 return std::isalnum(c) || c == '#' || c == '@' || c == '_';
122}
123
124void tokenize_line(std::string_view const line) {
125 if(line.empty())
126 return;
127
128 auto it = line.begin();
129
130 while(it != line.cend()) {
131 token tok{};
132 // If a line starts with a space - it's a group identifier
133 if(it == line.begin() && (std::isspace(*it) || *it == ',')) {
134 tok.type = token_type::group_item_ident;
135 ++it;
136 } else if(it == line.begin()) {
137 tok.type = token_type::group_ident;
138 }
139
140 // Skip a single spacing character
141 if(std::isspace(*it) || *it == ',')
142 ++it;
143 if(it == line.cend())
144 break;
145
146 // Otherwise skip the rest...
147 while(std::isspace(*it) || *it == ',')
148 ++it;
149 if(it == line.cend()) {
150 report_error(101, location_info(loc_info.row, int(get_column(line, it))), "Trailing spaces\n");
151 break;
152 }
153
154 if(*it == '(') {
155 tok.type = token_type::lparen;
156 ++it;
157 } else if(*it == ')') {
158 tok.type = token_type::rparen;
159 ++it;
160 } else if(is_ident(*it)) {
161 if(tok.type != token_type::group_ident && tok.type != token_type::group_item_ident)
162 tok.type = token_type::ident;
163 auto start_ident = it;
164 while(it != line.cend() && is_ident(*it))
165 ++it;
166 auto start_idx = std::distance(line.begin(), start_ident);
167 auto end_idx = std::distance(line.begin(), it);
168 tok.data = line.substr(start_idx, end_idx - start_idx);
169
170 // Verify naming constraints for all identifiers
171 bool violated_casing = false;
172 for(auto const c : tok.data)
173 if(std::isalpha(c) && !std::islower(c)) {
174 violated_casing = true;
175 break;
176 }
177
178 if(violated_casing)
179 report_error(120, location_info(loc_info.row, int(get_column(line, it))), "Naming constraints violated '" + tok.data + "\n");
180 } else {
181 report_error(100, location_info(loc_info.row, int(get_column(line, it))), std::string() + "Unexpected token '" + *it + "'\n");
182 break;
183 }
184 tok.loc_info = loc_info;
185 tokens.push_back(tok);
186 }
187}
188
189void tokenize_file(std::stringstream& stream) {
190 for(std::string line; std::getline(stream, line); ) {
191 tokenize_line(line);
192
193 token tok{};
194 tok.loc_info = loc_info;
195 tok.type = token_type::newline;
196 tokens.push_back(tok);
197
198 ++loc_info.row;
199 }
200}
201
202std::optional<token> get_token(auto& it) {
203 return std::optional(*it++);
204}
205
206token get_specific_token(auto& it, bool& err_cond, token_type const& type) {
207 auto o = get_token(it);
208 if(!o.has_value()) {
209 // TODO: This is risky - but allows for accurate-ish reporting
210 report_error(109, it[-1].loc_info, std::string() + "Expected a '" + token::get_type_name(type).data() + "' token\n");
211 err_cond = true;
212 return token{};
213 }
214 token tok{o.value()};
215 if(tok.type != type) {
216 report_error(110, tok.loc_info, std::string() + "Expected a '" + token::get_type_name(type).data() + "' token, but found '" + token::get_type_name(tok.type).data() + "'\n");
217 err_cond = true;
218 return token{};
219 }
220 return tok;
221}
222
223void parse() {
224 auto it = tokens.begin();
225 while(it != tokens.end()) {
226 auto const key = get_token(it).value();
227 if(key.type == token_type::group_ident) {
228 if(key.data[0] == '#') {
229 groups.back().group_context_type = key.data.substr(1);
230 } else {
231 groups.emplace_back();
232 groups.back().group_object_type = key.data;
233 }
234 } else if(key.type == token_type::group_item_ident) {
235 if(groups.empty()) {
236 report_error(120, key.loc_info, "Item '" + key.data + "' defined before first group\n");
237 continue;
238 }
239
240 if(key.data == "#free") {
241 auto err_cond = false;
242 auto const type = get_specific_token(it, err_cond, token_type::ident);
243 auto const opt = get_specific_token(it, err_cond, token_type::ident);
244 auto const handler_type = get_specific_token(it, err_cond, token_type::ident);
245
246 auto const tmp = get_token(it).value_or(token{});
247 auto const handler_opt = tmp.type == token_type::lparen
248 ? get_specific_token(it, err_cond, token_type::ident)
249 : token{};
250 if(tmp.type == token_type::lparen)
251 get_specific_token(it, err_cond, token_type::rparen);
252
253 if(err_cond)
254 continue;
255
256 if(type.data == "parser" || type.data == "group") {
257 groups.back().set_handler = group_association{ key.data, opt.data, value_and_optional{handler_type.data, handler_opt.data}, false };
258 } else if(type.data == "extern") {
259 groups.back().set_handler = group_association{ key.data, opt.data, value_and_optional{handler_type.data, handler_opt.data}, true };
260 } else if(type.data == "value") {
261 groups.back().single_value_handler_type = opt.data;
262 groups.back().single_value_handler_result = value_and_optional{ handler_type.data, handler_opt.data };
263 } else {
264 report_error(102, type.loc_info, "Invalid #free type '" + type.data + "'\n");
265 }
266 } else if(key.data == "#base") {
267 auto err_cond = false;
268 auto const base_class_name = get_specific_token(it, err_cond, token_type::ident);
269 if(err_cond)
270 continue;
271
272 for(auto& g : groups) {
273 if(g.group_object_type == base_class_name.data) {
274 for(auto& v : g.groups)
275 groups.back().groups.push_back(v);
276 for(auto& v : g.values)
277 groups.back().values.push_back(v);
278 groups.back().any_group_handler = g.any_group_handler;
279 groups.back().any_value_handler = g.any_value_handler;
280 groups.back().single_value_handler_result = g.single_value_handler_result;
281 groups.back().single_value_handler_type = g.single_value_handler_type;
282 groups.back().any_value_handler = g.any_value_handler;
283 groups.back().set_handler = g.set_handler;
284 }
285 }
286 } else if(key.data == "#any") {
287 /* #any: type, opt, handler_type (handler_opt) */
288 auto err_cond = false;
289 auto const type = get_specific_token(it, err_cond, token_type::ident);
290 auto const opt = get_specific_token(it, err_cond, token_type::ident);
291 auto const handler_type = get_specific_token(it, err_cond, token_type::ident);
292
293 auto const tmp = get_token(it).value_or(token{});
294 auto const handler_opt = tmp.type == token_type::lparen
295 ? get_specific_token(it, err_cond, token_type::ident)
296 : token{};
297 if(tmp.type == token_type::lparen)
298 get_specific_token(it, err_cond, token_type::rparen);
299
300 if(err_cond)
301 continue;
302
303 static std::vector<std::string_view> const valid_group_handler_values = {
304 "discard", "member", "member_fn", "function"
305 };
306
307 if(type.data == "parser" || type.data == "group") {
308 groups.back().any_group_handler = group_association{ "", opt.data, value_and_optional{handler_type.data, handler_opt.data}, false };
309 } else if(type.data == "value") {
310 groups.back().any_value_handler = value_association{ "", opt.data, value_and_optional{handler_type.data, handler_opt.data} };
311 } else if(type.data == "extern") {
312 groups.back().any_group_handler = group_association{ "", opt.data, value_and_optional{handler_type.data, handler_opt.data}, true };
313
314 if(std::find(valid_group_handler_values.begin(), valid_group_handler_values.end(), groups.back().any_group_handler.handler.value) == valid_group_handler_values.end())
315 report_error(104, type.loc_info, "Unhandled #any group '" + type.data + "' with invalid handler_type '" + handler_type.data + "'\n");
316 } else {
317 report_error(103, type.loc_info, "Invalid #any type '" + type.data + "'\n");
318 }
319 } else {
320 /* key: type opt handler_type (handler_opt) */
321 auto err_cond = false;
322 auto const type = get_specific_token(it, err_cond, token_type::ident);
323 auto const opt = get_specific_token(it, err_cond, token_type::ident);
324 auto const handler_type = get_specific_token(it, err_cond, token_type::ident);
325
326 auto const tmp = get_token(it).value_or(token{});
327 auto const handler_opt = tmp.type == token_type::lparen
328 ? get_specific_token(it, err_cond, token_type::ident)
329 : token{};
330 if(tmp.type == token_type::lparen)
331 get_specific_token(it, err_cond, token_type::rparen);
332
333 if(std::find_if(groups.back().groups.begin(), groups.back().groups.end(), [&](auto const& g) { return g.key == key.data; }) != groups.back().groups.end()) {
334 report_error(116, type.loc_info, "Duplicate key '" + key.data + "' in group '" + groups.back().group_object_type + "'\n");
335 err_cond = true;
336 }
337
338 if(err_cond)
339 continue;
340
341 if(key.data.empty()) {
342 report_error(118, type.loc_info, "Empty member '" + type.data + "' in group '" + groups.back().group_object_type + "'\n");
343 continue;
344 }
345 if(type.data == "parser" || type.data == "group") {
346 groups.back().groups.push_back(group_association{ key.data, opt.data, value_and_optional{handler_type.data, handler_opt.data}, false });
347 } else if(type.data == "value") {
348 groups.back().values.push_back(value_association{ key.data, opt.data, value_and_optional{handler_type.data, handler_opt.data} });
349 } else if(type.data == "extern") {
350 groups.back().groups.push_back(group_association{ key.data, opt.data, value_and_optional{handler_type.data, handler_opt.data}, true });
351 } else {
352 report_error(104, type.loc_info, "Invalid #free type '" + type.data + "'\n");
353 }
354 }
355 } else if(key.type == token_type::newline) {
356 // ignore newline
357 } else {
358 report_error(120, key.loc_info, std::string() + "Unexpected token '" + token::get_type_name(key.type).data() + "'\n");
359 }
360 }
361}
362};
363
364std::string char_to_hex(char c) {
365 static std::string_view hexmap = "0123456789ABCDEF";
366 uint32_t v = c | 0x20;
367 std::string temp{};
368 temp.push_back(hexmap[(v / 16) % 16]);
369 temp.push_back(hexmap[v % 16]);
370 return temp;
371}
372
373std::string string_to_hex(std::string_view const s, int32_t start, int32_t count) {
374 std::string res = "0x";
375 for(int32_t i = count - 1; i >= 0; --i)
376 res += char_to_hex(s[start + i]);
377 return res;
378}
379
380std::string final_match_condition_internal(std::string_view const key, int32_t starting_position, int32_t ending_position) {
381 if(starting_position >= ending_position)
382 return "";
383
384 if(ending_position - starting_position >= 8) {
385 return
386 " && (*(uint64_t const*)(&cur.content[" + std::to_string(starting_position) + "]) | uint64_t(0x2020202020202020) ) == uint64_t(" + string_to_hex(key, starting_position, 8) + ")"
387 + final_match_condition_internal(key, starting_position + 8, ending_position);
388 } else if(ending_position - starting_position >= 4) {
389 return
390 " && (*(uint32_t const*)(&cur.content[" + std::to_string(starting_position) + "]) | uint32_t(0x20202020) ) == uint32_t(" + string_to_hex(key, starting_position, 4) + ")"
391 + final_match_condition_internal(key, starting_position + 4, ending_position);
392 } else if(ending_position - starting_position >= 2) {
393 return
394 " && (*(uint16_t const*)(&cur.content[" + std::to_string(starting_position) + "]) | 0x2020 ) == " + string_to_hex(key, starting_position, 2)
395 + final_match_condition_internal(key, starting_position + 2, ending_position);
396 } else {
397 return " && (cur.content[" + std::to_string(starting_position) + "] | 0x20 ) == " + string_to_hex(key, starting_position, 1);
398 }
399}
400
401std::string final_match_condition(std::string_view const key, size_t starting_position, size_t ending_position) {
402 if(!ending_position)
403 ending_position = key.length();
404 assert(ending_position <= key.length());
405 assert(starting_position <= ending_position);
406 return std::string("(true") + final_match_condition_internal(key, int32_t(starting_position), int32_t(ending_position)) + ")";
407}
408
409template<typename V, typename F>
410void enum_with_prefix(V const& vector, std::string_view const prefix, int32_t length, F const& fn) {
411 for(int32_t i = 0; i < int32_t(vector.size()); ++i) {
412 //auto const tmp = std::string(prefix);
413 //std::printf("EVAL-A %s (%s)\n", vector[i].key.c_str(), tmp.c_str());
414 if(int32_t(vector[i].key.length()) == length) {
415 bool match = true;
416 for(int32_t j = 0; j < int32_t(prefix.length()); ++j) {
417 if((vector[i].key[j] | 0x20) != (prefix[j] | 0x20)) {
418 match = false;
419 break;
420 }
421 }
422
423 if(match)
424 fn(vector[i]);
425 }
426 }
427}
428
429template<typename V>
430int32_t count_with_prefix(V const& vector, std::string_view const prefix, int32_t length) {
431 int32_t total = 0;
432 enum_with_prefix(vector, prefix, length, [&](auto const&) {
433 ++total;
434 });
435 return total;
436}
437
438template<typename V>
439int32_t max_length(V const& vector) {
440 int32_t mx = 0;
441 for(auto const& e : vector)
442 mx = mx > int32_t(e.key.length()) ? mx : int32_t(e.key.length());
443 return mx;
444}
445
447 std::string tabs;
448
450 tabs.push_back('\t');
451}
452
454 tabs.pop_back();
455}
456
457std::string tabulate(std::string_view const s) const {
458 return tabs + s.data();
459}
460
461template<typename V>
462std::string get_match_tree_running_prefix(V const& vector, std::string prefix, int32_t length) {
463 int32_t top_count = count_with_prefix(vector, prefix, length);
464 for(int32_t c = 32; c <= 95; ++c) {
465 int32_t count = count_with_prefix(vector, prefix + char(c), length);
466 if(top_count == count) {
467 prefix = get_match_tree_running_prefix(vector, prefix + char(c), length);
468 break;
469 }
470 }
471 return prefix;
472}
473
474template<typename V, typename F>
475std::string construct_match_tree_internal(V const& vector, F const& generator_match, std::string_view const no_match, std::string_view const prefix, int32_t length) {
476 int32_t top_count = count_with_prefix(vector, prefix, length);
477 std::string output;
478 bool has_switch = false;
479 for(int32_t c = 32; c <= 95; ++c) {
480 int32_t count = count_with_prefix(vector, std::string(prefix) + char(c), length);
481 if(count == 0) {
482 // skip
483 } else if(top_count == count) {
484 // Obtain the prefix that is equal on all the branches, for example if the branching options were
485 // namefoo
486 // namebar
487 // nameowa
488 //
489 // Then our running prefix would be [name] - instead of checking every character at a time
490 auto running_prefix = get_match_tree_running_prefix(vector, std::string(prefix), length);
491 assert(!running_prefix.empty());
492 output += tabulate("// " + running_prefix + "\n");
493 assert(running_prefix.length() > prefix.length() && running_prefix != prefix);
494
495 output += tabulate("// running - " + running_prefix.substr(prefix.length()) + "\n");
496 output += tabulate("if(" + final_match_condition(running_prefix, prefix.length(), running_prefix.length()) + ") {\n");
498 output += construct_match_tree_internal(vector, generator_match, no_match, running_prefix, length);
500
501 output += tabulate("} else {\n");
503 output += tabulate(std::string(no_match) + "\n");
504 output += tabulate("}\n");
506 } else if(count == 1) {
507 if(!has_switch) {
508 output += tabulate("switch(0x20 | int32_t(cur.content[" + std::to_string(prefix.length()) + "])) {\n");
509 has_switch = true;
510 }
511
512 output += tabulate("case 0x" + char_to_hex(char(c)) + ":\n");
514 enum_with_prefix(vector, std::string(prefix) + char(c), length, [&](auto& v) {
515 output += tabulate("// " + v.key + "\n");
516 output += tabulate("if(" + final_match_condition(v.key, prefix.length() + 1, 0) + ") {\n");
518 output += tabulate(generator_match(v) + "\n");
520 output += tabulate("} else {\n");
522 output += tabulate(std::string(no_match) + "\n");
524 output += tabulate("}\n");
525 });
526 output += tabulate("break;\n");
528 } else {
529 if(!has_switch) {
530 output += tabulate("switch(0x20 | int32_t(cur.content[" + std::to_string(prefix.length()) + "])) {\n");
531 has_switch = true;
532 }
533
534 output += tabulate("case 0x" + char_to_hex(char(c)) + ":\n");
536 output += construct_match_tree_internal(vector, generator_match, no_match, std::string(prefix) + char(c), length);
537 output += tabulate("break;\n");
539 }
540 }
541
542 if(has_switch) {
543 output += tabulate("default:\n");
545 output += tabulate(std::string(no_match) + "\n");
546 output += tabulate("break;\n");
548 output += tabulate("}\n");
549 }
550 return output;
551}
552
553std::string construct_match_tree_outer(auto const& vector, auto const& generator_match, std::string_view const no_match) {
554 auto const maxlen = max_length(vector);
555 std::string output = tabulate("switch(int32_t(cur.content.length())) {\n");
556 for(int32_t l = 1; l <= maxlen; ++l) {
557 int32_t count = count_with_prefix(vector, "", l);
558 if(count == 0) {
559 // skip
560 } else if(count == 1) {
561 output += tabulate("case " + std::to_string(l) + ":\n");
563 enum_with_prefix(vector, "", l, [&](auto& v) {
564 output += tabulate("// " + v.key + "\n");
565 output += tabulate("if(" + final_match_condition(v.key, 0, 0) + ") {\n");
567 output += tabulate(generator_match(v) + "\n");
569 output += tabulate("} else {\n");
571 output += tabulate(std::string(no_match) + "\n");
573 output += tabulate("}\n");
574 });
575 output += tabulate("break;\n");
577 } else {
578 output += tabulate("case " + std::to_string(l) + ":\n");
580 output += construct_match_tree_internal(vector, generator_match, no_match, "", l);
581 output += tabulate("break;\n");
583 }
584 }
585 output += tabulate("default:\n");
587 output += tabulate(std::string(no_match) + "\n");
588 output += tabulate("break;\n");
590 output += tabulate("}\n");
591 return output;
592}
593
594void file_write_out(std::fstream& stream, std::vector<group_contents>& groups) {
595 // process the parsed content into the generated file
596 std::string output;
597 output += "#include \"parsers.hpp\"\n";
598 // output += "#pragma warning( push )\n";
599 // output += "#pragma warning( disable : 4065 )\n";
600 // output += "#pragma warning( disable : 4189 )\n";
601 output += "\n";
602 output += "namespace parsers {\n";
603 // fn bodies
604 std::vector<bool> declared_groups(groups.size(), false);
605 for(size_t i = 0; i < groups.size(); i++) {
606 auto const& g = groups[i];
607 // declare fns only when needed
608 for(size_t j = 0; j < groups.size(); j++)
609 if(!declared_groups[j]) {
610 bool fwd_decl = false;
611 auto const& f = groups[j];
612 fwd_decl = f.group_object_type == g.any_group_handler.handler.opt
613 || f.group_object_type == g.set_handler.handler.opt
614 || f.group_object_type == g.any_value_handler.handler.opt;
615 for(const auto& e : g.groups)
616 fwd_decl = fwd_decl || f.group_object_type == e.handler.opt;
617 for(const auto& e : g.values)
618 fwd_decl = fwd_decl || f.group_object_type == e.handler.opt;
619 if(fwd_decl) {
620 declared_groups[j] = fwd_decl;
621 if(g.group_context_type.empty()) {
622 output += "template<typename C>\n";
623 output += g.group_object_type + " parse_" + g.group_object_type + "(token_generator& gen, error_handler& err, C&& context);\n";
624 } else {
625 output += g.group_object_type + " parse_" + g.group_object_type + "(token_generator& gen, error_handler& err, " + g.group_context_type + "&& context);\n";
626 }
627 }
628 }
629 declared_groups[i] = true;
630
631 if(g.group_context_type.empty()) {
632 output += "template<typename C>\n";
633 output += g.group_object_type + " parse_" + g.group_object_type + "(token_generator& gen, error_handler& err, C&& context) {\n";
634 } else {
635 output += g.group_object_type + " parse_" + g.group_object_type + "(token_generator& gen, error_handler& err, " + g.group_context_type + "&& context) {\n";
636 }
637
638 output += "\t" + g.group_object_type + " cobj;\n";
639 output += "\tfor(token_and_type cur = gen.get(); cur.type != token_type::unknown && cur.type != token_type::close_brace; cur = gen.get()) {\n";
640 // case: free group
641 output += "\t\tif(cur.type == token_type::open_brace) { \n";
642 {
643 // set_handler.handler.value = discard | member | ...
644 // set_handler.handler.opt = name of non default member target / fn name to call w/ results
645 // set_handler.type_or_function = function to call on "gen" to process it (with extern == true, wont prepend parse)
646 std::string set_effect;
647 if(g.set_handler.is_extern == false) {
648 if(g.set_handler.handler.value == "discard") {
649 set_effect = "gen.discard_group();";
650 } else if(g.set_handler.handler.value == "member") {
651 set_effect = "cobj." +
652 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
653 " = parse_" + g.set_handler.type_or_function + "(gen, err, context);";
654 } else if(g.set_handler.handler.value == "member_fn") {
655 set_effect = "cobj." +
656 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
657 "(parse_" + g.set_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
658 } else if(g.set_handler.handler.value == "function") {
659 set_effect =
660 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
661 "(cobj, parse_" + g.set_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
662 } else {
663 set_effect = "err.unhandled_free_group(cur); gen.discard_group();";
664 }
665 } else {
666 if(g.set_handler.handler.value == "discard") {
667 set_effect = "gen.discard_group();";
668 } else if(g.set_handler.handler.value == "member") {
669 set_effect = "cobj." +
670 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
671 " = " + g.set_handler.type_or_function + "(gen, err, context);";
672 } else if(g.set_handler.handler.value == "member_fn") {
673 set_effect = "cobj." +
674 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
675 "(" + g.set_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
676 } else if(g.set_handler.handler.value == "function") {
677 set_effect =
678 (g.set_handler.handler.opt.length() > 0 ? g.set_handler.handler.opt : std::string("free_group")) +
679 "(cobj, " + g.set_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
680 } else {
681 set_effect = "err.unhandled_free_group(cur); gen.discard_group();";
682 }
683 }
684 output += "\t\t\t" + set_effect + "\n";
685 output += "\t\t\tcontinue;\n";
686 output += "\t\t}\n";
687 }
688 output += "\t\tauto peek_result = gen.next();\n";
689 output += "\t\tif(peek_result.type == token_type::special_identifier) {\n"; // start next token if
690 output += "\t\t\tauto peek2_result = gen.next_next();\n";
691 output += "\t\t\tif(peek2_result.type == token_type::open_brace) {\n";
692 // match groups
693 output += "\t\t\t\tgen.get(); gen.get();\n";
694 {
695 /*
696 #any: type, opt, handler_type (handler_opt)
697
698 type = (parser, extern -> groups, value -> values)
699
700 key = association.key
701 opt = association.type_or_function
702 handler_type = handler.value
703 handler_opt = handler.opt
704 */
705 std::string no_match_effect;
706 if(g.any_group_handler.is_extern == false) {
707 if(g.any_group_handler.handler.value == "discard") {
708 no_match_effect = "gen.discard_group();";
709 } else if(g.any_group_handler.handler.value == "member") {
710 no_match_effect = "cobj." +
711 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
712 " = parse_" + g.any_group_handler.type_or_function + "(gen, err, context);";
713 } else if(g.any_group_handler.handler.value == "member_fn") {
714 no_match_effect = "cobj." +
715 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
716 "(cur.content, parse_" + g.any_group_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
717 } else if(g.any_group_handler.handler.value == "function") {
718 no_match_effect =
719 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
720 "(cobj, cur.content, parse_" + g.any_group_handler.type_or_function + "(gen, err, context), err, cur.line, context);";
721 } else {
722 no_match_effect = "err.unhandled_group_key(cur); gen.discard_group();";
723 }
724 } else {
725 if(g.any_group_handler.handler.value == "discard") {
726 no_match_effect = g.any_group_handler.type_or_function + "(cur.content, gen, err, context);";
727 } else if(g.any_group_handler.handler.value == "member") {
728 no_match_effect = "cobj." +
729 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
730 " = " + g.any_group_handler.type_or_function + "(cur.content, gen, err, context);";
731 } else if(g.any_group_handler.handler.value == "member_fn") {
732 no_match_effect = "cobj." +
733 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
734 "(cur.content, " + g.any_group_handler.type_or_function + "(cur.content, gen, err, context), err, cur.line, context);";
735 } else if(g.any_group_handler.handler.value == "function") {
736 no_match_effect =
737 (g.any_group_handler.handler.opt.length() > 0 ? g.any_group_handler.handler.opt : std::string("any_group")) +
738 "(cobj, cur.content, " + g.any_group_handler.type_or_function + "(cur.content, gen, err, context), err, cur.line, context);";
739 } else {
740 no_match_effect = "err.unhandled_group_key(cur); gen.discard_group();";
741 }
742 }
743 /*
744 key: type, opt, handler_type (handler_opt)
745
746 type = (group, extern -> groups, value -> values)
747
748 key = association.key
749 opt = association.type_or_function
750 handler_type = handler.value
751 handler_opt = handler.opt
752 */
753 auto match_handler = [](group_association const& v) {
754 std::string out;
755 if(v.is_extern) {
756 if(v.handler.value == "discard") {
757 out = v.type_or_function + "(gen, err, context);";
758 } else if(v.handler.value == "member") {
759 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
760 " = " + v.type_or_function + "(gen, err, context);";
761 } else if(v.handler.value == "member_fn") {
762 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
763 "(" + v.type_or_function + "(gen, err, context), err, cur.line, context);";
764 } else if(v.handler.value == "function") {
765 out = (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
766 "(cobj, " + v.type_or_function + "(gen, err, context), err, cur.line, context);";
767 } else {
768 out = "err.unhandled_group_key(cur);";
769 }
770 } else {
771 if(v.handler.value == "discard") {
772 out = "gen.discard_group();";
773 } else if(v.handler.value == "member") {
774 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
775 " = parse_" + v.type_or_function + "(gen, err, context);";
776 } else if(v.handler.value == "member_fn") {
777 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
778 "(parse_" + v.type_or_function + "(gen, err, context), err, cur.line, context);";
779 } else if(v.handler.value == "function") {
780 out = (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
781 "(cobj, parse_" + v.type_or_function + "(gen, err, context), err, cur.line, context);";
782 } else {
783 out = "err.unhandled_group_key(cur);";
784 }
785 }
786 return out;
787 };
792 output += construct_match_tree_outer(g.groups, match_handler, no_match_effect);
797 }
798 output += "\t\t\t} else {\n"; // next next != open brace
799 output += "\t\t\t\tauto const assoc_token = gen.get();\n";
800 output += "\t\t\t\tauto const assoc_type = parse_association_type(assoc_token.content, assoc_token.line, err);\n";
801 output += "\t\t\t\tauto const rh_token = gen.get();\n";
802 // match values
803 {
804 std::string no_match_effect;
805 if(g.any_value_handler.handler.value == "discard") {
806 } else if(g.any_value_handler.handler.value == "member") {
807 no_match_effect = "cobj." +
808 (g.any_value_handler.handler.opt.length() > 0 ? g.any_value_handler.handler.opt : std::string("any_value")) +
809 " = parse_" + g.any_value_handler.type + "(rh_token.content, rh_token.line, err);";
810 } else if(g.any_value_handler.handler.value == "member_fn") {
811 no_match_effect = "cobj." +
812 (g.any_value_handler.handler.opt.length() > 0 ? g.any_value_handler.handler.opt : std::string("any_value")) +
813 "(cur.content, assoc_type, parse_" + g.any_value_handler.type + "(rh_token.content, rh_token.line, err), err, cur.line, context);";
814 } else if(g.any_value_handler.handler.value == "function") {
815 no_match_effect =
816 (g.any_value_handler.handler.opt.length() > 0 ? g.any_value_handler.handler.opt : std::string("any_value")) +
817 "(cobj, cur.content, assoc_type, parse_" + g.any_value_handler.type + "(rh_token.content, rh_token.line, err), err, cur.line, context);";
818 } else {
819 no_match_effect = "err.unhandled_association_key(cur);";
820 }
821 /*
822 key: type, opt, handler_type (handler_opt)
823
824 type = (group, extern -> groups, value -> values)
825
826 key = association.key
827 opt = association.type
828 handler_type = handler.value
829 handler_opt = handler.opt
830 */
831 auto match_handler = [](value_association const& v) {
832 std::string out;
833 if(v.handler.value == "discard") {
834 } else if(v.handler.value == "member") {
835 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
836 " = parse_" + v.type + "(rh_token.content, rh_token.line, err);";
837 } else if(v.handler.value == "member_fn") {
838 out = "cobj." + (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
839 "(assoc_type, parse_" + v.type + "(rh_token.content, rh_token.line, err), err, cur.line, context);";
840 } else if(v.handler.value == "function") {
841 out = (v.handler.opt.length() > 0 ? v.handler.opt : v.key) +
842 "(cobj, assoc_type, parse_" + v.type + "(rh_token.content, rh_token.line, err), err, cur.line, context);";
843 } else {
844 out = "err.unhandled_association_key(cur);";
845 }
846 return out;
847 };
852 output += construct_match_tree_outer(g.values, match_handler, no_match_effect);
857 }
858 output += "\t\t\t}\n"; // end next next
859 output += "\t\t} else {\n"; // next != special identifier
860 // case: free value;
861 if(g.single_value_handler_result.value.length() > 0) {
862 if(g.single_value_handler_result.value == "discard") {
863 // do nothing
864 } else if(g.single_value_handler_result.value == "member") {
865 output += "\t\t\tcobj." +
866 (g.single_value_handler_result.opt.length() > 0 ? g.single_value_handler_result.opt : std::string("free_value")) +
867 "= parse_" + g.single_value_handler_type + "(cur.content, cur.line, err);\n";
868 } else if(g.single_value_handler_result.value == "member_fn") {
869 output += "\t\t\tcobj." +
870 (g.single_value_handler_result.opt.length() > 0 ? g.single_value_handler_result.opt : std::string("free_value")) +
871 "(parse_" + g.single_value_handler_type + "(cur.content, cur.line, err), err, cur.line, context);\n";
872 } else if(g.single_value_handler_result.value == "function") {
873 output += "\t\t\t" +
874 (g.single_value_handler_result.opt.length() > 0 ? g.single_value_handler_result.opt : std::string("free_value")) +
875 "(cobj, parse_" + g.single_value_handler_type + "(cur.content, cur.line, err), err, cur.line, context);\n";
876 }
877 } else {
878 output += "\t\t\terr.unhandled_free_value(cur);\n"; // end next token if
879 }
880 output += "\t\t}\n"; // end next token if
881 output += "\t}\n"; // end token loop
882 output += "\tcobj.finish(context);\n";
883 output += "\treturn cobj;\n";
884 output += "}\n"; // end fn
885 }
886 output += "}\n"; // end namespace
887 // output += "#pragma warning( pop )\n";
888
889 //newline at end of file
890 output += "\n";
891 stream.write(output.data(), output.size());
892}
893};
894
895int main(int argc, char *argv[]) {
896 if(argc > 1) {
897 auto const input_filename = std::string(argv[1]);
898 std::string output_filename;
899 if(argc > 2) {
900 output_filename = std::string(argv[2]);
901 } else {
902 output_filename = std::string(argv[1]);
903 if(output_filename.length() >= 4 && output_filename[output_filename.length() - 4] == '.') {
904 output_filename.pop_back();
905 output_filename.pop_back();
906 output_filename.pop_back();
907 output_filename.pop_back();
908 }
909 output_filename += "_generated.hpp";
910 }
911
912 std::fstream input_file;
913 input_file.open(input_filename, std::ios::in);
914
915 std::fstream output_file;
916 output_file.open(output_filename, std::ios::out);
917
918 std::stringstream file_contents_stream{std::string((std::istreambuf_iterator<char>(input_file)), std::istreambuf_iterator<char>{})};
919 parser_state state(input_filename);
920 state.tokenize_file(file_contents_stream);
921 state.parse();
922
923 std::cout << state.console_stream.str() << std::endl;
924 if(state.error_count > 0)
925 std::exit(EXIT_FAILURE);
926
927 cxx_tree_builder tree_builder{};
928 tree_builder.file_write_out(output_file, state.groups);
929 } else {
930 fprintf(stderr, "Usage: %s <input> [output]\n", argv[0]);
931 }
932 return 0;
933}
#define assert(condition)
Definition: debug.h:74
uint uint32_t
int main(int argc, char *argv[])
std::string final_match_condition(std::string_view const key, size_t starting_position, size_t ending_position)
std::string final_match_condition_internal(std::string_view const key, int32_t starting_position, int32_t ending_position)
std::string string_to_hex(std::string_view const s, int32_t start, int32_t count)
std::string char_to_hex(char c)
int32_t max_length(V const &vector)
void enum_with_prefix(V const &vector, std::string_view const prefix, int32_t length, F const &fn)
int32_t count_with_prefix(V const &vector, std::string_view const prefix, int32_t length)
token_type
@ group_item_ident
@ group_ident
@ newline
@ none
@ lparen
@ rparen
@ ident
std::string construct_match_tree_internal(V const &vector, F const &generator_match, std::string_view const no_match, std::string_view const prefix, int32_t length)
std::string construct_match_tree_outer(auto const &vector, auto const &generator_match, std::string_view const no_match)
std::string tabulate(std::string_view const s) const
std::string get_match_tree_running_prefix(V const &vector, std::string prefix, int32_t length)
void file_write_out(std::fstream &stream, std::vector< group_contents > &groups)
std::string function
std::string type_or_function
value_and_optional handler
std::string single_value_handler_type
std::string group_context_type
group_association set_handler
std::vector< value_association > values
std::string group_object_type
value_association any_value_handler
value_and_optional single_value_handler_result
std::vector< group_association > groups
group_association any_group_handler
location_info(int _row, int _column)
location_info()=default
size_t get_column(std::string_view const s, std::string_view::iterator const it)
void tokenize_file(std::stringstream &stream)
bool is_ident(char c)
void report_any(std::string_view const severity, int code, location_info local_loc_info, std::string_view const fmt)
void tokenize_line(std::string_view const line)
location_info loc_info
std::string_view file_name
std::ctype< char > const * char_facet
void report_error(int code, location_info local_loc_info, std::string_view const fmt)
std::vector< group_contents > groups
token get_specific_token(auto &it, bool &err_cond, token_type const &type)
parser_state(std::string_view const _file_name)
std::optional< token > get_token(auto &it)
std::stringstream console_stream
std::vector< token > tokens
location_info loc_info
static std::string_view get_type_name(token_type const &type)
std::string data
token_type type
value_and_optional handler