Project Alice
Loading...
Searching...
No Matches
parsers.cpp
Go to the documentation of this file.
1#include "parsers.hpp"
2#include "nations.hpp"
3#include <charconv>
4#include <algorithm>
5
6namespace parsers {
7bool ignorable_char(char c) {
8 return (c == ' ') || (c == '\r') || (c == '\f') || (c == '\n') || (c == '\t') || (c == ',') || (c == ';');
9}
10
12 return (c == '!') || (c == '=') || (c == '<') || (c == '>');
13}
14
15bool breaking_char(char c) {
16 return ignorable_char(c) || (c == '{') || (c == '}') || special_identifier_char(c) || (c == '#');
17}
18
20 return !special_identifier_char(c);
21}
22
23bool line_termination(char c) {
24 return (c == '\r') || (c == '\n');
25}
26
28 return (c == '\r') || (c == '\n') || (c == '\"');
29}
30
32 return (c == '\r') || (c == '\n') || (c == '\'');
33}
34
35bool is_positive_integer(char const* start, char const* end) {
36 if(start == end)
37 return false;
38 while(start < end) {
39 if(!isdigit(*start))
40 return false;
41 ++start;
42 }
43 return true;
44}
45
46bool is_integer(char const* start, char const* end) {
47 if(start == end)
48 return false;
49 if(*start == '-')
50 return is_positive_integer(start + 1, end);
51 else
52 return is_positive_integer(start, end);
53}
54
55bool is_positive_fp(char const* start, char const* end) {
56 auto const decimal = std::find(start, end, '.');
57 if(decimal == end) {
58 return is_positive_integer(start, end);
59 } else if(decimal == start) {
60 return is_positive_integer(decimal + 1, end);
61 } else {
62 return is_positive_integer(start, decimal) && (decimal + 1 == end || is_positive_integer(decimal + 1, end));
63 }
64}
65
66bool is_fp(char const* start, char const* end) {
67 if(start == end)
68 return false;
69 if(*start == '-')
70 return is_positive_fp(start + 1, end);
71 else
72 return is_positive_fp(start, end);
73}
74
75template<typename T>
76char const* scan_for_match(char const* start, char const* end, int32_t& current_line, T&& condition) {
77 while(start < end) {
78 if(condition(*start))
79 return start;
80 if(*start == '\n')
81 ++current_line;
82 ++start;
83 }
84 return start;
85}
86template<typename T>
87char const* scan_for_not_match(char const* start, char const* end, int32_t& current_line, T&& condition) {
88 while(start < end) {
89 if(!condition(*start))
90 return start;
91 if(*start == '\n')
92 ++current_line;
93 ++start;
94 }
95 return start;
96}
97
98char const* advance_position_to_next_line(char const* start, char const* end, int32_t& current_line) {
99 auto const start_lterm = scan_for_match(start, end, current_line, line_termination);
100 return scan_for_not_match(start_lterm, end, current_line, line_termination);
101}
102
103char const* advance_position_to_non_whitespace(char const* start, char const* end, int32_t& current_line) {
104 return scan_for_not_match(start, end, current_line, ignorable_char);
105}
106
107char const* advance_position_to_non_comment(char const* start, char const* end, int32_t& current_line) {
108 auto position = advance_position_to_non_whitespace(start, end, current_line);
109 while(position < end && *position == '#') {
110 auto start_of_new_line = advance_position_to_next_line(position, end, current_line);
111 position = advance_position_to_non_whitespace(start_of_new_line, end, current_line);
112 }
113 return position;
114}
115
116char const* advance_position_to_breaking_char(char const* start, char const* end, int32_t& current_line) {
117 return scan_for_match(start, end, current_line, breaking_char);
118}
119
120token_and_type token_generator::internal_next() {
121 if(position >= file_end)
122 return token_and_type{std::string_view(), current_line, token_type::unknown};
123
124 auto non_ws = advance_position_to_non_comment(position, file_end, current_line);
125 if(non_ws < file_end) {
126 if(*non_ws == '{') {
127 position = non_ws + 1;
128 return token_and_type{std::string_view(non_ws, 1), current_line, token_type::open_brace};
129 } else if(*non_ws == '}') {
130 position = non_ws + 1;
131 return token_and_type{std::string_view(non_ws, 1), current_line, token_type::close_brace};
132 } else if(*non_ws == '\"') {
133 auto const close = scan_for_match(non_ws + 1, file_end, current_line, double_quote_termination);
134 position = close + 1;
135 return token_and_type{std::string_view(non_ws + 1, close - (non_ws + 1)), current_line, token_type::quoted_string};
136 } else if(*non_ws == '\'') {
137 auto const close = scan_for_match(non_ws + 1, file_end, current_line, single_quote_termination);
138 position = close + 1;
139 return token_and_type{std::string_view(non_ws + 1, close - (non_ws + 1)), current_line, token_type::quoted_string};
140 } else if(has_fixed_prefix(non_ws, file_end, "==") || has_fixed_prefix(non_ws, file_end, "<=") ||
141 has_fixed_prefix(non_ws, file_end, ">=") || has_fixed_prefix(non_ws, file_end, "<>") ||
142 has_fixed_prefix(non_ws, file_end, "!=")) {
143
144 position = non_ws + 2;
145 return token_and_type{std::string_view(non_ws, 2), current_line, token_type::special_identifier};
146 } else if(*non_ws == '<' || *non_ws == '>' || *non_ws == '=') {
147
148 position = non_ws + 1;
149 return token_and_type{std::string_view(non_ws, 1), current_line, token_type::special_identifier};
150 } else {
151 position = advance_position_to_breaking_char(non_ws + 1, file_end, current_line);
152 return token_and_type{std::string_view(non_ws, position - non_ws), current_line, token_type::identifier};
153 }
154 } else {
155 position = file_end;
156 return token_and_type{std::string_view(), current_line, token_type::unknown};
157 }
158}
159
161 if(peek_1.type != token_type::unknown) {
162 auto const temp = peek_1;
163 peek_1 = peek_2;
164 peek_2.type = token_type::unknown;
165 return temp;
166 }
167
168 return internal_next();
169}
170
172 if(peek_1.type == token_type::unknown) {
173 peek_1 = internal_next();
174 }
175 return peek_1;
176}
177
179 if(peek_1.type == token_type::unknown) {
180 peek_1 = internal_next();
181 }
182 if(peek_2.type == token_type::unknown) {
183 peek_2 = internal_next();
184 }
185 return peek_2;
186}
187
189 int32_t brace_count = 0;
190
191 while(brace_count >= 0 && !at_end()) {
192 auto gotten = get();
193 if(gotten.type == token_type::open_brace) {
194 brace_count++;
195 } else if(gotten.type == token_type::close_brace) {
196 brace_count--;
197 }
198 }
199}
200
201bool parse_bool(std::string_view content, int32_t, error_handler&) {
202 if(content.length() == 0)
203 return false;
204 else
205 return (content[0] == 'Y') || (content[0] == 'y') || (content[0] == '1');
206}
207
208float parse_float(std::string_view content, int32_t line, error_handler& err) {
209 float rvalue = 0.0f;
210
211 if(!float_from_chars(content.data(), content.data() + content.length(), rvalue)) {
212 err.bad_float(content, line);
213 }
214
215 return rvalue;
216}
217
218double parse_double(std::string_view content, int32_t line, error_handler& err) {
219 double rvalue = 0.0;
220 if(!double_from_chars(content.data(), content.data() + content.length(), rvalue)) {
221 err.bad_float(content, line);
222 }
223 return rvalue;
224}
225
226int32_t parse_int(std::string_view content, int32_t line, error_handler& err) {
227 int32_t rvalue = 0;
228 auto result = std::from_chars(content.data(), content.data() + content.length(), rvalue);
229 if(result.ec == std::errc::invalid_argument) {
230 err.bad_int(content, line);
231 }
232 return rvalue;
233}
234
235uint32_t parse_uint(std::string_view content, int32_t line, error_handler& err) {
236 uint32_t rvalue = 0;
237 auto result = std::from_chars(content.data(), content.data() + content.length(), rvalue);
238 if(result.ec == std::errc::invalid_argument) {
239 err.bad_unsigned_int(content, line);
240 }
241 return rvalue;
242}
243
244uint32_t parse_tag(std::string_view tag, int32_t line, error_handler& err) {
245 if(tag.length() != 3) {
246 err.accumulated_errors +=
247 err.file_name + " line " + std::to_string(line) + ": encountered a tag that was not three characters\n";
248 return 0;
249 }
250 return nations::tag_to_int(tag[0], tag[1], tag[2]);
251}
252
253sys::year_month_day parse_date(std::string_view content, int32_t line, error_handler& err) {
254 auto position = content.data();
255 auto value_end = position + content.length();
256
257 for(; position < value_end && !isdigit(*position); ++position) // advance to year
258 ;
259 auto year_start = position;
260 for(; position < value_end && isdigit(*position); ++position) // advance to year end
261 ;
262 auto year_end = position;
263
264 for(; position < value_end && !isdigit(*position); ++position) // advance to month
265 ;
266 auto month_start = position;
267 for(; position < value_end && isdigit(*position); ++position) // advance to month end
268 ;
269 auto month_end = position;
270
271 for(; position < value_end && !isdigit(*position); ++position) // advance to day
272 ;
273 auto day_start = position;
274 for(; position < value_end && isdigit(*position); ++position) // advance to day end
275 ;
276 auto day_end = position;
277
278 return sys::year_month_day{parsers::parse_int(std::string_view(year_start, year_end - year_start), line, err),
279 uint16_t(parsers::parse_uint(std::string_view(month_start, month_end - month_start), line, err)),
280 uint16_t(parsers::parse_uint(std::string_view(day_start, day_end - day_start), line, err))};
281}
282
283bool starts_with(std::string_view content, char v) {
284 return content.length() != 0 && content[0] == v;
285}
286
287association_type parse_association_type(std::string_view content, int32_t line, error_handler& err) {
288 if(content.length() == 1) {
289 if(content[0] == '>')
291 else if(content[0] == '<')
293 else if(content[0] == '=')
295 } else if(content.length() == 2) {
296 if(content[0] == '=' && content[1] == '=')
298 else if(content[0] == '<' && content[1] == '=')
300 else if(content[0] == '>' && content[1] == '=')
302 else if(content[0] == '!' && content[1] == '=')
304 else if(content[0] == '<' && content[1] == '>')
306 }
307 err.bad_association_token(content, line);
309}
310
311/*
312date_tag parse_date(std::string_view content, int32_t line, error_handler& err) {
313 const auto first_dot = std::find(start, end, '.');
314 const auto second_dot = std::find(first_dot + 1, end, '.');
315
316 const auto year = static_cast<uint16_t>(parse_uint(start, first_dot));
317 const auto month = static_cast<uint16_t>(parse_uint(first_dot + 1, second_dot));
318 const auto day = static_cast<uint16_t>(parse_uint(second_dot + 1, end));
319
320 return date_to_tag(boost::gregorian::date(year, month, day));
321}
322*/
323
324separator_scan_result csv_find_separator_token(char const* start, char const* end, char seperator) {
325 while(start != end) {
326 if(line_termination(*start))
327 return separator_scan_result{start, false};
328 else if(*start == seperator)
329 return separator_scan_result{start, true};
330 else
331 ++start;
332 }
333 return separator_scan_result{start, false};
334}
335
336char const* csv_advance(char const* start, char const* end, char seperator) {
337 while(start != end) {
338 if(line_termination(*start))
339 return start;
340 else if(*start == seperator)
341 return start + 1;
342 else
343 ++start;
344 }
345 return start;
346}
347
348char const* csv_advance_n(uint32_t n, char const* start, char const* end, char seperator) {
349
350 if(n == 0)
351 return start;
352 --n;
353
354 while(start != end) {
355 if(line_termination(*start))
356 return start;
357 else if(*start == seperator) {
358 if(n == 0)
359 return start + 1;
360 else
361 --n;
362 }
363 ++start;
364 }
365 return start;
366}
367
368char const* csv_advance_to_next_line(char const* start, char const* end) {
369
370 while(start != end && !line_termination(*start)) {
371 ++start;
372 }
373 while(start != end && line_termination(*start))
374 ++start;
375 if(start == end || *start != '#')
376 return start;
377 else
378 return csv_advance_to_next_line(start, end);
379}
380
381std::string_view remove_surrounding_whitespace(std::string_view txt) {
382 char const* start = txt.data();
383 char const* end = txt.data() + txt.length();
384 for(; start < end; ++start) {
385 if(*start != ' ' && *start != '\t' && *start != '\r' && *start != '\n')
386 break;
387 }
388 for(; start < end; --end) {
389 if(*(end - 1) != ' ' && *(end - 1) != '\t' && *(end - 1) != '\r' && *(end - 1) != '\n')
390 break;
391 }
392 return std::string_view(start, end - start);
393}
394} // namespace parsers
395
396#include "defines.cpp"
398#include "nations_parsing.cpp"
399#include "cultures_parsing.cpp"
400#include "econ_parsing.cpp"
401#include "military_parsing.cpp"
402#include "provinces_parsing.cpp"
403#include "trigger_parsing.cpp"
404#include "effect_parsing.cpp"
std::string accumulated_errors
Definition: parsers.hpp:62
void bad_int(std::string_view s, int32_t l)
Definition: parsers.hpp:100
void bad_association_token(std::string_view s, int32_t l)
Definition: parsers.hpp:108
std::string file_name
Definition: parsers.hpp:61
void bad_float(std::string_view s, int32_t l)
Definition: parsers.hpp:88
void bad_unsigned_int(std::string_view s, int32_t l)
Definition: parsers.hpp:104
token_and_type get()
Definition: parsers.cpp:160
bool at_end() const
Definition: parsers.hpp:50
token_and_type next()
Definition: parsers.cpp:171
token_and_type next_next()
Definition: parsers.cpp:178
uint32_t tag_to_int(char first, char second, char third)
Definition: nations.hpp:11
bool float_from_chars(char const *start, char const *end, float &float_out)
char const * advance_position_to_next_line(char const *start, char const *end, int32_t &current_line)
Definition: parsers.cpp:98
bool is_integer(char const *start, char const *end)
Definition: parsers.cpp:46
char const * scan_for_match(char const *start, char const *end, int32_t &current_line, T &&condition)
Definition: parsers.cpp:76
char const * csv_advance_n(uint32_t n, char const *start, char const *end, char seperator)
Definition: parsers.cpp:348
separator_scan_result csv_find_separator_token(char const *start, char const *end, char seperator)
Definition: parsers.cpp:324
bool special_identifier_char(char c)
Definition: parsers.cpp:11
association_type
Definition: parsers.hpp:28
bool breaking_char(char c)
Definition: parsers.cpp:15
bool is_positive_integer(char const *start, char const *end)
Definition: parsers.cpp:35
char const * csv_advance(char const *start, char const *end, char seperator)
Definition: parsers.cpp:336
bool ignorable_char(char c)
Definition: parsers.cpp:7
char const * advance_position_to_breaking_char(char const *start, char const *end, int32_t &current_line)
Definition: parsers.cpp:116
bool not_special_identifier_char(char c)
Definition: parsers.cpp:19
bool double_quote_termination(char c)
Definition: parsers.cpp:27
bool double_from_chars(char const *start, char const *end, double &dbl_out)
char const * advance_position_to_non_comment(char const *start, char const *end, int32_t &current_line)
Definition: parsers.cpp:107
char const * scan_for_not_match(char const *start, char const *end, int32_t &current_line, T &&condition)
Definition: parsers.cpp:87
sys::year_month_day parse_date(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:253
bool line_termination(char c)
Definition: parsers.cpp:23
int32_t parse_int(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:226
std::string_view remove_surrounding_whitespace(std::string_view txt)
Definition: parsers.cpp:381
bool parse_bool(std::string_view content, int32_t, error_handler &)
Definition: parsers.cpp:201
uint32_t parse_tag(std::string_view tag, int32_t line, error_handler &err)
Definition: parsers.cpp:244
float parse_float(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:208
bool starts_with(std::string_view content, char v)
Definition: parsers.cpp:283
bool is_positive_fp(char const *start, char const *end)
Definition: parsers.cpp:55
uint32_t parse_uint(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:235
bool single_quote_termination(char c)
Definition: parsers.cpp:31
bool is_fp(char const *start, char const *end)
Definition: parsers.cpp:66
association_type parse_association_type(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:287
double parse_double(std::string_view content, int32_t line, error_handler &err)
Definition: parsers.cpp:218
char const * advance_position_to_non_whitespace(char const *start, char const *end, int32_t &current_line)
Definition: parsers.cpp:103
char const * csv_advance_to_next_line(char const *start, char const *end)
Definition: parsers.cpp:368
bool has_fixed_prefix(char const *start, char const *end, char const (&t)[N])
Definition: parsers.hpp:196
uint uint32_t