Yume
parser.cpp
Go to the documentation of this file.
1#include "parser.hpp"
2
3#include "ast/ast.hpp"
5#include "qualifier.hpp"
6#include <algorithm>
7#include <llvm/Support/ErrorHandling.h>
8#include <llvm/Support/raw_ostream.h>
9
11auto Parser::ignore_separator([[maybe_unused]] const source_location location) -> bool {
12 bool found_separator = false;
13 while (!tokens.at_end() && tokens->type == Separator) {
14#ifdef YUME_SPEW_CONSUMED_TOKENS
15 errs() << "consumed " << *tokens << " at " << at(location) << "\n";
16#endif
17 ++tokens;
18 found_separator = true;
19 }
20 return found_separator;
21}
22
23void Parser::expect(Token::Type token_type, const source_location location) const {
24 if (tokens.at_end())
25 emit_fatal_and_terminate() << "Expected token type " << Token::type_name(token_type) << ", got the end of the file";
26
27 if (tokens->type != token_type) {
28 emit_fatal_and_terminate() << "Expected token type " << Token::type_name(token_type) << ", got "
29 << to_string(*tokens) << " at " + at(location);
30 }
31}
32
34 if (!tokens.at_end() && tokens->type != Token::Type::EndOfFile)
35 expect(Separator, location);
36 ignore_separator(location);
37}
38
39auto Parser::to_string(Token token) -> string {
40 string str{};
41 llvm::raw_string_ostream(str) << token;
42 return str;
43}
44
45void Parser::consume(TokenAtom token_atom, const source_location location) {
46 auto [token_type, payload] = token_atom;
48 if (tokens.at_end()) {
49 emit_fatal_and_terminate() << "Expected token type " << Token::type_name(token_type)
50 << " for payload " + string(payload) << ", got the end of the file at " << at(location);
51 }
52
53 if (tokens->type != token_type) {
54 emit_fatal_and_terminate() << "Expected token type " << Token::type_name(token_type) << " for payload "
55 << string(payload) << ", got " << to_string(*tokens) << " at " << at(location);
56 }
57 if (tokens->payload != payload) {
58 emit_fatal_and_terminate() << "Expected payload atom " << string(payload) << ", got " << to_string(*tokens)
59 << " at " << at(location);
60 }
61
62#ifdef YUME_SPEW_CONSUMED_TOKENS
63 errs() << "consume: " << *tokens << " at " << at(location) << "\n";
64#endif
65
66 tokens++;
67}
68
69auto Parser::try_consume(TokenAtom token_atom, [[maybe_unused]] const source_location location) -> bool {
70 auto [token_type, payload] = token_atom;
71 if (tokens.at_end() || tokens->type != token_type || tokens->payload != payload)
72 return false;
73
74#ifdef YUME_SPEW_CONSUMED_TOKENS
75 errs() << "try_consume: " << *tokens << " at " << at(location) << "\n";
76#endif
77
78 tokens++;
79 return true;
80}
81
82auto Parser::try_peek(int ahead, TokenAtom token_atom, [[maybe_unused]] const source_location location) const -> bool {
83 auto [token_type, payload] = token_atom;
84 if (tokens.at_end())
85 return false;
86
87 auto token = tokens + ahead;
88 if (token.at_end())
89 return false;
90
91#ifdef YUME_SPEW_CONSUMED_TOKENS
92 errs() << "try_peek ahead by " << ahead << ": expected " << Token::type_name(token_type) << " " << string(payload)
93 << ", got " << *token << " at " << at(location) << "\n";
94#endif
95
96 return (token->type == token_type) && (token->payload == payload);
97}
98
99auto Parser::try_peek(int ahead, Token::Type token_type, [[maybe_unused]] const source_location location) const
100 -> bool {
101 if (tokens.at_end())
102 return false;
103
104 auto token = tokens + ahead;
105 if (token.at_end())
106 return false;
107
108#ifdef YUME_SPEW_CONSUMED_TOKENS
109 errs() << "try_peek ahead by " << ahead << ": expected " << Token::type_name(token_type) << ", got " << *token
110 << " at " << at(location) << "\n";
111#endif
112
113 return token->type == token_type;
114}
115
116auto Parser::next([[maybe_unused]] const source_location location) -> Token {
117 auto tok = *tokens++;
118#ifdef YUME_SPEW_CONSUMED_TOKENS
119 errs() << "next: " << tok << " at " << at(location) << "\n";
120#endif
121 return tok;
122}
123
124auto Parser::assert_payload_next([[maybe_unused]] const source_location location) -> Atom {
125 auto payload = tokens->payload;
126 if (!payload) {
127 emit_fatal_and_terminate() << "Expected a payload, but wasn't found: " << to_string(*tokens) << " at "
128 << at(location);
129 llvm_unreachable("Fatal error should have terminated by now");
130 }
131
132 next(location);
133 return payload.value();
134}
135
136auto Parser::consume_word(const source_location location) -> string {
137 ignore_separator();
138 if (tokens.at_end())
139 emit_fatal_and_terminate() << "Expected word, got the end of the file";
140 if (tokens->type != Word)
141 emit_fatal_and_terminate() << "Expected word, got " << to_string(*tokens) << " at " << at(location);
142
143 return string(assert_payload_next());
144}
145
146auto Parser::try_peek_uword(int ahead, [[maybe_unused]] const source_location location) const -> bool {
147 auto token = tokens + ahead;
148
149#ifdef YUME_SPEW_CONSUMED_TOKENS
150 errs() << "try_peek ahead by " << ahead << ": expected uword, got " << *token << " at " << at(location) << "\n";
151#endif
152
153 auto payload = token->payload;
154 return token->type == Word && payload.has_value() && is_uword(payload.value());
155}
156
157auto Parser::parse_stmt(bool require_sep) -> unique_ptr<Stmt> {
158 auto stat = unique_ptr<Stmt>();
159
160 if (tokens->is_a(KWD_DEF))
161 stat = parse_fn_or_ctor_decl();
162 else if (tokens->is_a(KWD_STRUCT) || tokens->is_a(KWD_INTERFACE))
163 stat = parse_struct_decl();
164 else if (tokens->is_a(KWD_LET))
165 stat = parse_var_decl();
166 else if (tokens->is_a(KWD_CONST))
167 stat = parse_const_decl();
168 else if (tokens->is_a(KWD_WHILE))
169 stat = parse_while_stmt();
170 else if (tokens->is_a(KWD_IF))
171 stat = parse_if_stmt();
172 else if (tokens->is_a(KWD_RETURN))
173 stat = parse_return_stmt();
174 else
175 stat = parse_expr();
176
177 if (require_sep)
178 require_separator();
179 return stat;
180}
181
182auto Parser::try_parse_function_type() -> optional<unique_ptr<FunctionType>> {
183 auto entry = tokens.begin();
184 auto result = [&]() -> optional<unique_ptr<FunctionType>> {
185 if (try_consume(SYM_LPAREN)) {
186 auto args = vector<AnyType>{};
187 int i = 0;
188 while (!try_consume(SYM_ARROW)) {
189 if (i++ > 0) {
190 if (!try_consume(SYM_COMMA))
191 return {};
192 }
193 auto t = try_parse_type();
194 if (!t.has_value())
195 return {};
196 args.emplace_back(move(*t));
197 }
198 auto fn_ptr = try_consume(KWD_PTR);
199 auto ret = OptionalType{try_parse_type()};
200 consume(SYM_RPAREN);
201 return ast_ptr<FunctionType>(entry, move(ret), move(args), fn_ptr);
202 }
203 return {};
204 }();
205 if (!result.has_value())
206 tokens = {entry, tokens.end()}; // Rewind
207 return result;
208}
209
210auto Parser::parse_type(bool implicit_self) -> unique_ptr<Type> {
211 if (!implicit_self) {
212 if (auto maybe_fn_type = try_parse_function_type(); maybe_fn_type.has_value())
213 return move(*maybe_fn_type);
214 }
215
216 auto entry = tokens.begin();
217 auto base = [&]() -> unique_ptr<Type> {
218 if (implicit_self || try_consume(KWD_SELF_TYPE))
219 return ast_ptr<SelfType>(entry);
220
221 const string name = consume_word();
222 if (!is_uword(name))
223 emit_fatal_and_terminate() << "Expected capitalized payload for simple type";
224
225 return ast_ptr<SimpleType>(entry, name);
226 }();
227 while (true) {
228 if (try_consume(KWD_PTR)) {
229 base = ast_ptr<QualType>(entry, move(base), Qualifier::Ptr);
230 } else if (try_consume(KWD_MUT)) {
231 base = ast_ptr<QualType>(entry, move(base), Qualifier::Mut);
232 } else if (try_consume(KWD_REF)) {
233 base = ast_ptr<QualType>(entry, move(base), Qualifier::Ref);
234 } else if (try_consume(KWD_TYPE)) {
235 base = ast_ptr<QualType>(entry, move(base), Qualifier::Type);
236 } else if (try_peek(0, SYM_LBRACKET) && try_peek(1, SYM_RBRACKET)) {
237 // Don't consume the `[` unless the `]` is directly after; it might be a slice literal.
238 consume(SYM_LBRACKET);
239 consume(SYM_RBRACKET);
240
241 auto slice_ty = ast_ptr<SimpleType>(entry, "Slice");
242 auto generic_args = vector<AnyTypeOrExpr>{};
243 generic_args.emplace_back(move(base));
244 base = ast_ptr<TemplatedType>(entry, move(slice_ty), move(generic_args));
245 } else if (try_consume(SYM_LBRACE)) {
246 auto generic_args = vector<AnyTypeOrExpr>{};
247 consume_with_commas_until(SYM_RBRACE, [&] {
248 auto expr = parse_expr();
249 if (auto* type_expr = dyn_cast<ast::TypeExpr>(expr.get()))
250 generic_args.emplace_back(move(type_expr->type));
251 else
252 generic_args.emplace_back(move(expr));
253 });
254
255 base = ast_ptr<TemplatedType>(entry, move(base), move(generic_args));
256 } else {
257 break;
258 }
259 }
260
261 return base;
262}
263
264auto Parser::try_parse_type() -> optional<unique_ptr<Type>> {
265 if (auto maybe_fn_type = try_parse_function_type(); maybe_fn_type.has_value())
266 return maybe_fn_type;
267
268 auto entry = tokens.begin();
269 if (tokens->type != Word || !tokens->payload.has_value())
270 return {};
271
272 if (!try_peek_uword(0))
273 return {};
274
275 const string name = consume_word();
276 if (make_atom(name) != std::get<Atom>(KWD_SELF_TYPE) && !is_uword(name))
277 return {};
278
279 unique_ptr<Type> base{};
280 if (make_atom(name) == std::get<Atom>(KWD_SELF_TYPE))
281 base = ast_ptr<SelfType>(entry);
282 else
283 base = ast_ptr<SimpleType>(entry, name);
284
285 while (true) {
286 if (try_consume(KWD_PTR)) {
287 base = ast_ptr<QualType>(entry, move(base), Qualifier::Ptr);
288 } else if (try_consume(KWD_MUT)) {
289 base = ast_ptr<QualType>(entry, move(base), Qualifier::Mut);
290 } else if (try_consume(KWD_REF)) {
291 base = ast_ptr<QualType>(entry, move(base), Qualifier::Ref);
292 } else if (try_consume(KWD_TYPE)) {
293 base = ast_ptr<QualType>(entry, move(base), Qualifier::Type);
294 } else if (try_peek(0, SYM_LBRACKET) && try_peek(1, SYM_RBRACKET)) {
295 // Don't consume the `[` unless the `]` is directly after; it might be a slice literal.
296 consume(SYM_LBRACKET);
297 consume(SYM_RBRACKET);
298
299 auto slice_ty = ast_ptr<SimpleType>(entry, "Slice");
300 auto generic_args = vector<AnyTypeOrExpr>{};
301 generic_args.emplace_back(move(base));
302 base = ast_ptr<TemplatedType>(entry, move(slice_ty), move(generic_args));
303 } else if (try_consume(SYM_LBRACE)) {
304 auto generic_args = vector<AnyTypeOrExpr>{};
305 consume_with_commas_until(SYM_RBRACE, [&] {
306 if (auto type = try_parse_type(); type.has_value())
307 generic_args.emplace_back(move(*type));
308 else
309 generic_args.emplace_back(parse_expr());
310 });
311
312 base = ast_ptr<TemplatedType>(entry, move(base), move(generic_args));
313 } else {
314 break;
315 }
316 }
317
318 return base;
319}
320
321auto Parser::parse_type_name() -> unique_ptr<TypeName> {
322 auto entry = tokens.begin();
323 if (try_consume(KWD_SELF_ITEM)) {
324 unique_ptr<Type> type = parse_type(/* implicit_self= */ true);
325 return ast_ptr<TypeName>(entry, move(type), "self");
326 }
327 const string name = consume_word();
328 unique_ptr<Type> type = parse_type();
329 return ast_ptr<TypeName>(entry, move(type), name);
330}
331
333 auto entry = tokens.begin();
334 auto left = parse_logical_and();
335 if (try_consume(SYM_OR_OR)) {
336 auto right = parse_logical_or();
337 left = ast_ptr<BinaryLogicExpr>(entry, SYM_OR_OR.second, move(left), move(right));
338 }
339 return left;
340}
341
343 auto entry = tokens.begin();
344 auto left = parse_operator();
345 if (try_consume(SYM_AND_AND)) {
346 auto right = parse_logical_and();
347 left = ast_ptr<BinaryLogicExpr>(entry, SYM_AND_AND.second, move(left), move(right));
348 }
349 return left;
350}
351
352auto Parser::parse_expr() -> unique_ptr<Expr> { return parse_logical_or(); }
353
354auto Parser::parse_fn_name() -> string {
355 string name{};
356 if (tokens->type == Word) {
357 name = consume_word();
358 } else if (tokens->type == Symbol) {
359 // Try to parse an operator name, as in `def +()`
360 bool found_op = false;
361 for (const auto& op_row : operators()) {
362 for (const auto& op : op_row) {
363 if (try_consume(op)) {
364 found_op = true;
365 name = std::get<Atom>(op);
366 break;
367 }
368 }
369 if (found_op)
370 break;
371 }
372
373 // If an operator wasn't found, try parse the operator []
374 if (try_consume(SYM_LBRACKET)) {
375 consume(SYM_RBRACKET);
376 name = "[]";
377 } else if (try_consume(SYM_BANG)) {
378 name = "!"; // ! is unary, but the above operator check only checked binary ones
379 }
380 }
381
382 // Check if an equal sign follows, for fused assignment operators such as `+=` or `[]=`
383 if (try_consume(SYM_EQ))
384 name += "=";
385
386 return name;
387}
388
389auto Parser::parse_struct_decl() -> unique_ptr<StructDecl> {
390 auto entry = tokens.begin();
391
392 bool interface = try_consume(KWD_INTERFACE);
393 if (!interface)
394 consume(KWD_STRUCT);
395
396 auto annotations = std::unordered_set<string>{};
397 while (try_consume(SYM_AT))
398 annotations.emplace(consume_word());
399
400 const string name = consume_word();
401 if (!is_uword(name))
402 emit_fatal_and_terminate() << "Expected capitalized name for struct decl";
403
404 auto type_args = parse_generic_type_params();
405
406 auto fields = vector<TypeName>{};
407 if (try_consume(SYM_LPAREN))
408 consume_with_commas_until(SYM_RPAREN, [&] { fields.push_back(move(*parse_type_name())); });
409
410 auto implements = OptionalType{};
411 if (try_consume(KWD_IS))
412 implements = parse_type();
413
414 require_separator();
415
416 auto body = vector<AnyStmt>{};
417 auto body_begin = tokens.begin();
418 while (!try_consume(KWD_END)) {
419 body.emplace_back(parse_stmt());
420 ignore_separator();
421 }
422
423 return ast_ptr<StructDecl>(entry, name, move(fields), move(type_args), make_ast<Compound>(body_begin, move(body)),
424 move(implements), move(annotations), interface);
425}
426
428 auto entry = tokens.begin();
429 if (try_consume(SYM_COLON_COLON)) {
430 auto field_name = consume_word();
431 AnyType proxy_type = ast_ptr<ProxyType>(entry, field_name);
432 auto proxied_arg = ast_ptr<TypeName>(entry, move(proxy_type), field_name);
433
434 auto implicit_field = ast_ptr<FieldAccessExpr>(entry, std::nullopt, field_name);
435 auto arg_var = ast_ptr<VarExpr>(entry, field_name);
436 auto extra_assign = ast_ptr<AssignExpr>(entry, move(implicit_field), move(arg_var));
437
438 return {move(proxied_arg), move(extra_assign)};
439 }
440
441 return {parse_type_name(), std::nullopt};
442}
443
444auto Parser::parse_generic_type_params() -> vector<GenericParam> {
445 auto type_args = vector<GenericParam>{};
446 if (try_consume(SYM_LBRACE)) {
447 consume_with_commas_until(SYM_RBRACE, [&] {
448 auto entry = tokens.begin();
449 auto name = consume_word();
450 if (is_uword(name)) {
451 if (!try_consume(KWD_TYPE))
452 emit_note(-1, diagnostic::Severity::Warn) << "Type parameter with no specifier will be deprecated";
453 type_args.push_back(make_ast<GenericParam>(entry, std::nullopt, name));
454 } else {
455 auto type = parse_type();
456 type_args.push_back(make_ast<GenericParam>(entry, move(type), name));
457 }
458 });
459 }
460
461 return type_args;
462}
463
465 if (try_peek(1, SYM_COLON))
466 return parse_ctor_decl();
467 return parse_fn_decl();
468}
469
470auto Parser::parse_fn_decl() -> unique_ptr<FnDecl> {
471 auto entry = tokens.begin();
472
473 consume(KWD_DEF);
474
475 auto annotations = std::unordered_set<string>{};
476 while (try_consume(SYM_AT))
477 annotations.emplace(consume_word());
478
479 const string name = parse_fn_name();
480 auto type_args = parse_generic_type_params();
481
482 consume(SYM_LPAREN);
483
484 auto args = vector<TypeName>{};
485 auto body = vector<AnyStmt>{};
486
487 consume_with_commas_until(SYM_RPAREN, [&] {
488 auto arg = parse_fn_arg();
489 args.emplace_back(move(*arg.type_name));
490 if (arg.extra_body)
491 body.emplace_back(move(arg.extra_body));
492 });
493
494 auto ret_type = OptionalType{try_parse_type()};
495 auto body_begin = entry;
496
497 if (try_consume(SYM_EQ)) { // A "short" function definition, consists of a single expression
498 if (try_consume(KWD_PRIMITIVE)) {
499 consume(SYM_LPAREN);
500 auto primitive = consume_word();
501 consume(SYM_RPAREN);
502 return ast_ptr<FnDecl>(entry, name, move(args), move(type_args), move(ret_type), primitive, move(annotations));
503 }
504 if (try_consume(KWD_EXTERN)) {
505 // consume(SYM_LPAREN);
506 // auto primitive = consume_word();
507 // consume(SYM_RPAREN);
508 auto varargs = try_consume(KWD_VARARGS);
509 return ast_ptr<FnDecl>(entry, name, move(args), move(type_args), move(ret_type),
510 FnDecl::extern_decl_t{name, varargs}, move(annotations));
511 }
512 if (try_consume(KWD_ABSTRACT)) {
513 if (args.empty()) {
514 // A no-arg abstract method wouldn't be able to be called, because the self type is required to perform
515 // dispatch. Add it here silently. It cannot actually be used, so it will be discarded when passed to the impl
516 // method.
517 // HACK: This currently basically only works by chance, as there is no logic in place to verify type
518 // compatibility between interface methods and their implementations. As such, this invented self pointer just
519 // magically vanishes, where as if it would be properly type checked, they wouldn't match.
520 // HACK: This implicit variable is a hack itself, as it makes it indistinguishable from an abstract method
521 // with an explicit self parameter.
522 // HACK: As per the hack mentioned above, the arg is given an empty name "", which is checked for later, under
523 // the assumption that regular code can't write an empty name. Relying on a specific, special name is still
524 // dirty, though.
525 args.emplace_back(make_ast<TypeName>(entry, ast_ptr<SelfType>(entry), ""));
526 }
527 return ast_ptr<FnDecl>(entry, name, move(args), move(type_args), move(ret_type), FnDecl::abstract_decl_t{},
528 move(annotations));
529 }
530 body_begin = tokens.begin();
531 auto expr = parse_expr();
532 body.emplace_back(ast_ptr<ReturnStmt>(entry, move(expr)));
533 } else {
534 if (!try_peek(0, KWD_END)) // Allow `end` to be on the same line
535 require_separator();
536
537 body_begin = tokens.begin();
538 while (!try_consume(KWD_END)) {
539 body.emplace_back(parse_stmt());
540 ignore_separator();
541 }
542 }
543
544 return ast_ptr<FnDecl>(entry, name, move(args), move(type_args), move(ret_type),
545 make_ast<Compound>(body_begin, move(body)), move(annotations));
546}
547
548auto Parser::parse_ctor_decl() -> unique_ptr<CtorDecl> {
549 auto entry = tokens.begin();
550
551 consume(KWD_DEF);
552 consume(SYM_COLON);
553 consume(KWD_NEW);
554 consume(SYM_LPAREN);
555
556 auto args = vector<TypeName>{};
557 auto body = vector<AnyStmt>{};
558
559 consume_with_commas_until(SYM_RPAREN, [&] {
560 auto arg = parse_fn_arg();
561 args.emplace_back(move(*arg.type_name));
562 if (arg.extra_body)
563 body.emplace_back(move(arg.extra_body));
564 });
565
566 if (!try_peek(0, KWD_END)) // Allow `end` to be on the same line
567 require_separator();
568
569 auto body_begin = tokens.begin();
570 while (!try_consume(KWD_END)) {
571 body.emplace_back(parse_stmt());
572 ignore_separator();
573 }
574
575 return ast_ptr<CtorDecl>(entry, move(args), make_ast<Compound>(body_begin, move(body)));
576}
577
578auto Parser::parse_var_decl() -> unique_ptr<VarDecl> {
579 auto entry = tokens.begin();
580
581 consume(KWD_LET);
582 const string name = consume_word();
583 auto type = OptionalType{try_parse_type()};
584
585 consume(SYM_EQ);
586
587 auto init = parse_expr();
588
589 return ast_ptr<VarDecl>(entry, name, move(type), move(init));
590}
591
592auto Parser::parse_const_decl() -> unique_ptr<ConstDecl> {
593 auto entry = tokens.begin();
594
595 consume(KWD_CONST);
596 const string name = consume_word();
597 auto type = AnyType{parse_type()};
598
599 consume(SYM_EQ);
600
601 auto init = parse_expr();
602
603 return ast_ptr<ConstDecl>(entry, name, move(type), move(init));
604}
605
606auto Parser::parse_while_stmt() -> unique_ptr<WhileStmt> {
607 auto entry = tokens.begin();
608
609 consume(KWD_WHILE);
610 auto cond = parse_expr();
611
612 ignore_separator();
613
614 auto body_begin = tokens.begin();
615 auto body = vector<AnyStmt>{};
616 while (!try_consume(KWD_END)) {
617 body.emplace_back(parse_stmt());
618 ignore_separator();
619 }
620
621 auto compound = make_ast<Compound>(body_begin, move(body));
622
623 return ast_ptr<WhileStmt>(entry, move(cond), move(compound));
624}
625
626auto Parser::parse_return_stmt() -> unique_ptr<ReturnStmt> {
627 auto entry = tokens.begin();
628
629 consume(KWD_RETURN);
630 if (!tokens.at_end() && try_peek(0, Separator))
631 return ast_ptr<ReturnStmt>(entry, std::nullopt);
632
633 auto expr = parse_expr();
634
635 return ast_ptr<ReturnStmt>(entry, move(expr));
636}
637
638auto Parser::parse_if_stmt() -> unique_ptr<IfStmt> {
639 auto entry = tokens.begin();
640 auto clause_begin = entry;
641 consume(KWD_IF);
642 auto cond = parse_expr();
643 if (!try_consume(KWD_THEN))
644 require_separator();
645
646 auto current_entry = tokens.begin();
647 auto else_entry = tokens.begin();
648 auto clauses = vector<IfClause>{};
649 auto current_body = vector<AnyStmt>{};
650 auto else_body = vector<AnyStmt>{};
651 bool in_else = false;
652
653 while (true) {
654 auto current_clause_begin = tokens.begin();
655 if (try_consume(KWD_END))
656 break;
657 if (try_consume(KWD_ELSE)) {
658 // An `else` followed by an `if` begins a new clause of the same if statement.
659 if (!in_else && try_consume(KWD_IF)) {
660 clauses.emplace_back(ts(clause_begin), move(cond), make_ast<Compound>(current_entry, move(current_body)));
661 current_body = vector<AnyStmt>{};
662 cond = parse_expr();
663 current_entry = tokens.begin();
664 clause_begin = current_clause_begin;
665 } else {
666 in_else = true;
667 else_entry = tokens.begin();
668 }
669 if (!try_consume(KWD_THEN))
670 require_separator();
671 }
672 auto st = parse_stmt();
673 if (in_else)
674 else_body.emplace_back(move(st));
675 else
676 current_body.emplace_back(move(st));
677 }
678
679 if (else_body.empty())
680 else_entry = tokens.begin();
681
682 clauses.emplace_back(ts(clause_begin, else_entry - 1), move(cond),
683 Compound(ts(current_entry, else_entry - 1), move(current_body)));
684
685 auto else_clause = optional<Compound>{};
686 if (!else_body.empty())
687 else_clause.emplace(ts(else_entry), move(else_body));
688
689 return ast_ptr<IfStmt>(entry, move(clauses), move(else_clause));
690}
691
692auto Parser::parse_number_expr() -> unique_ptr<NumberExpr> {
693 static constexpr int BASE_16 = 16;
694 static constexpr int BASE_10 = 10;
695 auto entry = tokens.begin();
696 expect(Number);
697
698 auto literal = string(assert_payload_next());
699 int64_t value = literal.starts_with("0x"sv) ? stoll(literal, nullptr, BASE_16) : stoll(literal, nullptr, BASE_10);
700
701 return ast_ptr<NumberExpr>({entry, 1}, value);
702}
703
704auto Parser::parse_string_expr() -> unique_ptr<StringExpr> {
705 auto entry = tokens.begin();
706 expect(Token::Type::Literal);
707
708 auto value = string(assert_payload_next());
709
710 return ast_ptr<StringExpr>({entry, 1}, value);
711}
712
713auto Parser::parse_char_expr() -> unique_ptr<CharExpr> {
714 auto entry = tokens.begin();
715 expect(Token::Type::Char);
716
717 auto value = string(assert_payload_next())[0];
718
719 return ast_ptr<CharExpr>({entry, 1}, value);
720}
721
723 const auto guard = make_guard("Parsing primary expression");
724
725 auto entry = tokens.begin();
726
727 // Need to distinguish if this is a function type expr, or a parenthesized expression, as both start with '('.
728 if (auto maybe_fn_type = try_parse_function_type(); maybe_fn_type.has_value())
729 return ast_ptr<TypeExpr>(entry, move(*maybe_fn_type));
730
731 if (try_consume(SYM_LPAREN)) {
732 auto val = parse_expr();
733 consume(SYM_RPAREN);
734 return val;
735 }
736
737 if (tokens->type == Number)
738 return parse_number_expr();
739 if (tokens->type == Token::Type::Literal)
740 return parse_string_expr();
741 if (tokens->type == Token::Type::Char)
742 return parse_char_expr();
743 if (try_consume(KWD_TRUE))
744 return ast_ptr<BoolExpr>(entry, true);
745 if (try_consume(KWD_FALSE))
746 return ast_ptr<BoolExpr>(entry, false);
747 if (try_consume(SYM_COLON_COLON))
748 return ast_ptr<FieldAccessExpr>(entry, std::nullopt, consume_word());
749 if (try_consume(SYM_DOLLAR))
750 return ast_ptr<ConstExpr>(entry, consume_word(), std::nullopt);
751
752 if (tokens->type == Word) {
753 if (try_peek_uword(0)) {
754 if (try_peek(1, SYM_COLON_COLON)) {
755 auto parent = consume_word();
756 consume(SYM_COLON_COLON);
757 return ast_ptr<ConstExpr>(entry, consume_word(), parent);
758 }
759
760 auto type = parse_type();
761 if (try_consume(SYM_LPAREN)) {
762 auto call_args = collect_with_commas_until<AnyExpr>(SYM_RPAREN, &Parser::parse_expr);
763 return ast_ptr<CtorExpr>(entry, move(type), move(call_args));
764 }
765 if (try_consume(SYM_COLON)) {
766 if (try_consume(SYM_LBRACKET)) {
767 auto slice_members = collect_with_commas_until<AnyExpr>(SYM_RBRACKET, &Parser::parse_expr);
768 return ast_ptr<SliceExpr>(entry, move(type), move(slice_members));
769 }
770 }
771 if (try_consume(SYM_DOT)) {
772 if (try_peek_uword(0))
773 emit_fatal_and_terminate() << "Nested types aren't yet implemented";
774
775 auto name = consume_word();
776 auto call_args = vector<AnyExpr>{};
777 if (try_consume(SYM_LPAREN))
778 collect_with_commas_until(SYM_RPAREN, &Parser::parse_expr, call_args);
779 return ast_ptr<CallExpr>(entry, name, move(type), move(call_args));
780 }
781
782 return ast_ptr<TypeExpr>(entry, move(type));
783 }
784 auto name = consume_word();
785 if (try_consume(SYM_LPAREN)) {
786 auto call_args = collect_with_commas_until<AnyExpr>(SYM_RPAREN, &Parser::parse_expr);
787 return ast_ptr<CallExpr>(entry, name, std::nullopt, move(call_args));
788 }
789 return ast_ptr<VarExpr>({entry, 1}, name);
790 }
791 emit_fatal_and_terminate() << "Couldn't make an expression from here";
792 llvm_unreachable("Fatal error encountered");
793}
794
796 auto entry = tokens.begin();
797 if (try_consume(SYM_DOT)) {
798 auto name = consume_word();
799 auto call_args = vector<AnyExpr>{};
800 call_args.emplace_back(move(receiver));
801 if (try_consume(SYM_LPAREN)) { // A call with a dot `a.b(...)`
802 collect_with_commas_until(SYM_RPAREN, &Parser::parse_expr, call_args);
803 auto call = ast_ptr<CallExpr>(entry + 1, name, std::nullopt, move(call_args));
804 return parse_receiver(move(call), receiver_entry);
805 }
806 if (try_consume(SYM_EQ)) { // A setter `a.b = ...`
807 auto value = parse_expr();
808 call_args.emplace_back(move(value));
809 auto call = ast_ptr<CallExpr>(entry + 1, name + '=', std::nullopt, move(call_args));
810 return parse_receiver(move(call), receiver_entry);
811 }
812 auto noarg_call = ast_ptr<CallExpr>(receiver_entry, name, std::nullopt, move(call_args));
813 return parse_receiver(move(noarg_call), receiver_entry);
814 }
815 if (try_consume(SYM_EQ)) {
816 auto value = parse_expr();
817 auto assign = ast_ptr<AssignExpr>(receiver_entry, move(receiver), move(value));
818 return parse_receiver(move(assign), receiver_entry);
819 }
820 if (try_consume(SYM_LBRACKET)) {
821 auto args = vector<AnyExpr>{};
822 args.emplace_back(move(receiver));
823 args.emplace_back(parse_expr());
824 consume(SYM_RBRACKET);
825 if (try_consume(SYM_EQ)) {
826 auto value = parse_expr();
827 args.emplace_back(move(value));
828 auto call = ast_ptr<CallExpr>(entry, "[]=", std::nullopt, move(args));
829 return parse_receiver(move(call), receiver_entry);
830 }
831 auto call = ast_ptr<CallExpr>(entry, "[]", std::nullopt, move(args));
832 return parse_receiver(move(call), receiver_entry);
833 }
834 if (try_consume(SYM_COLON_COLON)) {
835 auto field = consume_word();
836 auto access = ast_ptr<FieldAccessExpr>(receiver_entry, move(receiver), field);
837 return parse_receiver(move(access), receiver_entry);
838 }
839 if (try_consume(SYM_ARROW)) {
840 consume(SYM_LPAREN);
841 auto call_args = vector<AnyExpr>{};
842 call_args.emplace_back(move(receiver));
843 collect_with_commas_until(SYM_RPAREN, &Parser::parse_expr, call_args);
844 auto call = ast_ptr<CallExpr>(entry, "->", std::nullopt, move(call_args));
845 return parse_receiver(move(call), receiver_entry);
846 }
847 return receiver;
848}
849
850auto Parser::parse_lambda() -> unique_ptr<LambdaExpr> {
851 auto entry = tokens.begin();
852
853 consume(KWD_DEF);
854
855 auto annotations = std::set<string>{};
856 while (try_consume(SYM_AT))
857 annotations.emplace(consume_word());
858
859 consume(SYM_LPAREN);
860
861 auto args = vector<TypeName>{};
862 consume_with_commas_until(SYM_RPAREN, [&] {
863 auto arg = parse_type_name();
864 args.emplace_back(move(*arg));
865 });
866
867 auto ret_type = OptionalType{try_parse_type()};
868 auto body_begin = entry;
869 auto body = vector<AnyStmt>{};
870
871 if (try_consume(SYM_EQ)) { // A "short" function definition, consists of a single expression
872 body_begin = tokens.begin();
873 auto expr = parse_expr();
874 body.emplace_back(ast_ptr<ReturnStmt>(entry, move(expr)));
875 } else {
876 if (!try_peek(0, KWD_END)) // Allow `end` to be on the same line
877 require_separator();
878
879 body_begin = tokens.begin();
880 while (!try_consume(KWD_END)) {
881 body.emplace_back(parse_stmt());
882 ignore_separator();
883 }
884 }
885
886 return ast_ptr<LambdaExpr>(entry, move(args), move(ret_type), make_ast<Compound>(body_begin, move(body)),
887 move(annotations));
888}
889
891 if (tokens->is_a(KWD_DEF))
892 return parse_lambda();
893
894 auto entry = tokens.begin();
895 return parse_receiver(parse_primary(), entry);
896}
897
899 auto entry = tokens.begin();
900 for (const auto& un_op : unary_operators()) {
901 if (try_consume(un_op)) {
902 auto value = parse_receiver();
903 auto args = vector<AnyExpr>{};
904 args.emplace_back(move(value));
905 return ast_ptr<CallExpr>(entry, string(std::get<Atom>(un_op)), std::nullopt, move(args));
906 }
907 }
908 return parse_receiver();
909}
910} // namespace yume::ast::parser
911
912namespace yume::ast {
913auto Program::parse(TokenIterator& tokens, diagnostic::NotesHolder& notes) -> unique_ptr<Program> {
914 auto parser = parser::Parser{tokens, notes};
915 parser.ignore_separator();
916 auto entry = tokens.begin();
917
918 auto statements = vector<AnyStmt>{};
919 while (tokens->type != Token::Type::EndOfFile)
920 statements.emplace_back(parser.parse_stmt());
921 tokens++; // Consume the EOF token
922
923 return make_unique<Program>(parser.ts(entry), move(statements));
924}
925} // namespace yume::ast
Atoms represent strings in a string pool.
Definition: atom.hpp:12
An iterator-like holding Tokens, used when parsing.
Definition: parser.hpp:31
auto at_end() const noexcept -> bool
Check if the iterator is at the end and no more Tokens could possibly be read.
Definition: parser.hpp:40
static const TokenAtom KWD_TYPE
Definition: parser.hpp:93
static const TokenAtom KWD_DEF
Definition: parser.hpp:81
static const TokenAtom SYM_RBRACKET
Definition: parser.hpp:113
static const TokenAtom SYM_DOLLAR
Definition: parser.hpp:132
static const TokenAtom KWD_SELF_ITEM
Definition: parser.hpp:89
static const TokenAtom KWD_IF
Definition: parser.hpp:79
static const TokenAtom SYM_COMMA
Definition: parser.hpp:106
static const TokenAtom SYM_COLON
Definition: parser.hpp:127
static const TokenAtom KWD_INTERFACE
Definition: parser.hpp:100
static const TokenAtom KWD_LET
Definition: parser.hpp:83
static const TokenAtom KWD_STRUCT
Definition: parser.hpp:97
static const TokenAtom KWD_VARARGS
Definition: parser.hpp:103
static const TokenAtom SYM_LBRACE
Definition: parser.hpp:114
static const TokenAtom KWD_PRIMITIVE
Definition: parser.hpp:104
static const TokenAtom SYM_AT
Definition: parser.hpp:109
static constexpr auto Word
Definition: parser.hpp:76
static const TokenAtom KWD_THEN
Definition: parser.hpp:91
static const TokenAtom SYM_RPAREN
Definition: parser.hpp:111
static const TokenAtom KWD_CONST
Definition: parser.hpp:96
static const TokenAtom SYM_DOT
Definition: parser.hpp:107
static const TokenAtom SYM_LPAREN
Definition: parser.hpp:110
static const TokenAtom KWD_PTR
Definition: parser.hpp:84
static const TokenAtom KWD_NEW
Definition: parser.hpp:87
static const TokenAtom SYM_COLON_COLON
Definition: parser.hpp:128
static const TokenAtom SYM_EQ
Definition: parser.hpp:108
static const TokenAtom KWD_SELF_TYPE
Definition: parser.hpp:90
static const TokenAtom KWD_WHILE
Definition: parser.hpp:95
static const TokenAtom KWD_TRUE
Definition: parser.hpp:92
static const TokenAtom SYM_BANG
Definition: parser.hpp:126
static const TokenAtom SYM_ARROW
Definition: parser.hpp:131
static const TokenAtom KWD_ABSTRACT
Definition: parser.hpp:99
static const TokenAtom KWD_END
Definition: parser.hpp:82
static const TokenAtom KWD_IS
Definition: parser.hpp:80
static const TokenAtom KWD_EXTERN
Definition: parser.hpp:102
static const TokenAtom SYM_AND_AND
Definition: parser.hpp:130
static const TokenAtom KWD_ELSE
Definition: parser.hpp:88
static const TokenAtom KWD_RETURN
Definition: parser.hpp:98
static const TokenAtom KWD_MUT
Definition: parser.hpp:85
static const TokenAtom SYM_RBRACE
Definition: parser.hpp:115
static const TokenAtom KWD_REF
Definition: parser.hpp:86
static const TokenAtom KWD_FALSE
Definition: parser.hpp:94
static const TokenAtom SYM_LBRACKET
Definition: parser.hpp:112
std::pair< Token::Type, Atom > TokenAtom
Definition: parser.hpp:74
static const TokenAtom SYM_OR_OR
Definition: parser.hpp:129
static constexpr auto Symbol
Definition: parser.hpp:77
Definition: ast.cpp:8
vector< Token >::iterator VectorTokenIterator
Definition: parser.hpp:23
auto make_atom(std::string_view value) noexcept -> Atom
Create an Atom with the given string content.
Definition: atom.hpp:34
auto at(const source_location location=source_location::current()) -> std::string
A categorized token in source code, created by the tokenizer. These tokens are consumed by the lexer.
Definition: token.hpp:80
@ Char
A character literal, beginning with ?
@ EndOfFile
A token added at the very end of the file.
@ Literal
A string literal, enclosed in quotes.
static auto constexpr type_name(Type type) -> const char *
Definition: token.hpp:91
A statement consisting of multiple other statements, i.e. the body of a function.
Definition: ast.hpp:712
{ string name extern_decl_t
Definition: ast.hpp:765
{} abstract_decl_t
Definition: ast.hpp:768
static auto parse(TokenIterator &tokens, diagnostic::NotesHolder &notes) -> unique_ptr< Program >
Definition: parser.cpp:913
auto parse_generic_type_params() -> vector< GenericParam >
Definition: parser.cpp:444
auto parse_string_expr() -> unique_ptr< StringExpr >
Definition: parser.cpp:704
void expect(Token::Type token_type, source_location location=source_location::current()) const
If the next token doesn't have the type, token_type, throw a runtime exception.
Definition: parser.cpp:23
auto consume_word(source_location location=source_location::current()) -> string
Return the payload of the next token. Throws if the next token isn't a Word.
Definition: parser.cpp:136
auto parse_number_expr() -> unique_ptr< NumberExpr >
Definition: parser.cpp:692
void consume(TokenAtom token_atom, source_location location=source_location::current())
Consume a token of the given type and payload. Throws if it wasn't encountered.
Definition: parser.cpp:45
auto parse_logical_and() -> unique_ptr< Expr >
Definition: parser.cpp:342
auto parse_receiver() -> unique_ptr< Expr >
Definition: parser.cpp:890
auto try_parse_function_type() -> optional< unique_ptr< FunctionType > >
Definition: parser.cpp:182
TokenIterator & tokens
Definition: parser.hpp:145
auto emit_fatal_and_terminate(int offset=0) const noexcept(false) -> diagnostic::Note
Definition: parser.hpp:159
auto parse_fn_or_ctor_decl() -> unique_ptr< Stmt >
Definition: parser.cpp:464
auto try_parse_type() -> optional< unique_ptr< Type > >
Definition: parser.cpp:264
static auto to_string(Token token) -> string
Definition: parser.cpp:39
auto parse_unary() -> unique_ptr< Expr >
Definition: parser.cpp:898
auto try_consume(TokenAtom token_atom, source_location location=source_location::current()) -> bool
Attempt to consume a token of the given type and payload. Returns false if it wasn't encountered.
Definition: parser.cpp:69
auto parse_if_stmt() -> unique_ptr< IfStmt >
Definition: parser.cpp:638
auto parse_struct_decl() -> unique_ptr< StructDecl >
Definition: parser.cpp:389
auto next(source_location location=source_location::current()) -> Token
Return the next token and increment the iterator.
Definition: parser.cpp:116
auto parse_while_stmt() -> unique_ptr< WhileStmt >
Definition: parser.cpp:606
static constexpr auto Separator
Definition: parser.hpp:190
auto parse_var_decl() -> unique_ptr< VarDecl >
Definition: parser.cpp:578
auto assert_payload_next(source_location location=source_location::current()) -> Atom
Returns the payload of the next token and increment the iterator.
Definition: parser.cpp:124
auto parse_fn_arg() -> FnArg
Definition: parser.cpp:427
auto parse_type_name() -> unique_ptr< TypeName >
Definition: parser.cpp:321
auto try_peek(int ahead, TokenAtom token_atom, source_location location=source_location::current()) const -> bool
Check if the token ahead by ahead is of the given type and payload.
Definition: parser.cpp:82
auto parse_stmt(bool require_sep=true) -> unique_ptr< Stmt >
Definition: parser.cpp:157
auto parse_fn_name() -> string
Definition: parser.cpp:354
void require_separator(source_location location=source_location::current())
Consume all subsequent Separator tokens. Throws if none were found.
Definition: parser.cpp:33
auto parse_const_decl() -> unique_ptr< ConstDecl >
Definition: parser.cpp:592
auto parse_lambda() -> unique_ptr< LambdaExpr >
Definition: parser.cpp:850
auto ignore_separator(source_location location=source_location::current()) -> bool
Ignore any Separator tokens if any are present.
Definition: parser.cpp:11
auto parse_expr() -> unique_ptr< Expr >
Definition: parser.cpp:352
auto parse_type(bool implicit_self=false) -> unique_ptr< Type >
Definition: parser.cpp:210
auto parse_ctor_decl() -> unique_ptr< CtorDecl >
Definition: parser.cpp:548
auto parse_fn_decl() -> unique_ptr< FnDecl >
Definition: parser.cpp:470
auto parse_logical_or() -> unique_ptr< Expr >
Definition: parser.cpp:332
auto parse_primary() -> unique_ptr< Expr >
Definition: parser.cpp:722
auto parse_return_stmt() -> unique_ptr< ReturnStmt >
Definition: parser.cpp:626
auto try_peek_uword(int ahead, source_location location=source_location::current()) const -> bool
Check if the ahead by ahead is a capitalized word.
Definition: parser.cpp:146
auto parse_char_expr() -> unique_ptr< CharExpr >
Definition: parser.cpp:713