#include #include #include #include #include "ast.h" #include "io.h" #include "limits.h" #include "parser.h" #include "scanner.h" #include "strings.h" #define error(...) __error(__VA_ARGS__, NULL) static node_t *parse_expr(parser_t *p); static node_t *parse_stmt_or_block(parser_t *p); static node_t *parse_cond(parser_t *p); static node_t *parse_if(parser_t *p); static node_t *parse_if_let(parser_t *p); static node_t *parse_if_case(parser_t *p); static node_t *parse_block(parser_t *p); static node_t *parse_stmt(parser_t *p); static node_t *parse_type(parser_t *p); static node_t *parse_union(parser_t *p, node_t *attrs); static node_t *parse_record(parser_t *p, node_t *attrs); static node_t *parse_record_type(parser_t *p); static node_t *parse_record_lit(parser_t *p, node_t *type_name); static node_t *parse_postfix(parser_t *p, node_t *expr); static node_t *parse_as_cast(parser_t *p, node_t *expr); static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls); static node_t *parse_static(parser_t *p); static node_t *parse_ident(parser_t *p, const char *error); static node_t *parse_ident_or_placeholder(parser_t *p, const char *error); static node_t *parse_scope_segment(parser_t *p, const char *error); static node_t *parse_label(parser_t *p, const char *error); static node_t *parse_assignment(parser_t *p, node_t *lval); static node_t *parse_fn_call_arg(parser_t *p); static node_t *parse_match(parser_t *p); static node_t *parse_match_case(parser_t *p); static node_t *parse_builtin(parser_t *p); static node_t *parse_throw(parser_t *p); static node_t *parse_try(parser_t *p, bool panic, bool optional); static node_t *parse_panic(parser_t *p); static bool token_is_stmt_terminator(tokenclass_t cls); static bool stmt_requires_semicolon(const node_t *stmt); static bool consume_statement_separator( parser_t *p, node_t *stmt, bool require ); /* Initialize parser. */ void parser_init(parser_t *p) { p->root = NULL; p->errors = 0; p->nnodes = 0; p->nptrs = 0; p->context = PARSE_CTX_NORMAL; } /* Report an error with optional format string. */ static void __error(parser_t *p, const char *fmt, ...) { va_list ap; va_start(ap, fmt); location_t loc = scanner_get_location(&p->scanner, p->current.position); fprintf(stderr, "%s:%u:%u: error: ", loc.file, loc.line, loc.col); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); p->errors++; } /* Check that the current token is equal to the given type. */ static bool check(parser_t *p, tokenclass_t cls) { return p->current.cls == cls; } /* Advance the parser by one token. */ static void advance(parser_t *p) { p->previous = p->current; p->current = scanner_next(&p->scanner); } /* Like `check`, but also advances the parser if it matches. */ static bool consume(parser_t *p, tokenclass_t cls) { if (check(p, cls)) { advance(p); return true; } return false; } /* Like `consume`, but report an error if it doesn't match. */ __nodiscard static bool expect( parser_t *p, tokenclass_t cls, const char *message ) { if (consume(p, cls)) { return true; } error(p, message); return false; } /* Allocate a new AST node. */ static node_t *node(parser_t *p, nodeclass_t cls) { if (p->nnodes >= MAX_NODES) { abort(); } node_t *n = &p->nodes[p->nnodes++]; n->cls = cls; n->type = NULL; n->sym = NULL; n->offset = p->current.position; n->length = p->current.length; n->file = p->scanner.file; return n; } /* Parse a type annotation. * Eg. `i32` or `[i32; 12]` */ static node_t *parse_type(parser_t *p) { /* Parse optional types. */ if (p->current.cls == T_QUESTION) { node_t *opt = node(p, NODE_TYPE); advance(p); /* Consume `?`. */ node_t *elem_type = parse_type(p); if (!elem_type) return NULL; opt->val.type.tclass = TYPE_OPT; opt->val.type.elem_type = elem_type; return opt; } /* Parse pointer types and slice types. */ if (p->current.cls == T_STAR) { advance(p); /* Consume `*`. */ /* Consume `mut` */ bool mut = consume(p, T_MUT); /* Parse slice types like `*[i32]` or `*mut [i32]` */ if (p->current.cls == T_LBRACKET) { node_t *slice = node(p, NODE_TYPE); advance(p); /* Consume `[`. */ node_t *elem_type = parse_type(p); if (!elem_type) return NULL; if (!expect(p, T_RBRACKET, "expected `]` after slice element type")) return NULL; slice->val.type.tclass = TYPE_SLICE; slice->val.type.elem_type = elem_type; slice->val.type.info.slice.mut = mut; return slice; } /* Otherwise it's a pointer type like `*i32` or `*mut i32` */ node_t *ptr = node(p, NODE_TYPE); node_t *elem_type = parse_type(p); if (!elem_type) return NULL; ptr->val.type.tclass = TYPE_PTR; ptr->val.type.elem_type = elem_type; ptr->val.type.info.ptr.mut = mut; return ptr; } /* Parse array types. */ if (p->current.cls == T_LBRACKET) { advance(p); /* Consume `[`. */ /* Get the element type. */ node_t *elem_type = parse_type(p); if (!elem_type) return NULL; /* Expect a semicolon separator. */ if (!expect(p, T_SEMICOLON, "expected `;` in array type")) return NULL; /* Parse the array length. */ node_t *length = parse_expr(p); if (!length) { error(p, "expected array size expression"); return NULL; } /* Expect the closing bracket */ if (!expect(p, T_RBRACKET, "expected `]` after array size")) return NULL; node_t *ary = node(p, NODE_TYPE); ary->val.type.tclass = TYPE_ARRAY; ary->val.type.elem_type = elem_type; ary->val.type.info.array.length = length; return ary; } /* Type identifiers are treated differently, as a concrete type cannot * yet be assigned. */ if (p->current.cls == T_IDENT || p->current.cls == T_SUPER) { node_t *path = parse_scope_segment(p, "expected type identifier or `super`"); if (!path) return NULL; while (consume(p, T_COLON_COLON)) { node_t *next = parse_scope_segment(p, "expected identifier name after `::`"); if (!next) return NULL; node_t *scope = node(p, NODE_SCOPE); scope->val.access.lval = path; scope->val.access.rval = next; path = scope; } return path; } node_t *n = node(p, NODE_TYPE); switch (p->current.cls) { case T_I8: advance(p); n->val.type.tclass = TYPE_I8; return n; case T_I16: advance(p); n->val.type.tclass = TYPE_I16; return n; case T_I32: advance(p); n->val.type.tclass = TYPE_I32; return n; case T_U8: advance(p); n->val.type.tclass = TYPE_U8; return n; case T_U16: advance(p); n->val.type.tclass = TYPE_U16; return n; case T_U32: advance(p); n->val.type.tclass = TYPE_U32; return n; case T_BOOL: advance(p); n->val.type.tclass = TYPE_BOOL; return n; case T_VOID: advance(p); n->val.type.tclass = TYPE_VOID; return n; case T_OPAQUE: advance(p); n->val.type.tclass = TYPE_OPAQUE; return n; case T_FN: { advance(p); /* consume `fn` */ if (!expect(p, T_LPAREN, "expected `(` after `fn`")) return NULL; n->val.type.tclass = TYPE_FN; n->val.type.info.fn.params = nodespan_alloc(p, MAX_FN_PARAMS); n->val.type.info.fn.ret = NULL; n->val.type.info.fn.throws = nodespan_alloc(p, MAX_FN_THROWS); /* Parse parameter types */ if (!check(p, T_RPAREN)) { node_t *param = NULL; do { if (n->val.type.info.fn.params.len >= MAX_FN_PARAMS) { error(p, "too many function pointer parameters"); return NULL; } if (!(param = parse_type(p))) { return NULL; } nodespan_push(p, &n->val.type.info.fn.params, param); } while (consume(p, T_COMMA)); } if (!expect( p, T_RPAREN, "expected `)` after function pointer parameters" )) return NULL; /* Parse return type */ if (consume(p, T_ARROW)) { if (!(n->val.type.info.fn.ret = parse_type(p))) { return NULL; } } if (consume(p, T_THROWS)) { if (!expect(p, T_LPAREN, "expected `(` after `throws`")) return NULL; if (!check(p, T_RPAREN)) { do { if (n->val.type.info.fn.throws.len >= MAX_FN_THROWS) { error(p, "maximum number of thrown types exceeded"); return NULL; } node_t *thrown = parse_type(p); if (!thrown) return NULL; nodespan_push(p, &n->val.type.info.fn.throws, thrown); } while (consume(p, T_COMMA)); } if (!expect(p, T_RPAREN, "expected `)` after throws clause")) return NULL; } return n; } default: error(p, "expected type annotation, eg. `i32`, `bool`, etc."); return NULL; } } /* Parse primary expressions. */ static node_t *parse_array_literal(parser_t *p) { node_t *n = NULL; if (check(p, T_RBRACKET)) { /* Empty array `[]` */ n = node(p, NODE_ARRAY_LIT); n->val.array_lit.elems = (nodespan_t){ 0 }; } else { node_t *expr = parse_expr(p); if (!expr) return NULL; /* Check if this is a repeat array [value; count] */ if (consume(p, T_SEMICOLON)) { n = node(p, NODE_ARRAY_REPEAT_LIT); n->val.array_repeat_lit.value = expr; n->val.array_repeat_lit.count = parse_expr(p); if (!n->val.array_repeat_lit.count) return NULL; } else { /* Regular array literal [a, b, ...] */ n = node(p, NODE_ARRAY_LIT); n->val.array_lit.elems = (nodespan_t){ 0 }; nodespan_push(p, &n->val.array_lit.elems, expr); /* Continue parsing remaining elements */ while (consume(p, T_COMMA) && !check(p, T_RBRACKET)) { node_t *elem = parse_expr(p); if (!elem) return NULL; nodespan_push(p, &n->val.array_lit.elems, elem); } } } if (!expect(p, T_RBRACKET, "expected `]` after array elements")) return NULL; return n; } static node_t *parse_builtin(parser_t *p) { node_t *n = node(p, NODE_BUILTIN); /* Token is @identifier, skip the '@' to get the name. */ const char *name = p->current.start + 1; usize length = p->current.length - 1; advance(p); /* consume `@identifier` */ builtin_kind_t kind; if (!strncmp(name, "sizeOf", 6)) { kind = BUILTIN_SIZE_OF; } else if (!strncmp(name, "alignOf", 7)) { kind = BUILTIN_ALIGN_OF; } else if (!strncmp(name, "sliceOf", 7)) { kind = BUILTIN_SLICE_OF; } else { error(p, "unknown builtin `@%.*s`", (int)length, name); return NULL; } if (!expect(p, T_LPAREN, "expected `(` after builtin name")) return NULL; n->val.builtin.kind = kind; n->val.builtin.args = (nodespan_t){ 0 }; /* @sliceOf takes two expression arguments: @sliceOf(ptr, len) */ if (kind == BUILTIN_SLICE_OF) { parse_ctx_t prev = p->context; p->context = PARSE_CTX_NORMAL; node_t *ptr_expr = parse_expr(p); if (!ptr_expr) return NULL; nodespan_push(p, &n->val.builtin.args, ptr_expr); if (!expect( p, T_COMMA, "expected `,` after first argument to @sliceOf" )) return NULL; node_t *len_expr = parse_expr(p); if (!len_expr) return NULL; nodespan_push(p, &n->val.builtin.args, len_expr); p->context = prev; } else { /* @sizeOf and @alignOf take type arguments only. */ node_t *type_arg = parse_type(p); if (!type_arg) return NULL; nodespan_push(p, &n->val.builtin.args, type_arg); } if (!expect(p, T_RPAREN, "expected `)` after builtin argument")) return NULL; return n; } static node_t *parse_primary(parser_t *p) { node_t *n; switch (p->current.cls) { case T_LBRACKET: /* Array literal [a, b, c] */ advance(p); return parse_array_literal(p); case T_NOT: /* Unary not operator */ n = node(p, NODE_UNOP); n->val.unop.op = OP_NOT; advance(p); if (!(n->val.unop.expr = parse_primary(p))) return NULL; return n; case T_RECORD: { advance(p); /* consume `record` */ node_t *rtype = parse_record_type(p); if (!rtype) return NULL; if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) { return parse_record_lit(p, rtype); } if (p->context == PARSE_CTX_NORMAL) { error(p, "expected `{` after anonymous record type"); return NULL; } return rtype; } case T_LBRACE: if (p->context == PARSE_CTX_CONDITION) { error(p, "unexpected `{` in this context"); return NULL; } advance(p); /* consume `{` */ return parse_record_lit(p, NULL); case T_MINUS: /* Unary negation operator */ n = node(p, NODE_UNOP); n->val.unop.op = OP_NEG; advance(p); if (!(n->val.unop.expr = parse_primary(p))) return NULL; return n; case T_TILDE: /* Bitwise NOT operator */ n = node(p, NODE_UNOP); n->val.unop.op = OP_BNOT; advance(p); if (!(n->val.unop.expr = parse_primary(p))) return NULL; return n; case T_AMP: n = node(p, NODE_REF); advance(p); n->val.ref.mut = consume(p, T_MUT); if (!(n->val.ref.target = parse_primary(p))) return NULL; return n; case T_STAR: n = node(p, NODE_UNOP); advance(p); n->val.unop.op = OP_DEREF; if (!(n->val.unop.expr = parse_primary(p))) return NULL; return n; case T_NUMBER: n = node(p, NODE_NUMBER); advance(p); n->val.number.text = p->previous.start; n->val.number.text_len = p->previous.length; if (check(p, T_DOT_DOT)) { return parse_postfix(p, n); } return n; case T_CHAR: n = node(p, NODE_CHAR); advance(p); if (p->previous.start[1] == '\\') { switch (p->previous.start[2]) { case 'n': n->val.char_lit = '\n'; break; case 't': n->val.char_lit = '\t'; break; case 'r': n->val.char_lit = '\r'; break; case '\'': n->val.char_lit = '\''; break; case '\\': n->val.char_lit = '\\'; break; default: abort(); } } else { n->val.char_lit = p->previous.start[1]; } if (check(p, T_DOT_DOT)) { return parse_postfix(p, n); } return n; case T_STRING: { n = node(p, NODE_STRING); advance(p); /* Account for quotes. */ const char *data = p->previous.start + 1; usize len = p->previous.length - 2; /* Intern string. This escapes the string properly and * NULL-terminates it. */ n->val.string_lit.data = strings_alloc_len(data, len); n->val.string_lit.length = strlen(n->val.string_lit.data); return n; } case T_AT_IDENT: return parse_builtin(p); case T_SUPER: n = node(p, NODE_SUPER); advance(p); if (check(p, T_COLON_COLON)) { return parse_postfix(p, n); } return n; case T_IDENT: n = node(p, NODE_IDENT); n->val.ident.name = p->current.start; n->val.ident.length = p->current.length; advance(p); /* Check for record initializer, eg. `{ x: 1, y: 2 }` */ if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) { return parse_record_lit(p, n); } /* Check for field access or array indexing. */ if (check(p, T_DOT) || check(p, T_LBRACKET) || check(p, T_COLON_COLON) || check(p, T_LPAREN) || check(p, T_DOT_DOT)) { return parse_postfix(p, n); } return n; case T_LPAREN: advance(p); /* Inside parentheses, we are in a normal parsing context */ parse_ctx_t prev = p->context; p->context = PARSE_CTX_NORMAL; n = parse_expr(p); p->context = prev; if (!expect(p, T_RPAREN, "expected closing `)` after expression")) return NULL; /* Check for field access or array indexing. */ if (check(p, T_DOT) || check(p, T_LBRACKET) || check(p, T_DOT_DOT)) { return parse_postfix(p, n); } return n; case T_TRUE: n = node(p, NODE_BOOL); n->val.bool_lit = true; advance(p); return n; case T_FALSE: n = node(p, NODE_BOOL); n->val.bool_lit = false; advance(p); return n; case T_NIL: n = node(p, NODE_NIL); advance(p); return n; case T_UNDEF: n = node(p, NODE_UNDEF); advance(p); return n; case T_UNDERSCORE: n = node(p, NODE_PLACEHOLDER); advance(p); return n; case T_TRY: { advance(p); bool panic = consume(p, T_BANG); bool optional = consume(p, T_QUESTION); node_t *expr = parse_try(p, panic, optional); if (!expr) return NULL; if (check(p, T_DOT) || check(p, T_LBRACKET) || check(p, T_COLON_COLON) || check(p, T_LPAREN) || check(p, T_DOT_DOT)) { return parse_postfix(p, expr); } return expr; } default: error( p, "expected expression, got `%.*s`", p->current.length, p->current.start ); return NULL; } } /* Parse binary expressions with precedence climbing. */ static node_t *parse_binary(parser_t *p, node_t *left, int precedence) { /* Operator precedence table. */ static const struct { tokenclass_t tok; binop_t op; int prec; } ops[] = { /* Arithmetic operators (higher precedence). */ { T_PLUS, OP_ADD, 6 }, { T_MINUS, OP_SUB, 6 }, { T_STAR, OP_MUL, 7 }, { T_SLASH, OP_DIV, 7 }, { T_PERCENT, OP_MOD, 7 }, /* Shift operators. */ { T_LSHIFT, OP_SHL, 5 }, { T_RSHIFT, OP_SHR, 5 }, /* Bitwise operators. */ { T_AMP, OP_BAND, 4 }, { T_CARET, OP_XOR, 3 }, { T_PIPE, OP_BOR, 2 }, /* Comparison operators. */ { T_EQ_EQ, OP_EQ, 1 }, { T_BANG_EQ, OP_NE, 1 }, { T_LT, OP_LT, 1 }, { T_GT, OP_GT, 1 }, { T_LT_EQ, OP_LE, 1 }, { T_GT_EQ, OP_GE, 1 }, /* Logical operators (lowest precedence). */ { T_AND, OP_AND, 0 }, { T_OR, OP_OR, 0 }, }; for (;;) { int next = -1; binop_t op; /* Find matching operator and its precedence. */ for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) { if (check(p, ops[i].tok) && ops[i].prec > precedence) { if (next == -1 || ops[i].prec < next) { next = ops[i].prec; op = ops[i].op; } } } if (next == -1) break; /* Consume the operator token. */ advance(p); /* Parse the right operand. */ node_t *right = parse_primary(p); if (!right) return NULL; /* Handle `as` casts on the right operand */ while (check(p, T_AS)) { right = parse_as_cast(p, right); if (!right) return NULL; } /* Look for higher precedence operators. */ for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) { if (check(p, ops[i].tok) && ops[i].prec > next) { right = parse_binary(p, right, next); break; } } /* Build binary expression node. */ node_t *binop = node(p, NODE_BINOP); binop->offset = left->offset; binop->length = right->offset + right->length - left->offset; binop->val.binop.op = op; binop->val.binop.left = left; binop->val.binop.right = right; left = binop; } return left; } /* Parse an `if let` statement. * Syntax: if let x in (expr) { ... } else { ... } */ static node_t *parse_if_let(parser_t *p) { /* Consume 'let' */ if (!expect(p, T_LET, "expected 'let'")) return NULL; /* Check for `if let case` syntax. */ if (check(p, T_CASE)) { return parse_if_case(p); } node_t *n = node(p, NODE_IF_LET); /* Parse identifier or placeholder */ if (consume(p, T_UNDERSCORE)) { n->val.if_let_stmt.var = node(p, NODE_PLACEHOLDER); } else if (expect(p, T_IDENT, "expected identifier or '_' after 'let'")) { n->val.if_let_stmt.var = node(p, NODE_IDENT); n->val.if_let_stmt.var->val.ident.name = p->previous.start; n->val.if_let_stmt.var->val.ident.length = p->previous.length; } else { return NULL; } n->val.if_let_stmt.guard = NULL; /* Expect '=' */ if (!expect(p, T_EQ, "expected `=` after identifier")) return NULL; /* Parse expression yielding an optional. */ n->val.if_let_stmt.expr = parse_cond(p); if (!n->val.if_let_stmt.expr) return NULL; /* Optional boolean guard. */ if (consume(p, T_SEMICOLON)) { n->val.if_let_stmt.guard = parse_cond(p); } /* Parse the 'then' branch */ n->val.if_let_stmt.lbranch = parse_block(p); if (!n->val.if_let_stmt.lbranch) return NULL; /* Parse optional 'else' branch */ if (consume(p, T_ELSE)) { /* Check for `else if` construct. */ if (check(p, T_IF)) { advance(p); /* Consume the 'if' token. */ /* Create a block to hold the nested if statement. */ node_t *block = node(p, NODE_BLOCK); block->val.block.stmts = (nodespan_t){ 0 }; node_t *nested_if = parse_if(p); if (!nested_if) return NULL; /* Add the nested if as a statement in the block. */ nodespan_push(p, &block->val.block.stmts, nested_if); /* Set the block as the else branch. */ n->val.if_let_stmt.rbranch = block; } else { /* Regular else clause. */ n->val.if_let_stmt.rbranch = parse_block(p); } } else { n->val.if_let_stmt.rbranch = NULL; } return n; } /* Parse an `if let case` statement. Called after 'let' has been consumed. */ static node_t *parse_if_case(parser_t *p) { node_t *n = node(p, NODE_IF_CASE); if (!expect(p, T_CASE, "expected 'case'")) return NULL; parse_ctx_t pctx = p->context; p->context = PARSE_CTX_NORMAL; node_t *pattern = parse_primary(p); p->context = pctx; if (!pattern) return NULL; n->val.if_case_stmt.pattern = pattern; if (!expect(p, T_EQ, "expected `=` after pattern")) return NULL; n->val.if_case_stmt.expr = parse_cond(p); if (!n->val.if_case_stmt.expr) return NULL; n->val.if_case_stmt.guard = NULL; if (consume(p, T_SEMICOLON)) { n->val.if_case_stmt.guard = parse_cond(p); if (!n->val.if_case_stmt.guard) return NULL; } n->val.if_case_stmt.lbranch = parse_block(p); if (!n->val.if_case_stmt.lbranch) return NULL; if (consume(p, T_ELSE)) { if (check(p, T_IF)) { advance(p); node_t *block = node(p, NODE_BLOCK); block->val.block.stmts = (nodespan_t){ 0 }; node_t *nested_if = parse_if(p); if (!nested_if) return NULL; nodespan_push(p, &block->val.block.stmts, nested_if); n->val.if_case_stmt.rbranch = block; } else { n->val.if_case_stmt.rbranch = parse_block(p); if (!n->val.if_case_stmt.rbranch) return NULL; } } else { n->val.if_case_stmt.rbranch = NULL; } return n; } /* Parse a `let case` statement: * `let case PATTERN = EXPR [; GUARD] else { ... };` */ static node_t *parse_let_case(parser_t *p) { node_t *n = node(p, NODE_GUARD_CASE); usize start = p->previous.position; parse_ctx_t pctx = p->context; p->context = PARSE_CTX_NORMAL; node_t *pattern = parse_primary(p); p->context = pctx; if (!pattern) return NULL; n->val.guard_case_stmt.pattern = pattern; if (!expect(p, T_EQ, "expected `=` after pattern")) return NULL; if (!(n->val.guard_case_stmt.expr = parse_cond(p))) return NULL; n->val.guard_case_stmt.guard = NULL; if (consume(p, T_IF)) { if (!(n->val.guard_case_stmt.guard = parse_cond(p))) return NULL; } if (!expect(p, T_ELSE, "expected `else` after pattern")) return NULL; if (!(n->val.guard_case_stmt.rbranch = parse_stmt_or_block(p))) return NULL; n->offset = start; n->length = p->previous.position + p->previous.length - start; return n; } /* Parse an `if` expression, with optional `else` or `else if` clauses. * `else if` is desugared into a nested if inside a block. */ static node_t *parse_if(parser_t *p) { /* Check for `if let` or `if let case` syntax */ if (check(p, T_LET)) { return parse_if_let(p); } /* Regular if statement */ node_t *n = node(p, NODE_IF); n->val.if_stmt.cond = parse_cond(p); if (!n->val.if_stmt.cond) return NULL; n->val.if_stmt.lbranch = parse_block(p); if (!n->val.if_stmt.lbranch) return NULL; if (consume(p, T_ELSE)) { /* Check for `else if` construct. */ if (check(p, T_IF)) { advance(p); /* Consume the 'if' token. */ /* Create a block to hold the nested if statement. */ node_t *block = node(p, NODE_BLOCK); block->val.block.stmts = (nodespan_t){ 0 }; node_t *nested_if = parse_if(p); if (!nested_if) return NULL; /* Add the nested if as a statement in the block. */ nodespan_push(p, &block->val.block.stmts, nested_if); /* Set the block as the else branch. */ n->val.if_stmt.rbranch = block; } else { /* Regular else clause. */ n->val.if_stmt.rbranch = parse_block(p); } } else { n->val.if_stmt.rbranch = NULL; } return n; } /* Parse a match statement. */ static node_t *parse_match(parser_t *p) { node_t *n = node(p, NODE_MATCH); n->val.match_stmt.cases = (nodespan_t){ 0 }; /* Parse the expression to match on */ if (!(n->val.match_stmt.expr = parse_cond(p))) return NULL; if (!expect(p, T_LBRACE, "expected '{' before match cases")) return NULL; /* Parse cases until we reach the end of the match block */ while (!check(p, T_RBRACE) && !check(p, T_EOF)) { node_t *case_node = parse_match_case(p); if (!case_node) return NULL; if (!nodespan_push(p, &n->val.match_stmt.cases, case_node)) { error(p, "too many cases in match statement"); return NULL; } /* Consume the comma separating cases if present */ bool consumed = consume(p, T_COMMA); (void)consumed; } if (!expect(p, T_RBRACE, "expected '}' after match cases")) return NULL; return n; } /* Parse a single match case. */ static node_t *parse_match_case(parser_t *p) { node_t *n = node(p, NODE_MATCH_CASE); n->val.match_case.patterns = (nodespan_t){ 0 }; n->val.match_case.guard = NULL; if (check(p, T_ELSE)) { /* For the 'else' case, we use zero patterns * to indicate the else case */ advance(p); } else { if (!expect(p, T_CASE, "expected 'case' at start of match case")) return NULL; /* Parse one or more comma-separated patterns */ do { parse_ctx_t pctx = p->context; p->context = PARSE_CTX_NORMAL; node_t *pattern = parse_primary(p); p->context = pctx; if (!pattern) { return NULL; } /* Add pattern to the case */ if (!nodespan_push(p, &n->val.match_case.patterns, pattern)) { error(p, "too many patterns in case statement"); return NULL; } } while (consume(p, T_COMMA)); /* Continue if there's a comma */ if (consume(p, T_IF)) { if (!(n->val.match_case.guard = parse_cond(p))) return NULL; } } if (!expect(p, T_FAT_ARROW, "expected `=>` after case pattern")) return NULL; n->val.match_case.body = parse_stmt(p); if (!n->val.match_case.body) return NULL; return n; } /* Parse a `log` statement. */ /* Parse a record declaration. */ static node_t *parse_record(parser_t *p, node_t *attrs) { node_t *n = node(p, NODE_RECORD); n->val.record_decl.attribs = attrs; n->val.record_decl.fields = (nodespan_t){ 0 }; n->val.record_decl.tuple = false; n->val.record_decl.name = parse_ident(p, "expected record name"); if (!n->val.record_decl.name) return NULL; if (consume(p, T_LPAREN)) { n->val.record_decl.tuple = true; if (!check(p, T_RPAREN)) { do { node_t *field = node(p, NODE_RECORD_FIELD); field->val.var.ident = NULL; /* No field name for tuples */ field->val.var.type = parse_type(p); field->val.var.value = NULL; field->val.var.align = NULL; if (!field->val.var.type) return NULL; if (!nodespan_push(p, &n->val.record_decl.fields, field)) { error(p, "too many record fields"); return NULL; } } while (consume(p, T_COMMA) && !check(p, T_RPAREN)); } if (!expect(p, T_RPAREN, "expected `)` after record fields")) return NULL; /* Unlabeled records must end with semicolon */ if (!expect(p, T_SEMICOLON, "expected `;` after record declaration")) return NULL; } else { /* Record with named fields */ if (!expect(p, T_LBRACE, "expected `{` before record body")) return NULL; node_t *field; do { if (!(field = parse_name_type_value(p, NODE_RECORD_FIELD))) return NULL; if (!nodespan_push(p, &n->val.record_decl.fields, field)) { error(p, "too many record fields"); return NULL; } } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); if (!expect(p, T_RBRACE, "expected `}`")) return NULL; } return n; } static node_t *parse_record_type(parser_t *p) { node_t *n = node(p, NODE_RECORD_TYPE); n->val.record_type.fields = (nodespan_t){ 0 }; if (!expect(p, T_LBRACE, "expected `{` after `record`")) return NULL; if (!check(p, T_RBRACE)) { do { node_t *field = parse_name_type_value(p, NODE_RECORD_FIELD); if (!field) return NULL; if (field->val.var.value) { error(p, "anonymous record fields cannot have initializers"); return NULL; } if (!nodespan_push(p, &n->val.record_type.fields, field)) { error(p, "too many record fields"); return NULL; } } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); } if (!expect(p, T_RBRACE, "expected `}` after record fields")) return NULL; return n; } /* Parse a single record literal field (labeled or shorthand). */ static node_t *parse_record_lit_field(parser_t *p) { node_t *n = node(p, NODE_RECORD_LIT_FIELD); usize start = p->current.position; record_lit_field_t *field = &n->val.record_lit_field; /* Field must start with an identifier. */ node_t *name = parse_ident(p, "expected field name"); if (!name) return NULL; if (consume(p, T_COLON)) { /* Labeled field: `name: value` */ field->name = name; field->value = parse_expr(p); if (!field->value) return NULL; } else { /* Shorthand syntax: `{ x }` is equivalent to `{ x: x }` */ field->name = name; field->value = name; } n->offset = start; n->length = p->previous.position + p->previous.length - start; return n; } /* Parse a record literal expression (e.g., Point { x: 1, y: 2 }) * Also handles pattern syntax: Variant { .. } to discard all fields */ static node_t *parse_record_lit(parser_t *p, node_t *type_name) { node_t *n = node(p, NODE_RECORD_LIT); n->val.record_lit.type = type_name; n->val.record_lit.fields = (nodespan_t){ 0 }; n->val.record_lit.etc = false; do { /* Check for `..` to discard remaining fields. */ if (consume(p, T_DOT_DOT)) { n->val.record_lit.etc = true; break; } node_t *field = parse_record_lit_field(p); if (!field) return NULL; if (!nodespan_push(p, &n->val.record_lit.fields, field)) { error(p, "too many record fields"); return NULL; } } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); if (!expect(p, T_RBRACE, "expected '}' to end record literal")) return NULL; return n; } /* Parse a union declaration. * Eg. `union Color { Red, Green, Blue = 5 }` */ static node_t *parse_union(parser_t *p, node_t *attrs) { node_t *n = node(p, NODE_UNION); n->val.union_decl.attribs = attrs; n->val.union_decl.variants = (nodespan_t){ 0 }; n->val.union_decl.name = parse_ident(p, "expected union name"); if (!n->val.union_decl.name) return NULL; /* Parse union body with { ... } */ if (!expect(p, T_LBRACE, "expected `{` before union body")) return NULL; /* Parse union variants. */ if (!check(p, T_RBRACE)) { do { /* Allow optional `case` keyword before variant name. */ consume(p, T_CASE); /* Parse variant name. */ node_t *variant_name = parse_ident(p, "expected variant name"); if (!variant_name) return NULL; node_t *v = node(p, NODE_UNION_VARIANT); union_variant_t *variant = &v->val.union_variant; variant->name = variant_name; variant->type = NULL; variant->value_expr = NULL; if (consume(p, T_LPAREN)) { /* Tuple-like variant: Foo(Type) */ node_t *payload = parse_type(p); if (!payload) return NULL; variant->type = payload; if (!expect(p, T_RPAREN, "expected `)` after variant type")) return NULL; } else if (check(p, T_LBRACE)) { /* Struct-like variant: Bar { x: i32, y: i32 } */ node_t *payload = parse_record_type(p); if (!payload) return NULL; variant->type = payload; } else { /* Check for explicit value assignment. */ if (consume(p, T_EQ)) { if (!expect( p, T_NUMBER, "expected integer literal after `=`" )) return NULL; token_t literal_tok = p->previous; node_t *literal = node(p, NODE_NUMBER); literal->offset = literal_tok.position; literal->length = literal_tok.length; literal->val.number.text = literal_tok.start; literal->val.number.text_len = literal_tok.length; literal->val.number.value = (imm_t){ 0 }; variant->value_expr = literal; } else { /* Auto-assign value. */ } } /* Add variant to declaration node. */ if (!nodespan_push(p, &n->val.union_decl.variants, v)) { error(p, "too many union variants"); return NULL; } /* Allow trailing comma. */ } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); } if (!expect(p, T_RBRACE, "expected `}`")) return NULL; return n; } /* Parse a code block or an expression */ static node_t *parse_stmt_or_block(parser_t *p) { if (check(p, T_LBRACE)) { return parse_block(p); } node_t *stmt = parse_stmt(p); node_t *blk = node(p, NODE_BLOCK); blk->val.block.stmts = (nodespan_t){ 0 }; nodespan_push(p, &blk->val.block.stmts, stmt); return blk; } /* Parse a code block, enclosed by `{}`. */ static node_t *parse_block(parser_t *p) { if (!expect(p, T_LBRACE, "expected '{' before block")) { return NULL; } node_t *n = node(p, NODE_BLOCK); node_t *stmt; /* Parse statements. */ n->val.block.stmts = (nodespan_t){ 0 }; while (!check(p, T_RBRACE) && !check(p, T_EOF)) { usize start = p->current.position; if (!(stmt = parse_stmt(p))) return NULL; if (!consume_statement_separator(p, stmt, true)) return NULL; stmt->offset = start; stmt->length = p->current.position - start; if (!nodespan_push(p, &n->val.block.stmts, stmt)) { error(p, "too many statements in block"); return NULL; } } if (!expect(p, T_RBRACE, "expected matching '}' after block")) return NULL; return n; } /* Parse an expression. */ static node_t *parse_expr(parser_t *p) { node_t *lval; if ((lval = parse_primary(p)) == NULL) return NULL; /* Handle `as` casts before binary operators (higher precedence than * binary ops, lower than unary) */ while (check(p, T_AS)) { lval = parse_as_cast(p, lval); if (!lval) return NULL; } lval = parse_binary(p, lval, -1); return lval; } /* Parse an assignment statement. */ static node_t *parse_assignment(parser_t *p, node_t *lval) { /* We've already verified this is an assignment. */ if (lval->cls != NODE_IDENT && lval->cls != NODE_ACCESS && lval->cls != NODE_ARRAY_INDEX && !(lval->cls == NODE_UNOP && lval->val.unop.op == OP_DEREF)) { error( p, "can't assign to `%.*s`", lval->length, &p->scanner.source[lval->offset] ); return NULL; } node_t *rval; if (!(rval = parse_expr(p))) return NULL; node_t *assign = node(p, NODE_ASSIGN); assign->val.assign.lval = lval; assign->val.assign.rval = rval; return assign; } /* Parse a condition. */ static node_t *parse_cond(parser_t *p) { parse_ctx_t prev = p->context; p->context = PARSE_CTX_CONDITION; node_t *cond = parse_expr(p); if (!cond) { p->context = prev; return NULL; } p->context = prev; return cond; } static bool token_is_stmt_terminator(tokenclass_t cls) { switch (cls) { case T_SEMICOLON: case T_RBRACE: case T_COMMA: case T_CASE: case T_ELSE: case T_EOF: return true; default: return false; } } static bool stmt_requires_semicolon(const node_t *stmt) { switch (stmt->cls) { case NODE_IF: case NODE_IF_LET: case NODE_IF_CASE: case NODE_WHILE: case NODE_WHILE_LET: case NODE_LOOP: case NODE_FOR: case NODE_MATCH: case NODE_BLOCK: case NODE_FN: case NODE_RECORD: case NODE_UNION: return false; default: return true; } } static bool consume_statement_separator( parser_t *p, node_t *stmt, bool require ) { if (stmt_requires_semicolon(stmt)) { return expect(p, T_SEMICOLON, "expected `;` after statement"); } if (require) consume(p, T_SEMICOLON); return true; } /* Parse a `return` statement. */ static node_t *parse_return(parser_t *p) { node_t *n = node(p, NODE_RETURN); if (!token_is_stmt_terminator(p->current.cls)) { n->val.return_stmt.value = parse_expr(p); if (!n->val.return_stmt.value) return NULL; } else { n->val.return_stmt.value = NULL; /* Return void. */ } return n; } static node_t *parse_throw(parser_t *p) { node_t *n = node(p, NODE_THROW); if (!(n->val.throw_stmt.expr = parse_expr(p))) return NULL; return n; } /* Parse a `break` statement. */ static node_t *parse_break(parser_t *p) { node_t *n = node(p, NODE_BREAK); return n; } /* Parse a `for` statement. */ static node_t *parse_for(parser_t *p) { node_t *n = node(p, NODE_FOR); n->val.for_stmt.rbranch = NULL; n->val.for_stmt.idx = NULL; /* Parse the loop variable name or placeholder */ if (!(n->val.for_stmt.var = parse_ident_or_placeholder(p, "expected identifier or '_'"))) return NULL; /* Check for optional index variable: `for x, i in xs` */ if (consume(p, T_COMMA)) { /* Parse the index variable name or placeholder */ if (!(n->val.for_stmt.idx = parse_ident_or_placeholder( p, "expected index identifier or '_' after comma" ))) return NULL; } if (!expect(p, T_IN, "expected `in`")) return NULL; if (!(n->val.for_stmt.iter = parse_cond(p))) return NULL; if (!(n->val.for_stmt.body = parse_block(p))) return NULL; /* Parse optional `else` clause */ if (consume(p, T_ELSE)) { if (!(n->val.for_stmt.rbranch = parse_block(p))) return NULL; } return n; } /* Parse a `while let` statement. */ static node_t *parse_while_let(parser_t *p) { if (!expect(p, T_LET, "expected `let`")) return NULL; node_t *n = node(p, NODE_WHILE_LET); /* Parse identifier or placeholder */ if (consume(p, T_UNDERSCORE)) { n->val.while_let_stmt.var = node(p, NODE_PLACEHOLDER); } else if (expect(p, T_IDENT, "expected identifier or '_' after `let`")) { n->val.while_let_stmt.var = node(p, NODE_IDENT); n->val.while_let_stmt.var->val.ident.name = p->previous.start; n->val.while_let_stmt.var->val.ident.length = p->previous.length; } else { return NULL; } n->val.while_let_stmt.guard = NULL; n->val.while_let_stmt.rbranch = NULL; if (!expect(p, T_EQ, "expected `=` after identifier")) return NULL; /* Parse expression yielding an optional. */ n->val.while_let_stmt.expr = parse_cond(p); if (!n->val.while_let_stmt.expr) return NULL; /* Optional guard condition after semicolon. */ if (consume(p, T_SEMICOLON)) { if (!(n->val.while_let_stmt.guard = parse_cond(p))) return NULL; } /* Parse the loop body and optional 'else' branch */ if (!(n->val.while_let_stmt.body = parse_block(p))) return NULL; if (consume(p, T_ELSE)) { if (!(n->val.while_let_stmt.rbranch = parse_block(p))) return NULL; } return n; } /* Parse a `while` statement. */ static node_t *parse_while(parser_t *p) { /* Check for `while let` syntax */ if (check(p, T_LET)) { return parse_while_let(p); } node_t *n = node(p, NODE_WHILE); n->val.while_stmt.rbranch = NULL; if (!(n->val.while_stmt.cond = parse_cond(p))) return NULL; if (!(n->val.while_stmt.body = parse_block(p))) return NULL; /* Parse optional else clause */ if (consume(p, T_ELSE)) { if (!(n->val.while_stmt.rbranch = parse_block(p))) return NULL; } return n; } /* Parse a `loop` statement. */ static node_t *parse_loop(parser_t *p) { node_t *n = node(p, NODE_LOOP); if (!(n->val.loop_stmt.body = parse_block(p))) return NULL; return n; } static node_t *parse_try(parser_t *p, bool panic, bool optional) { node_t *n = node(p, NODE_TRY); n->val.try_expr.expr = NULL; n->val.try_expr.catch_expr = NULL; n->val.try_expr.handlers = nodespan_alloc(p, MAX_TRY_CATCHES); n->val.try_expr.panic = panic; n->val.try_expr.optional = optional; if (!(n->val.try_expr.expr = parse_primary(p))) return NULL; /* Parse catch clause: `catch { ... }` or `catch e { ... }` */ if (consume(p, T_CATCH)) { node_t *catch_node = node(p, NODE_CATCH); catch_node->val.catch_clause.binding = NULL; catch_node->val.catch_clause.body = NULL; catch_node->val.catch_clause.scope = NULL; /* Check for error binding: `catch e { ... }` */ if (check(p, T_IDENT)) { node_t *binding = node(p, NODE_IDENT); binding->val.ident.name = p->current.start; binding->val.ident.length = p->current.length; catch_node->val.catch_clause.binding = binding; advance(p); } if (!check(p, T_LBRACE)) { error(p, "expected `{` after `catch`"); return NULL; } if (!(catch_node->val.catch_clause.body = parse_block(p))) return NULL; n->val.try_expr.catch_expr = catch_node; } return n; } static node_t *parse_panic(parser_t *p) { node_t *panic = node(p, NODE_PANIC); /* `panic { "Something's wrong!" }` */ if (consume(p, T_LBRACE)) { node_t *expr = parse_expr(p); if (!(panic->val.panic_stmt.message = expr)) return NULL; if (!expect(p, T_RBRACE, "expected closing `}` after expression")) return NULL; return panic; } if (token_is_stmt_terminator(p->current.cls)) { panic->val.panic_stmt.message = NULL; return panic; } node_t *expr = parse_expr(p); if (!(panic->val.panic_stmt.message = expr)) return NULL; return panic; } /* Parse a name, type, and optional value. * * Used for record field declarations, variable declarations, and record field * initializations. */ static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls) { node_t *n = node(p, cls); usize start = p->current.position; node_t *type = NULL; bool is_typed = false; n->val.var.ident = parse_ident_or_placeholder(p, "expected identifier or '_'"); if (!n->val.var.ident) return NULL; if (cls == NODE_VAR) { /* Type annotation is optional for variable declarations. */ if (consume(p, T_COLON)) is_typed = true; } else { if (!expect(p, T_COLON, "expected `:` after identifier")) return NULL; is_typed = true; } if (is_typed) { type = parse_type(p); if (!type) return NULL; if (cls == NODE_VAR) { n->val.var.align = NULL; if (consume(p, T_ALIGN)) { if (!expect(p, T_LPAREN, "expected `(` after `align`")) return NULL; n->val.var.align = node(p, NODE_ALIGN); n->val.var.align->val.align = parse_expr(p); if (!expect(p, T_RPAREN, "expected `)` after expression")) return NULL; } } } else if (cls == NODE_VAR) { n->val.var.align = NULL; } n->val.var.type = type; n->val.var.value = NULL; /* Parse the optional value. */ if (consume(p, T_EQ)) { node_t *value = parse_expr(p); if (!value) return NULL; n->val.var.value = value; } /* Set the node location. */ n->offset = start; n->length = p->previous.position + p->previous.length - start; return n; } /* Parse a variable declaration. */ static node_t *parse_var(parser_t *p, bool mutable) { node_t *var = parse_name_type_value(p, NODE_VAR); if (!var) return NULL; var->val.var.mutable = mutable; /* Parse optional `else` clause. */ if (consume(p, T_ELSE)) { if (mutable) { error(p, "let-else bindings cannot be mutable"); return NULL; } if (!var->val.var.value) { error(p, "let-else requires an initializer"); return NULL; } node_t *rbranch = parse_stmt_or_block(p); if (!rbranch) return NULL; var->cls = NODE_GUARD_LET; var->val.guard_let_stmt.var = var->val.var.ident; var->val.guard_let_stmt.expr = var->val.var.value; var->val.guard_let_stmt.rbranch = rbranch; var->length = p->previous.position + p->previous.length - var->offset; return var; } var->length = p->previous.position + p->previous.length - var->offset; return var; } /* Parse a static variable declaration. */ static node_t *parse_static(parser_t *p) { node_t *n = node(p, NODE_STATIC); usize start = p->previous.position; node_t *ident = parse_label(p, "expected identifier in static declaration"); if (!ident) return NULL; node_t *type = parse_type(p); if (!type) return NULL; if (!expect(p, T_EQ, "expected `=` in static declaration")) return NULL; node_t *value = parse_expr(p); if (!value) return NULL; n->val.static_decl.ident = ident; n->val.static_decl.type = type; n->val.static_decl.value = value; n->offset = start; n->length = p->previous.position + p->previous.length - start; return n; } /* Parse a constant declaration. */ static node_t *parse_const(parser_t *p) { node_t *var = parse_name_type_value(p, NODE_CONST); if (!var) return NULL; return var; } /* Parse a module use declaration. */ static node_t *parse_use(parser_t *p, node_t *attrs) { usize start = p->current.position; /* Parse the first identifier in the path. */ node_t *path = parse_scope_segment(p, "expected module name after 'use'"); if (!path) return NULL; /* Track if this is a wildcard import. */ bool wildcard = false; /* Continue parsing the dotted path if present. */ while (consume(p, T_COLON_COLON)) { /* Check for wildcard import (e.g., `use foo::*`) */ if (consume(p, T_STAR)) { wildcard = true; break; } node_t *n = node(p, NODE_SCOPE); n->val.access.lval = path; /* Parse the sub-module name. */ node_t *mod = parse_scope_segment(p, "expected identifier or '*' after '::'"); if (!mod) return NULL; n->val.access.rval = mod; path = n; } /* Create a use node and wrap the path. */ node_t *use_node = node(p, NODE_USE); use_node->val.use_decl.path = path; use_node->val.use_decl.attribs = attrs; use_node->val.use_decl.wildcard = wildcard; /* Set position information. */ use_node->offset = start; use_node->length = p->previous.position + p->previous.length - start; return use_node; } /* Parse a module declaration. */ static node_t *parse_mod(parser_t *p, node_t *attrs) { usize start = p->current.position; node_t *ident = parse_ident(p, "expected module name after 'mod'"); if (!ident) return NULL; node_t *mod_node = node(p, NODE_MOD); mod_node->val.mod_decl.ident = ident; mod_node->val.mod_decl.attribs = attrs; mod_node->offset = start; mod_node->length = p->previous.position + p->previous.length - start; return mod_node; } /* Parse a function parameter. */ static node_t *parse_fn_param(parser_t *p) { /* Create parameter node. */ node_t *param = node(p, NODE_PARAM); node_t *name = parse_label(p, "expected parameter name"); if (!name) return NULL; param->val.param.ident = name; /* Parse and store parameter type. */ if (!(param->val.param.type = parse_type(p))) return NULL; return param; } static node_t *parse_module_body(parser_t *p) { node_t *mod = node(p, NODE_MOD_BODY); mod->val.block.stmts = (nodespan_t){ 0 }; while (!check(p, T_EOF)) { node_t *stmt; usize start = p->current.position; if (!(stmt = parse_stmt(p))) return NULL; if (!consume_statement_separator(p, stmt, true)) return NULL; stmt->offset = start; stmt->length = p->current.position - start; if (!nodespan_push(p, &mod->val.block.stmts, stmt)) { error(p, "too many statements in module"); return NULL; } } return mod; } /* Parse a function definition. */ static node_t *parse_fn(parser_t *p, node_t *attrs) { node_t *n = node(p, NODE_FN); node_t *param = NULL; /* Parse the function name. */ node_t *name = parse_ident(p, "expected function name"); if (!name) return NULL; n->val.fn_decl.ident = name; n->val.fn_decl.params = nodespan_alloc(p, MAX_FN_PARAMS); n->val.fn_decl.throws = nodespan_alloc(p, MAX_FN_THROWS); n->val.fn_decl.attribs = attrs; n->val.fn_decl.body = NULL; /* Check if it's an extern function */ bool is_extern = (attrs && attrs->val.attrib & ATTRIB_EXTERN); if (!expect(p, T_LPAREN, "expected `(` after function name")) return NULL; /* Parse parameters with types */ if (!check(p, T_RPAREN)) { do { if (n->val.fn_decl.params.len >= MAX_FN_PARAMS) { error( p, "maximum number of function parameters (%d) exceeded", MAX_FN_PARAMS ); return NULL; } if (!(param = parse_fn_param(p))) { return NULL; } node_fn_add_param(p, n, param); } while (consume(p, T_COMMA)); } if (!expect(p, T_RPAREN, "expected matching `)` after parameters list")) return NULL; if (consume(p, T_ARROW)) { if (!(n->val.fn_decl.return_type = parse_type(p))) { return NULL; } } else { n->val.fn_decl.return_type = NULL; } if (consume(p, T_THROWS)) { if (!expect(p, T_LPAREN, "expected `(` after `throws`")) return NULL; if (!check(p, T_RPAREN)) { do { if (n->val.fn_decl.throws.len >= MAX_FN_THROWS) { error(p, "maximum number of thrown types exceeded"); return NULL; } node_t *thrown = parse_type(p); if (!thrown) return NULL; nodespan_push(p, &n->val.fn_decl.throws, thrown); } while (consume(p, T_COMMA)); } if (!expect(p, T_RPAREN, "expected `)` after throws clause")) return NULL; } /* For extern functions, expect semicolon instead of body */ if (is_extern) { if (!expect( p, T_SEMICOLON, "expected `;` after extern function declaration" )) return NULL; } else { if (!(n->val.fn_decl.body = parse_block(p))) return NULL; } return n; } /* Try to parse an annotation like `@default`. * Returns true if a known annotation was found and consumed. * Returns false if not an annotation (e.g. @sizeOf) - tokens not consumed. */ static bool try_parse_annotation(parser_t *p, attrib_t *attrs) { if (!check(p, T_AT_IDENT)) return false; /* Token is @identifier, skip the '@' to get the name. */ const char *name = p->current.start + 1; usize length = p->current.length - 1; if (length == 7 && !strncmp(name, "default", 7)) { advance(p); /* Consume `@default`. */ *attrs |= ATTRIB_DEFAULT; return true; } if (length == 4 && !strncmp(name, "test", 4)) { advance(p); /* Consume `@test`. */ *attrs |= ATTRIB_TEST; return true; } if (length == 9 && !strncmp(name, "intrinsic", 9)) { advance(p); /* Consume `@intrinsic`. */ *attrs |= ATTRIB_INTRINSIC; return true; } /* Not a known annotation - leave for parse_builtin to handle. */ return false; } /* Parse statement attributes. */ static node_t *parse_attribs(parser_t *p) { node_t *n = NULL; attrib_t attrs = ATTRIB_NONE; for (;;) { if (consume(p, T_PUB)) { if (attrs & ATTRIB_PUB) { error(p, "duplicate `pub` attribute"); return NULL; } attrs |= ATTRIB_PUB; } else if (try_parse_annotation(p, &attrs)) { /* Annotation was consumed, continue. */ } else if (consume(p, T_EXTERN)) { if (attrs & ATTRIB_EXTERN) { error(p, "duplicate `extern` attribute"); return NULL; } attrs |= ATTRIB_EXTERN; } else { break; } } if (attrs != ATTRIB_NONE) { n = node(p, NODE_ATTRIBUTE); n->val.attrib = attrs; } return n; } /* Parse a statement. */ static node_t *parse_stmt(parser_t *p) { /* Parse any attributes that come before the statement. */ node_t *attrs = parse_attribs(p); if (attrs) { switch (p->current.cls) { case T_FN: case T_UNION: case T_RECORD: case T_MOD: case T_CONST: case T_USE: break; default: error(p, "attributes are not allowed in this context"); return NULL; } /* Verify extern is only used with functions */ if ((attrs->val.attrib & ATTRIB_EXTERN) && p->current.cls != T_FN) { error( p, "extern attribute is only allowed on function declarations" ); return NULL; } } switch (p->current.cls) { case T_LBRACE: return parse_block(p); case T_LET: advance(p); if (consume(p, T_CASE)) { return parse_let_case(p); } if (consume(p, T_MUT)) { return parse_var(p, true); } return parse_var(p, false); case T_STATIC: advance(p); return parse_static(p); case T_CONST: advance(p); return parse_const(p); case T_USE: advance(p); return parse_use(p, attrs); case T_MOD: advance(p); return parse_mod(p, attrs); case T_RETURN: advance(p); return parse_return(p); case T_THROW: advance(p); return parse_throw(p); case T_BREAK: advance(p); return parse_break(p); case T_WHILE: advance(p); return parse_while(p); case T_FOR: advance(p); return parse_for(p); case T_LOOP: advance(p); return parse_loop(p); case T_IF: advance(p); return parse_if(p); case T_MATCH: advance(p); return parse_match(p); case T_FN: advance(p); return parse_fn(p, attrs); case T_UNION: advance(p); return parse_union(p, attrs); case T_RECORD: advance(p); return parse_record(p, attrs); case T_PANIC: advance(p); return parse_panic(p); default: break; } /* Parse an expression as a statement or an assignment statement. */ node_t *expr; if ((expr = parse_expr(p)) == NULL) return NULL; /* If we see an equals sign, this is an assignment statement */ if (consume(p, T_EQ)) { return parse_assignment(p, expr); } /* Create an expression statement node. */ node_t *stmt = node(p, NODE_EXPR_STMT); stmt->val.expr_stmt = expr; return stmt; } /* Parse a function argument, which may have an optional label. */ static node_t *parse_fn_call_arg(parser_t *p) { usize start = p->current.position; node_t *arg = node(p, NODE_CALL_ARG); /* Parse the expression first */ node_t *expr = parse_expr(p); if (!expr) return NULL; /* Check if this was an identifier followed by a colon * (making it a label), or the complete expression. */ if (expr->cls == NODE_IDENT && consume(p, T_COLON)) { /* It's a label, parse the actual value expression */ arg->val.call_arg.label = expr; if (!(arg->val.call_arg.expr = parse_expr(p))) { return NULL; } } else { arg->val.call_arg.label = NULL; arg->val.call_arg.expr = expr; } arg->offset = start; arg->length = p->previous.position + p->previous.length - start; return arg; } /* Parse an identifier. */ static node_t *parse_ident(parser_t *p, const char *error) { if (!expect(p, T_IDENT, error)) return NULL; node_t *ident = node(p, NODE_IDENT); ident->val.ident.name = p->previous.start; ident->val.ident.length = p->previous.length; return ident; } /* Parse either an identifier or a placeholder ('_'). */ static node_t *parse_ident_or_placeholder(parser_t *p, const char *error) { if (consume(p, T_UNDERSCORE)) { return node(p, NODE_PLACEHOLDER); } return parse_ident(p, error); } /* Parse a label. * Returns an identifier node. Expects IDENT followed by COLON. */ static node_t *parse_label(parser_t *p, const char *error) { if (!expect(p, T_IDENT, error)) return NULL; node_t *ident = node(p, NODE_IDENT); ident->val.ident.name = p->previous.start; ident->val.ident.length = p->previous.length; if (!expect(p, T_COLON, "expected ':' after identifier")) return NULL; return ident; } static node_t *parse_scope_segment(parser_t *p, const char *error) { if (check(p, T_SUPER)) { node_t *super_node = node(p, NODE_SUPER); advance(p); return super_node; } return parse_ident(p, error); } static node_t *parse_as_cast(parser_t *p, node_t *expr) { if (!consume(p, T_AS)) return NULL; node_t *as = node(p, NODE_AS); as->val.as_expr.expr = expr; /* Parse the target type */ node_t *typ = parse_type(p); if (!typ) return NULL; as->val.as_expr.type = typ; as->offset = expr->offset; as->length = p->current.position - as->offset; return as; } /* Parse postfix expressions (field access and array indexing). * * This function handles both field access (expr.field) and array indexing * (expr[index]) in a unified way, enabling arbitrarily complex nested * expressions like `x.y.z[1].w[2][3].q`. */ static node_t *parse_postfix(parser_t *p, node_t *expr) { node_t *result = expr; for (;;) { if (consume(p, T_DOT)) { /* Field access. */ node_t *n = node(p, NODE_ACCESS); n->val.access.lval = result; node_t *field = parse_ident(p, "expected field name after `.`"); if (!field) return NULL; field->val.ident.name = p->previous.start; field->val.ident.length = p->previous.length; n->val.access.rval = field; result = n; } else if (consume(p, T_DOT_DOT)) { node_t *range = node(p, NODE_RANGE); range->val.range.start = result; range->val.range.end = NULL; /* Check if there's a right-hand side for the range. */ if (!check(p, T_RBRACKET) && !check(p, T_SEMICOLON) && !check(p, T_COMMA) && !check(p, T_RPAREN) && !check(p, T_LBRACE)) { if (!(range->val.range.end = parse_expr(p))) { return NULL; } } result = range; } else if (consume(p, T_COLON_COLON)) { /* Scope access */ node_t *ident = parse_scope_segment(p, "expected identifier name after `::`"); if (!ident) return NULL; node_t *n = node(p, NODE_SCOPE); n->val.access.lval = result; n->val.access.rval = ident; result = n; } else if (consume(p, T_LBRACKET)) { /* Array indexing or slicing. */ node_t *expr = NULL; if (consume(p, T_DOT_DOT)) { /* Either `..` or `..n` */ /* Create range node with NULL start and end. */ expr = node(p, NODE_RANGE); expr->val.range.start = NULL; expr->val.range.end = NULL; if (!check(p, T_RBRACKET)) { if (!(expr->val.range.end = parse_expr(p))) { return NULL; } } } else { /* Either `n`, `n..` or `n..m` */ node_t *index = parse_expr(p); if (!index) return NULL; expr = index; } /* Create array index node with the index expression */ node_t *n = node(p, NODE_ARRAY_INDEX); n->val.access.lval = result; n->val.access.rval = expr; n->offset = result->offset; n->length = result->length; /* Expect closing bracket */ if (!expect(p, T_RBRACKET, "expected `]` after array index")) return NULL; result = n; } else if (consume(p, T_LPAREN)) { /* Parse function call. */ node_t *call = node(p, NODE_CALL); call->val.call.callee = result; call->val.call.args = nodespan_alloc(p, MAX_FN_PARAMS); node_t *arg = NULL; if (!check(p, T_RPAREN)) { do { if (!(arg = parse_fn_call_arg(p))) { return NULL; } nodespan_push(p, &call->val.call.args, arg); } while (consume(p, T_COMMA)); } if (!expect(p, T_RPAREN, "expected `)` after function arguments")) return NULL; result = call; } else if (p->context == PARSE_CTX_NORMAL && result->cls == NODE_SCOPE && check(p, T_LBRACE)) { /* Record literal after scope access: `Union::Variant { ... }`. */ advance(p); /* consume `{` */ node_t *literal = parse_record_lit(p, result); if (!literal) return NULL; result = literal; } else { /* No postfix operators to try. */ break; } } return result; } /* Parse a complete program, return the root of the AST, or `NULL` * if parsing failed. */ node_t *parser_parse(parser_t *p) { p->current = scanner_next(&p->scanner); /* Create a top-level module. */ node_t *root = parse_module_body(p); if (!root) return NULL; if (!expect(p, T_EOF, "expected end-of-file")) return NULL; root->length = (usize)(p->scanner.cursor - p->scanner.source); return (p->root = root); }