Initial commit

7834d3a9d44fb48ae3d3c06da992922f3e46b580b3d92df36372081b2fe475c3
Radiance bootstrapping compiler written in C99.
Licensed under the MIT license.
Alexis Sellier committed ago
.clang-format added +19 -0
1 +
IndentWidth: 4
2 +
ColumnLimit: 80
3 +
UseTab: Never
4 +
AllowShortBlocksOnASingleLine: Always  # or use 'Empty' for only empty blocks
5 +
AllowShortFunctionsOnASingleLine: Empty
6 +
AlignConsecutiveMacros: AcrossComments
7 +
AlignAfterOpenBracket: BlockIndent
8 +
AlignConsecutiveBitFields: AcrossEmptyLinesAndComments
9 +
AlignConsecutiveDeclarations:
10 +
  Enabled: true
11 +
AlignConsecutiveAssignments:
12 +
  Enabled: true
13 +
  AlignCompound: true
14 +
  PadOperators: true
15 +
BinPackParameters: false
16 +
BinPackArguments: false
17 +
BreakAfterReturnType: Automatic
18 +
PenaltyReturnTypeOnItsOwnLine: 999
19 +
Cpp11BracedListStyle: false
.gitignore added +4 -0
1 +
/bin
2 +
/radiance
3 +
/bootstrap
4 +
*.o
.gitsigners added +1 -0
1 +
alexis ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICpDRmIwBm4ajzW+METm9tBdK4CG2/v0qmO4bPfi+s+c alexis@radiant.computer
LICENSE added +19 -0
1 +
Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer)
2 +
3 +
Permission is hereby granted, free of charge, to any person obtaining a copy of
4 +
this software and associated documentation files (the "Software"), to deal in
5 +
the Software without restriction, including without limitation the rights to
6 +
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 +
of the Software, and to permit persons to whom the Software is furnished to do
8 +
so, subject to the following conditions:
9 +
10 +
The above copyright notice and this permission notice shall be included in all
11 +
copies or substantial portions of the Software.
12 +
13 +
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 +
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 +
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 +
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 +
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 +
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 +
SOFTWARE.
Makefile added +39 -0
1 +
# Builds the C bootstrap compiler.
2 +
3 +
CC      := clang
4 +
CFLAGS  := -fvisibility=hidden -std=c99 -Os \
5 +
           -Wall -Wextra -Wpedantic \
6 +
           -Wformat=2 -Wformat-security \
7 +
           -Wnull-dereference \
8 +
           -Wno-format-nonliteral \
9 +
           -Wcast-align \
10 +
           -Wunused -Wuninitialized \
11 +
           -Wmissing-field-initializers \
12 +
           -fno-common -fstack-protector-all \
13 +
           -mcmodel=medium
14 +
LDFLAGS := -fuse-ld=lld -Wl,-z,stack-size=33554432
15 +
16 +
SRC := $(wildcard *.c) $(wildcard gen/*.c)
17 +
HDR := $(wildcard *.h) $(wildcard gen/*.h)
18 +
OBJ := $(SRC:.c=.o)
19 +
BIN := bin/radiance.s0
20 +
21 +
default: $(BIN)
22 +
23 +
$(BIN): $(OBJ)
24 +
	@echo "ld   $^ => $@"
25 +
	@mkdir -p bin
26 +
	@$(CC) $(LDFLAGS) $(OBJ) -o $@
27 +
	@echo "ok   $@"
28 +
29 +
%.o: %.c $(HDR)
30 +
	@echo "cc   $< => $@"
31 +
	@$(CC) $(CFLAGS) -c $< -o $@
32 +
33 +
clean:
34 +
	@rm -f $(OBJ) $(BIN)
35 +
36 +
fmt:
37 +
	git ls-files "*.c" "*.h" | xargs clang-format -i
38 +
39 +
.PHONY: default clean fmt
README added +66 -0
1 +
2 +
RADIANCE BOOTSTRAPPING COMPILER
3 +
4 +
Bootstrap compiler for the Radiance programming language, written in C99.
5 +
6 +
This is the Stage 0 compiler used to bootstrap Radiance from scratch. It
7 +
compiles Radiance source code to RISC-V machine code. Once built, it can be
8 +
used to compile the self-hosted Radiance compiler, which then recompiles
9 +
itself until a fixed point is reached.
10 +
11 +
BUILDING
12 +
13 +
  Requirements:
14 +
15 +
    * clang (or another C99 compiler)
16 +
    * lld (the LLVM linker)
17 +
18 +
  To build the compiler:
19 +
20 +
    make
21 +
22 +
  This produces `bin/radiance.s0`, the Stage 0 compiler binary.
23 +
  You can specify a different C compiler with:
24 +
25 +
    make CC=gcc
26 +
27 +
USAGE
28 +
29 +
    bin/radiance.s0 [options] <input.rad>
30 +
31 +
  Options:
32 +
33 +
    -o <path>       Output file path (required)
34 +
    -mod <path>     Register an additional module
35 +
36 +
COMPILER PIPELINE
37 +
38 +
  The compiler is structured as a series of passes:
39 +
40 +
    Scanner     (scanner.c)    Tokenizes source text
41 +
    Parser      (parser.c)     Builds an AST from tokens
42 +
    Desugar     (desugar.c)    Syntactic transformations on the AST
43 +
    Resolver    (resolver.c)   Name resolution and type checking
44 +
    Gen         (gen.c)        Code generation targeting RISC-V 64-bit
45 +
    RISC-V      (riscv.c)      Instruction encoding
46 +
47 +
  Supporting modules:
48 +
49 +
    ast.c         AST node definitions and utilities
50 +
    module.c      Module loading and management
51 +
    symtab.c      Symbol table
52 +
    strings.c     Interned string table
53 +
    ralloc.c      Region-based memory allocator
54 +
    io.c          File I/O helpers
55 +
    options.c     Command line option parsing
56 +
    gen/emit.c    Binary emission
57 +
    gen/data.c    Read-only and read-write data sections
58 +
59 +
FORMATTING
60 +
61 +
  To format source files before committing, run `make fmt`.
62 +
63 +
LICENSE
64 +
65 +
  Licensed under the MIT License,
66 +
  Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer)
ast.c added +159 -0
1 +
#include <stdio.h>
2 +
#include <stdlib.h>
3 +
#include <string.h>
4 +
5 +
#include "ast.h"
6 +
#include "module.h"
7 +
#include "parser.h"
8 +
#include "resolver.h"
9 +
#include "symtab.h"
10 +
11 +
/* String representations of value types. */
12 +
const char *type_names[] = {
13 +
    [TYPE_VOID] = "void",     [TYPE_I8] = "i8",        [TYPE_I16] = "i16",
14 +
    [TYPE_I32] = "i32",       [TYPE_U8] = "u8",        [TYPE_U16] = "u16",
15 +
    [TYPE_U32] = "u32",       [TYPE_F32] = "f32",      [TYPE_BOOL] = "bool",
16 +
    [TYPE_FN] = "fn",         [TYPE_UNION] = "union",  [TYPE_RESULT] = "result",
17 +
    [TYPE_RECORD] = "record", [TYPE_ARRAY] = "array",  [TYPE_SLICE] = "slice",
18 +
    [TYPE_PTR] = "pointer",   [TYPE_OPT] = "optional", [TYPE_NEVER] = "never",
19 +
    [TYPE_OPAQUE] = "opaque"
20 +
};
21 +
22 +
/* String representations of node classes. */
23 +
const char *node_names[] = {
24 +
    [NODE_TYPE]     = "TYPE",
25 +
    [NODE_NUMBER]   = "NUMBER",
26 +
    [NODE_BOOL]     = "BOOL",
27 +
    [NODE_CHAR]     = "CHAR",
28 +
    [NODE_STRING]   = "STRING",
29 +
    [NODE_UNDEF]    = "UNDEFINED",
30 +
    [NODE_NIL]      = "NIL",
31 +
    [NODE_IDENT]    = "IDENT",
32 +
    [NODE_SUPER]    = "SUPER",
33 +
    [NODE_BINOP]    = "BINOP",
34 +
    [NODE_UNOP]     = "UNOP",
35 +
    [NODE_BLOCK]    = "BLOCK",
36 +
    [NODE_CALL]     = "CALL",
37 +
    [NODE_BUILTIN]  = "BUILTIN",
38 +
    [NODE_CALL_ARG] = "ARG",
39 +
    [NODE_ASSIGN]   = "ASSIGN",
40 +
    [NODE_PTR]      = "PTR",
41 +
    [NODE_MOD]      = "MOD",
42 +
    [NODE_MOD_BODY] = "MODULE",
43 +
44 +
    [NODE_PANIC]            = "PANIC",
45 +
    [NODE_RETURN]           = "RETURN",
46 +
    [NODE_THROW]            = "THROW",
47 +
    [NODE_WHILE]            = "WHILE",
48 +
    [NODE_WHILE_LET]        = "WHILE_LET",
49 +
    [NODE_FOR]              = "FOR",
50 +
    [NODE_LOOP]             = "LOOP",
51 +
    [NODE_TRY]              = "TRY",
52 +
    [NODE_IF]               = "IF",
53 +
    [NODE_IF_LET]           = "IF_LET",
54 +
    [NODE_IF_CASE]          = "IF_CASE",
55 +
    [NODE_GUARD_CASE]       = "GUARD_CASE",
56 +
    [NODE_GUARD_LET]        = "GUARD_LET",
57 +
    [NODE_MATCH]            = "SWITCH",
58 +
    [NODE_MATCH_CASE]       = "SWITCH_CASE",
59 +
    [NODE_CATCH]            = "CATCH",
60 +
    [NODE_FN]               = "FUNCTION",
61 +
    [NODE_VAR]              = "VAR",
62 +
    [NODE_CONST]            = "CONST",
63 +
    [NODE_STATIC]           = "STATIC",
64 +
    [NODE_REF]              = "REF",
65 +
    [NODE_PARAM]            = "PARAM",
66 +
    [NODE_ATTRIBUTE]        = "ATTRIBUTE",
67 +
    [NODE_BREAK]            = "BREAK",
68 +
    [NODE_EXPR_STMT]        = "EXPR_STMT",
69 +
    [NODE_RECORD]           = "RECORD",
70 +
    [NODE_RECORD_FIELD]     = "RECORD_FIELD",
71 +
    [NODE_RECORD_TYPE]      = "RECORD_TYPE",
72 +
    [NODE_UNION]            = "UNION",
73 +
    [NODE_UNION_VARIANT]    = "UNION_VARIANT",
74 +
    [NODE_RECORD_LIT]       = "RECORD_LIT",
75 +
    [NODE_RECORD_LIT_FIELD] = "RECORD_LIT_FIELD",
76 +
    [NODE_ARRAY_LIT]        = "ARRAY_LIT",
77 +
    [NODE_ARRAY_INDEX]      = "ARRAY_INDEX",
78 +
    [NODE_ACCESS]           = "ACCESS",
79 +
    [NODE_SCOPE]            = "SCOPE",
80 +
    [NODE_AS]               = "AS",
81 +
    [NODE_RANGE]            = "RANGE",
82 +
    [NODE_USE]              = "USE",
83 +
    [NODE_PLACEHOLDER]      = "PLACEHOLDER",
84 +
};
85 +
86 +
/* Check if the node is a binary comparison operation. */
87 +
bool node_is_comp(node_t *n) {
88 +
    if (n->cls != NODE_BINOP)
89 +
        return false;
90 +
91 +
    binop_t op = n->val.binop.op;
92 +
93 +
    return op == OP_EQ || op == OP_NE || op == OP_LT || op == OP_GT ||
94 +
           op == OP_LE || op == OP_GE || op == OP_AND || op == OP_OR;
95 +
}
96 +
97 +
/* Check if the node is a literal value. */
98 +
bool node_is_literal(node_t *n) {
99 +
    switch (n->cls) {
100 +
    case NODE_ARRAY_LIT:
101 +
    case NODE_ARRAY_REPEAT_LIT:
102 +
    case NODE_RECORD_LIT:
103 +
    case NODE_UNION_VARIANT:
104 +
    case NODE_NUMBER:
105 +
    case NODE_BOOL:
106 +
    case NODE_STRING:
107 +
    case NODE_CHAR:
108 +
    case NODE_NIL:
109 +
    case NODE_UNDEF:
110 +
        return true;
111 +
    default:
112 +
        return false;
113 +
    }
114 +
}
115 +
116 +
/* Check if the binary operator is a logical one. */
117 +
bool op_is_logical(binop_t op) {
118 +
    return op == OP_AND || op == OP_OR;
119 +
}
120 +
121 +
/* Access node pointers from a span. */
122 +
node_t **nodespan_ptrs(parser_t *p, nodespan_t span) {
123 +
    return &p->ptrs[span.idx];
124 +
}
125 +
126 +
/* Allocate a new span from the parser's pointer pool. */
127 +
nodespan_t nodespan_alloc(parser_t *p, u16 cap) {
128 +
    nodespan_t span  = { .idx = (u32)p->nptrs, .len = 0, .cap = cap };
129 +
    p->nptrs        += cap;
130 +
    return span;
131 +
}
132 +
133 +
/* Append a node to a span. Returns false on overflow. */
134 +
bool nodespan_push(parser_t *p, nodespan_t *span, node_t *node) {
135 +
    if (span->len >= span->cap) {
136 +
        u16 newcap = span->cap == 0 ? 8 : span->cap * 2;
137 +
        if (p->nptrs + newcap > MAX_NODEPTR_POOL) {
138 +
            return false;
139 +
        }
140 +
        u32 newidx = (u32)p->nptrs;
141 +
        for (u16 i = 0; i < span->len; i++) {
142 +
            p->ptrs[newidx + i] = p->ptrs[span->idx + i];
143 +
        }
144 +
        span->idx  = newidx;
145 +
        span->cap  = newcap;
146 +
        p->nptrs  += newcap;
147 +
    }
148 +
    p->ptrs[span->idx + span->len++] = node;
149 +
    return true;
150 +
}
151 +
152 +
/* Add a statement to a block node using module's parser. */
153 +
void node_block_add_stmt(module_t *mod, node_t *block, node_t *stmt) {
154 +
    nodespan_push(&mod->parser, &block->val.block.stmts, stmt);
155 +
}
156 +
157 +
void node_fn_add_param(parser_t *p, node_t *fn, node_t *param) {
158 +
    nodespan_push(p, &fn->val.fn_decl.params, param);
159 +
}
ast.h added +512 -0
1 +
#ifndef AST_H
2 +
#define AST_H
3 +
4 +
#include <stdio.h>
5 +
6 +
#include "limits.h"
7 +
#include "symtab.h"
8 +
#include "types.h"
9 +
10 +
/* String representations of value types. */
11 +
extern const char *type_names[];
12 +
/* String representations of node classes. */
13 +
extern const char *node_names[];
14 +
15 +
/* Span into parser's pointer pool. Used instead of embedded arrays
16 +
 * to keep node_t small. Access via parser_get_ptrs(). */
17 +
typedef struct {
18 +
    u32 idx; /* Starting index into parser_t.ptrs */
19 +
    u16 len; /* Number of elements */
20 +
    u16 cap; /* Capacity (for growable spans) */
21 +
} nodespan_t;
22 +
23 +
/* Variable declaration */
24 +
typedef struct {
25 +
    struct node_t *ident;
26 +
    struct node_t *type;
27 +
    struct node_t *value;
28 +
    struct node_t *align;
29 +
    bool mutable;
30 +
} var_decl_t;
31 +
32 +
/* Constant declaration */
33 +
typedef struct {
34 +
    struct node_t *ident;
35 +
    struct node_t *type;
36 +
    struct node_t *value;
37 +
} const_decl_t;
38 +
39 +
typedef struct {
40 +
    struct node_t *ident;
41 +
    struct node_t *type;
42 +
    struct node_t *value;
43 +
} static_decl_t;
44 +
45 +
/* Record type declaration. */
46 +
typedef struct {
47 +
    struct node_t *name;    /* Name of the record */
48 +
    nodespan_t     fields;  /* Fields of the record */
49 +
    struct node_t *attribs; /* Attributes (e.g. pub) */
50 +
    bool           tuple;   /* Unlabeled fields */
51 +
} record_decl_t;
52 +
53 +
typedef struct {
54 +
    nodespan_t fields; /* Fields of the record */
55 +
} record_type_t;
56 +
57 +
typedef struct {
58 +
    struct node_t *name;  /* Field name */
59 +
    struct node_t *value; /* Field value expression */
60 +
} record_lit_field_t;
61 +
62 +
/* Union variant definition. */
63 +
typedef struct {
64 +
    struct node_t *name;       /* Name of the variant */
65 +
    struct node_t *type;       /* Optional payload type */
66 +
    i32            value;      /* Value or tag for the variant */
67 +
    struct node_t *value_expr; /* Literal expression when explicitly assigned */
68 +
} union_variant_t;
69 +
70 +
/* Union type declaration. */
71 +
typedef struct {
72 +
    struct node_t *name;     /* Name of the union */
73 +
    nodespan_t     variants; /* Variants of the union */
74 +
    struct node_t *attribs;  /* Attributes (e.g. pub) */
75 +
} union_decl_t;
76 +
77 +
/* Function definition. */
78 +
typedef struct {
79 +
    struct node_t *ident;
80 +
    nodespan_t     params;
81 +
    struct node_t *return_type;
82 +
    nodespan_t     throws;
83 +
    struct node_t *body; /* Will be NULL for `extern` functions */
84 +
    struct node_t *attribs;
85 +
} fn_decl_t;
86 +
87 +
/* Node type. */
88 +
typedef enum {
89 +
    /* Literals. */
90 +
    NODE_NUMBER,
91 +
    NODE_CHAR,
92 +
    NODE_STRING,
93 +
    NODE_BOOL,
94 +
    NODE_NIL,              /* Nil literal */
95 +
    NODE_UNDEF,            /* Undefined literal */
96 +
    NODE_RECORD_LIT,       /* Record literal (e.g. Foo { x: 1, y: 2 }) */
97 +
    NODE_RECORD_LIT_FIELD, /* Record literal field */
98 +
    NODE_ARRAY_LIT,        /* Array literal (e.g. [1, 2, 3]) */
99 +
    NODE_ARRAY_REPEAT_LIT, /* Array repeat literal (e.g. [0; 24]) */
100 +
    NODE_ARRAY_INDEX,      /* Array indexing (e.g. arr[0]) */
101 +
    NODE_RANGE,            /* Range expression (e.g. 0..5 or ..) */
102 +
103 +
    /* Expressions. */
104 +
    NODE_IDENT,
105 +
    NODE_SUPER, /* `super` path segment */
106 +
    NODE_BINOP,
107 +
    NODE_UNOP, /* Unary operation (e.g., not x) */
108 +
    NODE_BLOCK,
109 +
    NODE_CALL,
110 +
    NODE_BUILTIN,
111 +
    NODE_CALL_ARG, /* Function call argument, optionally labeled (e.g. x: 1) */
112 +
    NODE_ACCESS,   /* (eg. foo.bar) */
113 +
    NODE_SCOPE,    /* (eg. foo::bar) */
114 +
    NODE_REF,      /* (eg., &foo) */
115 +
    NODE_AS,       /* (eg. x as i32) */
116 +
117 +
    /* Statements. */
118 +
    NODE_MOD,      /* Module declaration: mod foo; */
119 +
    NODE_MOD_BODY, /* Module body content */
120 +
    NODE_RETURN,
121 +
    NODE_THROW,
122 +
    NODE_WHILE,
123 +
    NODE_WHILE_LET, /* While let statement */
124 +
    NODE_LOOP,
125 +
    NODE_TRY,
126 +
    NODE_IF,
127 +
    NODE_IF_LET,     /* If let statement (eg. if let x in (opt) { ... }) */
128 +
    NODE_IF_CASE,    /* If case statement (eg. if case Foo::Bar(x) = expr) */
129 +
    NODE_GUARD_CASE, /* let case else statement */
130 +
    NODE_GUARD_LET,  /* let else statement */
131 +
    NODE_MATCH,      /* Switch statement (eg. switch (x) { ... }) */
132 +
    NODE_MATCH_CASE, /* Switch case (eg. case 1 => { ... }) */
133 +
    NODE_CATCH,
134 +
    NODE_FN,
135 +
    NODE_VAR, /* Variable declaration */
136 +
    NODE_CONST,
137 +
    NODE_STATIC,
138 +
    NODE_PARAM, /* Function parameter */
139 +
    NODE_BREAK,
140 +
    NODE_FOR,
141 +
    NODE_ASSIGN,
142 +
    NODE_EXPR_STMT,
143 +
    NODE_USE,   /* Module use declaration (e.g. use std.net) */
144 +
    NODE_PANIC, /* Panic statement */
145 +
146 +
    /* Type declarations. */
147 +
    NODE_TYPE,
148 +
    NODE_PTR, /* Pointer type */
149 +
    NODE_ALIGN,
150 +
    NODE_ATTRIBUTE,
151 +
    NODE_RECORD,       /* Record type declaration. */
152 +
    NODE_RECORD_FIELD, /* Field in record declaration */
153 +
    NODE_RECORD_TYPE,  /* Anonymous record type */
154 +
    NODE_UNION,
155 +
    NODE_UNION_VARIANT,
156 +
    NODE_PLACEHOLDER, /* Placeholder `_` for unused bindings */
157 +
} nodeclass_t;
158 +
159 +
/* Compiler built-ins */
160 +
typedef enum {
161 +
    BUILTIN_SIZE_OF,
162 +
    BUILTIN_ALIGN_OF,
163 +
    BUILTIN_SLICE_OF
164 +
} builtin_kind_t;
165 +
166 +
/* Binary operators. */
167 +
typedef enum {
168 +
    OP_ADD,
169 +
    OP_SUB,
170 +
    OP_MUL,
171 +
    OP_DIV,
172 +
    OP_MOD,
173 +
    OP_EQ,
174 +
    OP_NE,
175 +
    OP_LT,
176 +
    OP_GT,
177 +
    OP_LE,
178 +
    OP_GE,
179 +
    OP_AND,
180 +
    OP_OR,
181 +
    OP_BAND, /* Bitwise AND (&) */
182 +
    OP_BOR,  /* Bitwise OR (|) */
183 +
    OP_XOR,  /* Bitwise XOR (^) */
184 +
    OP_SHL,  /* Shift left (<<) */
185 +
    OP_SHR,  /* Shift right (>>) */
186 +
} binop_t;
187 +
188 +
/* Unary operators. */
189 +
typedef enum {
190 +
    OP_NOT,
191 +
    OP_NEG, /* Numeric negation */
192 +
    OP_DEREF,
193 +
    OP_BNOT, /* Bitwise NOT (~) */
194 +
} unop_t;
195 +
196 +
/* A node in the abstract syntax tree. */
197 +
typedef struct node_t {
198 +
    /* Fields set by parser. */
199 +
    nodeclass_t cls;    /* Node class. */
200 +
    u32         offset; /* Byte offset into source code for this node. */
201 +
    u32         length; /* Length of source code for this node. */
202 +
    const char *file;   /* Source file this node was parsed from. */
203 +
204 +
    /* Fields set by resolver. */
205 +
    symbol_t      *sym;  /* Symbol table entry, if any. */
206 +
    struct type_t *type; /* Type context, if any. */
207 +
208 +
    /* Node value, set during parsing. */
209 +
    union {
210 +
        /* Boolean literal. */
211 +
        bool bool_lit;
212 +
        /* Character literal. */
213 +
        char char_lit;
214 +
215 +
        struct {
216 +
            const char *data;
217 +
            u16         length;
218 +
        } string_lit;
219 +
220 +
        /* Expression statement. */
221 +
        struct node_t *expr_stmt;
222 +
223 +
        /* Attribute node. */
224 +
        attrib_t attrib;
225 +
226 +
        /* Type node. */
227 +
        struct {
228 +
            /* The type represented by this node. If a complex type,
229 +
             * additional information is found in the `info` union. */
230 +
            typeclass_t    tclass;
231 +
            struct node_t *elem_type;
232 +
233 +
            union {
234 +
                struct {
235 +
                    struct node_t *length;
236 +
                } array;
237 +
238 +
                struct {
239 +
                    bool mut;
240 +
                } ptr;
241 +
242 +
                struct {
243 +
                    bool mut;
244 +
                } slice;
245 +
246 +
                struct {
247 +
                    nodespan_t     params; /* Parameter types */
248 +
                    struct node_t *ret;    /* Return type */
249 +
                    nodespan_t     throws;
250 +
                } fn;
251 +
            } info;
252 +
        } type;
253 +
254 +
        /* Reference node. */
255 +
        struct {
256 +
            struct node_t *target; /* Target of the reference. */
257 +
            bool           mut;    /* If this is a mutable reference. */
258 +
        } ref;
259 +
260 +
        /* Align expression */
261 +
        struct node_t *align;
262 +
263 +
        /* Expression nodes. */
264 +
        struct {
265 +
            const char *text;     /* Original text of the number literal */
266 +
            u16         text_len; /* Length of the original text */
267 +
            imm_t       value;    /* Parsed value based on type context */
268 +
        } number;
269 +
270 +
        struct {
271 +
            const char *name;
272 +
            u16         length;
273 +
        } ident;
274 +
275 +
        struct {
276 +
            binop_t        op;
277 +
            struct node_t *left;
278 +
            struct node_t *right;
279 +
        } binop;
280 +
281 +
        struct {
282 +
            unop_t         op;
283 +
            struct node_t *expr;
284 +
        } unop;
285 +
286 +
        struct {
287 +
            struct node_t *expr; /* Expression to cast */
288 +
            struct node_t *type; /* Target type */
289 +
        } as_expr;
290 +
291 +
        struct {
292 +
            nodespan_t      stmts;
293 +
            struct scope_t *scope; /* Scope for this block */
294 +
        } block;
295 +
296 +
        struct {
297 +
            struct node_t *callee;
298 +
            nodespan_t     args;
299 +
        } call;
300 +
301 +
        struct {
302 +
            builtin_kind_t kind;
303 +
            nodespan_t     args; /* Arguments to builtin */
304 +
        } builtin;
305 +
306 +
        struct {
307 +
            struct node_t *label; /* Optional label, or `NULL`. */
308 +
            struct node_t *expr;  /* Argument expression. */
309 +
        } call_arg;
310 +
311 +
        struct {
312 +
            struct node_t *lval;
313 +
            struct node_t *rval;
314 +
        } assign;
315 +
316 +
        struct {
317 +
            struct node_t *value;
318 +
        } return_stmt;
319 +
320 +
        struct {
321 +
            struct node_t *expr;
322 +
        } throw_stmt;
323 +
324 +
        struct {
325 +
            struct node_t *message; /* Optional message expression */
326 +
        } panic_stmt;
327 +
328 +
        struct {
329 +
            struct node_t *cond;
330 +
            struct node_t *body;
331 +
            struct node_t *rbranch; /* Optional else clause */
332 +
        } while_stmt;
333 +
334 +
        struct {
335 +
            struct node_t  *var;     /* Variable to bind */
336 +
            struct node_t  *expr;    /* Optional expression to unwrap */
337 +
            struct node_t  *guard;   /* Optional guard condition */
338 +
            struct node_t  *body;    /* Loop body */
339 +
            struct node_t  *rbranch; /* Optional else clause */
340 +
            struct scope_t *scope;   /* Holds the bound variable */
341 +
        } while_let_stmt;
342 +
343 +
        struct {
344 +
            struct node_t  *var;
345 +
            struct node_t  *idx; /* Optional index variable */
346 +
            struct node_t  *iter;
347 +
            struct node_t  *body;
348 +
            struct node_t  *rbranch; /* Optional else clause */
349 +
            struct scope_t *scope;   /* Holds the temporary variable. */
350 +
        } for_stmt;
351 +
352 +
        struct {
353 +
            struct node_t *body;
354 +
        } loop_stmt;
355 +
356 +
        struct {
357 +
            struct node_t *expr;       /* Expression guarded by try */
358 +
            struct node_t *catch_expr; /* Fallback expression for catch */
359 +
            nodespan_t     handlers;
360 +
            bool           panic; /* Emit ebreak on error when true */
361 +
            bool optional;        /* Return optional instead of propagating */
362 +
        } try_expr;
363 +
364 +
        struct {
365 +
            struct node_t *cond;
366 +
            struct node_t *lbranch;
367 +
            struct node_t *rbranch;
368 +
        } if_stmt;
369 +
370 +
        struct {
371 +
            struct node_t  *var;     /* Variable to bind */
372 +
            struct node_t  *expr;    /* Optional expression to unwrap */
373 +
            struct node_t  *guard;   /* Optional guard */
374 +
            struct node_t  *lbranch; /* Then branch */
375 +
            struct node_t  *rbranch; /* Else branch (optional) */
376 +
            struct scope_t *scope;   /* Holds the bound variable */
377 +
        } if_let_stmt;
378 +
379 +
        struct {
380 +
            struct node_t *pattern; /* Pattern to match */
381 +
            struct node_t *expr;    /* Expression being tested */
382 +
            struct node_t *guard;   /* Optional guard */
383 +
            struct node_t *lbranch; /* Then branch */
384 +
            struct node_t *rbranch; /* Else branch (optional) */
385 +
        } if_case_stmt;
386 +
387 +
        struct {
388 +
            struct node_t *pattern; /* Pattern to match */
389 +
            struct node_t *expr;    /* Expression being tested */
390 +
            struct node_t *guard;   /* Optional guard */
391 +
            struct node_t *rbranch; /* Else branch */
392 +
        } guard_case_stmt;
393 +
394 +
        struct {
395 +
            struct node_t *var;     /* Bound variable */
396 +
            struct node_t *expr;    /* Optional expression to unwrap */
397 +
            struct node_t *rbranch; /* Else branch */
398 +
        } guard_let_stmt;
399 +
400 +
        struct {
401 +
            struct node_t *expr;
402 +
            nodespan_t     cases; /* Switch cases */
403 +
        } match_stmt;
404 +
405 +
        struct {
406 +
            nodespan_t     patterns;
407 +
            struct node_t *body;     /* Case body */
408 +
            struct node_t *guard;    /* Optional guard condition */
409 +
            struct node_t *variable; /* Bound variable */
410 +
        } match_case;
411 +
412 +
        struct {
413 +
            struct node_t  *binding; /* Optional bound identifier */
414 +
            struct node_t  *body;    /* Catch handler body */
415 +
            struct scope_t *scope;   /* Scope for the bound variable */
416 +
        } catch_clause;
417 +
418 +
        var_decl_t    var;
419 +
        const_decl_t  constant;
420 +
        static_decl_t static_decl;
421 +
422 +
        struct {
423 +
            struct node_t *ident;
424 +
            struct node_t *type;
425 +
        } param;
426 +
427 +
        union_decl_t    union_decl;
428 +
        union_variant_t union_variant;
429 +
430 +
        struct {
431 +
            struct node_t *type;   /* Type identifier */
432 +
            nodespan_t     fields; /* Field initializers */
433 +
            bool           etc;    /* Pattern discards remaining fields */
434 +
        } record_lit;
435 +
436 +
        /* Field initialization in a record literal. */
437 +
        record_lit_field_t record_lit_field;
438 +
439 +
        /* Record declarations. */
440 +
        record_decl_t record_decl;
441 +
        /* Anonymous record */
442 +
        record_type_t record_type;
443 +
444 +
        /* Array indexing and record field access, eg. `x.y` or `x[y]`. */
445 +
        struct {
446 +
            struct node_t *lval;
447 +
            struct node_t *rval;
448 +
        } access;
449 +
450 +
        /* Range expression, e.g. `0..5` or `..` */
451 +
        struct {
452 +
            struct node_t *start; /* Start expression (NULL if omitted) */
453 +
            struct node_t *end;   /* End expression (NULL if omitted) */
454 +
        } range;
455 +
456 +
        /* Array literal, e.g. `[1, 2, 3]` */
457 +
        struct {
458 +
            nodespan_t elems;
459 +
        } array_lit;
460 +
461 +
        /* Array repeat literal, e.g. `[0; 24]` */
462 +
        struct {
463 +
            struct node_t *value; /* Value to repeat */
464 +
            struct node_t *count; /* Number of repetitions */
465 +
        } array_repeat_lit;
466 +
467 +
        /* Use declaration, e.g. `use std::net::tcp;` */
468 +
        struct {
469 +
            struct node_t *path;     /* Path to the module being used */
470 +
            struct node_t *attribs;  /* Attributes (e.g. pub) */
471 +
            bool           wildcard; /* Whether this is a wildcard import */
472 +
        } use_decl;
473 +
474 +
        /* Module declaration, e.g. `mod util;` */
475 +
        struct {
476 +
            struct node_t *ident;   /* Name of the module */
477 +
            struct node_t *attribs; /* Attributes (e.g. pub) */
478 +
        } mod_decl;
479 +
480 +
        fn_decl_t fn_decl;
481 +
    } val;
482 +
} node_t;
483 +
484 +
/* Forward declaration for parser_t */
485 +
struct parser_t;
486 +
487 +
/* Check if the node is a binary comparison operation. */
488 +
bool node_is_comp(node_t *n);
489 +
/* Check if the node is a literal */
490 +
bool node_is_literal(node_t *n);
491 +
/* Check if the binary operator is a logical one. */
492 +
bool op_is_logical(binop_t);
493 +
/* Add a parameter to a function. */
494 +
void node_fn_add_param(struct parser_t *p, node_t *fn, node_t *param);
495 +
496 +
/* Access node pointers from a span. Requires parser context. */
497 +
struct parser_t;
498 +
node_t **nodespan_ptrs(struct parser_t *p, nodespan_t span);
499 +
500 +
/* Allocate a new span from the parser's pointer pool. */
501 +
nodespan_t nodespan_alloc(struct parser_t *p, u16 cap);
502 +
503 +
/* Append a node to a span, growing if needed. Returns false on overflow. */
504 +
bool nodespan_push(struct parser_t *p, nodespan_t *span, node_t *node);
505 +
506 +
/* Forward declaration for module_t */
507 +
struct module_t;
508 +
509 +
/* Add a statement to a block node using module's parser. */
510 +
void node_block_add_stmt(struct module_t *mod, node_t *block, node_t *stmt);
511 +
512 +
#endif
desugar.c added +744 -0
1 +
#include <stdio.h>
2 +
#include <stdlib.h>
3 +
#include <string.h>
4 +
5 +
#include "ast.h"
6 +
#include "desugar.h"
7 +
#include "io.h"
8 +
#include "module.h"
9 +
#include "parser.h"
10 +
#include "resolver.h"
11 +
#include "symtab.h"
12 +
13 +
/*
14 +
 * AST desugaring pass
15 +
 *
16 +
 * This pass runs before resolving and transforms the AST.
17 +
 */
18 +
19 +
/* Forward declarations */
20 +
static node_t *desugar_node(desugar_t *d, module_t *mod, node_t *n);
21 +
static node_t *desugar_and_operator(desugar_t *d, module_t *mod, node_t *binop);
22 +
static node_t *desugar_or_operator(desugar_t *d, module_t *mod, node_t *binop);
23 +
static node_t *desugar_guard_stmt(
24 +
    desugar_t *d, module_t *mod, node_t *block, usize ix, node_t *guard
25 +
);
26 +
static node_t *desugar_block(desugar_t *d, module_t *mod, node_t *block);
27 +
28 +
/* Allocate a new AST node using the module's parser */
29 +
static node_t *node(module_t *mod, nodeclass_t cls, node_t *original) {
30 +
    parser_t *p = &mod->parser;
31 +
    if (p->nnodes >= MAX_NODES) {
32 +
        bail("maximum number of AST nodes reached");
33 +
    }
34 +
    node_t *n = &p->nodes[p->nnodes++];
35 +
    n->cls    = cls;
36 +
    n->type   = NULL;
37 +
    n->sym    = NULL;
38 +
    n->offset = original ? original->offset : 0;
39 +
    n->length = original ? original->length : 0;
40 +
41 +
    return n;
42 +
}
43 +
44 +
static node_t *node_bool(module_t *mod, bool b, node_t *loc) {
45 +
    node_t *lit       = node(mod, NODE_BOOL, loc);
46 +
    lit->val.bool_lit = b;
47 +
48 +
    return lit;
49 +
}
50 +
51 +
/* Create an empty block node. */
52 +
static node_t *node_block(module_t *mod, node_t *original) {
53 +
    node_t *block          = node(mod, NODE_BLOCK, original);
54 +
    block->val.block.stmts = (nodespan_t){ 0 };
55 +
    block->val.block.scope = NULL;
56 +
57 +
    return block;
58 +
}
59 +
60 +
/* Transform a while-let loop into:
61 +
 *
62 +
 *   loop {
63 +
 *     if let var = expr; guard {
64 +
 *       body;
65 +
 *     } else {
66 +
 *       rbranch;
67 +
 *       break;
68 +
 *     }
69 +
 *   }
70 +
 */
71 +
static node_t *desugar_while_let(
72 +
    desugar_t *d, module_t *mod, node_t *while_let_node
73 +
) {
74 +
    /* Create the loop node */
75 +
    node_t *loop_node = node(mod, NODE_LOOP, while_let_node);
76 +
    /* Create the if-let node */
77 +
    node_t *if_let_node = node(mod, NODE_IF_LET, while_let_node);
78 +
    /* Create the break statement */
79 +
    node_t *break_node = node(mod, NODE_BREAK, while_let_node);
80 +
81 +
    /* Handle the else clause */
82 +
    node_t *else_clause = node_block(mod, while_let_node);
83 +
    if (while_let_node->val.while_let_stmt.rbranch) {
84 +
        node_block_add_stmt(
85 +
            mod,
86 +
            else_clause,
87 +
            desugar_node(d, mod, while_let_node->val.while_let_stmt.rbranch)
88 +
        );
89 +
    }
90 +
    node_block_add_stmt(mod, else_clause, break_node);
91 +
92 +
    node_t *loop_body = node_block(mod, while_let_node);
93 +
    node_block_add_stmt(mod, loop_body, if_let_node);
94 +
95 +
    /* Set up the if-let statement */
96 +
    if_let_node->val.if_let_stmt.var = while_let_node->val.while_let_stmt.var;
97 +
    if_let_node->val.if_let_stmt.expr =
98 +
        desugar_node(d, mod, while_let_node->val.while_let_stmt.expr);
99 +
    if_let_node->val.if_let_stmt.guard =
100 +
        while_let_node->val.while_let_stmt.guard
101 +
            ? desugar_node(d, mod, while_let_node->val.while_let_stmt.guard)
102 +
            : NULL;
103 +
    if_let_node->val.if_let_stmt.lbranch =
104 +
        desugar_node(d, mod, while_let_node->val.while_let_stmt.body);
105 +
    if_let_node->val.if_let_stmt.rbranch = else_clause;
106 +
    if_let_node->val.if_let_stmt.scope =
107 +
        NULL; /* Will be set during resolving */
108 +
109 +
    /* Set up the loop statement */
110 +
    loop_node->val.loop_stmt.body = loop_body;
111 +
112 +
    return loop_node;
113 +
}
114 +
115 +
/* Transform a while loop into:
116 +
 *
117 +
 *   loop {
118 +
 *     if (condition) {
119 +
 *       body;
120 +
 *     } else {
121 +
 *       else_clause;
122 +
 *       break
123 +
 *     }
124 +
 *   }
125 +
 */
126 +
static node_t *desugar_while(desugar_t *d, module_t *mod, node_t *while_node) {
127 +
    /* Create the loop node */
128 +
    node_t *loop_node = node(mod, NODE_LOOP, while_node);
129 +
    /* Create the condition check */
130 +
    node_t *if_node = node(mod, NODE_IF, while_node);
131 +
    /* Create the break statement */
132 +
    node_t *break_node = node(mod, NODE_BREAK, while_node);
133 +
134 +
    /* Handle the else clause */
135 +
    node_t *else_clause = node_block(mod, while_node);
136 +
    if (while_node->val.while_stmt.rbranch) {
137 +
        node_block_add_stmt(
138 +
            mod,
139 +
            else_clause,
140 +
            desugar_node(d, mod, while_node->val.while_stmt.rbranch)
141 +
        );
142 +
    }
143 +
    node_block_add_stmt(mod, else_clause, break_node);
144 +
145 +
    node_t *loop_body = node_block(mod, while_node->val.while_stmt.body);
146 +
    node_block_add_stmt(mod, loop_body, if_node);
147 +
148 +
    /* Set up the if statement. */
149 +
    if_node->val.if_stmt.cond =
150 +
        desugar_node(d, mod, while_node->val.while_stmt.cond);
151 +
    if_node->val.if_stmt.lbranch =
152 +
        desugar_node(d, mod, while_node->val.while_stmt.body);
153 +
    if_node->val.if_stmt.rbranch = else_clause;
154 +
155 +
    /* Set up the loop statement */
156 +
    loop_node->val.loop_stmt.body = loop_body;
157 +
158 +
    return loop_node;
159 +
}
160 +
161 +
static node_t *node_ident(module_t *mod, const char *name, node_t *loc) {
162 +
    node_t *ident           = node(mod, NODE_IDENT, loc);
163 +
    ident->val.ident.name   = name;
164 +
    ident->val.ident.length = strlen(name);
165 +
166 +
    return ident;
167 +
}
168 +
169 +
static node_t *node_var(
170 +
    module_t *mod,
171 +
    node_t   *ident,
172 +
    node_t   *typ,
173 +
    node_t   *val,
174 +
    bool      mut,
175 +
    node_t   *loc
176 +
) {
177 +
    node_t *var = node(mod, NODE_VAR, loc);
178 +
179 +
    var->val.var.ident   = ident;
180 +
    var->val.var.type    = typ;
181 +
    var->val.var.value   = val;
182 +
    var->val.var.mutable = mut;
183 +
184 +
    return var;
185 +
}
186 +
187 +
static node_t *node_number(module_t *mod, const char *text, node_t *loc) {
188 +
    node_t *n              = node(mod, NODE_NUMBER, loc);
189 +
    n->val.number.text     = text;
190 +
    n->val.number.text_len = strlen(text);
191 +
192 +
    return n;
193 +
}
194 +
195 +
static node_t *node_type(module_t *mod, typeclass_t tc, node_t *loc) {
196 +
    node_t *typ          = node(mod, NODE_TYPE, loc);
197 +
    typ->val.type.tclass = tc;
198 +
199 +
    return typ;
200 +
}
201 +
202 +
static node_t *node_access(
203 +
    module_t *mod, node_t *lval, node_t *rval, node_t *loc
204 +
) {
205 +
    node_t *access          = node(mod, NODE_ACCESS, loc);
206 +
    access->val.access.lval = lval;
207 +
    access->val.access.rval = rval;
208 +
209 +
    return access;
210 +
}
211 +
212 +
static node_t *node_access_str(
213 +
    module_t *mod, node_t *lval, const char *field, node_t *loc
214 +
) {
215 +
    return node_access(mod, lval, node_ident(mod, field, loc), loc);
216 +
}
217 +
218 +
static node_t *node_binop(
219 +
    module_t *mod, binop_t op, node_t *left, node_t *right, node_t *loc
220 +
) {
221 +
    node_t *binop          = node(mod, NODE_BINOP, loc);
222 +
    binop->val.binop.op    = op;
223 +
    binop->val.binop.left  = left;
224 +
    binop->val.binop.right = right;
225 +
226 +
    return binop;
227 +
}
228 +
229 +
static node_t *node_increment(
230 +
    module_t *mod, node_t *lval_ident, node_t *expr_ident, node_t *loc
231 +
) {
232 +
    node_t *assign          = node(mod, NODE_ASSIGN, loc);
233 +
    assign->val.assign.lval = lval_ident;
234 +
    assign->val.assign.rval =
235 +
        node_binop(mod, OP_ADD, expr_ident, node_number(mod, "1", loc), loc);
236 +
237 +
    return assign;
238 +
}
239 +
240 +
static node_t *node_match(module_t *mod, node_t *expr, node_t *loc) {
241 +
    node_t *swtch               = node(mod, NODE_MATCH, loc);
242 +
    swtch->val.match_stmt.expr  = expr;
243 +
    swtch->val.match_stmt.cases = (nodespan_t){ 0 };
244 +
245 +
    return swtch;
246 +
}
247 +
248 +
static node_t *node_match_case(
249 +
    module_t *mod, node_t *pattern, node_t *guard, node_t *body, node_t *loc
250 +
) {
251 +
    node_t *swtch_case                  = node(mod, NODE_MATCH_CASE, loc);
252 +
    swtch_case->val.match_case.patterns = (nodespan_t){ 0 };
253 +
    if (pattern != NULL) {
254 +
        nodespan_push(
255 +
            &mod->parser, &swtch_case->val.match_case.patterns, pattern
256 +
        );
257 +
    }
258 +
    swtch_case->val.match_case.body     = body;
259 +
    swtch_case->val.match_case.guard    = guard;
260 +
    swtch_case->val.match_case.variable = NULL;
261 +
262 +
    return swtch_case;
263 +
}
264 +
265 +
/*
266 +
 * Transform guard statements into their desugared control flow.
267 +
 *
268 +
 * For example, `let value = opt else { handle(); }; rest;` becomes:
269 +
 *
270 +
 *     if let value = opt {
271 +
 *         rest;
272 +
 *     } else {
273 +
 *         handle();
274 +
 *     }
275 +
 *
276 +
 * Likewise, `let case Pattern(x) = expr else { ... };` becomes an
277 +
 * equivalent `if case` construct with the suffix statements placed in
278 +
 * the success branch so they retain access to bound names.
279 +
 */
280 +
static node_t *desugar_guard_stmt(
281 +
    desugar_t *d, module_t *mod, node_t *block, usize index, node_t *guard
282 +
) {
283 +
    node_t *success = node_block(mod, guard);
284 +
    node_t *if_stmt = NULL;
285 +
286 +
    /* Add the rest of the surrounding block into the success branch. */
287 +
    node_t **stmts = nodespan_ptrs(&mod->parser, block->val.block.stmts);
288 +
    for (usize j = index + 1; j < block->val.block.stmts.len; j++) {
289 +
        node_block_add_stmt(mod, success, stmts[j]);
290 +
    }
291 +
    if (guard->cls == NODE_GUARD_LET) {
292 +
        if_stmt                          = node(mod, NODE_IF_LET, guard);
293 +
        if_stmt->val.if_let_stmt.var     = guard->val.guard_let_stmt.var;
294 +
        if_stmt->val.if_let_stmt.expr    = guard->val.guard_let_stmt.expr;
295 +
        if_stmt->val.if_let_stmt.guard   = NULL;
296 +
        if_stmt->val.if_let_stmt.lbranch = success;
297 +
        if_stmt->val.if_let_stmt.rbranch = guard->val.guard_let_stmt.rbranch;
298 +
        if_stmt->val.if_let_stmt.scope   = NULL;
299 +
    } else {
300 +
        if_stmt                           = node(mod, NODE_IF_CASE, guard);
301 +
        if_stmt->val.if_case_stmt.pattern = guard->val.guard_case_stmt.pattern;
302 +
        if_stmt->val.if_case_stmt.expr    = guard->val.guard_case_stmt.expr;
303 +
        if_stmt->val.if_case_stmt.guard   = guard->val.guard_case_stmt.guard;
304 +
        if_stmt->val.if_case_stmt.lbranch = success;
305 +
        if_stmt->val.if_case_stmt.rbranch = guard->val.guard_case_stmt.rbranch;
306 +
    }
307 +
    block->val.block.stmts.len = index + 1;
308 +
309 +
    return desugar_node(d, mod, if_stmt);
310 +
}
311 +
312 +
static node_t *desugar_block(desugar_t *d, module_t *mod, node_t *block) {
313 +
    node_t **stmts = nodespan_ptrs(&mod->parser, block->val.block.stmts);
314 +
    for (usize i = 0; i < block->val.block.stmts.len; i++) {
315 +
        node_t *stmt = stmts[i];
316 +
        /* Guard statements fold the rest of the block under the success
317 +
         * branch of the generated `if` statement, therefore we continue
318 +
         * processing the block inside the guard statement desugar. */
319 +
        if (stmt->cls == NODE_GUARD_LET || stmt->cls == NODE_GUARD_CASE) {
320 +
            stmts[i] = desugar_guard_stmt(d, mod, block, i, stmt);
321 +
            return block;
322 +
        }
323 +
        stmts[i] = desugar_node(d, mod, stmt);
324 +
    }
325 +
    return block;
326 +
}
327 +
328 +
static void node_match_add_case(
329 +
    module_t *mod, node_t *swtch, node_t *swtch_case
330 +
) {
331 +
    nodespan_push(&mod->parser, &swtch->val.match_stmt.cases, swtch_case);
332 +
}
333 +
334 +
static node_t *desugar_for_range(
335 +
    desugar_t *d, module_t *mod, node_t *for_node
336 +
) {
337 +
    node_t *range = for_node->val.for_stmt.iter;
338 +
339 +
    node_t *index_name = for_node->val.for_stmt.idx
340 +
                             ? for_node->val.for_stmt.idx
341 +
                             : node_ident(mod, "$i", for_node);
342 +
    node_t *end_name   = node_ident(mod, "$end", for_node);
343 +
    node_t *start_expr = range->val.range.start
344 +
                             ? desugar_node(d, mod, range->val.range.start)
345 +
                             : node_number(mod, "0", range);
346 +
    node_t *index_typ  = node_type(mod, TYPE_U32, for_node);
347 +
    node_t *index_var =
348 +
        node_var(mod, index_name, index_typ, start_expr, true, for_node);
349 +
350 +
    node_t *end_expr = desugar_node(d, mod, range->val.range.end);
351 +
    node_t *end_typ  = node_type(mod, TYPE_U32, for_node);
352 +
    node_t *end_var  = node_var(mod, end_name, end_typ, end_expr, false, range);
353 +
354 +
    node_t *cond = node_binop(
355 +
        mod, OP_LT, index_var->val.var.ident, end_var->val.var.ident, for_node
356 +
    );
357 +
    node_t *loop_body = node_block(mod, for_node);
358 +
    node_t *loop_var  = node_var(
359 +
        mod,
360 +
        for_node->val.for_stmt.var,
361 +
        NULL,
362 +
        index_var->val.var.ident,
363 +
        false,
364 +
        for_node->val.for_stmt.var
365 +
    );
366 +
    node_block_add_stmt(mod, loop_body, loop_var);
367 +
    node_block_add_stmt(
368 +
        mod, loop_body, desugar_node(d, mod, for_node->val.for_stmt.body)
369 +
    );
370 +
371 +
    node_t *increment = node_increment(
372 +
        mod, index_var->val.var.ident, index_var->val.var.ident, for_node
373 +
    );
374 +
    node_block_add_stmt(mod, loop_body, increment);
375 +
376 +
    node_t *while_node              = node(mod, NODE_WHILE, for_node);
377 +
    while_node->val.while_stmt.cond = cond;
378 +
    while_node->val.while_stmt.body = loop_body;
379 +
    while_node->val.while_stmt.rbranch =
380 +
        for_node->val.for_stmt.rbranch
381 +
            ? desugar_node(d, mod, for_node->val.for_stmt.rbranch)
382 +
            : NULL;
383 +
384 +
    node_t *wrapper = node_block(mod, for_node);
385 +
    node_block_add_stmt(mod, wrapper, index_var);
386 +
    node_block_add_stmt(mod, wrapper, end_var);
387 +
    node_block_add_stmt(mod, wrapper, desugar_while(d, mod, while_node));
388 +
389 +
    return wrapper;
390 +
}
391 +
392 +
/* Transform a for loop into a while loop:
393 +
 *
394 +
 *   for var in (iter) {
395 +
 *     body;
396 +
 *   } else {
397 +
 *     rbranch;
398 +
 *   }
399 +
 *
400 +
 * becomes:
401 +
 *
402 +
 *   {
403 +
 *     let $i: u32 = 0;
404 +
 *     let $len: u32 = iter.len;
405 +
 *     while ($i < $len) {
406 +
 *       let var = iter[$i];
407 +
 *       body;
408 +
 *       $i = $i + 1;
409 +
 *     } else {
410 +
 *       rbranch;
411 +
 *     }
412 +
 *   }
413 +
 */
414 +
static node_t *desugar_for(desugar_t *d, module_t *mod, node_t *for_node) {
415 +
    if (for_node->val.for_stmt.iter->cls == NODE_RANGE) {
416 +
        return desugar_for_range(d, mod, for_node);
417 +
    }
418 +
    /* Use simple temporary variable names or user-provided index variable */
419 +
    node_t *index_name  = for_node->val.for_stmt.idx
420 +
                              ? for_node->val.for_stmt.idx
421 +
                              : node_ident(mod, "$i", for_node);
422 +
    node_t *length_name = node_ident(mod, "$len", for_node);
423 +
424 +
    /* Create index variable: let $i: u32 = 0; */
425 +
    node_t *index_val = node_number(mod, "0", for_node);
426 +
    node_t *index_typ = node_type(mod, TYPE_U32, for_node);
427 +
    node_t *index_var =
428 +
        node_var(mod, index_name, index_typ, index_val, true, for_node);
429 +
430 +
    /* Create length variable: let $len: u32 = iter.len; */
431 +
    node_t *len_field = node_access_str(
432 +
        mod,
433 +
        desugar_node(d, mod, for_node->val.for_stmt.iter),
434 +
        "len",
435 +
        for_node->val.for_stmt.iter
436 +
    );
437 +
    node_t *length_typ = node_type(mod, TYPE_U32, for_node);
438 +
    node_t *length_var =
439 +
        node_var(mod, length_name, length_typ, len_field, false, for_node);
440 +
441 +
    /* Create while condition: $i < $len */
442 +
    node_t *cond = node_binop(
443 +
        mod,
444 +
        OP_LT,
445 +
        index_var->val.var.ident,
446 +
        length_var->val.var.ident,
447 +
        for_node
448 +
    );
449 +
450 +
    /* Create array index access: iter[$i] */
451 +
    node_t *array_idx = node(mod, NODE_ARRAY_INDEX, for_node);
452 +
    array_idx->val.access.lval =
453 +
        desugar_node(d, mod, for_node->val.for_stmt.iter);
454 +
    array_idx->val.access.rval = index_var->val.var.ident;
455 +
456 +
    /* Create loop variable assignment: let var = iter[$i]; */
457 +
    node_t *var_name = for_node->val.for_stmt.var;
458 +
    node_t *loop_var = node_var(
459 +
        mod, var_name, NULL, array_idx, false, for_node->val.for_stmt.var
460 +
    );
461 +
462 +
    /* Create increment statement: $i = $i + 1; */
463 +
    node_t *increment = node_increment(
464 +
        mod, index_var->val.var.ident, index_var->val.var.ident, for_node
465 +
    );
466 +
467 +
    /* Create while body */
468 +
    node_t *body = node_block(mod, for_node);
469 +
    node_block_add_stmt(mod, body, loop_var);
470 +
    node_block_add_stmt(
471 +
        mod, body, desugar_node(d, mod, for_node->val.for_stmt.body)
472 +
    );
473 +
    node_block_add_stmt(mod, body, increment);
474 +
475 +
    /* Create while node */
476 +
    node_t *while_node              = node(mod, NODE_WHILE, for_node);
477 +
    while_node->val.while_stmt.cond = cond;
478 +
    while_node->val.while_stmt.body = body;
479 +
    while_node->val.while_stmt.rbranch =
480 +
        for_node->val.for_stmt.rbranch
481 +
            ? desugar_node(d, mod, for_node->val.for_stmt.rbranch)
482 +
            : NULL;
483 +
484 +
    /* Create wrapper block containing the initialization and while loop */
485 +
    node_t *wrapper = node_block(mod, for_node);
486 +
    node_block_add_stmt(mod, wrapper, index_var);
487 +
    node_block_add_stmt(mod, wrapper, length_var);
488 +
    node_block_add_stmt(mod, wrapper, desugar_while(d, mod, while_node));
489 +
490 +
    return wrapper;
491 +
}
492 +
493 +
/* Transform `x and y` into:
494 +
 *
495 +
 *   if (x) {
496 +
 *     y
497 +
 *   } else {
498 +
 *     false
499 +
 *   }
500 +
 */
501 +
static node_t *desugar_and_operator(
502 +
    desugar_t *d, module_t *mod, node_t *binop
503 +
) {
504 +
    node_t *if_node   = node(mod, NODE_IF, binop);
505 +
    node_t *false_lit = node_bool(mod, false, binop);
506 +
507 +
    if_node->val.if_stmt.cond    = desugar_node(d, mod, binop->val.binop.left);
508 +
    if_node->val.if_stmt.lbranch = desugar_node(d, mod, binop->val.binop.right);
509 +
    if_node->val.if_stmt.rbranch = false_lit;
510 +
511 +
    return if_node;
512 +
}
513 +
514 +
/* Transform `x or y` into:
515 +
 *
516 +
 *   if (x) {
517 +
 *     true
518 +
 *   } else {
519 +
 *     y
520 +
 *   }
521 +
 */
522 +
static node_t *desugar_or_operator(desugar_t *d, module_t *mod, node_t *binop) {
523 +
    node_t *if_node  = node(mod, NODE_IF, binop);
524 +
    node_t *true_lit = node_bool(mod, true, binop);
525 +
526 +
    if_node->val.if_stmt.cond    = desugar_node(d, mod, binop->val.binop.left);
527 +
    if_node->val.if_stmt.lbranch = true_lit;
528 +
    if_node->val.if_stmt.rbranch = desugar_node(d, mod, binop->val.binop.right);
529 +
530 +
    return if_node;
531 +
}
532 +
533 +
/* Recursively desugar a node and its children */
534 +
static node_t *desugar_node(desugar_t *d, module_t *mod, node_t *n) {
535 +
    if (!n)
536 +
        return NULL;
537 +
538 +
    switch (n->cls) {
539 +
    case NODE_WHILE:
540 +
        return desugar_while(d, mod, n);
541 +
542 +
    case NODE_WHILE_LET:
543 +
        return desugar_while_let(d, mod, n);
544 +
545 +
    case NODE_MOD_BODY:
546 +
    case NODE_BLOCK:
547 +
        return desugar_block(d, mod, n);
548 +
549 +
    case NODE_IF:
550 +
        n->val.if_stmt.cond    = desugar_node(d, mod, n->val.if_stmt.cond);
551 +
        n->val.if_stmt.lbranch = desugar_node(d, mod, n->val.if_stmt.lbranch);
552 +
        if (n->val.if_stmt.rbranch) {
553 +
            n->val.if_stmt.rbranch =
554 +
                desugar_node(d, mod, n->val.if_stmt.rbranch);
555 +
        }
556 +
        return n;
557 +
558 +
    case NODE_IF_LET:
559 +
        n->val.if_let_stmt.expr = desugar_node(d, mod, n->val.if_let_stmt.expr);
560 +
        if (n->val.if_let_stmt.guard) {
561 +
            n->val.if_let_stmt.guard =
562 +
                desugar_node(d, mod, n->val.if_let_stmt.guard);
563 +
        }
564 +
        n->val.if_let_stmt.lbranch =
565 +
            desugar_node(d, mod, n->val.if_let_stmt.lbranch);
566 +
        if (n->val.if_let_stmt.rbranch) {
567 +
            n->val.if_let_stmt.rbranch =
568 +
                desugar_node(d, mod, n->val.if_let_stmt.rbranch);
569 +
        }
570 +
        return n;
571 +
572 +
    case NODE_IF_CASE: {
573 +
        node_t *pattern = desugar_node(d, mod, n->val.if_case_stmt.pattern);
574 +
        node_t *expr    = desugar_node(d, mod, n->val.if_case_stmt.expr);
575 +
        node_t *guard   = NULL;
576 +
577 +
        if (n->val.if_case_stmt.guard) {
578 +
            guard = desugar_node(d, mod, n->val.if_case_stmt.guard);
579 +
        }
580 +
        node_t *then_block = desugar_node(d, mod, n->val.if_case_stmt.lbranch);
581 +
        node_t *swtch      = node_match(mod, expr, n);
582 +
583 +
        node_t *case_node = node_match_case(mod, pattern, guard, then_block, n);
584 +
585 +
        node_match_add_case(mod, swtch, case_node);
586 +
587 +
        if (n->val.if_case_stmt.rbranch) {
588 +
            node_t *else_body =
589 +
                desugar_node(d, mod, n->val.if_case_stmt.rbranch);
590 +
            node_t *default_case =
591 +
                node_match_case(mod, NULL, NULL, else_body, n);
592 +
593 +
            node_match_add_case(mod, swtch, default_case);
594 +
        }
595 +
        return swtch;
596 +
    }
597 +
598 +
    case NODE_LOOP:
599 +
        n->val.loop_stmt.body = desugar_node(d, mod, n->val.loop_stmt.body);
600 +
        return n;
601 +
602 +
    case NODE_FN:
603 +
        n->val.fn_decl.body = desugar_node(d, mod, n->val.fn_decl.body);
604 +
        return n;
605 +
606 +
    case NODE_BINOP:
607 +
        /* Handle logical operators with short-circuit evaluation */
608 +
        if (n->val.binop.op == OP_AND) {
609 +
            return desugar_and_operator(d, mod, n);
610 +
        }
611 +
        if (n->val.binop.op == OP_OR) {
612 +
            return desugar_or_operator(d, mod, n);
613 +
        }
614 +
        /* For other binary operators, recursively desugar operands */
615 +
        n->val.binop.left  = desugar_node(d, mod, n->val.binop.left);
616 +
        n->val.binop.right = desugar_node(d, mod, n->val.binop.right);
617 +
        return n;
618 +
619 +
    case NODE_UNOP:
620 +
        n->val.unop.expr = desugar_node(d, mod, n->val.unop.expr);
621 +
        return n;
622 +
623 +
    case NODE_CALL: {
624 +
        node_t **args = nodespan_ptrs(&mod->parser, n->val.call.args);
625 +
        for (usize i = 0; i < n->val.call.args.len; i++) {
626 +
            args[i] = desugar_node(d, mod, args[i]);
627 +
        }
628 +
        return n;
629 +
    }
630 +
631 +
    case NODE_BUILTIN: {
632 +
        node_t **args = nodespan_ptrs(&mod->parser, n->val.builtin.args);
633 +
        for (usize i = 0; i < n->val.builtin.args.len; i++) {
634 +
            args[i] = desugar_node(d, mod, args[i]);
635 +
        }
636 +
        return n;
637 +
    }
638 +
639 +
    case NODE_RETURN:
640 +
        if (n->val.return_stmt.value) {
641 +
            n->val.return_stmt.value =
642 +
                desugar_node(d, mod, n->val.return_stmt.value);
643 +
        }
644 +
        return n;
645 +
646 +
    case NODE_VAR:
647 +
        if (n->val.var.value) {
648 +
            n->val.var.value = desugar_node(d, mod, n->val.var.value);
649 +
        }
650 +
        return n;
651 +
652 +
    case NODE_ASSIGN:
653 +
        n->val.assign.lval = desugar_node(d, mod, n->val.assign.lval);
654 +
        n->val.assign.rval = desugar_node(d, mod, n->val.assign.rval);
655 +
        return n;
656 +
657 +
    case NODE_EXPR_STMT:
658 +
        n->val.expr_stmt = desugar_node(d, mod, n->val.expr_stmt);
659 +
        return n;
660 +
661 +
    case NODE_FOR:
662 +
        return desugar_for(d, mod, n);
663 +
664 +
    case NODE_MATCH: {
665 +
        n->val.match_stmt.expr = desugar_node(d, mod, n->val.match_stmt.expr);
666 +
        node_t **cases = nodespan_ptrs(&mod->parser, n->val.match_stmt.cases);
667 +
        for (usize i = 0; i < n->val.match_stmt.cases.len; i++) {
668 +
            cases[i] = desugar_node(d, mod, cases[i]);
669 +
        }
670 +
        return n;
671 +
    }
672 +
673 +
    case NODE_MATCH_CASE: {
674 +
        node_t **patterns =
675 +
            nodespan_ptrs(&mod->parser, n->val.match_case.patterns);
676 +
        for (usize i = 0; i < n->val.match_case.patterns.len; i++) {
677 +
            patterns[i] = desugar_node(d, mod, patterns[i]);
678 +
        }
679 +
        if (n->val.match_case.guard) {
680 +
            n->val.match_case.guard =
681 +
                desugar_node(d, mod, n->val.match_case.guard);
682 +
        }
683 +
        n->val.match_case.body = desugar_node(d, mod, n->val.match_case.body);
684 +
        return n;
685 +
    }
686 +
687 +
    case NODE_ARRAY_INDEX:
688 +
    case NODE_ARRAY_LIT:
689 +
    case NODE_ARRAY_REPEAT_LIT:
690 +
    case NODE_RECORD_LIT:
691 +
    case NODE_CALL_ARG:
692 +
    case NODE_REF:
693 +
    case NODE_ACCESS:
694 +
    case NODE_NUMBER:
695 +
    case NODE_CHAR:
696 +
    case NODE_STRING:
697 +
    case NODE_BOOL:
698 +
    case NODE_NIL:
699 +
    case NODE_UNDEF:
700 +
    case NODE_SCOPE:
701 +
    case NODE_IDENT:
702 +
    case NODE_PLACEHOLDER:
703 +
    case NODE_BREAK:
704 +
    case NODE_USE:
705 +
    case NODE_AS:
706 +
    case NODE_CONST:
707 +
    case NODE_STATIC:
708 +
    case NODE_MOD:
709 +
    case NODE_UNION:
710 +
    case NODE_RECORD:
711 +
    case NODE_PANIC:
712 +
    case NODE_TYPE:
713 +
    case NODE_RECORD_TYPE:
714 +
        return n;
715 +
716 +
    case NODE_THROW:
717 +
        n->val.throw_stmt.expr = desugar_node(d, mod, n->val.throw_stmt.expr);
718 +
        return n;
719 +
720 +
    case NODE_TRY: {
721 +
        n->val.try_expr.expr = desugar_node(d, mod, n->val.try_expr.expr);
722 +
        node_t **handlers =
723 +
            nodespan_ptrs(&mod->parser, n->val.try_expr.handlers);
724 +
        for (usize i = 0; i < n->val.try_expr.handlers.len; i++) {
725 +
            handlers[i] = desugar_node(d, mod, handlers[i]);
726 +
        }
727 +
        n->val.try_expr.catch_expr =
728 +
            desugar_node(d, mod, n->val.try_expr.catch_expr);
729 +
        return n;
730 +
    }
731 +
    case NODE_CATCH:
732 +
        n->val.catch_clause.body =
733 +
            desugar_node(d, mod, n->val.catch_clause.body);
734 +
        return n;
735 +
736 +
    default:
737 +
        bail("unsupported node type %s", node_names[n->cls]);
738 +
        return NULL;
739 +
    }
740 +
}
741 +
742 +
node_t *desugar_run(desugar_t *d, module_t *mod, node_t *ast) {
743 +
    return desugar_node(d, mod, ast);
744 +
}
desugar.h added +15 -0
1 +
#ifndef DESUGAR_H
2 +
#define DESUGAR_H
3 +
4 +
#include "ast.h"
5 +
#include "module.h"
6 +
7 +
/* Desugar pass context */
8 +
typedef struct {
9 +
    u32 flags;
10 +
} desugar_t;
11 +
12 +
/* AST desugaring pass; runs after parsing to transform the AST */
13 +
node_t *desugar_run(desugar_t *d, module_t *mod, node_t *ast);
14 +
15 +
#endif
gen.c added +3233 -0
1 +
#include <assert.h>
2 +
#include <stdlib.h>
3 +
#include <string.h>
4 +
5 +
#include "ast.h"
6 +
#include "gen.h"
7 +
#include "gen/data.h"
8 +
#include "gen/emit.h"
9 +
#include "io.h"
10 +
#include "limits.h"
11 +
#include "module.h"
12 +
#include "options.h"
13 +
#include "ralloc.h"
14 +
#include "riscv.h"
15 +
16 +
#include "resolver.h"
17 +
#include "symtab.h"
18 +
#include "types.h"
19 +
20 +
/* Shorthand: get node pointer array from span in current module's parser. */
21 +
#define SPAN(g, s) nodespan_ptrs(&(g)->mod->parser, (s))
22 +
23 +
static void    gen_assign(gen_t *g, node_t *n);
24 +
static void    gen_return(gen_t *g, node_t *n);
25 +
static void    gen_fn(gen_t *g, node_t *n);
26 +
static value_t gen_array_index(gen_t *g, node_t *n, bool ref);
27 +
static value_t gen_array_slice(gen_t *g, value_t array_val, node_t *range);
28 +
static value_t gen_array_literal(gen_t *g, node_t *n);
29 +
static value_t gen_array_repeat(gen_t *g, node_t *n);
30 +
static value_t gen_expr(gen_t *g, node_t *n, bool lvalue);
31 +
static void    gen_expr_stmt(gen_t *g, node_t *n);
32 +
static void    gen_match(gen_t *g, node_t *n);
33 +
static void    gen_block(gen_t *g, node_t *n);
34 +
static void    gen_if(gen_t *g, node_t *n);
35 +
static void    gen_if_let(gen_t *g, node_t *n);
36 +
static value_t gen_if_expr(gen_t *g, node_t *n);
37 +
static void    gen_loop(gen_t *g, node_t *n);
38 +
static void    gen_break(gen_t *g, node_t *n);
39 +
static void    gen_var(gen_t *g, node_t *n);
40 +
static void    gen_const(gen_t *g, node_t *n);
41 +
static void    gen_static(gen_t *g, node_t *n);
42 +
static void    gen_nop(gen_t *g, node_t *n);
43 +
static value_t gen_deref(gen_t *g, node_t *n, value_t ref_val, bool lval);
44 +
static void    gen_ecall(gen_t *g, node_t *n);
45 +
static void    gen_ebreak(gen_t *g, node_t *n);
46 +
static void    gen_panic(gen_t *g, node_t *n);
47 +
static void    gen_mod(gen_t *g, node_t *n);
48 +
static void    gen_use(gen_t *g, node_t *n);
49 +
static value_t gen_as_cast(gen_t *g, node_t *n);
50 +
static value_t gen_union_constructor(gen_t *g, node_t *n);
51 +
static value_t gen_record_lit(gen_t *g, node_t *n);
52 +
static void    gen_throw(gen_t *g, node_t *n);
53 +
static value_t gen_try(gen_t *g, node_t *n);
54 +
static value_t gen_union_store(
55 +
    gen_t *g, type_t *union_type, symbol_t *variant_sym, value_t payload
56 +
);
57 +
static void    useval(gen_t *g, value_t val);
58 +
static void    freeval(gen_t *g, value_t val);
59 +
static value_t value_none(void);
60 +
i32            tval_payload_offset(type_t *container);
61 +
62 +
/* Convert a value into a tagged value by calculating its offsets. */
63 +
tval_t tval_from_val(gen_t *g, value_t val) {
64 +
    /* For unions with payloads, we don't know the value type in advance. */
65 +
    type_t *val_typ = NULL;
66 +
67 +
    if (val.type->cls == TYPE_OPT) {
68 +
        val_typ = val.type->info.opt.elem;
69 +
    } else if (val.type->cls == TYPE_RESULT) {
70 +
        val_typ = val.type->info.res.payload;
71 +
    }
72 +
    i32 val_off = tval_payload_offset(val.type);
73 +
74 +
    tval_t tval = { 0 };
75 +
    tval.tag =
76 +
        (value_t){ .type = g->types->type_u8, .loc = val.loc, .as = val.as };
77 +
    tval.typ = val.type;
78 +
    tval.val = (value_t){
79 +
        .type = val_typ,
80 +
        .loc  = val.loc,
81 +
        .as   = val.as,
82 +
    };
83 +
84 +
    if (val.loc == LOC_STACK) {
85 +
        tval.val.as.off.offset = val.as.off.offset + val_off;
86 +
    } else if (val.loc == LOC_ADDR) {
87 +
        tval.val.as.adr.offset = val.as.adr.offset + val_off;
88 +
    } else if (val.loc == LOC_REG) {
89 +
        /* Register contains the address of the optional in memory */
90 +
        tval.tag.loc           = LOC_STACK;
91 +
        tval.tag.as.off.base   = val.as.reg;
92 +
        tval.tag.as.off.offset = 0;
93 +
94 +
        tval.val.loc           = LOC_STACK;
95 +
        tval.val.as.off.base   = val.as.reg;
96 +
        tval.val.as.off.offset = val_off;
97 +
    } else {
98 +
        bail("cannot load tagged value from location %d", val.loc);
99 +
    }
100 +
    return tval;
101 +
}
102 +
103 +
/* Return the byte offset of the payload within a tagged value. */
104 +
i32 tval_payload_offset(type_t *container) {
105 +
    return container->size > TAG_SIZE ? align(TAG_SIZE, container->align)
106 +
                                      : TAG_SIZE;
107 +
}
108 +
109 +
/* Return the number of payload bytes to zero before writing a new value. */
110 +
i32 tval_payload_zero_size(type_t *container) {
111 +
    switch (container->cls) {
112 +
    case TYPE_OPT:
113 +
        return container->size - tval_payload_offset(container);
114 +
    case TYPE_UNION:
115 +
        return container->size - tval_payload_offset(container);
116 +
    default:
117 +
        return 0;
118 +
    }
119 +
}
120 +
121 +
void tval_store(gen_t *g, value_t dest, value_t value, i32 tag) {
122 +
    /* Optional values treat tag 0 as nil; everything else always stores a
123 +
     * payload area. */
124 +
    bool nil = (dest.type->cls == TYPE_OPT && tag == 0);
125 +
126 +
    /* Compute base/offset for tag and payload. For addresses, materialize a
127 +
     * temporary base register so regstore/memzero can operate safely. */
128 +
    reg_t base      = ZERO;
129 +
    i32   tag_off   = 0;
130 +
    bool  base_temp = false;
131 +
132 +
    switch (dest.loc) {
133 +
    case LOC_STACK:
134 +
        base    = dest.as.off.base;
135 +
        tag_off = dest.as.off.offset;
136 +
        break;
137 +
    case LOC_ADDR:
138 +
        base      = nextreg(g);
139 +
        base_temp = true;
140 +
        emit_li(g, base, dest.as.adr.base);
141 +
        tag_off = dest.as.adr.offset;
142 +
        break;
143 +
    case LOC_REG: {
144 +
        /* Register holds the address; copy into a reserved temp. */
145 +
        base      = nextreg(g);
146 +
        base_temp = true;
147 +
        emit_mv(g, base, dest.as.reg);
148 +
        tag_off = 0;
149 +
        break;
150 +
    }
151 +
    default:
152 +
        bail("cannot store tagged value at location %d", dest.loc);
153 +
    }
154 +
    i32 payload_off = tag_off + tval_payload_offset(dest.type);
155 +
156 +
    /* Store tag (1 byte) */
157 +
    reg_t rd = nextreg(g);
158 +
    emit_li(g, rd, tag);
159 +
    emit_regstore(g, rd, base, tag_off, g->types->type_u8);
160 +
    freereg(g, rd);
161 +
162 +
    /* Zero padding between tag byte and payload start so that byte-level
163 +
     * equality comparisons of tagged values work correctly. */
164 +
    i32 pad_off  = tag_off + TAG_SIZE;
165 +
    i32 pad_size = payload_off - pad_off;
166 +
    if (pad_size > 0) {
167 +
        emit_memzero(g, OFFSET(base, pad_off), pad_size);
168 +
    }
169 +
170 +
    /* Clear payload region before writing a new value (or when nil). */
171 +
    i32 payload_size = tval_payload_zero_size(dest.type);
172 +
    emit_memzero(g, OFFSET(base, payload_off), payload_size);
173 +
174 +
    if (!nil && value.type && value.type->cls != TYPE_VOID) {
175 +
        emit_store(g, value, base, payload_off);
176 +
    }
177 +
    if (base_temp)
178 +
        freereg(g, base);
179 +
}
180 +
181 +
/* Helper function to create an optional value from a primitive immediate. */
182 +
static value_t optval_from_prim(gen_t *g, type_t *opt_type, value_t prim_val) {
183 +
    i32     offset  = reserve(g, opt_type);
184 +
    value_t opt_val = value_stack(OFFSET(FP, offset), opt_type);
185 +
    tval_store(g, opt_val, prim_val, 1);
186 +
187 +
    return opt_val;
188 +
}
189 +
190 +
static value_t optval_from_value(gen_t *g, type_t *opt_type, value_t value) {
191 +
    if (value.type == opt_type)
192 +
        return value;
193 +
194 +
    i32     offset  = reserve(g, opt_type);
195 +
    value_t opt_val = value_stack(OFFSET(FP, offset), opt_type);
196 +
197 +
    tval_store(g, opt_val, value, 1);
198 +
199 +
    return opt_val;
200 +
}
201 +
202 +
/* Load the tag of tagged value into a register. */
203 +
static reg_t tval_load_tag(gen_t *g, value_t opt_val) {
204 +
    tval_t opt = tval_from_val(g, opt_val);
205 +
206 +
    return emit_load(g, opt.tag);
207 +
}
208 +
209 +
/* Helper to bind a union variant payload to a variable. */
210 +
static void bind_union_value(gen_t *g, value_t union_src, node_t *bound_var) {
211 +
    symbol_t *var_sym = bound_var->sym;
212 +
213 +
    /* Allocate storage for the bound variable if not already allocated */
214 +
    if (var_sym->e.var.val.loc == LOC_NONE) {
215 +
        i32 off = reserve_aligned(g, var_sym->e.var.typ, var_sym->e.var.align);
216 +
        var_sym->e.var.val = value_stack(OFFSET(FP, off), var_sym->e.var.typ);
217 +
    }
218 +
    /* Create a value pointing to the value part of the union (after the tag) */
219 +
    type_t *union_type = union_src.type;
220 +
    if (union_type->cls == TYPE_PTR)
221 +
        union_type = union_type->info.ptr.target;
222 +
    i32     val_off        = tval_payload_offset(union_type);
223 +
    value_t union_val_part = union_src;
224 +
    union_val_part.type    = bound_var->type;
225 +
226 +
    if (union_src.loc == LOC_STACK) {
227 +
        union_val_part.as.off.offset += val_off;
228 +
    } else if (union_src.loc == LOC_ADDR) {
229 +
        union_val_part.as.adr.offset += val_off;
230 +
    } else {
231 +
        bail("cannot bind union value from this location");
232 +
    }
233 +
    /* Copy the union payload to the bound variable */
234 +
    emit_replace(g, var_sym->e.var.val, union_val_part);
235 +
}
236 +
237 +
/* Copy the value part of an optional to a destination */
238 +
static void optval_copy_value(gen_t *g, value_t opt_src, value_t value_dest) {
239 +
    tval_t opt = tval_from_val(g, opt_src);
240 +
    emit_replace(g, value_dest, opt.val);
241 +
}
242 +
243 +
/* Generate a union constructor call like Expr::number(42) */
244 +
static value_t gen_union_constructor(gen_t *g, node_t *call) {
245 +
    type_t *variant_type = call->sym->node->type;
246 +
    value_t payload      = value_none();
247 +
248 +
    if (variant_type->cls != TYPE_VOID) {
249 +
        node_t *arg_node = SPAN(g, call->val.call.args)[0];
250 +
        node_t *arg_expr = arg_node->val.call_arg.expr;
251 +
        payload          = gen_expr(g, arg_expr, false);
252 +
    }
253 +
    return gen_union_store(g, call->type, call->sym, payload);
254 +
}
255 +
256 +
static value_t gen_union_store(
257 +
    gen_t *g, type_t *union_type, symbol_t *variant_sym, value_t payload
258 +
) {
259 +
    i32 tag = variant_sym->node->val.union_variant.value;
260 +
261 +
    /* Allocate space for the union on the stack */
262 +
    i32     offset    = reserve(g, union_type);
263 +
    value_t union_val = value_stack(OFFSET(FP, offset), union_type);
264 +
265 +
    /* Store the union value */
266 +
    useval(g, payload);
267 +
    tval_store(g, union_val, payload, tag);
268 +
    freeval(g, payload);
269 +
270 +
    return union_val;
271 +
}
272 +
273 +
/* Node type to generator function mapping. */
274 +
static void (*GENERATORS[])(gen_t *, node_t *) = {
275 +
    [NODE_TYPE]       = NULL,
276 +
    [NODE_NUMBER]     = NULL,
277 +
    [NODE_BOOL]       = NULL,
278 +
    [NODE_STRING]     = NULL,
279 +
    [NODE_CHAR]       = NULL,
280 +
    [NODE_IDENT]      = NULL,
281 +
    [NODE_BINOP]      = NULL,
282 +
    [NODE_BLOCK]      = gen_block,
283 +
    [NODE_CALL]       = NULL,
284 +
    [NODE_CALL_ARG]   = NULL,
285 +
    [NODE_VAR]        = gen_var,
286 +
    [NODE_CONST]      = gen_const,
287 +
    [NODE_STATIC]     = gen_static,
288 +
    [NODE_ASSIGN]     = gen_assign,
289 +
    [NODE_RETURN]     = gen_return,
290 +
    [NODE_THROW]      = gen_throw,
291 +
    [NODE_PANIC]      = gen_panic,
292 +
    [NODE_WHILE]      = NULL,
293 +
    [NODE_WHILE_LET]  = NULL,
294 +
    [NODE_FOR]        = NULL,
295 +
    [NODE_LOOP]       = gen_loop,
296 +
    [NODE_IF]         = gen_if,
297 +
    [NODE_IF_LET]     = gen_if_let,
298 +
    [NODE_IF_CASE]    = NULL,
299 +
    [NODE_GUARD_CASE] = NULL,
300 +
    [NODE_GUARD_LET]  = NULL,
301 +
    [NODE_MATCH]      = gen_match,
302 +
    [NODE_MATCH_CASE] = gen_nop, /* Cases are handled by gen_match */
303 +
    [NODE_FN]         = gen_fn,
304 +
    [NODE_BREAK]      = gen_break,
305 +
    [NODE_RECORD]     = gen_nop,
306 +
    [NODE_UNION]      = gen_nop,
307 +
    [NODE_EXPR_STMT]  = gen_expr_stmt,
308 +
    [NODE_MOD]        = gen_mod,
309 +
    [NODE_USE]        = gen_use,
310 +
};
311 +
312 +
/* Built-in functions */
313 +
static const struct {
314 +
    const char *name;
315 +
    usize       length;
316 +
    void (*gen)(gen_t *, node_t *);
317 +
} BUILTINS[] = {
318 +
    { "std::intrinsics::ecall", 22, gen_ecall },
319 +
    { "std::intrinsics::ebreak", 23, gen_ebreak },
320 +
    { NULL, 0, NULL },
321 +
};
322 +
323 +
/******************************************************************************/
324 +
325 +
value_t value_addr(usize addr, i32 off, type_t *ty) {
326 +
    return (value_t){
327 +
        .type          = ty,
328 +
        .loc           = LOC_ADDR,
329 +
        .as.adr.base   = addr,
330 +
        .as.adr.offset = off,
331 +
    };
332 +
}
333 +
334 +
value_t value_stack(offset_t off, type_t *ty) {
335 +
    return (value_t){
336 +
        .type          = ty,
337 +
        .loc           = LOC_STACK,
338 +
        .as.off.base   = off.base,
339 +
        .as.off.offset = off.offset,
340 +
    };
341 +
}
342 +
343 +
value_t value_reg(reg_t r, type_t *ty) {
344 +
    return (value_t){
345 +
        .temp   = true,
346 +
        .type   = ty,
347 +
        .loc    = LOC_REG,
348 +
        .as.reg = r,
349 +
    };
350 +
}
351 +
352 +
value_t value_imm(imm_t imm, type_t *ty) {
353 +
    return (value_t){
354 +
        .type   = ty,
355 +
        .loc    = LOC_IMM,
356 +
        .as.imm = imm,
357 +
    };
358 +
}
359 +
360 +
static value_t value_none(void) {
361 +
    return (value_t){
362 +
        .type = NULL,
363 +
        .loc  = LOC_NONE,
364 +
    };
365 +
}
366 +
367 +
i32 align_stack(i32 addr, i32 alignment) {
368 +
    /* Verify alignment is a power of 2. */
369 +
370 +
    /* For negative addresses (stack growth downward),
371 +
     * we round down to the next multiple of alignment. */
372 +
    return addr & ~(alignment - 1);
373 +
}
374 +
375 +
i32 jump_offset(usize from, usize to) {
376 +
    return ((i32)to - (i32)from) * INSTR_SIZE;
377 +
}
378 +
379 +
/* Provide a sentinel patch so callers can keep a uniform interface. */
380 +
static branch_patch_t branch_patch_invalid(void) {
381 +
    return (branch_patch_t){
382 +
        .pc       = (usize)-1,
383 +
        .tramp_pc = (usize)-1,
384 +
        .op       = I_BEQ,
385 +
        .rs1      = ZERO,
386 +
        .rs2      = ZERO,
387 +
        .valid    = false,
388 +
    };
389 +
}
390 +
391 +
/* Reserve space for the branch and a fallback trampoline in one call. */
392 +
static branch_patch_t branch_patch_make(
393 +
    gen_t *g, iname_t op, reg_t rs1, reg_t rs2
394 +
) {
395 +
    branch_patch_t patch = {
396 +
        .pc       = emit(g, NOP),
397 +
        .tramp_pc = emit(g, NOP),
398 +
        .op       = op,
399 +
        .rs1      = rs1,
400 +
        .rs2      = rs2,
401 +
        .valid    = true,
402 +
    };
403 +
    return patch;
404 +
}
405 +
406 +
/* Flip a branch opcode so the trampoline executes on the opposite outcome. */
407 +
static iname_t branch_op_inverse(iname_t op) {
408 +
    switch (op) {
409 +
    case I_BEQ:
410 +
        return I_BNE;
411 +
    case I_BNE:
412 +
        return I_BEQ;
413 +
    case I_BLT:
414 +
        return I_BGE;
415 +
    case I_BGE:
416 +
        return I_BLT;
417 +
    case I_BLTU:
418 +
        return I_BGEU;
419 +
    case I_BGEU:
420 +
        return I_BLTU;
421 +
    default:
422 +
        return 0;
423 +
    }
424 +
}
425 +
426 +
/* Finalize the branch, rewriting to a long-range form when necessary. */
427 +
static void branch_patch_apply(gen_t *g, branch_patch_t patch, usize target) {
428 +
    if (!patch.valid)
429 +
        return;
430 +
431 +
    i32 imm = jump_offset(patch.pc, target);
432 +
    if (is_branch_imm(imm)) {
433 +
        g->instrs[patch.pc] = instr(patch.op, ZERO, patch.rs1, patch.rs2, imm);
434 +
        g->instrs[patch.tramp_pc] = NOP;
435 +
        return;
436 +
    }
437 +
438 +
    usize fallthrough = patch.tramp_pc + 1;
439 +
    i32   skip_imm    = jump_offset(patch.pc, fallthrough);
440 +
441 +
    iname_t inv         = branch_op_inverse(patch.op);
442 +
    g->instrs[patch.pc] = instr(inv, ZERO, patch.rs1, patch.rs2, skip_imm);
443 +
444 +
    i32 jmp_imm               = jump_offset(patch.tramp_pc, target);
445 +
    g->instrs[patch.tramp_pc] = JMP(jmp_imm);
446 +
}
447 +
448 +
i32 reserve(gen_t *g, type_t *ty) {
449 +
    return reserve_aligned(g, ty, ty->align);
450 +
}
451 +
452 +
static void useval(gen_t *g, value_t val) {
453 +
    if (val.loc == LOC_REG) {
454 +
        usereg(g, val.as.reg);
455 +
    } else if (val.loc == LOC_STACK) {
456 +
        usereg(g, val.as.off.base);
457 +
    }
458 +
}
459 +
460 +
static void freeval(gen_t *g, value_t val) {
461 +
    if (val.loc == LOC_REG && val.temp) {
462 +
        freereg(g, val.as.reg);
463 +
    }
464 +
}
465 +
466 +
/******************************************************************************/
467 +
468 +
/* Patch all break statements for a loop. */
469 +
static void patch_break_stmts(gen_t *g) {
470 +
    for (usize i = 0; i < g->fn.nbrkpatches; i++) {
471 +
        ctpatch_t *p = &g->fn.brkpatches[i];
472 +
        if (!p->applied && p->loop == g->loop.current) {
473 +
            /* Calculate jump offset to the loop end, and apply patch. */
474 +
            i32 offset       = jump_offset(p->pc, g->loop.end);
475 +
            g->instrs[p->pc] = JAL(ZERO, offset);
476 +
            p->applied       = true;
477 +
        }
478 +
    }
479 +
}
480 +
481 +
/******************************************************************************/
482 +
483 +
/* Generate code for a node. */
484 +
static void gen_node(gen_t *g, node_t *n) {
485 +
    if (!n)
486 +
        return;
487 +
488 +
    if (!GENERATORS[n->cls])
489 +
        bail("unsupported node type '%s'", node_names[n->cls]);
490 +
491 +
    /* Restore register allocation state between statements to avoid leaking */
492 +
    bool regs[RALLOC_NREGS] = { false };
493 +
494 +
    ralloc_save(&g->regs, regs);
495 +
    GENERATORS[n->cls](g, n);
496 +
    ralloc_restore(&g->regs, regs);
497 +
}
498 +
499 +
/* System call (ecall): Takes four arguments (A0, A1, A2, A3) */
500 +
static void gen_ecall(gen_t *g, node_t *n) {
501 +
    node_t **cargs = SPAN(g, n->val.call.args);
502 +
    node_t  *num   = cargs[0];
503 +
    node_t  *arg0  = cargs[1];
504 +
    node_t  *arg1  = cargs[2];
505 +
    node_t  *arg2  = cargs[3];
506 +
    node_t  *arg3  = cargs[4];
507 +
508 +
    value_t numval  = gen_expr(g, num->val.call_arg.expr, false);
509 +
    value_t arg0val = gen_expr(g, arg0->val.call_arg.expr, false);
510 +
    value_t arg1val = gen_expr(g, arg1->val.call_arg.expr, false);
511 +
    value_t arg2val = gen_expr(g, arg2->val.call_arg.expr, false);
512 +
    value_t arg3val = gen_expr(g, arg3->val.call_arg.expr, false);
513 +
514 +
    /* Move the arguments to the appropriate registers. Load higher-numbered
515 +
     * argument registers first so we don't overwrite values that are still
516 +
     * needed for lower-numbered arguments (e.g. when the source value lives in
517 +
     * A0). */
518 +
    usereg(g, A7);
519 +
    emit_load_into(g, A7, numval); /* Syscall number is stored in A7 */
520 +
521 +
    usereg(g, A3);
522 +
    emit_load_into(g, A3, arg3val);
523 +
524 +
    usereg(g, A2);
525 +
    emit_load_into(g, A2, arg2val);
526 +
527 +
    usereg(g, A1);
528 +
    emit_load_into(g, A1, arg1val);
529 +
530 +
    usereg(g, A0);
531 +
    emit_load_into(g, A0, arg0val);
532 +
533 +
    emit(g, ECALL);
534 +
535 +
    freereg(g, A3);
536 +
    freereg(g, A2);
537 +
    freereg(g, A1);
538 +
    freereg(g, A7);
539 +
}
540 +
541 +
/* Emit an EBREAK instruction */
542 +
static void gen_ebreak(gen_t *g, node_t *n) {
543 +
    (void)n;
544 +
    emit(g, EBREAK);
545 +
}
546 +
547 +
/* Generate panic statement */
548 +
static void gen_panic(gen_t *g, node_t *n) {
549 +
    (void)n;
550 +
    emit(g, EBREAK);
551 +
}
552 +
553 +
static void gen_expr_stmt(gen_t *g, node_t *n) {
554 +
    /* Generate the expression as a statement; result will be discarded. */
555 +
    value_t result = gen_expr(g, n->val.expr_stmt, false);
556 +
    /* For non-void expressions, we free any allocated registers */
557 +
    if (result.loc == LOC_REG) {
558 +
        freereg(g, result.as.reg);
559 +
    }
560 +
}
561 +
562 +
/* Generate conditional branch code. */
563 +
static void gen_branch(gen_t *g, node_t *cond, node_t *lbranch) {
564 +
    binop_t op    = cond->val.binop.op;
565 +
    value_t lval  = gen_expr(g, cond->val.binop.left, false);
566 +
    value_t rval  = gen_expr(g, cond->val.binop.right, false);
567 +
    reg_t   left  = emit_load(g, lval);
568 +
    reg_t   right = emit_load(g, rval);
569 +
570 +
    iname_t branch_op   = I_BEQ;
571 +
    reg_t   rs1         = left;
572 +
    reg_t   rs2         = right;
573 +
    bool    is_unsigned = false;
574 +
575 +
    if (cond->val.binop.left->type) {
576 +
        is_unsigned = type_is_unsigned(cond->val.binop.left->type->cls);
577 +
    }
578 +
579 +
    /* Select the appropriate branch instruction based on the comparison
580 +
     * operator. Nb. we're branching if the condition is *false*, so we use the
581 +
     * opposite branch instruction. */
582 +
    switch (op) {
583 +
    case OP_EQ:
584 +
        branch_op = I_BNE;
585 +
        break;
586 +
    case OP_LT:
587 +
        branch_op = is_unsigned ? I_BGEU : I_BGE;
588 +
        break;
589 +
    case OP_GT:
590 +
        branch_op = is_unsigned ? I_BGEU : I_BGE;
591 +
        rs1       = right;
592 +
        rs2       = left;
593 +
        break;
594 +
    case OP_LE:
595 +
        branch_op = is_unsigned ? I_BLTU : I_BLT;
596 +
        rs1       = right;
597 +
        rs2       = left;
598 +
        break;
599 +
    case OP_GE:
600 +
        branch_op = is_unsigned ? I_BLTU : I_BLT;
601 +
        break;
602 +
    case OP_NE:
603 +
        /* For not equals, branch if they are equal. */
604 +
        branch_op = I_BEQ;
605 +
        break;
606 +
    case OP_AND:
607 +
    case OP_OR:
608 +
    case OP_ADD:
609 +
    case OP_SUB:
610 +
    case OP_DIV:
611 +
    case OP_MUL:
612 +
    case OP_MOD:
613 +
    case OP_BAND:
614 +
    case OP_BOR:
615 +
    case OP_XOR:
616 +
    case OP_SHL:
617 +
    case OP_SHR:
618 +
        abort();
619 +
    }
620 +
621 +
    branch_patch_t patch = branch_patch_make(g, branch_op, rs1, rs2);
622 +
623 +
    freereg(g, left);
624 +
    freereg(g, right);
625 +
626 +
    /* Generate code for the left (true) branch. */
627 +
    gen_block(g, lbranch);
628 +
629 +
    /* Patch the branch to jump past the left branch when false. */
630 +
    branch_patch_apply(g, patch, g->ninstrs);
631 +
}
632 +
633 +
/* Generate code for an if/else condition with arbitrary condition and branches.
634 +
 * This function is used both for regular if statements and for match cases. */
635 +
static void gen_if_else(
636 +
    gen_t  *g,
637 +
    value_t condition_val, /* Condition value to test */
638 +
    node_t *lbranch,       /* Code to execute if condition is true */
639 +
    node_t *rbranch        /* Code to execute if condition is false */
640 +
) {
641 +
    /* Load the condition value into a register */
642 +
    reg_t condreg = emit_load(g, condition_val);
643 +
644 +
    /* Emit a conditional branch: if condition is zero (false),
645 +
     * jump past the left branch. */
646 +
    branch_patch_t lb_branch = branch_patch_make(g, I_BEQ, condreg, ZERO);
647 +
    /* Nb. we free this register here even though the register _name_ is used
648 +
     * lower, because it's only used for patching the instruction above. */
649 +
    freereg(g, condreg);
650 +
651 +
    /* Generate code for the true branch. */
652 +
    gen_block(g, lbranch);
653 +
654 +
    if (rbranch) {
655 +
        /* If we have an else branch, emit jump to skip over it. */
656 +
        const usize lb_end   = emit(g, NOP);
657 +
        const usize rb_start = g->ninstrs;
658 +
659 +
        /* Patch the branch instruction to jump to else. */
660 +
        branch_patch_apply(g, lb_branch, rb_start);
661 +
662 +
        /* Generate code for the false branch. */
663 +
        gen_block(g, rbranch);
664 +
665 +
        /* Patch the jump past else. */
666 +
        const usize rb_end = g->ninstrs;
667 +
        g->instrs[lb_end]  = JMP(jump_offset(lb_end, rb_end));
668 +
    } else {
669 +
        /* No false branch, just patch the conditional branch to jump to the
670 +
         * end. */
671 +
        const usize end = g->ninstrs;
672 +
        branch_patch_apply(g, lb_branch, end);
673 +
    }
674 +
}
675 +
676 +
/* Generate guard check for a match case. Updates ctrl->guard_branch if guard
677 +
 * present. */
678 +
static void gen_case_guard(gen_t *g, node_t *n, match_case_ctrl_t *ctrl) {
679 +
    if (n->val.match_case.guard) {
680 +
        value_t guard_val  = gen_expr(g, n->val.match_case.guard, false);
681 +
        reg_t   guard_reg  = emit_load(g, guard_val);
682 +
        ctrl->guard_branch = branch_patch_make(g, I_BEQ, guard_reg, ZERO);
683 +
        freereg(g, guard_reg);
684 +
    }
685 +
}
686 +
687 +
/* Bind a pattern variable to a record field value. Allocates stack space for
688 +
 * the variable if needed and copies the field value into it.
689 +
 * For ref matches (variable type is pointer to field type), stores the address
690 +
 * of the field instead of copying its value. */
691 +
static void bind_var_to_field(
692 +
    gen_t *g, value_t record_val, symbol_t *field_sym, symbol_t *var_sym
693 +
) {
694 +
    if (var_sym->e.var.val.loc == LOC_NONE) {
695 +
        i32 off = reserve_aligned(g, var_sym->e.var.typ, var_sym->e.var.align);
696 +
        var_sym->e.var.val = value_stack(OFFSET(FP, off), var_sym->e.var.typ);
697 +
    }
698 +
    value_t field_val = record_val;
699 +
    field_val.type    = field_sym->e.field.typ;
700 +
    if (field_val.loc == LOC_STACK)
701 +
        field_val.as.off.offset += field_sym->e.field.offset;
702 +
    else if (field_val.loc == LOC_ADDR)
703 +
        field_val.as.adr.offset += field_sym->e.field.offset;
704 +
    else if (field_val.loc == LOC_REG) {
705 +
        /* Register holds the address of the record. Convert to LOC_STACK
706 +
         * so that the field offset is applied when loading. */
707 +
        reg_t base_reg          = field_val.as.reg;
708 +
        field_val.loc           = LOC_STACK;
709 +
        field_val.as.off.base   = base_reg;
710 +
        field_val.as.off.offset = field_sym->e.field.offset;
711 +
    }
712 +
713 +
    /* Check if this is a ref match (variable is pointer to field type) */
714 +
    type_t *var_typ = var_sym->e.var.typ;
715 +
    if (var_typ->cls == TYPE_PTR &&
716 +
        var_typ->info.ptr.target == field_sym->e.field.typ) {
717 +
        /* Store address of field instead of copying value */
718 +
        reg_t addr_reg = nextreg(g);
719 +
        if (field_val.loc == LOC_STACK) {
720 +
            emit_addr_offset(
721 +
                g, addr_reg, field_val.as.off.base, field_val.as.off.offset
722 +
            );
723 +
        } else if (field_val.loc == LOC_ADDR) {
724 +
            emit_li(
725 +
                g, addr_reg, field_val.as.adr.base + field_val.as.adr.offset
726 +
            );
727 +
        } else if (field_val.loc == LOC_REG) {
728 +
            emit(
729 +
                g, ADDI(addr_reg, field_val.as.reg, field_sym->e.field.offset)
730 +
            );
731 +
        } else {
732 +
            bail("cannot take address of field for ref match");
733 +
        }
734 +
        /* Store the address register into the variable's stack location */
735 +
        emit_regstore(
736 +
            g,
737 +
            addr_reg,
738 +
            var_sym->e.var.val.as.off.base,
739 +
            var_sym->e.var.val.as.off.offset,
740 +
            var_sym->e.var.typ
741 +
        );
742 +
        freereg(g, addr_reg);
743 +
    } else {
744 +
        emit_replace(g, var_sym->e.var.val, field_val);
745 +
    }
746 +
}
747 +
748 +
/* Bind fields from a record value to pattern variables. Handles both
749 +
 * tuple-style patterns like `S(x, y)` and labeled patterns like `T { x, y }`.
750 +
 */
751 +
static void gen_bind_record_fields(
752 +
    gen_t *g, value_t record_val, node_t *pattern, type_t *record_type
753 +
) {
754 +
    if (pattern->cls == NODE_CALL) {
755 +
        for (usize i = 0; i < pattern->val.call.args.len; i++) {
756 +
            node_t *arg_node = SPAN(g, pattern->val.call.args)[i];
757 +
            node_t *arg      = (arg_node->cls == NODE_CALL_ARG)
758 +
                                   ? arg_node->val.call_arg.expr
759 +
                                   : arg_node;
760 +
            if (arg->cls == NODE_IDENT && arg->sym) {
761 +
                bind_var_to_field(
762 +
                    g, record_val, record_type->info.srt.fields[i], arg->sym
763 +
                );
764 +
            }
765 +
        }
766 +
    } else if (pattern->cls == NODE_RECORD_LIT) {
767 +
        node_t **fields =
768 +
            nodespan_ptrs(&g->mod->parser, pattern->val.record_lit.fields);
769 +
        for (usize i = 0; i < pattern->val.record_lit.fields.len; i++) {
770 +
            node_t *binding = fields[i]->val.record_lit_field.value;
771 +
            if (binding->cls == NODE_IDENT && binding->sym) {
772 +
                bind_var_to_field(g, record_val, fields[i]->sym, binding->sym);
773 +
            }
774 +
        }
775 +
    }
776 +
}
777 +
778 +
static match_case_ctrl_t gen_match_case_union_payload(
779 +
    gen_t *g, value_t match_val, node_t *n
780 +
) {
781 +
    match_case_ctrl_t ctrl = {
782 +
        .skip_body    = 0,
783 +
        .guard_branch = branch_patch_invalid(),
784 +
    };
785 +
    /* Array to store jumps to body when a pattern matches */
786 +
    branch_patch_t jumps[MAX_CASE_PATTERNS];
787 +
    usize          njumps = 0;
788 +
789 +
    /* union pattern matching - generate tag comparisons */
790 +
    node_t **patterns =
791 +
        nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns);
792 +
    for (usize p = 0; p < n->val.match_case.patterns.len; p++) {
793 +
        node_t *patt_node = patterns[p];
794 +
        node_t *callee    = NULL;
795 +
796 +
        if (patt_node->cls == NODE_CALL) {
797 +
            callee = patt_node->val.call.callee;
798 +
        } else if (patt_node->cls == NODE_RECORD_LIT) {
799 +
            callee = patt_node->val.record_lit.type;
800 +
        } else {
801 +
            callee = patt_node;
802 +
        }
803 +
804 +
        /* Use the stored variant index */
805 +
        node_t *variant_ident = callee->val.access.rval;
806 +
        usize   variant_tag = variant_ident->sym->node->val.union_variant.value;
807 +
808 +
        /* Generate tag comparison.
809 +
         * For ref matching (pointer-to-union), the register holds an address
810 +
         * we need to load from. */
811 +
        reg_t tag_reg;
812 +
        if (match_val.loc == LOC_REG && match_val.type->cls == TYPE_PTR) {
813 +
            /* Load tag byte from address in register */
814 +
            tag_reg = nextreg(g);
815 +
            emit(g, LBU(tag_reg, match_val.as.reg, 0));
816 +
        } else {
817 +
            value_t tag_val = match_val;
818 +
            tag_val.type    = g->types->type_u8;
819 +
            tag_reg         = emit_load(g, tag_val);
820 +
        }
821 +
        reg_t variant_idx_reg = nextreg(g);
822 +
        emit(g, ADDI(variant_idx_reg, ZERO, variant_tag));
823 +
        jumps[njumps++] = branch_patch_make(g, I_BEQ, tag_reg, variant_idx_reg);
824 +
825 +
        freereg(g, variant_idx_reg);
826 +
        freereg(g, tag_reg);
827 +
    }
828 +
829 +
    /* If none of the patterns match, jump past the body */
830 +
    ctrl.skip_body   = emit(g, NOP); /* Will be patched later */
831 +
    usize body_start = g->ninstrs;   /* Body starts here */
832 +
833 +
    /* Patch all the pattern match jumps to point to the body start */
834 +
    for (usize p = 0; p < njumps; p++) {
835 +
        branch_patch_apply(g, jumps[p], body_start);
836 +
    }
837 +
    /* Set up bound variable for payload binding */
838 +
    if (n->val.match_case.variable) {
839 +
        /* If variable doesn't have a symbol, it's likely a placeholder,
840 +
         * eg. `_`, so we don't bind anything. */
841 +
        if (n->val.match_case.variable->sym) {
842 +
            bind_union_value(g, match_val, n->val.match_case.variable);
843 +
        }
844 +
    }
845 +
    /* Handle record literal pattern field bindings */
846 +
    if (n->val.match_case.patterns.len == 1) {
847 +
        node_t *patt = patterns[0];
848 +
        if (patt->cls == NODE_RECORD_LIT) {
849 +
            node_t *callee       = patt->val.record_lit.type;
850 +
            node_t *variant_node = callee->val.access.rval;
851 +
            type_t *payload_type = variant_node->sym->node->type;
852 +
853 +
            /* Create a value pointing to the payload (after tag).
854 +
             * When matching on a reference, match_val.type is a pointer;
855 +
             * dereference to get the underlying union type. */
856 +
            type_t *union_type = match_val.type;
857 +
            if (union_type->cls == TYPE_PTR)
858 +
                union_type = union_type->info.ptr.target;
859 +
            i32     val_off = tval_payload_offset(union_type);
860 +
            value_t payload = match_val;
861 +
            if (payload.loc == LOC_STACK) {
862 +
                payload.as.off.offset += val_off;
863 +
            } else if (payload.loc == LOC_ADDR) {
864 +
                payload.as.adr.offset += val_off;
865 +
            } else if (payload.loc == LOC_REG) {
866 +
                /* Register contains union address; add offset to get payload */
867 +
                reg_t payload_reg = nextreg(g);
868 +
                emit(g, ADDI(payload_reg, payload.as.reg, val_off));
869 +
                payload = value_reg(payload_reg, payload_type);
870 +
            }
871 +
            payload.type = payload_type;
872 +
873 +
            gen_bind_record_fields(g, payload, patt, payload_type);
874 +
        }
875 +
    }
876 +
    gen_case_guard(g, n, &ctrl);
877 +
    return ctrl;
878 +
}
879 +
880 +
/* Generate code for a match case with a standalone record pattern.
881 +
 * Record patterns always match (no tag comparison), so we just bind fields. */
882 +
static match_case_ctrl_t gen_match_case_record(
883 +
    gen_t *g, value_t match_val, node_t *n
884 +
) {
885 +
    match_case_ctrl_t ctrl = { 0, branch_patch_invalid() };
886 +
    node_t          **patterns =
887 +
        nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns);
888 +
889 +
    if (n->val.match_case.patterns.len >= 1)
890 +
        gen_bind_record_fields(g, match_val, patterns[0], match_val.type);
891 +
892 +
    gen_case_guard(g, n, &ctrl);
893 +
    return ctrl;
894 +
}
895 +
896 +
static match_case_ctrl_t gen_match_case(gen_t *g, reg_t match_reg, node_t *n) {
897 +
    match_case_ctrl_t ctrl = {
898 +
        .skip_body    = 0,
899 +
        .guard_branch = branch_patch_invalid(),
900 +
    };
901 +
    /* Array to store jumps to body when a pattern matches */
902 +
    branch_patch_t jumps[MAX_CASE_PATTERNS];
903 +
    usize          njumps = 0;
904 +
905 +
    /* Regular pattern matching (non-payload types) */
906 +
    node_t **patterns =
907 +
        nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns);
908 +
    for (usize p = 0; p < n->val.match_case.patterns.len; p++) {
909 +
        node_t *patt_node = patterns[p];
910 +
        value_t patt_val  = gen_expr(g, patt_node, false);
911 +
        reg_t   patt_reg  = emit_load(g, patt_val);
912 +
913 +
        /* If this pattern matches, jump to the body
914 +
         * (Will be patched later) */
915 +
        jumps[njumps++] = branch_patch_make(g, I_BEQ, match_reg, patt_reg);
916 +
        freereg(g, patt_reg);
917 +
    }
918 +
    /* If none of the patterns match, jump past the body */
919 +
    ctrl.skip_body   = emit(g, NOP); /* Will be patched later */
920 +
    usize body_start = g->ninstrs;   /* Body starts here */
921 +
922 +
    /* Patch all the pattern match jumps to point to the body start */
923 +
    for (usize p = 0; p < njumps; p++) {
924 +
        branch_patch_apply(g, jumps[p], body_start);
925 +
    }
926 +
    gen_case_guard(g, n, &ctrl);
927 +
    return ctrl;
928 +
}
929 +
930 +
/* Generate code for a match statement by converting it to a series of
931 +
 * equality comparisons */
932 +
static void gen_match(gen_t *g, node_t *n) {
933 +
    /* If there are no cases, nothing to do */
934 +
    if (n->val.match_stmt.cases.len == 0)
935 +
        return;
936 +
937 +
    /* Generate code for the match operand and load it into a register */
938 +
    value_t match_val = gen_expr(g, n->val.match_stmt.expr, false);
939 +
    reg_t   match_reg = emit_load(g, match_val);
940 +
941 +
    /* Track jump locations to the end of the match */
942 +
    usize end_jumps[MAX_SWITCH_CASES];
943 +
    usize nend_jumps = 0;
944 +
945 +
    /* Process each case from first to last */
946 +
    node_t **cases = nodespan_ptrs(&g->mod->parser, n->val.match_stmt.cases);
947 +
    for (usize i = 0; i < n->val.match_stmt.cases.len; i++) {
948 +
        node_t *cn = cases[i];
949 +
950 +
        if (!cn->val.match_case.patterns.len) {
951 +
            /* Default/else case: generate block body */
952 +
            gen_node(g, cn->val.match_case.body);
953 +
            break;
954 +
        }
955 +
        /* For cases with patterns, we need to:
956 +
         * 1. Generate pattern tests with jumps to the body if matching
957 +
         * 2. Jump to the next case if no patterns match
958 +
         * 3. Generate the body
959 +
         * 4. Jump to the end of the match after the body */
960 +
        type_t           *match_type = n->val.match_stmt.expr->type;
961 +
        match_case_ctrl_t ctrl;
962 +
963 +
        /* Check if matching on a pointer to a union */
964 +
        type_t *union_type = match_type;
965 +
        if (match_type->cls == TYPE_PTR &&
966 +
            type_is_union_with_payload(match_type->info.ptr.target)) {
967 +
            union_type = match_type->info.ptr.target;
968 +
        }
969 +
970 +
        if (type_is_union_with_payload(union_type)) {
971 +
            ctrl = gen_match_case_union_payload(g, match_val, cn);
972 +
        } else if (union_type->cls == TYPE_RECORD) {
973 +
            ctrl = gen_match_case_record(g, match_val, cn);
974 +
        } else {
975 +
            ctrl = gen_match_case(g, match_reg, cn);
976 +
        }
977 +
        /* Generate the case body */
978 +
        gen_node(g, cn->val.match_case.body);
979 +
        /* Jump to end of the match after the body (patched later) */
980 +
        end_jumps[nend_jumps++] = emit(g, NOP);
981 +
        /* Patch the jump over the body (skip_body=0 means no patching needed)
982 +
         */
983 +
        if (ctrl.guard_branch.valid) {
984 +
            branch_patch_apply(g, ctrl.guard_branch, g->ninstrs);
985 +
        }
986 +
        if (ctrl.skip_body) {
987 +
            g->instrs[ctrl.skip_body] =
988 +
                JMP(jump_offset(ctrl.skip_body, g->ninstrs));
989 +
        }
990 +
    }
991 +
992 +
    /* Patch all jumps to the end of the match */
993 +
    usize end = g->ninstrs;
994 +
    for (usize i = 0; i < nend_jumps; i++) {
995 +
        g->instrs[end_jumps[i]] = JMP(jump_offset(end_jumps[i], end));
996 +
    }
997 +
    freeval(g, match_val);
998 +
}
999 +
1000 +
/* Generate code for an `if` statement. */
1001 +
static void gen_if(gen_t *g, node_t *n) {
1002 +
    node_t *cond    = n->val.if_stmt.cond;
1003 +
    node_t *lbranch = n->val.if_stmt.lbranch;
1004 +
    node_t *rbranch = n->val.if_stmt.rbranch;
1005 +
1006 +
    /* Special case for comparison operations. */
1007 +
    if (node_is_comp(cond)) {
1008 +
        /* If there's no else branch, use the simple branch generation,
1009 +
         * but only for primitive types that are compatible with BEQ or BNE. */
1010 +
        if (!rbranch && type_is_primitive(cond->val.binop.left->type)) {
1011 +
            gen_branch(g, cond, lbranch);
1012 +
            return;
1013 +
        }
1014 +
    }
1015 +
    gen_if_else(g, gen_expr(g, cond, false), lbranch, rbranch);
1016 +
}
1017 +
1018 +
/* Generate code for an if expression */
1019 +
static value_t gen_if_expr(gen_t *g, node_t *n) {
1020 +
    /* Allocate space for the result value */
1021 +
    i32     result_off = reserve(g, n->type);
1022 +
    value_t result_val = value_stack(OFFSET(FP, result_off), n->type);
1023 +
1024 +
    /* Generate condition */
1025 +
    value_t cond_val = gen_expr(g, n->val.if_stmt.cond, false);
1026 +
    reg_t   cond_reg = emit_load(g, cond_val);
1027 +
1028 +
    /* Branch to else if condition is false */
1029 +
    branch_patch_t else_branch = branch_patch_make(g, I_BEQ, cond_reg, ZERO);
1030 +
    freereg(g, cond_reg);
1031 +
1032 +
    /* Generate then branch and store result */
1033 +
    value_t then_val = gen_expr(g, n->val.if_stmt.lbranch, false);
1034 +
    emit_store(g, then_val, result_val.as.off.base, result_val.as.off.offset);
1035 +
1036 +
    /* Jump over else branch */
1037 +
    usize end_jump = emit(g, NOP); /* Placeholder for unconditional jump */
1038 +
1039 +
    /* Patch else branch jump */
1040 +
    usize else_start = g->ninstrs;
1041 +
    branch_patch_apply(g, else_branch, else_start);
1042 +
1043 +
    /* Generate else branch and store result */
1044 +
    value_t else_val = gen_expr(g, n->val.if_stmt.rbranch, false);
1045 +
    emit_store(g, else_val, result_val.as.off.base, result_val.as.off.offset);
1046 +
1047 +
    /* Patch end jump */
1048 +
    usize end           = g->ninstrs;
1049 +
    g->instrs[end_jump] = JMP(jump_offset(end_jump, end));
1050 +
1051 +
    return result_val;
1052 +
}
1053 +
1054 +
/* Generate code for an `if let` statement.
1055 +
 * This checks if an optional value has content and binds it to a variable if
1056 +
 * so. */
1057 +
static void gen_if_let(gen_t *g, node_t *n) {
1058 +
    /* Generate the optional expression */
1059 +
    value_t opt_val = gen_expr(g, n->val.if_let_stmt.expr, false);
1060 +
    /* Load the tag to check if optional has a value */
1061 +
    reg_t tag_reg = tval_load_tag(g, opt_val);
1062 +
1063 +
    /* Set up conditional branch: if `exists` is 0, skip the left branch */
1064 +
    branch_patch_t lb_branch = branch_patch_make(g, I_BEQ, tag_reg, ZERO);
1065 +
1066 +
    /* Create and allocate the bound variable (unless it's a placeholder) */
1067 +
    if (n->val.if_let_stmt.var->cls != NODE_PLACEHOLDER) {
1068 +
        symbol_t *val_sym = n->val.if_let_stmt.var->sym;
1069 +
        i32       val_off =
1070 +
            reserve_aligned(g, val_sym->e.var.typ, val_sym->e.var.align);
1071 +
        val_sym->e.var.val =
1072 +
            value_stack(OFFSET(FP, val_off), val_sym->e.var.typ);
1073 +
1074 +
        /* Copy the value part from the optional to the local variable */
1075 +
        optval_copy_value(g, opt_val, val_sym->e.var.val);
1076 +
    }
1077 +
1078 +
    /* If there's a guard condition, evaluate it */
1079 +
    branch_patch_t guard_branch = branch_patch_invalid();
1080 +
1081 +
    if (n->val.if_let_stmt.guard) {
1082 +
        value_t guard_val = gen_expr(g, n->val.if_let_stmt.guard, false);
1083 +
        reg_t   guard_reg = emit_load(g, guard_val);
1084 +
1085 +
        /* If guard is false, jump to else branch */
1086 +
        guard_branch =
1087 +
            branch_patch_make(g, I_BEQ, guard_reg, ZERO); /* Will patch later */
1088 +
        freereg(g, guard_reg);
1089 +
    }
1090 +
1091 +
    /* Generate code for the left branch */
1092 +
    gen_block(g, n->val.if_let_stmt.lbranch);
1093 +
1094 +
    if (n->val.if_let_stmt.rbranch) {
1095 +
        /* If we have an else branch, emit jump to skip over it */
1096 +
        const usize lb_end   = emit(g, NOP);
1097 +
        const usize rb_start = g->ninstrs;
1098 +
1099 +
        /* Patch the branch instruction to jump to else in *none* case */
1100 +
        branch_patch_apply(g, lb_branch, rb_start);
1101 +
1102 +
        /* Patch guard condition branch if it exists */
1103 +
        if (guard_branch.valid) {
1104 +
            branch_patch_apply(g, guard_branch, rb_start);
1105 +
        }
1106 +
        freereg(g, tag_reg);
1107 +
1108 +
        /* Generate code for the else branch */
1109 +
        gen_block(g, n->val.if_let_stmt.rbranch);
1110 +
1111 +
        /* Patch the jump instruction to skip over the else branch */
1112 +
        usize rb_end      = g->ninstrs;
1113 +
        g->instrs[lb_end] = JMP(jump_offset(lb_end, rb_end));
1114 +
    } else {
1115 +
        /* No else branch, just patch the branch to skip the then branch */
1116 +
        usize lb_end = g->ninstrs;
1117 +
        branch_patch_apply(g, lb_branch, lb_end);
1118 +
1119 +
        /* Patch guard condition branch if it exists */
1120 +
        if (guard_branch.valid) {
1121 +
            branch_patch_apply(g, guard_branch, lb_end);
1122 +
        }
1123 +
        freereg(g, tag_reg);
1124 +
    }
1125 +
}
1126 +
1127 +
/* Generate code for a forever loop. */
1128 +
static void gen_loop(gen_t *g, node_t *n) {
1129 +
    /* Save the outer loop context and setup new context with a new loop id. */
1130 +
    loop_t outer    = g->loop;
1131 +
    g->loop.current = n;
1132 +
    g->loop.start   = g->ninstrs;
1133 +
1134 +
    /* Generate code for the loop body. */
1135 +
    gen_block(g, n->val.loop_stmt.body);
1136 +
    /* Jump back to the beginning of the loop. */
1137 +
    emit_jump(g, g->loop.start);
1138 +
1139 +
    /* Mark this position as the loop end for break statements */
1140 +
    g->loop.end = g->ninstrs;
1141 +
    patch_break_stmts(g);
1142 +
    g->loop = outer;
1143 +
}
1144 +
1145 +
/* Generate code for a break statement. */
1146 +
static void gen_break(gen_t *g, node_t *n) {
1147 +
    (void)n;
1148 +
1149 +
    if (g->loop.current->cls != NODE_LOOP) {
1150 +
        bail("`break` statement outside of loop");
1151 +
    }
1152 +
    /* Instead of calculating the jump offset now, emit a placeholder
1153 +
     * instruction that will be patched when we know where the loop ends. */
1154 +
    usize offset = emit(g, NOP);
1155 +
1156 +
    /* Record this location for patching. */
1157 +
    g->fn.brkpatches[g->fn.nbrkpatches++] = (ctpatch_t){
1158 +
        .pc      = offset,
1159 +
        .loop    = g->loop.current,
1160 +
        .applied = false,
1161 +
    };
1162 +
}
1163 +
1164 +
static void gen_assign(gen_t *g, node_t *n) {
1165 +
    node_t *lval = n->val.assign.lval;
1166 +
    node_t *rval = n->val.assign.rval;
1167 +
1168 +
    switch (lval->cls) {
1169 +
    case NODE_IDENT: { /* Handle normal variable assignment. */
1170 +
        symbol_t *sym = lval->sym;
1171 +
1172 +
        value_t left  = sym->e.var.val;
1173 +
        value_t right = gen_expr(g, rval, false);
1174 +
1175 +
        /* Nb. frees the right value if it's in a register. */
1176 +
        emit_replace(g, left, right);
1177 +
        break;
1178 +
    }
1179 +
    case NODE_ACCESS: { /* Handle record field assignment (e.g., x.y = 1). */
1180 +
        value_t left  = gen_expr(g, lval, true);
1181 +
        value_t right = gen_expr(g, rval, false);
1182 +
1183 +
        /* Replace the field value with the right-hand side */
1184 +
        emit_replace(g, left, right);
1185 +
        break;
1186 +
    }
1187 +
    case NODE_ARRAY_INDEX: { /* Array index assignment (e.g. `arr[0] = 1`). */
1188 +
        value_t left  = gen_array_index(g, lval, true);
1189 +
        value_t right = gen_expr(g, rval, false);
1190 +
        /* Replace the array element value with the right-hand side. */
1191 +
        emit_replace(g, left, right);
1192 +
        /* Free the address register from array indexing */
1193 +
        if (left.loc == LOC_STACK) {
1194 +
            freereg(g, left.as.off.base);
1195 +
        }
1196 +
        break;
1197 +
    }
1198 +
    case NODE_UNOP: { /* Handle pointer dereference assignment */
1199 +
        if (lval->val.unop.op != OP_DEREF) {
1200 +
            bail("unsupported unary operator in assignment target");
1201 +
        }
1202 +
        value_t ptr_val = gen_expr(g, lval->val.unop.expr, true);
1203 +
        value_t right   = gen_expr(g, rval, false);
1204 +
        /* `gen_deref` expects an lvalue when the pointer itself is the storage
1205 +
         * we want to mutate (e.g., `*ptr = ...`). */
1206 +
        value_t left = gen_deref(g, lval, ptr_val, true);
1207 +
1208 +
        emit_replace(g, left, right);
1209 +
        break;
1210 +
    }
1211 +
    default:
1212 +
        bail("unsupported assignment target %s", node_names[lval->cls]);
1213 +
    }
1214 +
}
1215 +
1216 +
static void gen_return(gen_t *g, node_t *n) {
1217 +
    type_t *ret_typ = g->fn.current->node->type->info.fun.ret;
1218 +
    node_t *value   = n->val.return_stmt.value;
1219 +
    /* If there's a return value, evaluate the expression.
1220 +
     * Then, store the expression, in the return register A0,
1221 +
     * according to the RISC-V calling conventions. */
1222 +
    if (value) {
1223 +
        value_t val = gen_expr(g, value, false);
1224 +
1225 +
        if (ret_typ->cls == TYPE_RESULT) {
1226 +
            value_t dest;
1227 +
            if (type_is_passed_by_ref(ret_typ)) {
1228 +
                usereg(g, A0);
1229 +
                dest = value_stack(OFFSET(A0, 0), ret_typ);
1230 +
            } else {
1231 +
                dest = value_reg(A0, ret_typ);
1232 +
            }
1233 +
            /* Returns are always for the "success" case. */
1234 +
            emit_result_store_success(g, dest, val);
1235 +
        } else if (ret_typ->cls == TYPE_OPT &&
1236 +
                   type_coercible(val.type, ret_typ->info.opt.elem)) {
1237 +
            /* Wrap value in an optional */
1238 +
            usereg(g, A0);
1239 +
            tval_store(g, value_stack(OFFSET(A0, 0), ret_typ), val, 1);
1240 +
        } else if (ret_typ->cls == TYPE_OPT && val.type->cls == TYPE_OPT) {
1241 +
            /* Value is already optional, copy it */
1242 +
            usereg(g, A0);
1243 +
            emit_replace(g, value_stack(OFFSET(A0, 0), ret_typ), val);
1244 +
        } else if (type_is_passed_by_ref(val.type)) {
1245 +
            /* Aggregate returns go through the hidden sret pointer. */
1246 +
            usereg(g, A0);
1247 +
            emit_replace(g, value_stack(OFFSET(A0, 0), val.type), val);
1248 +
        } else {
1249 +
            emit_load_into(g, A0, val);
1250 +
        }
1251 +
        freeval(g, val);
1252 +
    } else {
1253 +
        if (ret_typ->cls == TYPE_RESULT) {
1254 +
            value_t dest;
1255 +
            if (type_is_passed_by_ref(ret_typ)) {
1256 +
                usereg(g, A0);
1257 +
                dest = value_stack(OFFSET(A0, 0), ret_typ);
1258 +
            } else {
1259 +
                dest = value_reg(A0, ret_typ);
1260 +
            }
1261 +
            emit_result_store_success(g, dest, value_none());
1262 +
        } else {
1263 +
            /* If there's no return value, we just store zero in A0. */
1264 +
            emit_load_into(
1265 +
                g, A0, value_imm((imm_t){ .i = 0 }, g->types->type_i32)
1266 +
            );
1267 +
        }
1268 +
    }
1269 +
1270 +
    /* Instead of returning directly, emit a placeholder jump to the function
1271 +
     * epilogue that will be patched later. This avoids duplicating epilogue
1272 +
     * code for each return point. */
1273 +
    usize pc = emit(g, NOP);
1274 +
1275 +
    if (g->fn.nretpatches >= MAX_RET_PATCHES)
1276 +
        bail("too many return statements in function");
1277 +
1278 +
    /* Record this location for patching */
1279 +
    g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){
1280 +
        .pc      = pc,
1281 +
        .applied = false,
1282 +
    };
1283 +
}
1284 +
1285 +
/* Emit the control flow for `throw`
1286 +
 *
1287 +
 * 1. Evaluate the error expression
1288 +
 * 2. Lay it out in the caller-visible result slot (A0 or *A0)
1289 +
 * 3. Queue a jump to the epilogue so every throw shares the same return path */
1290 +
static void gen_throw(gen_t *g, node_t *n) {
1291 +
    type_t *fn_ret = g->fn.current->node->type->info.fun.ret;
1292 +
1293 +
    value_t err_val = gen_expr(g, n->val.throw_stmt.expr, false);
1294 +
    value_t dest;
1295 +
1296 +
    if (type_is_passed_by_ref(fn_ret)) {
1297 +
        usereg(g, A0);
1298 +
        dest = value_stack(OFFSET(A0, 0), fn_ret);
1299 +
    } else {
1300 +
        dest = value_reg(A0, fn_ret);
1301 +
    }
1302 +
    emit_result_store_error(g, dest, err_val);
1303 +
    freeval(g, err_val);
1304 +
1305 +
    /* Jump to function end (patch) */
1306 +
    usize pc = emit(g, NOP);
1307 +
1308 +
    if (g->fn.nretpatches >= MAX_RET_PATCHES)
1309 +
        bail("too many return statements in function");
1310 +
1311 +
    /* Patch to jump to function epilogue */
1312 +
    g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){
1313 +
        .pc      = pc,
1314 +
        .applied = false,
1315 +
    };
1316 +
}
1317 +
1318 +
/* Emit `try`
1319 +
 *
1320 +
 * 1. Evaluate the expression result
1321 +
 * 2. Load its tag and branch past the error path when the tag is zero
1322 +
 * 3. On error, normalize the tag/value into the function result slot and
1323 +
 *    enqueue a jump to the epilogue (mirroring an early return)
1324 +
 * 4. On success, expose the payload location for the caller
1325 +
 *
1326 +
 * With catch block:
1327 +
 * 1. Evaluate the expression result
1328 +
 * 2. Load its tag and branch based on success/error
1329 +
 * 3. On error: execute catch block (must diverge or return void)
1330 +
 * 4. On success: use payload value
1331 +
 */
1332 +
static value_t gen_try(gen_t *g, node_t *n) {
1333 +
    /* 1. */
1334 +
    value_t res_val = gen_expr(g, n->val.try_expr.expr, false);
1335 +
    tval_t  res     = tval_from_val(g, res_val);
1336 +
1337 +
    /* Inspect the tag to determine whether the result is success or error. */
1338 +
    reg_t tag = nextreg(g);
1339 +
    emit_regload(
1340 +
        g, tag, res.tag.as.off.base, res.tag.as.off.offset, g->types->type_u8
1341 +
    );
1342 +
    type_t *payload     = res_val.type->info.res.payload;
1343 +
    type_t *result_type = n->type ? n->type : payload;
1344 +
1345 +
    /* Handle `try?` expressions */
1346 +
    if (n->val.try_expr.optional) {
1347 +
        /* Allocate stack space for the optional result. */
1348 +
        i32     result_offset = reserve(g, result_type);
1349 +
        value_t result = value_stack(OFFSET(FP, result_offset), result_type);
1350 +
1351 +
        /* Branch over the error path when the tag is zero (success). */
1352 +
        branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO);
1353 +
        freereg(g, tag);
1354 +
1355 +
        /* Error path: store nil (tag = 0). */
1356 +
        tval_store(g, result, (value_t){ 0 }, 0);
1357 +
1358 +
        /* Jump over success path. */
1359 +
        usize end_patch = emit(g, JMP(0));
1360 +
1361 +
        /* Success path: store Some(payload) (tag = 1). */
1362 +
        branch_patch_apply(g, success_branch, g->ninstrs);
1363 +
1364 +
        value_t payload_val = res.val;
1365 +
        payload_val.type    = payload;
1366 +
        tval_store(g, result, payload_val, 1);
1367 +
1368 +
        /* End: both paths converge here. */
1369 +
        g->instrs[end_patch] = JMP(jump_offset(end_patch, g->ninstrs));
1370 +
1371 +
        return result;
1372 +
    }
1373 +
1374 +
    /* Handle catch block */
1375 +
    if (n->val.try_expr.catch_expr) {
1376 +
        node_t *catch_node = n->val.try_expr.catch_expr;
1377 +
1378 +
        node_t *catch_binding = catch_node->val.catch_clause.binding;
1379 +
        node_t *catch_body    = catch_node->val.catch_clause.body;
1380 +
1381 +
        /* Branch over the error path when the tag is zero (success). */
1382 +
        branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO);
1383 +
        freereg(g, tag);
1384 +
1385 +
        /* If there's a binding, store the error value to the variable. */
1386 +
        if (catch_binding && catch_binding->sym) {
1387 +
            symbol_t *err_sym  = catch_binding->sym;
1388 +
            type_t   *err_type = res_val.type->info.res.err;
1389 +
            i32 err_off = reserve_aligned(g, err_type, err_sym->e.var.align);
1390 +
            err_sym->e.var.val = value_stack(OFFSET(FP, err_off), err_type);
1391 +
1392 +
            /* Create a value pointing to the error slot (same as payload). */
1393 +
            value_t err_slot = res.val;
1394 +
            err_slot.type    = err_type;
1395 +
1396 +
            /* Copy the error to the bound variable. */
1397 +
            emit_replace(g, err_sym->e.var.val, err_slot);
1398 +
        }
1399 +
        gen_block(g, catch_body);
1400 +
        branch_patch_apply(g, success_branch, g->ninstrs);
1401 +
1402 +
        if (catch_body->type && catch_body->type->cls == TYPE_NEVER) {
1403 +
            value_t payload_val = res.val;
1404 +
            payload_val.type    = payload;
1405 +
            return payload_val;
1406 +
        }
1407 +
        return value_none();
1408 +
    }
1409 +
1410 +
    /* Branch over the error path when the tag is zero (success). */
1411 +
    branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO);
1412 +
    if (n->val.try_expr.panic) {
1413 +
        emit(g, EBREAK);
1414 +
        branch_patch_apply(g, success_branch, g->ninstrs);
1415 +
        freereg(g, tag);
1416 +
1417 +
        if (n->val.try_expr.handlers.len > 0)
1418 +
            bail("catch clauses not supported in code generation");
1419 +
1420 +
        if (!payload->size) {
1421 +
            return value_none();
1422 +
        }
1423 +
        value_t result = res.val;
1424 +
        result.type    = payload;
1425 +
1426 +
        return result;
1427 +
    }
1428 +
1429 +
    type_t *fn_ret = g->fn.current->node->type->info.fun.ret;
1430 +
1431 +
    /* Prepare the function result slot so we can store an error in-place. */
1432 +
    value_t dest;
1433 +
    if (type_is_passed_by_ref(fn_ret)) {
1434 +
        usereg(g, A0);
1435 +
        dest = value_stack(OFFSET(A0, 0), fn_ret);
1436 +
    } else {
1437 +
        dest = value_reg(A0, fn_ret);
1438 +
    }
1439 +
    /* Copy the error payload into the function result slot. */
1440 +
    value_t err_slot = res.val;
1441 +
    err_slot.type    = res_val.type->info.res.err;
1442 +
    emit_result_store_error(g, dest, err_slot);
1443 +
1444 +
    usize ret_pc = emit(g, NOP);
1445 +
1446 +
    if (g->fn.nretpatches >= MAX_RET_PATCHES)
1447 +
        bail("too many return statements in function");
1448 +
1449 +
    g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){
1450 +
        .pc      = ret_pc,
1451 +
        .applied = false,
1452 +
    };
1453 +
    branch_patch_apply(g, success_branch, g->ninstrs);
1454 +
1455 +
    freereg(g, tag);
1456 +
1457 +
    if (n->val.try_expr.handlers.len > 0)
1458 +
        bail("catch clauses not supported in code generation");
1459 +
1460 +
    if (!payload->size) {
1461 +
        return value_none();
1462 +
    }
1463 +
    value_t result = res.val;
1464 +
    result.type    = payload; /* Unwrap payload */
1465 +
1466 +
    return result;
1467 +
}
1468 +
1469 +
static value_t gen_binop(gen_t *g, node_t *n) {
1470 +
    value_t lval = gen_expr(g, n->val.binop.left, false);
1471 +
    reg_t   left = emit_load(g, lval);
1472 +
1473 +
    /* Ensure generation for the rval does not overwrite the lval. */
1474 +
    usereg(g, left);
1475 +
1476 +
    value_t rval   = gen_expr(g, n->val.binop.right, false);
1477 +
    reg_t   right  = emit_load(g, rval);
1478 +
    reg_t   result = left;
1479 +
1480 +
    switch (n->val.binop.op) {
1481 +
    case OP_ADD:
1482 +
        if (type_is_int(lval.type->cls)) {
1483 +
            emit(g, ADDW(left, left, right));
1484 +
        } else {
1485 +
            emit(g, ADD(left, left, right));
1486 +
        }
1487 +
        break;
1488 +
    case OP_SUB:
1489 +
        if (type_is_int(lval.type->cls)) {
1490 +
            emit(g, SUBW(left, left, right));
1491 +
        } else {
1492 +
            emit(g, SUB(left, left, right));
1493 +
        }
1494 +
        break;
1495 +
    case OP_MUL:
1496 +
        if (type_is_int(lval.type->cls)) {
1497 +
            emit(g, MULW(left, left, right));
1498 +
        } else {
1499 +
            emit(g, MUL(left, left, right));
1500 +
        }
1501 +
        break;
1502 +
    case OP_DIV:
1503 +
        /* Check for division by zero (node is already set by gen_node) */
1504 +
        emit(g, BNE(right, ZERO, INSTR_SIZE * 2));
1505 +
        emit(g, EBREAK);
1506 +
        if (type_is_unsigned(lval.type->cls)) {
1507 +
            emit(g, DIVUW(left, left, right));
1508 +
        } else {
1509 +
            emit(g, DIVW(left, left, right));
1510 +
        }
1511 +
        break;
1512 +
    case OP_MOD:
1513 +
        if (type_is_int(lval.type->cls)) {
1514 +
            /* Check for division by zero (node is already set by gen_node) */
1515 +
            emit(g, BNE(right, ZERO, INSTR_SIZE * 2));
1516 +
            emit(g, EBREAK);
1517 +
            if (type_is_unsigned(lval.type->cls)) {
1518 +
                emit(g, REMUW(left, left, right));
1519 +
            } else {
1520 +
                emit(g, REMW(left, left, right));
1521 +
            }
1522 +
        } else {
1523 +
            bail("modulo operator is only supported for integers");
1524 +
        }
1525 +
        break;
1526 +
    case OP_EQ:
1527 +
    case OP_NE: {
1528 +
        bool invert    = (n->val.binop.op == OP_NE);
1529 +
        bool opt_left  = (lval.type->cls == TYPE_OPT);
1530 +
        bool opt_right = (rval.type->cls == TYPE_OPT);
1531 +
        bool left_nil  = (n->val.binop.left->cls == NODE_NIL);
1532 +
        bool right_nil = (n->val.binop.right->cls == NODE_NIL);
1533 +
1534 +
        /* Fast-path for comparisons with `nil`. */
1535 +
        if (opt_left && opt_right && (left_nil || right_nil)) {
1536 +
            if (left_nil && right_nil) {
1537 +
                freereg(g, left);
1538 +
                freereg(g, right);
1539 +
1540 +
                reg_t result_reg = nextreg(g);
1541 +
                emit_li(g, result_reg, invert ? 0 : 1);
1542 +
1543 +
                return value_reg(result_reg, n->type);
1544 +
            }
1545 +
            reg_t opt_reg = left_nil ? right : left;
1546 +
            reg_t nil_reg = left_nil ? left : right;
1547 +
1548 +
            freereg(g, nil_reg);
1549 +
1550 +
            reg_t tag_reg = nextreg(g);
1551 +
            emit(g, LBU(tag_reg, opt_reg, 0));
1552 +
            emit(g, SLTIU(tag_reg, tag_reg, 1));
1553 +
1554 +
            if (invert)
1555 +
                emit(g, XORI(tag_reg, tag_reg, 1));
1556 +
1557 +
            freereg(g, opt_reg);
1558 +
1559 +
            return value_reg(tag_reg, n->type);
1560 +
        }
1561 +
1562 +
        if (opt_left != opt_right) {
1563 +
            type_t *opt_type   = opt_left ? lval.type : rval.type;
1564 +
            value_t value_expr = opt_left ? rval : lval;
1565 +
            value_t wrapped    = optval_from_value(g, opt_type, value_expr);
1566 +
            reg_t   target_reg = opt_left ? right : left;
1567 +
1568 +
            emit_load_into(g, target_reg, wrapped);
1569 +
            result = nextreg(g);
1570 +
            emit_memequal(g, left, right, opt_type, result);
1571 +
1572 +
            if (invert)
1573 +
                emit(g, XORI(result, result, 1));
1574 +
        } else if (type_is_primitive(lval.type)) {
1575 +
            if (invert) {
1576 +
                /* XOR will be non-zero if values differ. */
1577 +
                emit(g, XOR(left, left, right));
1578 +
                /* Set to 1 if result is non-zero (different). */
1579 +
                emit(g, SLTU(left, ZERO, left));
1580 +
            } else {
1581 +
                /* Emits `result = left - right` */
1582 +
                if (type_is_int(lval.type->cls)) {
1583 +
                    emit(g, SUBW(left, left, right));
1584 +
                } else {
1585 +
                    emit(g, SUB(left, left, right));
1586 +
                }
1587 +
                /* Emits `result = (result < 1) ? 1 : 0` */
1588 +
                emit(g, SLTIU(left, left, 1));
1589 +
            }
1590 +
        } else {
1591 +
            result = nextreg(g);
1592 +
            emit_memequal(g, left, right, lval.type, result);
1593 +
            if (invert)
1594 +
                emit(g, XORI(result, result, 1));
1595 +
        }
1596 +
        break;
1597 +
    }
1598 +
    case OP_LT:
1599 +
        /* Emits `result = (left < right) ? 1 : 0` */
1600 +
        if (type_is_unsigned(lval.type->cls)) {
1601 +
            emit(g, SLTU(left, left, right));
1602 +
        } else {
1603 +
            emit(g, SLT(left, left, right));
1604 +
        }
1605 +
        break;
1606 +
    case OP_GT:
1607 +
        /* Emits `result = (right < left) ? 1 : 0` */
1608 +
        if (type_is_unsigned(lval.type->cls)) {
1609 +
            emit(g, SLTU(left, right, left));
1610 +
        } else {
1611 +
            emit(g, SLT(left, right, left));
1612 +
        }
1613 +
        break;
1614 +
    case OP_LE:
1615 +
        /* For `x <= y`, we can compute `!(x > y)`, which is `!(y < x)`, */
1616 +
        if (type_is_unsigned(lval.type->cls)) {
1617 +
            emit(g, SLTU(left, right, left));
1618 +
        } else {
1619 +
            emit(g, SLT(left, right, left));
1620 +
        }
1621 +
        emit(g, XORI(left, left, 1));
1622 +
        break;
1623 +
    case OP_GE:
1624 +
        /* For `x >= y`, we can compute `!(x < y)`. */
1625 +
        if (type_is_unsigned(lval.type->cls)) {
1626 +
            emit(g, SLTU(left, left, right));
1627 +
        } else {
1628 +
            emit(g, SLT(left, left, right));
1629 +
        }
1630 +
        emit(g, XORI(left, left, 1));
1631 +
        break;
1632 +
    case OP_AND:
1633 +
        /* Logical AND; both values must be 1 for the result to be 1. */
1634 +
        emit(g, AND(left, left, right));
1635 +
        break;
1636 +
    case OP_OR:
1637 +
        /* Logical OR; if either value is 1, the result is 1. */
1638 +
        emit(g, OR(left, left, right));
1639 +
        break;
1640 +
    case OP_BAND:
1641 +
        /* Bitwise AND */
1642 +
        emit(g, AND(left, left, right));
1643 +
        break;
1644 +
    case OP_BOR:
1645 +
        /* Bitwise OR */
1646 +
        emit(g, OR(left, left, right));
1647 +
        break;
1648 +
    case OP_XOR:
1649 +
        /* Bitwise XOR */
1650 +
        emit(g, XOR(left, left, right));
1651 +
        break;
1652 +
    case OP_SHL:
1653 +
        /* Left shift */
1654 +
        if (type_is_int(lval.type->cls)) {
1655 +
            emit(g, SLLW(left, left, right));
1656 +
        } else {
1657 +
            emit(g, SLL(left, left, right));
1658 +
        }
1659 +
        break;
1660 +
    case OP_SHR:
1661 +
        /* Right shift */
1662 +
        if (type_is_int(lval.type->cls)) {
1663 +
            emit(g, SRLW(left, left, right));
1664 +
        } else {
1665 +
            emit(g, SRL(left, left, right));
1666 +
        }
1667 +
        break;
1668 +
    }
1669 +
    /* Check if result needs to be coerced to optional type */
1670 +
    if (n->type->cls == TYPE_OPT) {
1671 +
        i32     offset     = reserve(g, n->type);
1672 +
        value_t opt_val    = value_stack(OFFSET(FP, offset), n->type);
1673 +
        value_t result_val = value_reg(result, n->type->info.opt.elem);
1674 +
1675 +
        tval_store(g, opt_val, result_val, 1);
1676 +
        lval = opt_val;
1677 +
1678 +
        /* Can free all registers since result is stored on stack */
1679 +
        freereg(g, left);
1680 +
        freereg(g, right);
1681 +
        freereg(g, result);
1682 +
    } else {
1683 +
        lval = value_reg(result, n->type);
1684 +
1685 +
        if (left != result)
1686 +
            freereg(g, left);
1687 +
        if (right != result)
1688 +
            freereg(g, right);
1689 +
    }
1690 +
    return lval;
1691 +
}
1692 +
1693 +
/* Generate code for record construction. Handles both labeled syntax like
1694 +
 * `Point { x: 1, y: 2 }` (NODE_RECORD_LIT) and tuple syntax like `Pair(1, 2)`
1695 +
 * (NODE_CALL with tuple record type). */
1696 +
static value_t gen_record_lit(gen_t *g, node_t *n) {
1697 +
    type_t *stype     = n->type;
1698 +
    int     strct_off = reserve(g, stype);
1699 +
1700 +
    usize nfields = (n->cls == NODE_RECORD_LIT) ? n->val.record_lit.fields.len
1701 +
                                                : n->val.call.args.len;
1702 +
    node_t **fields =
1703 +
        (n->cls == NODE_RECORD_LIT)
1704 +
            ? nodespan_ptrs(&g->mod->parser, n->val.record_lit.fields)
1705 +
            : NULL;
1706 +
1707 +
    for (usize i = 0; i < nfields; i++) {
1708 +
        symbol_t *field;
1709 +
        node_t   *expr;
1710 +
1711 +
        if (n->cls == NODE_RECORD_LIT) {
1712 +
            node_t *arg = fields[i];
1713 +
            field       = arg->sym ? arg->sym : stype->info.srt.fields[i];
1714 +
            expr        = arg->val.call_arg.expr;
1715 +
        } else {
1716 +
            node_t *arg = SPAN(g, n->val.call.args)[i];
1717 +
            field       = stype->info.srt.fields[i];
1718 +
            expr = (arg->cls == NODE_CALL_ARG) ? arg->val.call_arg.expr : arg;
1719 +
        }
1720 +
1721 +
        value_t argval = gen_expr(g, expr, false);
1722 +
        emit_record_field_set(g, argval, FP, strct_off, field);
1723 +
        freeval(g, argval);
1724 +
    }
1725 +
    return value_stack(OFFSET(FP, strct_off), stype);
1726 +
}
1727 +
1728 +
static value_t gen_call_intrinsic(
1729 +
    gen_t *g, node_t *n, void (*gen_intrinsic)(gen_t *, node_t *)
1730 +
) {
1731 +
    node_t *fn = n->sym->node;
1732 +
    type_t *ret =
1733 +
        fn->val.fn_decl.return_type ? fn->val.fn_decl.return_type->type : NULL;
1734 +
    /* Call the specialized generator for this intrinsic.
1735 +
     * It will handle argument processing in its own way. */
1736 +
    (*gen_intrinsic)(g, n);
1737 +
1738 +
    /* For void functions, return a void value */
1739 +
    if (!ret) {
1740 +
        return (value_t){ .type = NULL, .loc = LOC_NONE };
1741 +
    }
1742 +
    return value_reg(A0, ret);
1743 +
}
1744 +
1745 +
static value_t gen_call(gen_t *g, node_t *n) {
1746 +
    symbol_t   *sym  = n->sym;
1747 +
    const char *name = sym->qualified;
1748 +
1749 +
    /* Get the return type. Fall back to the call node type when the symbol
1750 +
     * does not carry a resolved function signature (eg. indirect calls). */
1751 +
    type_t *return_type = sym->node->type->info.fun.ret;
1752 +
    if (!return_type && n->type) {
1753 +
        return_type = n->type;
1754 +
    }
1755 +
1756 +
    /* Keep track of registers we saved before the call. */
1757 +
    i32       saved_regs[REGISTERS] = { 0 };
1758 +
    value_t   saved_vals[REGISTERS] = { 0 };
1759 +
    symbol_t *saved_syms[REGISTERS] = { 0 };
1760 +
1761 +
    /* Save live registers to the stack, in case they get clobbered by
1762 +
     * the callee. */
1763 +
    for (usize i = 0; i < RALLOC_NREGS; i++) {
1764 +
        reg_t r = ralloc_regs[i];
1765 +
1766 +
        /* Don't save registers that aren't caller-saved. */
1767 +
        if (!caller_saved_registers[r])
1768 +
            continue;
1769 +
1770 +
        /* Don't save registers that aren't in use. */
1771 +
        if (ralloc_is_free(&g->regs, r))
1772 +
            continue;
1773 +
1774 +
        /* Use a pointer-sized type for saving registers to the stack. */
1775 +
        static type_t dword = { .cls   = TYPE_PTR,
1776 +
                                .size  = WORD_SIZE,
1777 +
                                .align = WORD_SIZE };
1778 +
        saved_regs[r]       = emit_regpush(g, r, &dword);
1779 +
        /* We can free the register since it's on the stack. */
1780 +
        freereg(g, r);
1781 +
1782 +
        /* Parameters arrive in caller-saved registers; if we let the allocator
1783 +
         * reuse that register (e.g. in emit_memzero), the parameter value gets
1784 +
         * clobbered. When we spill the register here, rewrite the symbol to
1785 +
         * point at the spill slot so later loads grab the preserved copy. */
1786 +
        node_t *fn_node = g->fn.current->node;
1787 +
1788 +
        for (usize p = 0; p < fn_node->val.fn_decl.params.len; p++) {
1789 +
            node_t   *param     = SPAN(g, fn_node->val.fn_decl.params)[p];
1790 +
            symbol_t *param_sym = param->sym;
1791 +
            value_t  *param_val = &param_sym->e.var.val;
1792 +
1793 +
            if (param_val->loc == LOC_REG && param_val->as.reg == r) {
1794 +
                saved_syms[r] = param_sym;
1795 +
                saved_vals[r] = *param_val;
1796 +
1797 +
                param_sym->e.var.val =
1798 +
                    value_stack(OFFSET(FP, saved_regs[r]), param_val->type);
1799 +
                param_sym->e.var.val.temp = false;
1800 +
1801 +
                break;
1802 +
            }
1803 +
        }
1804 +
    }
1805 +
1806 +
    bool  sret           = type_is_passed_by_ref(return_type);
1807 +
    reg_t arg0           = sret ? A1 : A0;
1808 +
    usize avail_arg_regs = (usize)((A7 - arg0) + 1);
1809 +
1810 +
    if (n->val.call.args.len > avail_arg_regs) {
1811 +
        bail(
1812 +
            "function call '%s' requires %zu argument registers but only %zu "
1813 +
            "are available",
1814 +
            name,
1815 +
            n->val.call.args.len,
1816 +
            avail_arg_regs
1817 +
        );
1818 +
    }
1819 +
1820 +
    /* Setup arguments in argument registers (A0..A7), shifting when a hidden
1821 +
     * return pointer occupies A0. */
1822 +
    for (usize i = 0; i < n->val.call.args.len; i++) {
1823 +
        /* Generate code for the expression part of the argument. */
1824 +
        node_t *arg    = SPAN(g, n->val.call.args)[i];
1825 +
        value_t argval = gen_expr(g, arg->val.call_arg.expr, false);
1826 +
1827 +
        type_t *param_type = sym->node->type->info.fun.params[i];
1828 +
        if (param_type->cls == TYPE_OPT && argval.type->cls != TYPE_OPT) {
1829 +
            argval = optval_from_value(g, param_type, argval);
1830 +
        }
1831 +
        /* Mark this register as in use for the duration of the call. */
1832 +
        reg_t arg_reg = arg0 + (reg_t)i;
1833 +
        emit_load_into(g, usereg(g, arg_reg), argval);
1834 +
    }
1835 +
    /* Return value is in A0, by convention, whether or not an address was
1836 +
     * passed into A0 by the caller. */
1837 +
    reg_t return_reg = A0;
1838 +
    /* Return stack offset if we store it on the stack. */
1839 +
    i32  return_off         = 0;
1840 +
    i32  return_stack_off   = 0;
1841 +
    bool return_is_on_stack = false;
1842 +
1843 +
    /* For types that are passed by reference, allocate space in this
1844 +
     * stack frame, and pass the address via A0, as a hidden first parameter.
1845 +
     * Nb. The return record address is setup *after* the call arguments
1846 +
     * are generated, to not clobber A0 in case one of the arguments is a
1847 +
     * call, eg. `f(g())` where `f` is the current function call. */
1848 +
    if (return_type->cls == TYPE_VOID) {
1849 +
        /* For void functions, no need to allocate space for return value */
1850 +
    } else if (sret) {
1851 +
        return_off = reserve(g, return_type);
1852 +
        /* Result-returning callees can legitimately skip rewriting the tag on
1853 +
         * a fast-path success, so ensure the caller-visible slot starts zeroed.
1854 +
         * Other pass-by-ref aggregates are always fully overwritten by the
1855 +
         * callee, making a pre-emptive memset unnecessary work. */
1856 +
        if (return_type->cls == TYPE_RESULT) {
1857 +
            emit_memzero(g, OFFSET(FP, return_off), return_type->size);
1858 +
        }
1859 +
        /* Store return address in return address register. */
1860 +
        usereg(g, return_reg);
1861 +
        emit_addr_offset(g, return_reg, FP, return_off);
1862 +
    }
1863 +
1864 +
    /* Call the function. */
1865 +
    if (sym->kind == SYM_VARIABLE) {
1866 +
        /* Function pointer call: load address into S2 and call via JALR */
1867 +
        value_t fn_ptr_val = sym->e.var.val;
1868 +
1869 +
        if (fn_ptr_val.loc == LOC_REG && saved_regs[fn_ptr_val.as.reg]) {
1870 +
            value_t spill = value_stack(
1871 +
                OFFSET(FP, saved_regs[fn_ptr_val.as.reg]), fn_ptr_val.type
1872 +
            );
1873 +
            emit_load_into(g, S2, spill);
1874 +
        } else if (fn_ptr_val.loc == LOC_REG) {
1875 +
            emit_mv(g, S2, fn_ptr_val.as.reg);
1876 +
        } else {
1877 +
            emit_load_into(g, S2, fn_ptr_val);
1878 +
        }
1879 +
        emit(g, JALR(RA, S2, 0));
1880 +
    } else if (sym->e.fn.attribs & ATTRIB_EXTERN) {
1881 +
        /* External function. */
1882 +
    } else if (sym->e.fn.addr) {
1883 +
        /* Direct call, address is already known. */
1884 +
        emit_call(g, sym->e.fn.addr);
1885 +
    } else {
1886 +
        if (g->nfnpatches >= MAX_FN_PATCHES)
1887 +
            bail("too many function call patches");
1888 +
1889 +
        /* Indirect call with patch later, address is not yet known. */
1890 +
1891 +
        reg_t scratch = nextreg(g);
1892 +
        usize pc      = emit(g, NOP);
1893 +
        usize tramp   = emit(g, NOP);
1894 +
1895 +
        g->fnpatches[g->nfnpatches++] = (fnpatch_t){
1896 +
            .fn_name     = sym->qualified,
1897 +
            .pc          = pc,
1898 +
            .tramp_pc    = tramp,
1899 +
            .patch_type  = PATCH_CALL,
1900 +
            .target_reg  = 0,
1901 +
            .scratch_reg = scratch,
1902 +
        };
1903 +
        freereg(g, scratch);
1904 +
    }
1905 +
    /* If the return register (A0) was in use before the function call, move the
1906 +
     * return value to a fresh register so restored caller values do not wipe it
1907 +
     * out. */
1908 +
    bool is_reg_return =
1909 +
        (return_type->cls != TYPE_VOID) && !type_is_passed_by_ref(return_type);
1910 +
    bool is_return_reg_saved = saved_regs[return_reg] != 0;
1911 +
1912 +
    if (is_reg_return && is_return_reg_saved) {
1913 +
        return_stack_off   = emit_regpush(g, return_reg, return_type);
1914 +
        return_is_on_stack = true;
1915 +
    }
1916 +
1917 +
    /* Restore all saved registers. */
1918 +
    for (usize i = 0; i < RALLOC_NREGS; i++) {
1919 +
        reg_t dst    = ralloc_regs[i];
1920 +
        i32   offset = saved_regs[dst];
1921 +
1922 +
        if (!offset)
1923 +
            continue;
1924 +
1925 +
        static type_t dword = { .cls   = TYPE_PTR,
1926 +
                                .size  = WORD_SIZE,
1927 +
                                .align = WORD_SIZE };
1928 +
        emit_regload(g, dst, FP, offset, &dword);
1929 +
        usereg(g, dst);
1930 +
1931 +
        /* Undo the temporary rebinding so the parameter once again refers to
1932 +
         * its original register value now that the spill has been reloaded. */
1933 +
        if (saved_syms[dst]) {
1934 +
            saved_syms[dst]->e.var.val      = saved_vals[dst];
1935 +
            saved_syms[dst]->e.var.val.temp = false;
1936 +
        }
1937 +
    }
1938 +
1939 +
    /* Restore argument registers that weren't in use before the call. */
1940 +
    for (usize i = 0; i < n->val.call.args.len; i++) {
1941 +
        reg_t arg = arg0 + (reg_t)i;
1942 +
        if (!saved_regs[arg])
1943 +
            freereg(g, arg);
1944 +
    }
1945 +
1946 +
    /* For records, the return value is stored on the stack, and the return
1947 +
     * register holds the address. For everything else, it's in a register. */
1948 +
    if (return_type->cls == TYPE_VOID) {
1949 +
        /* For void functions, we don't return a value */
1950 +
        if (!is_return_reg_saved)
1951 +
            freereg(g, return_reg);
1952 +
        return (value_t){ .type = return_type, .loc = LOC_NONE };
1953 +
    } else if (type_is_passed_by_ref(return_type)) {
1954 +
        return value_stack(OFFSET(FP, return_off), return_type);
1955 +
    } else {
1956 +
        if (return_is_on_stack) {
1957 +
            if (!is_return_reg_saved)
1958 +
                freereg(g, return_reg);
1959 +
            return value_stack(OFFSET(FP, return_stack_off), return_type);
1960 +
        }
1961 +
        /* The return value is marked as temp, so the caller is responsible
1962 +
         * for freeing the register when done with the value. Mark the register
1963 +
         * as in use to prevent reallocation before the value is consumed. */
1964 +
        usereg(g, return_reg);
1965 +
        return value_reg(return_reg, return_type);
1966 +
    }
1967 +
}
1968 +
1969 +
/* Generate code to access a slice field (len or ptr) given the slice value. */
1970 +
static value_t gen_slice_field(
1971 +
    gen_t *g, value_t slice_val, node_t *field, type_t *result_type
1972 +
) {
1973 +
    if (memcmp(field->val.ident.name, LEN_FIELD, LEN_FIELD_LEN) == 0) {
1974 +
        reg_t len = emit_load_offset(g, slice_val, SLICE_FIELD_LEN_OFFSET);
1975 +
        /* Slice lengths are stored as full dwords but typed as u32.
1976 +
         * Zero-extend to clear any upper 32 bits so that 64-bit
1977 +
         * comparisons (SLTU etc.) produce correct results on RV64. */
1978 +
        if (WORD_SIZE == 8) {
1979 +
            emit(g, SLLI(len, len, 32));
1980 +
            emit(g, SRLI(len, len, 32));
1981 +
        }
1982 +
        return value_reg(len, result_type);
1983 +
    }
1984 +
    if (memcmp(field->val.ident.name, PTR_FIELD, PTR_FIELD_LEN) == 0) {
1985 +
        reg_t ptr = emit_load_offset(g, slice_val, SLICE_FIELD_PTR_OFFSET);
1986 +
        return value_reg(ptr, result_type);
1987 +
    }
1988 +
    bail("unknown slice field");
1989 +
}
1990 +
1991 +
static value_t gen_access_ref(gen_t *g, node_t *n) {
1992 +
    node_t *expr     = n->val.access.lval;
1993 +
    type_t *expr_typ = expr->type;
1994 +
1995 +
    type_t *target_type = deref_type(expr_typ);
1996 +
1997 +
    switch (target_type->cls) {
1998 +
    case TYPE_RECORD: {
1999 +
        value_t   ptr_val = gen_expr(g, expr, true);
2000 +
        symbol_t *field   = n->sym;
2001 +
        useval(g, ptr_val);
2002 +
2003 +
        /* For pointer access like ptr.field, we need to dereference first */
2004 +
        /* Create a temporary node for dereferencing */
2005 +
        node_t deref_node = {
2006 +
            .cls           = NODE_UNOP,
2007 +
            .type          = target_type,
2008 +
            .val.unop.op   = OP_DEREF,
2009 +
            .val.unop.expr = expr,
2010 +
        };
2011 +
        /* For pointer-to-record field access, keep the pointed-to record as an
2012 +
         * lvalue so the field setter sees the original storage address. */
2013 +
        value_t record_val = gen_deref(g, &deref_node, ptr_val, true);
2014 +
        freeval(g, ptr_val);
2015 +
2016 +
        return emit_record_field_get(record_val, field);
2017 +
    }
2018 +
    case TYPE_SLICE: {
2019 +
        node_t *field = n->val.access.rval;
2020 +
2021 +
        /* Dereference to get the slice value, then access the field */
2022 +
        value_t ptr_val = gen_expr(g, expr, true);
2023 +
        useval(g, ptr_val);
2024 +
2025 +
        node_t deref_node = {
2026 +
            .cls           = NODE_UNOP,
2027 +
            .type          = target_type,
2028 +
            .val.unop.op   = OP_DEREF,
2029 +
            .val.unop.expr = expr,
2030 +
        };
2031 +
        value_t slice_val = gen_deref(g, &deref_node, ptr_val, true);
2032 +
        freeval(g, ptr_val);
2033 +
2034 +
        return gen_slice_field(g, slice_val, field, n->type);
2035 +
    }
2036 +
    case TYPE_ARRAY: {
2037 +
        /* For pointer access like ptr[index], create a temporary array index
2038 +
         * node */
2039 +
        /* and let gen_array_index handle the pointer dereferencing */
2040 +
        node_t array_index_node = { .cls             = NODE_ARRAY_INDEX,
2041 +
                                    .type            = n->type,
2042 +
                                    .val.access.lval = expr,
2043 +
                                    .val.access.rval = n->val.access.rval };
2044 +
2045 +
        return gen_array_index(g, &array_index_node, true);
2046 +
    }
2047 +
    default:
2048 +
        bail(
2049 +
            "cannot access field of reference to %s",
2050 +
            type_names[target_type->cls]
2051 +
        );
2052 +
    }
2053 +
}
2054 +
2055 +
static value_t gen_access(gen_t *g, node_t *n, bool lval) {
2056 +
    node_t *expr     = n->val.access.lval;
2057 +
    type_t *expr_typ = expr->type;
2058 +
    node_t *field    = n->val.access.rval;
2059 +
2060 +
    /* Handle non-reference types. */
2061 +
    switch (expr_typ->cls) {
2062 +
    case TYPE_PTR:
2063 +
        return gen_access_ref(g, n);
2064 +
    case TYPE_RECORD: {
2065 +
        /* Struct value and type. */
2066 +
        value_t   sval  = gen_expr(g, expr, lval);
2067 +
        symbol_t *field = n->sym;
2068 +
2069 +
        return emit_record_field_get(sval, field);
2070 +
    }
2071 +
    case TYPE_SLICE: {
2072 +
        value_t slice_val = gen_expr(g, expr, lval);
2073 +
        return gen_slice_field(g, slice_val, field, n->type);
2074 +
    }
2075 +
    /* Fall through */
2076 +
    default:
2077 +
        abort();
2078 +
    }
2079 +
}
2080 +
2081 +
/* Generate code to obtain a function pointer for the given symbol */
2082 +
static value_t gen_fn_ptr(gen_t *g, symbol_t *sym, type_t *type) {
2083 +
    reg_t reg = nextreg(g);
2084 +
2085 +
    if (sym->e.fn.addr) {
2086 +
        /* Direct function address is known - use AUIPC+ADDI for PC-relative
2087 +
         * addressing since the program may be loaded at a non-zero base. */
2088 +
        emit_pc_rel_addr(g, reg, sym->e.fn.addr);
2089 +
        return value_reg(reg, type);
2090 +
    }
2091 +
2092 +
    /* Function address will be patched later - generate AUIPC+ADDI sequence */
2093 +
    usize pc = emit(g, NOP); /* Placeholder - will be patched with AUIPC */
2094 +
    emit(g, NOP); /* Second placeholder - will be patched with ADDI */
2095 +
2096 +
    if (g->nfnpatches >= MAX_FN_PATCHES)
2097 +
        bail("too many function address patches");
2098 +
2099 +
    g->fnpatches[g->nfnpatches++] = (fnpatch_t){
2100 +
        .fn_name     = sym->qualified,
2101 +
        .pc          = pc,
2102 +
        .tramp_pc    = pc + 1,
2103 +
        .patch_type  = PATCH_ADDRESS,
2104 +
        .target_reg  = reg,
2105 +
        .scratch_reg = ZERO,
2106 +
    };
2107 +
    return value_reg(reg, type);
2108 +
}
2109 +
2110 +
static value_t gen_scope(gen_t *g, node_t *n) {
2111 +
    symbol_t *sym = n->sym;
2112 +
2113 +
    /* Generate code based on the symbol type, not the lval */
2114 +
    switch (sym->kind) {
2115 +
    case SYM_VARIABLE:
2116 +
        break;
2117 +
    case SYM_CONSTANT:
2118 +
        if (sym->e.var.val.loc == LOC_NONE) {
2119 +
            gen_const(g, sym->node);
2120 +
        }
2121 +
        return sym->e.var.val;
2122 +
    case SYM_VARIANT:
2123 +
        if (n->type->cls == TYPE_UNION) {
2124 +
            if (type_is_union_with_payload(n->type)) {
2125 +
                return gen_union_store(g, n->type, sym, value_none());
2126 +
            }
2127 +
            return value_imm(
2128 +
                (imm_t){ .i = sym->node->val.union_variant.value }, n->type
2129 +
            );
2130 +
        } else {
2131 +
            bail("variant of type %s is invalid", type_names[n->type->cls]);
2132 +
        }
2133 +
        break;
2134 +
    case SYM_FUNCTION:
2135 +
        return gen_fn_ptr(g, sym, n->type);
2136 +
    default:
2137 +
        break;
2138 +
    }
2139 +
    bail(
2140 +
        "unhandled scope case for symbol kind %d, node kind %s",
2141 +
        sym->kind,
2142 +
        node_names[n->cls]
2143 +
    );
2144 +
}
2145 +
2146 +
static value_t gen_ref(gen_t *g, node_t *n) {
2147 +
    /* Slice literal */
2148 +
    if (n->val.ref.target->cls == NODE_ARRAY_LIT) {
2149 +
        value_t ary = gen_array_literal(g, n->val.ref.target);
2150 +
        return gen_array_slice(g, ary, NULL);
2151 +
    }
2152 +
2153 +
    /* Ask for an lvalue so we get back the actual storage location. */
2154 +
    value_t target_val = gen_expr(g, n->val.ref.target, true);
2155 +
2156 +
    /* If the value is in a register, we need its address.
2157 +
     * This requires the value to be moved to the stack first. */
2158 +
    if (target_val.loc == LOC_REG) {
2159 +
        target_val = emit_push(g, target_val);
2160 +
    }
2161 +
    if (target_val.loc == LOC_STACK) {
2162 +
        /* Turn the stack location into an address held in a register. */
2163 +
        reg_t addr = nextreg(g);
2164 +
        emit_addr_offset(
2165 +
            g, addr, target_val.as.off.base, target_val.as.off.offset
2166 +
        );
2167 +
2168 +
        return value_reg(addr, n->type);
2169 +
    }
2170 +
    if (target_val.loc == LOC_ADDR) {
2171 +
        reg_t addr = nextreg(g);
2172 +
        emit_li(g, addr, target_val.as.adr.base + target_val.as.adr.offset);
2173 +
        return value_reg(addr, n->type);
2174 +
    }
2175 +
    /* For immediates and other types, we can't take a reference. */
2176 +
    bail("cannot take a reference to the target expression");
2177 +
}
2178 +
2179 +
static value_t gen_deref(gen_t *g, node_t *n, value_t ref_val, bool lval) {
2180 +
    reg_t addr           = ZERO;
2181 +
    bool  addr_from_load = false;
2182 +
2183 +
    /* Resolve the pointer value into a register. */
2184 +
    if (ref_val.loc == LOC_REG) {
2185 +
        addr = ref_val.as.reg;
2186 +
    } else if (ref_val.loc == LOC_STACK || ref_val.loc == LOC_ADDR) {
2187 +
        addr           = emit_load(g, ref_val);
2188 +
        addr_from_load = true;
2189 +
    } else {
2190 +
        bail("cannot dereference expression at this location");
2191 +
    }
2192 +
    value_t location = value_stack(OFFSET(addr, 0), n->type);
2193 +
2194 +
    if (lval || type_is_passed_by_ref(n->type)) {
2195 +
        return location;
2196 +
    }
2197 +
    reg_t val_reg = emit_load(g, location);
2198 +
2199 +
    if (addr_from_load)
2200 +
        freereg(g, addr);
2201 +
2202 +
    return value_reg(val_reg, n->type);
2203 +
}
2204 +
2205 +
/* Generate an array literal.
2206 +
 *
2207 +
 * This function handles array literals like `[1, 2, 3]`. It allocates
2208 +
 * space for the array on the stack, evaluates each element, and initializes
2209 +
 * the array elements in memory. */
2210 +
static value_t gen_array_literal(gen_t *g, node_t *n) {
2211 +
    type_t *array_type = n->type;
2212 +
    type_t *elem_type  = array_type->info.ary.elem;
2213 +
    usize   length     = array_type->info.ary.length;
2214 +
2215 +
    /* Reserve stack space for the array in the current frame. */
2216 +
    int array_off = reserve(g, array_type);
2217 +
2218 +
    /* Evaluate and store each element of the array. */
2219 +
    node_t **elems = nodespan_ptrs(&g->mod->parser, n->val.array_lit.elems);
2220 +
    for (usize i = 0; i < length; i++) {
2221 +
        node_t  *elem     = elems[i];
2222 +
        frame_t *frame    = &g->fn.current->e.fn.frame;
2223 +
        i32      saved_sp = frame->sp;
2224 +
        value_t  elem_val = gen_expr(g, elem, false);
2225 +
2226 +
        /* Calculate the offset for this element in the array. */
2227 +
        i32 elem_off = array_off + (i32)(i * elem_type->size);
2228 +
2229 +
        /* Store the element value at the calculated offset. */
2230 +
        emit_store(g, elem_val, FP, elem_off);
2231 +
        freeval(g, elem_val);
2232 +
2233 +
        /* Only reclaim stack space if the element type doesn't contain
2234 +
         * pointers. Slices and pointers may reference stack-allocated
2235 +
         * temporaries that must remain live. */
2236 +
        if (!type_is_address(elem_type->cls)) {
2237 +
            frame->sp = saved_sp;
2238 +
        }
2239 +
    }
2240 +
    /* The initialized array is on the stack at the computed offset. */
2241 +
    return value_stack(OFFSET(FP, array_off), array_type);
2242 +
}
2243 +
2244 +
/* Generate code for an array repeat literal (e.g. [0; 24]). */
2245 +
static value_t gen_array_repeat(gen_t *g, node_t *n) {
2246 +
    type_t *array_type = n->type;
2247 +
    type_t *elem_type  = array_type->info.ary.elem;
2248 +
    usize   length     = array_type->info.ary.length;
2249 +
    usize   array_off  = reserve(g, array_type);
2250 +
    value_t elem_val   = gen_expr(g, n->val.array_repeat_lit.value, false);
2251 +
2252 +
    /* Store the same value at each array position */
2253 +
    for (usize i = 0; i < length; i++) {
2254 +
        i32 elem_off = array_off + (i32)(i * elem_type->size);
2255 +
        emit_store(g, elem_val, FP, elem_off);
2256 +
    }
2257 +
    if (elem_val.loc == LOC_REG)
2258 +
        freereg(g, elem_val.as.reg);
2259 +
2260 +
    return value_stack(OFFSET(FP, array_off), array_type);
2261 +
}
2262 +
2263 +
/* Generate code for a slice with a range expression. */
2264 +
static value_t gen_array_slice(gen_t *g, value_t array_val, node_t *range) {
2265 +
    static type_t dword_type = { .cls = TYPE_PTR };
2266 +
2267 +
    type_t *slice_type, *elem_type;
2268 +
    if (array_val.type->cls == TYPE_ARRAY) {
2269 +
        slice_type = array_val.type->slice;
2270 +
        elem_type  = slice_type->info.slc.elem;
2271 +
    } else { /* TYPE_SLICE */
2272 +
        slice_type = array_val.type;
2273 +
        elem_type  = array_val.type->info.slc.elem;
2274 +
    }
2275 +
2276 +
    /* Reserve stack space for the slice (pointer + length) */
2277 +
    i32     slice_off   = reserve(g, slice_type);
2278 +
    value_t slice_val   = value_stack(OFFSET(FP, slice_off), slice_type);
2279 +
    reg_t   slice_start = ZERO; /* Start index */
2280 +
2281 +
    /* 1. Store array pointer at slice offset `0`.
2282 +
     * 2. Update slice offset `0` with slice start range.
2283 +
     * 3. Compute slice length, based on range.
2284 +
     * 4. Store slice length at slice offset `4`. */
2285 +
2286 +
    /* Emit slice address information */
2287 +
    if (range && range->val.range.start) {
2288 +
        /* Generate start expression and bounds check */
2289 +
        reg_t   r         = nextreg(g);
2290 +
        value_t start_val = gen_expr(g, range->val.range.start, false);
2291 +
        reg_t   start_reg = emit_load(g, start_val);
2292 +
        reg_t   slice_adr = ZERO;
2293 +
2294 +
        if (array_val.type->cls == TYPE_ARRAY) {
2295 +
            slice_adr = emit_load(g, array_val);
2296 +
        } else {
2297 +
            /* Load data pointer from slice (first word) */
2298 +
            slice_adr = emit_load_dword(g, array_val);
2299 +
        }
2300 +
        offset_t slice_off = slice_val.as.off;
2301 +
2302 +
        emit_li(g, r, elem_type->size);
2303 +
        emit(g, MUL(r, r, start_reg)); /* Offset from array address */
2304 +
        emit(g, ADD(r, r, slice_adr)); /* Full address */
2305 +
        emit_regstore(
2306 +
            g, r, slice_off.base, slice_off.offset, &dword_type
2307 +
        ); /* Save */
2308 +
2309 +
        slice_start = start_reg;
2310 +
2311 +
        /* Don't free start_reg yet - still needed as slice_start */
2312 +
        if (array_val.type->cls == TYPE_SLICE) {
2313 +
            freereg(g, slice_adr);
2314 +
        }
2315 +
        freereg(g, r);
2316 +
    } else {
2317 +
        if (array_val.type->cls == TYPE_ARRAY) {
2318 +
            /* For arrays, copy the array address */
2319 +
            emit_copy_by_ref(g, array_val, slice_val);
2320 +
        } else { /* TYPE_SLICE */
2321 +
            /* For slices, copy the slice fat pointer */
2322 +
            emit_memcopy(g, array_val.as.off, slice_val.as.off, array_val.type);
2323 +
        }
2324 +
    }
2325 +
2326 +
    /* Emit slice length information */
2327 +
    if (range && range->val.range.end) {
2328 +
        /* Generate end value */
2329 +
        value_t end_val = gen_expr(g, range->val.range.end, false);
2330 +
        reg_t   end_reg = emit_load(g, end_val);
2331 +
2332 +
        offset_t slice_off = slice_val.as.off;
2333 +
        if (slice_start != ZERO) {
2334 +
            /* Use SUBW on RV64 so the result is properly sign-extended
2335 +
             * to 64 bits, keeping the upper 32 bits clean. */
2336 +
            emit(g, SUBW(end_reg, end_reg, slice_start));
2337 +
        }
2338 +
        emit_regstore(
2339 +
            g,
2340 +
            end_reg,
2341 +
            slice_off.base,
2342 +
            slice_off.offset + WORD_SIZE,
2343 +
            &dword_type
2344 +
        );
2345 +
2346 +
        freereg(g, end_reg);
2347 +
    } else {
2348 +
        reg_t r = nextreg(g);
2349 +
        if (array_val.type->cls == TYPE_ARRAY) {
2350 +
            emit_li(g, r, array_val.type->info.ary.length);
2351 +
        } else { /* Slice */
2352 +
            /* Load length from slice (second word) */
2353 +
            r = emit_load_offset(g, array_val, SLICE_FIELD_LEN_OFFSET);
2354 +
        }
2355 +
        /* Slice length = array length - slice start */
2356 +
        offset_t slice_off = slice_val.as.off;
2357 +
        if (slice_start != ZERO) {
2358 +
            /* Use SUBW on RV64 so the result is properly sign-extended
2359 +
             * to 64 bits, keeping the upper 32 bits clean. */
2360 +
            emit(g, SUBW(r, r, slice_start));
2361 +
        }
2362 +
        emit_regstore(
2363 +
            g, r, slice_off.base, slice_off.offset + WORD_SIZE, &dword_type
2364 +
        );
2365 +
2366 +
        freereg(g, r);
2367 +
    }
2368 +
    freereg(g, slice_start);
2369 +
2370 +
    return slice_val;
2371 +
}
2372 +
2373 +
/* Generate array indexing.
2374 +
 *
2375 +
 * This function handles array indexing operations like `arr[i]` or `slice[i]`,
2376 +
 * as well as slicing operations using ranges like `arr[..]` or `arr[0..5]`. */
2377 +
static value_t gen_array_index(gen_t *g, node_t *n, bool lval) {
2378 +
    /* Generate code for the array/slice expression. */
2379 +
    value_t array_val  = gen_expr(g, n->val.access.lval, lval);
2380 +
    type_t *array_type = array_val.type;
2381 +
2382 +
    if (array_type->cls == TYPE_PTR) {
2383 +
        array_type = deref_type(array_type);
2384 +
    }
2385 +
2386 +
    /* Check if this is a range expression (for slicing) */
2387 +
    node_t *idx_node = n->val.access.rval;
2388 +
    if (idx_node->cls == NODE_RANGE) {
2389 +
        return gen_array_slice(g, array_val, idx_node);
2390 +
    } else {
2391 +
        return emit_array_index(
2392 +
            g, array_val, gen_expr(g, idx_node, false), lval
2393 +
        );
2394 +
    }
2395 +
}
2396 +
2397 +
static value_t gen_unop(gen_t *g, node_t *n, bool lval) {
2398 +
    value_t expr_val = gen_expr(g, n->val.unop.expr, lval);
2399 +
2400 +
    switch (n->val.unop.op) {
2401 +
    case OP_NOT: {
2402 +
        /* Logical NOT; invert the boolean value. */
2403 +
        reg_t expr_reg = emit_load(g, expr_val);
2404 +
        emit(g, NOT(expr_reg, expr_reg));
2405 +
        return value_reg(expr_reg, expr_val.type);
2406 +
    }
2407 +
    case OP_NEG: {
2408 +
        /* Numerical negation. */
2409 +
        reg_t expr_reg = emit_load(g, expr_val);
2410 +
        emit(g, NEG(expr_reg, expr_reg));
2411 +
        return value_reg(expr_reg, expr_val.type);
2412 +
    }
2413 +
    case OP_BNOT: {
2414 +
        /* Bitwise NOT; invert all bits. */
2415 +
        reg_t expr_reg = emit_load(g, expr_val);
2416 +
        emit(g, XORI(expr_reg, expr_reg, -1));
2417 +
        return value_reg(expr_reg, expr_val.type);
2418 +
    }
2419 +
    case OP_DEREF:
2420 +
        return gen_deref(g, n, expr_val, lval);
2421 +
    default:
2422 +
        abort();
2423 +
    }
2424 +
}
2425 +
2426 +
static value_t gen_string(gen_t *g, node_t *n) {
2427 +
    /* Add the string to the data section and get its offset */
2428 +
    usize str_len = n->val.string_lit.length;
2429 +
    usize str_off = data_string(&g->data, n->val.string_lit.data, str_len);
2430 +
2431 +
    /* Create a stack space for the string slice */
2432 +
    i32 slice_off = reserve(g, n->type);
2433 +
2434 +
    return emit_slice_lit(g, slice_off, str_off, str_len, n->type);
2435 +
}
2436 +
2437 +
static value_t gen_expr(gen_t *g, node_t *n, bool lvalue) {
2438 +
    assert(n->type);
2439 +
2440 +
    value_t val = (value_t){ .type = n->type };
2441 +
2442 +
    switch (n->cls) {
2443 +
    case NODE_UNOP:
2444 +
        return gen_unop(g, n, lvalue);
2445 +
    case NODE_BINOP:
2446 +
        return gen_binop(g, n);
2447 +
    case NODE_BOOL:
2448 +
        if (n->type->cls == TYPE_OPT) {
2449 +
            value_t inner_val = (value_t){
2450 +
                .type     = n->type->info.opt.elem,
2451 +
                .loc      = LOC_IMM,
2452 +
                .as.imm.b = n->val.bool_lit,
2453 +
            };
2454 +
            return optval_from_prim(g, n->type, inner_val);
2455 +
        } else {
2456 +
            val.loc      = LOC_IMM;
2457 +
            val.as.imm.b = n->val.bool_lit;
2458 +
        }
2459 +
        break;
2460 +
    case NODE_STRING:
2461 +
        return gen_string(g, n);
2462 +
    case NODE_CHAR:
2463 +
        if (n->type->cls == TYPE_OPT) {
2464 +
            value_t inner_val = (value_t){
2465 +
                .type     = n->type->info.opt.elem,
2466 +
                .loc      = LOC_IMM,
2467 +
                .as.imm.u = (u8)n->val.char_lit,
2468 +
            };
2469 +
            return optval_from_prim(g, n->type, inner_val);
2470 +
        } else {
2471 +
            val.loc      = LOC_IMM;
2472 +
            val.as.imm.u = (u8)n->val.char_lit;
2473 +
        }
2474 +
        break;
2475 +
    case NODE_NUMBER:
2476 +
        val.loc = LOC_IMM;
2477 +
2478 +
        switch (n->type->cls) {
2479 +
        case TYPE_I8:
2480 +
        case TYPE_I16:
2481 +
        case TYPE_I32:
2482 +
            val.as.imm.i = n->val.number.value.i;
2483 +
            break;
2484 +
        case TYPE_U8:
2485 +
        case TYPE_U16:
2486 +
        case TYPE_U32:
2487 +
            val.as.imm.u = n->val.number.value.u;
2488 +
            break;
2489 +
        case TYPE_OPT: {
2490 +
            /* Number coerced to optional - create some(number) on stack */
2491 +
            type_t *elem_type = n->type->info.opt.elem;
2492 +
            value_t inner_val = (value_t){ .type = elem_type, .loc = LOC_IMM };
2493 +
2494 +
            switch (elem_type->cls) {
2495 +
            case TYPE_I8:
2496 +
            case TYPE_I16:
2497 +
            case TYPE_I32:
2498 +
                inner_val.as.imm.i = n->val.number.value.i;
2499 +
                break;
2500 +
            case TYPE_U8:
2501 +
            case TYPE_U16:
2502 +
            case TYPE_U32:
2503 +
                inner_val.as.imm.u = n->val.number.value.u;
2504 +
                break;
2505 +
            default:
2506 +
                break;
2507 +
            }
2508 +
            return optval_from_prim(g, n->type, inner_val);
2509 +
        }
2510 +
        default:
2511 +
            break;
2512 +
        }
2513 +
        break;
2514 +
    case NODE_ACCESS:
2515 +
        return gen_access(g, n, lvalue);
2516 +
    case NODE_SCOPE:
2517 +
        return gen_scope(g, n);
2518 +
    case NODE_TRY:
2519 +
        return gen_try(g, n);
2520 +
    case NODE_IDENT:
2521 +
2522 +
        if (n->sym->kind == SYM_FUNCTION) {
2523 +
            /* Function identifier used as a value (function pointer) */
2524 +
            return gen_fn_ptr(g, n->sym, n->type);
2525 +
        }
2526 +
2527 +
        /* For types that are passed by reference and held in registers
2528 +
         * (function parameters), dereference the pointer to get the data */
2529 +
        if ((type_is_passed_by_ref(n->type)) &&
2530 +
            n->sym->e.var.val.loc == LOC_REG) {
2531 +
            return value_stack(OFFSET(n->sym->e.var.val.as.reg, 0), n->type);
2532 +
        }
2533 +
        return n->sym->e.var.val;
2534 +
    case NODE_CALL: {
2535 +
        /* Check if this is a tuple record constructor call */
2536 +
        if (!n->sym && n->type && n->type->cls == TYPE_RECORD &&
2537 +
            n->type->info.srt.tuple) {
2538 +
            return gen_record_lit(g, n);
2539 +
        }
2540 +
        assert(n->sym);
2541 +
        /* Check if this is a union constructor call */
2542 +
        if (n->sym->kind == SYM_VARIANT &&
2543 +
            type_is_union_with_payload(n->type)) {
2544 +
            return gen_union_constructor(g, n);
2545 +
        }
2546 +
        /* Function pointer call */
2547 +
        if (n->sym->kind == SYM_VARIABLE) {
2548 +
            return gen_call(g, n);
2549 +
        }
2550 +
        /* Regular function call */
2551 +
2552 +
        if (n->sym->e.fn.attribs & ATTRIB_EXTERN) {
2553 +
            /* Check if it's a built-in function. */
2554 +
            for (usize i = 0; BUILTINS[i].name; i++) {
2555 +
                if (strcmp(n->sym->qualified, BUILTINS[i].name) == 0) {
2556 +
                    return gen_call_intrinsic(g, n, BUILTINS[i].gen);
2557 +
                }
2558 +
            }
2559 +
        }
2560 +
        return gen_call(g, n);
2561 +
    }
2562 +
    case NODE_CALL_ARG:
2563 +
        /* Unreachable. This is handled inside `NODE_CALL`. */
2564 +
    case NODE_RECORD_LIT:
2565 +
        if (type_is_union_with_payload(n->type)) {
2566 +
            type_t *payload_type = n->sym->node->type;
2567 +
2568 +
            node_t payload_lit = *n;
2569 +
            payload_lit.type   = payload_type;
2570 +
            payload_lit.sym    = NULL;
2571 +
2572 +
            value_t payload = gen_record_lit(g, &payload_lit);
2573 +
2574 +
            return gen_union_store(g, n->type, n->sym, payload);
2575 +
        }
2576 +
        return gen_record_lit(g, n);
2577 +
    case NODE_ARRAY_LIT:
2578 +
        return gen_array_literal(g, n);
2579 +
    case NODE_ARRAY_REPEAT_LIT:
2580 +
        return gen_array_repeat(g, n);
2581 +
    case NODE_ARRAY_INDEX:
2582 +
        return gen_array_index(g, n, lvalue);
2583 +
    case NODE_REF:
2584 +
        return gen_ref(g, n);
2585 +
    case NODE_NIL: {
2586 +
        /* Allocate space for the optional value and initialize as nil */
2587 +
        i32 off = reserve(g, n->type);
2588 +
        val     = value_stack(OFFSET(FP, off), n->type);
2589 +
        tval_store(g, val, (value_t){ 0 }, 0);
2590 +
2591 +
        return val;
2592 +
    }
2593 +
    case NODE_UNDEF: {
2594 +
        i32 off = reserve(g, n->type);
2595 +
        val     = value_stack(OFFSET(FP, off), n->type);
2596 +
2597 +
        return val;
2598 +
    }
2599 +
    case NODE_AS:
2600 +
        return gen_as_cast(g, n);
2601 +
    case NODE_IF:
2602 +
        if (n->type->cls != TYPE_VOID) {
2603 +
            return gen_if_expr(g, n);
2604 +
        } else {
2605 +
            gen_if(g, n);
2606 +
            return value_none();
2607 +
        }
2608 +
    case NODE_BUILTIN: {
2609 +
        builtin_kind_t kind = n->val.builtin.kind;
2610 +
        node_t **args = nodespan_ptrs(&g->mod->parser, n->val.builtin.args);
2611 +
2612 +
        switch (kind) {
2613 +
        case BUILTIN_SLICE_OF: {
2614 +
            /* @sliceOf(ptr, len) - construct a slice from a pointer and length.
2615 +
             * Slices are fat pointers: 4 bytes for ptr, 4 bytes for len. */
2616 +
            node_t *ptr_expr = args[0];
2617 +
            node_t *len_expr = args[1];
2618 +
2619 +
            /* Generate code for pointer and length expressions */
2620 +
            value_t ptr_val = gen_expr(g, ptr_expr, false);
2621 +
            value_t len_val = gen_expr(g, len_expr, false);
2622 +
2623 +
            /* Reserve stack space for the slice */
2624 +
            i32 off = reserve(g, n->type);
2625 +
            val     = value_stack(OFFSET(FP, off), n->type);
2626 +
2627 +
            /* Store pointer at offset+0, length at offset+WORD_SIZE */
2628 +
            emit_store(g, ptr_val, FP, off + SLICE_FIELD_PTR_OFFSET);
2629 +
            /* Force len to be stored as a dword (WORD_SIZE bytes) */
2630 +
            static type_t dword = { .cls = TYPE_PTR };
2631 +
            len_val.type        = &dword;
2632 +
            emit_store(g, len_val, FP, off + SLICE_FIELD_LEN_OFFSET);
2633 +
2634 +
            return val;
2635 +
        }
2636 +
        case BUILTIN_SIZE_OF:
2637 +
        case BUILTIN_ALIGN_OF:
2638 +
            /* These are compile-time constants and should have been
2639 +
             * folded during type checking. */
2640 +
            bail("@sizeOf/@alignOf should be folded at compile time");
2641 +
        }
2642 +
        break;
2643 +
    }
2644 +
    default:
2645 +
        bail("unsupported expression node type %s", node_names[n->cls]);
2646 +
    }
2647 +
    return val;
2648 +
}
2649 +
2650 +
static void gen_fn_param(gen_t *g, node_t *param, usize ix) {
2651 +
    node_t *fn = g->fn.current->node;
2652 +
2653 +
    type_t *ret  = fn->type->info.fun.ret;
2654 +
    bool    sret = type_is_passed_by_ref(ret);
2655 +
    reg_t   base = sret ? A1 : A0;
2656 +
    reg_t   a    = base + (reg_t)ix;
2657 +
2658 +
    /* We're going to simply track the register in which our parameter is
2659 +
     * held, and mark it as in use. */
2660 +
    param->sym->e.var.val      = value_reg(a, param->type);
2661 +
    param->sym->e.var.val.temp = false;
2662 +
    usereg(g, a);
2663 +
2664 +
    /* If the type was passed by reference, we need to copy it to avoid
2665 +
     * modifying the original copy. */
2666 +
    if (type_is_passed_by_ref(param->type)) {
2667 +
        param->sym->e.var.val = emit_push(g, param->sym->e.var.val);
2668 +
        freereg(g, a);
2669 +
    }
2670 +
    /* Nb. If code takes the address of a parameter (`&param`), that parameter
2671 +
     * typically must be spilled to memory since registers don't have
2672 +
     * addresses. */
2673 +
}
2674 +
2675 +
/* Detect literal initializers that reside in a dedicated temporary and
2676 +
 * therefore can be bound directly without creating a defensive copy. */
2677 +
static bool is_unaliased(node_t *init) {
2678 +
    switch (init->cls) {
2679 +
    case NODE_ARRAY_LIT:
2680 +
    case NODE_ARRAY_REPEAT_LIT:
2681 +
    case NODE_RECORD_LIT:
2682 +
    case NODE_STRING:
2683 +
    case NODE_NIL:
2684 +
    case NODE_CALL:
2685 +
        return true;
2686 +
    default:
2687 +
        /* Nb. all immediates return `false`, because they do not occupy a
2688 +
         * stack location and therefore are not considered aliasable. */
2689 +
        return false;
2690 +
    }
2691 +
}
2692 +
2693 +
static void gen_var(gen_t *g, node_t *n) {
2694 +
    node_t *lval = n->val.var.ident;
2695 +
    node_t *rval = n->val.var.value;
2696 +
2697 +
    /* For placeholders, just evaluate the rvalue for side effects */
2698 +
    if (lval->cls == NODE_PLACEHOLDER) {
2699 +
        if (rval->cls != NODE_UNDEF) {
2700 +
            gen_expr(g, rval, false);
2701 +
        }
2702 +
        return;
2703 +
    }
2704 +
2705 +
    i32 align = n->sym->e.var.align;
2706 +
2707 +
    if (rval->cls == NODE_UNDEF) {
2708 +
        i32 offset        = reserve_aligned(g, n->type, align);
2709 +
        n->sym->e.var.val = value_stack(OFFSET(FP, offset), n->type);
2710 +
        return;
2711 +
    }
2712 +
2713 +
    value_t val = gen_expr(g, rval, false);
2714 +
    bool    reuse =
2715 +
        align <= n->type->align && val.loc == LOC_STACK && is_unaliased(rval);
2716 +
2717 +
    if (reuse) {
2718 +
        n->sym->e.var.val = val;
2719 +
        return;
2720 +
    }
2721 +
    i32     offset    = reserve_aligned(g, n->type, align);
2722 +
    value_t dest      = value_stack(OFFSET(FP, offset), n->type);
2723 +
    n->sym->e.var.val = dest;
2724 +
2725 +
    emit_replace(g, dest, val);
2726 +
}
2727 +
2728 +
static void gen_const(gen_t *g, node_t *n) {
2729 +
    /* Don't re-generate if it already has a location. */
2730 +
    if (n->sym->e.var.val.loc != LOC_NONE)
2731 +
        return;
2732 +
2733 +
    node_t     *value    = n->val.constant.value;
2734 +
    const char *name     = n->sym->qualified;
2735 +
    usize       name_len = strlen(name);
2736 +
    usize addr = data_node(&g->data, &g->mod->parser, value, name, name_len);
2737 +
2738 +
    /* Store the constant address in the symbol table */
2739 +
    n->sym->e.var.val   = value_addr(addr, 0, n->type);
2740 +
    n->sym->e.var.align = n->type->align;
2741 +
}
2742 +
2743 +
static void gen_static(gen_t *g, node_t *n) {
2744 +
    /* Don't re-generate if it already has a location. */
2745 +
    if (n->sym->e.var.val.loc != LOC_NONE)
2746 +
        return;
2747 +
2748 +
    node_t     *value    = n->val.static_decl.value;
2749 +
    const char *name     = n->sym->qualified;
2750 +
    usize       name_len = strlen(n->sym->qualified);
2751 +
    usize addr = data_node(&g->data, &g->mod->parser, value, name, name_len);
2752 +
2753 +
    n->sym->e.var.val   = value_addr(addr, 0, n->type);
2754 +
    n->sym->e.var.align = n->type->align;
2755 +
}
2756 +
2757 +
/* Generate code for a block of code. */
2758 +
static void gen_block(gen_t *g, node_t *n) {
2759 +
    frame_t *frame = &g->fn.current->e.fn.frame;
2760 +
2761 +
    /* Record the stack pointer before entering the block
2762 +
     * to restore it when exiting. */
2763 +
    i32 sp = frame->sp;
2764 +
2765 +
    /* Generate code for each statement in the block. */
2766 +
    node_t **stmts = nodespan_ptrs(&g->mod->parser, n->val.block.stmts);
2767 +
    for (usize i = 0; i < n->val.block.stmts.len; i++) {
2768 +
        gen_node(g, stmts[i]);
2769 +
    }
2770 +
    if (-frame->sp > frame->size) {
2771 +
        /* Keep track of the maximum stack space used. */
2772 +
        frame->size = -frame->sp;
2773 +
    }
2774 +
    /* De-allocate stack space. */
2775 +
    frame->sp = sp;
2776 +
}
2777 +
2778 +
/* Generate code for a function. */
2779 +
static void gen_fn(gen_t *g, node_t *n) {
2780 +
    /* Skip unused functions (dead code elimination) */
2781 +
    if (!n->sym->e.fn.used) {
2782 +
        return;
2783 +
    }
2784 +
2785 +
    /* Check if this is an extern function */
2786 +
    if (n->sym->e.fn.attribs & ATTRIB_EXTERN) {
2787 +
        /* For extern functions, we don't generate any code since they are
2788 +
         * implemented externally or are built-ins. */
2789 +
        return;
2790 +
    }
2791 +
    /* Check if it's a test function, and skip if not in test mode. */
2792 +
    if (n->sym->e.fn.attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST)) {
2793 +
        return;
2794 +
    }
2795 +
2796 +
    type_t *ret  = n->type->info.fun.ret;
2797 +
    bool    sret = type_is_passed_by_ref(ret);
2798 +
2799 +
    /* For types that are returned by reference, keep hidden return pointer
2800 +
     * alive */
2801 +
    if (sret) {
2802 +
        usereg(g, A0);
2803 +
    }
2804 +
2805 +
    /* Set current function. */
2806 +
    g->fn.current     = n->sym;
2807 +
    g->fn.nretpatches = 0;
2808 +
2809 +
    symbol_t *sym = n->sym;
2810 +
2811 +
    /* Store the current instruction address as the function's address. */
2812 +
    sym->e.fn.addr = g->ninstrs;
2813 +
    node_t *body   = n->val.fn_decl.body;
2814 +
2815 +
    /* Functions should have non-zero address, unless it's the default */
2816 +
2817 +
    frame_t *f = &sym->e.fn.frame;
2818 +
2819 +
    /* Offsets for RA and previous FP. */
2820 +
    const i32 fp_off = -WORD_SIZE - WORD_SIZE;
2821 +
2822 +
    f->sp   = fp_off;
2823 +
    f->size = 0; /* Will be patched once we know the frame size. */
2824 +
2825 +
    /* Function prologue. Track prologue address for patching. */
2826 +
    usize prologue = sym->e.fn.addr;
2827 +
2828 +
    /* Generate placeholder instructions that will be patched at the end. */
2829 +
    /* This is the maximum prologue size, if we need to create a big
2830 +
     * stack frame. */
2831 +
    emit(g, NOP);
2832 +
    emit(g, NOP);
2833 +
    emit(g, NOP);
2834 +
    emit(g, NOP);
2835 +
    emit(g, NOP);
2836 +
    emit(g, NOP);
2837 +
    emit(g, NOP);
2838 +
2839 +
    /* Reserve all argument registers up-front so they are not used as
2840 +
     * temporaries while we spill each parameter. */
2841 +
    reg_t param_base = sret ? A1 : A0;
2842 +
2843 +
    for (usize i = 0; i < n->val.fn_decl.params.len; i++) {
2844 +
        reg_t a = param_base + (reg_t)i;
2845 +
2846 +
        if (a > A7) {
2847 +
            bail(
2848 +
                "function '%s' parameter %zu exceeds available register "
2849 +
                "arguments",
2850 +
                g->fn.current->qualified,
2851 +
                i + 1
2852 +
            );
2853 +
        }
2854 +
        usereg(g, a);
2855 +
    }
2856 +
2857 +
    /*
2858 +
     * Save parameters on the stack.
2859 +
     */
2860 +
2861 +
    for (usize i = 0; i < n->val.fn_decl.params.len; i++) {
2862 +
        gen_fn_param(g, SPAN(g, n->val.fn_decl.params)[i], i);
2863 +
    }
2864 +
2865 +
    /*
2866 +
     * Generate body.
2867 +
     */
2868 +
    gen_block(g, body);
2869 +
2870 +
    /* Ensure fallible functions that reach the end
2871 +
     * implicitly return success. */
2872 +
    if (ret->cls == TYPE_RESULT) {
2873 +
        if (!ret->info.res.payload->size) {
2874 +
            value_t dest;
2875 +
2876 +
            if (type_is_passed_by_ref(ret)) {
2877 +
                usereg(g, A0);
2878 +
                dest = value_stack(OFFSET(A0, 0), ret);
2879 +
            } else {
2880 +
                dest = value_reg(A0, ret);
2881 +
            }
2882 +
            emit_result_store_success(g, dest, value_none());
2883 +
2884 +
            if (!type_is_passed_by_ref(ret)) {
2885 +
                freereg(g, A0);
2886 +
            }
2887 +
        }
2888 +
    }
2889 +
    /* Align the frame size according to the RISCV ABI. */
2890 +
    f->size = align(f->size, STACK_ALIGNMENT);
2891 +
2892 +
    instr_t  *ins    = &g->instrs[prologue];
2893 +
    usize     slot   = 0;
2894 +
    const i32 locals = f->size - WORD_SIZE * 2;
2895 +
2896 +
    ins[slot++] = ADDI(SP, SP, -WORD_SIZE * 2);
2897 +
    ins[slot++] = SD(FP, SP, 0);
2898 +
    ins[slot++] = SD(RA, SP, WORD_SIZE);
2899 +
    ins[slot++] = ADDI(FP, SP, WORD_SIZE * 2);
2900 +
2901 +
    if (locals != 0) {
2902 +
        if (is_small(-locals)) {
2903 +
            ins[slot++] = ADDI(SP, SP, -locals);
2904 +
        } else {
2905 +
            i32 hi = 0, lo = 0;
2906 +
            split_imm(locals, &hi, &lo);
2907 +
2908 +
            ins[slot++] = LUI(T0, hi);
2909 +
2910 +
            if (lo != 0)
2911 +
                ins[slot++] = ADDI(T0, T0, lo);
2912 +
2913 +
            ins[slot++] = SUB(SP, SP, T0);
2914 +
        }
2915 +
    }
2916 +
    while (slot < 7)
2917 +
        ins[slot++] = NOP;
2918 +
2919 +
    /* Mark the epilogue position and patch all return statements
2920 +
     * to jump to this epilogue. */
2921 +
    usize epilogue = g->ninstrs;
2922 +
2923 +
    for (usize i = 0; i < g->fn.nretpatches; i++) {
2924 +
        ctpatch_t *p = &g->fn.retpatches[i];
2925 +
2926 +
        if (!p->applied) {
2927 +
            /* Calculate jump offset to the epilogue. */
2928 +
            i32 offset = jump_offset(p->pc, epilogue);
2929 +
2930 +
            /* A word-size offset basically means jumping to the next
2931 +
             * instruction, which is a redundant. We leave it as a NOP in
2932 +
             * that case. */
2933 +
            if (offset != INSTR_SIZE) {
2934 +
                /* Update the jump instruction with the correct offset. */
2935 +
                g->instrs[p->pc] = JMP(offset);
2936 +
            }
2937 +
            p->applied = true;
2938 +
        }
2939 +
    }
2940 +
    /*
2941 +
     * Function epilogue.
2942 +
     */
2943 +
    if (locals != 0) {
2944 +
        if (is_small(locals)) {
2945 +
            emit(g, ADDI(SP, SP, locals));
2946 +
        } else {
2947 +
            emit_li(g, T0, locals);
2948 +
            emit(g, ADD(SP, SP, T0));
2949 +
        }
2950 +
    }
2951 +
    emit(g, LD(FP, SP, 0));
2952 +
    emit(g, LD(RA, SP, WORD_SIZE));
2953 +
    emit(g, ADDI(SP, SP, WORD_SIZE * 2));
2954 +
    emit(g, RET);
2955 +
2956 +
    /* Release parameter and temporary registers */
2957 +
    for (reg_t r = A0; r <= A7; r++)
2958 +
        freereg(g, r);
2959 +
2960 +
    for (usize i = 0; i < sizeof(temp_registers) / sizeof(reg_t); i++)
2961 +
        freereg(g, temp_registers[i]);
2962 +
2963 +
    /* Patch function call locations. */
2964 +
    for (usize i = 0; i < g->nfnpatches; i++) {
2965 +
        fnpatch_t *p = &g->fnpatches[i];
2966 +
2967 +
        if (!p->applied && strcmp(p->fn_name, sym->qualified) == 0) {
2968 +
            if (p->patch_type == PATCH_CALL) {
2969 +
                i32 offset = jump_offset(p->pc, sym->e.fn.addr);
2970 +
2971 +
                if (is_jump_imm(offset)) {
2972 +
                    g->instrs[p->pc] = JAL(RA, offset);
2973 +
                    if (p->tramp_pc != (usize)-1)
2974 +
                        g->instrs[p->tramp_pc] = NOP;
2975 +
                } else {
2976 +
                    i32 target_addr  = (i32)(sym->e.fn.addr * INSTR_SIZE);
2977 +
                    i32 current_addr = (i32)(p->pc * INSTR_SIZE);
2978 +
                    i32 rel          = target_addr - current_addr;
2979 +
2980 +
                    i32 hi, lo;
2981 +
                    split_imm(rel, &hi, &lo);
2982 +
2983 +
                    reg_t scratch    = p->scratch_reg ? p->scratch_reg : T0;
2984 +
                    g->instrs[p->pc] = AUIPC(scratch, hi);
2985 +
                    g->instrs[p->tramp_pc] = JALR(RA, scratch, lo);
2986 +
                }
2987 +
            } else if (p->patch_type == PATCH_ADDRESS) {
2988 +
                /* For function address patches, replace the NOPs with AUIPC +
2989 +
                 * ADDI for PC-relative addressing. Calculate target -
2990 +
                 * current_pc. */
2991 +
                i32 target_addr  = sym->e.fn.addr * INSTR_SIZE;
2992 +
                i32 current_addr = p->pc * INSTR_SIZE;
2993 +
                i32 offset       = target_addr - current_addr;
2994 +
2995 +
                /* Split offset into upper 20 bits and lower 12 bits */
2996 +
                i32 hi, lo;
2997 +
                split_imm(offset, &hi, &lo);
2998 +
2999 +
                /* Emit AUIPC + ADDI sequence */
3000 +
                g->instrs[p->pc]       = AUIPC(p->target_reg, hi);
3001 +
                g->instrs[p->tramp_pc] = ADDI(p->target_reg, p->target_reg, lo);
3002 +
            }
3003 +
            /* Mark as applied so we don't patch it again. */
3004 +
            p->applied = true;
3005 +
        }
3006 +
    }
3007 +
}
3008 +
3009 +
/* Generate code for a module. */
3010 +
static void gen_module(gen_t *g, module_t *m) {
3011 +
    node_t *n = m->ast;
3012 +
3013 +
    if (m->compiled)
3014 +
        return;
3015 +
3016 +
    /* Set the current module for span access */
3017 +
    module_t *prev_mod = g->mod;
3018 +
    g->mod             = m;
3019 +
3020 +
    /* Don't compile test modules unless we are in test mode. */
3021 +
    if (m->attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST)) {
3022 +
        g->mod = prev_mod;
3023 +
        return;
3024 +
    }
3025 +
    /* Generate all constants to ensure they're available */
3026 +
    node_t **stmts_const = nodespan_ptrs(&m->parser, n->val.block.stmts);
3027 +
    for (usize i = 0; i < n->val.block.stmts.len; i++) {
3028 +
        node_t *stmt = stmts_const[i];
3029 +
        if (stmt->cls == NODE_CONST) {
3030 +
            gen_const(g, stmt);
3031 +
        } else if (stmt->cls == NODE_STATIC) {
3032 +
            gen_static(g, stmt);
3033 +
        }
3034 +
    }
3035 +
    /* Generate code for module entry point. */
3036 +
    /* Must be at address _zero_ of the module. */
3037 +
    if (n->sym->e.mod->default_fn) {
3038 +
        gen_fn(g, n->sym->e.mod->default_fn->node);
3039 +
    }
3040 +
3041 +
    /* Generate all declared modules */
3042 +
    node_t **stmts_sub = nodespan_ptrs(&m->parser, n->val.block.stmts);
3043 +
    for (usize i = 0; i < n->val.block.stmts.len; i++) {
3044 +
        node_t *stmt = stmts_sub[i];
3045 +
        if (stmt->cls == NODE_MOD) {
3046 +
            gen_mod(g, stmt);
3047 +
        }
3048 +
        if (stmt->cls == NODE_USE) {
3049 +
            gen_use(g, stmt);
3050 +
        }
3051 +
    }
3052 +
    /* Generate code for everything else. */
3053 +
    node_t **stmts = nodespan_ptrs(&m->parser, n->val.block.stmts);
3054 +
    for (usize i = 0; i < n->val.block.stmts.len; i++) {
3055 +
        node_t *stmt = stmts[i];
3056 +
        if (stmt->cls == NODE_CONST)
3057 +
            continue;
3058 +
        if (stmt->cls == NODE_FN && stmt->sym->e.fn.attribs & ATTRIB_DEFAULT)
3059 +
            continue;
3060 +
        gen_node(g, stmt);
3061 +
    }
3062 +
    m->compiled = true;
3063 +
    g->mod      = prev_mod;
3064 +
}
3065 +
3066 +
/* Generate code for a module declaration. */
3067 +
static void gen_mod(gen_t *g, node_t *n) {
3068 +
    if (!n->sym) { /* Skip modules that aren't loaded like test modules. */
3069 +
        return;
3070 +
    }
3071 +
    module_t *mod = n->sym->e.mod;
3072 +
3073 +
    gen_module(g, mod);
3074 +
}
3075 +
3076 +
/* Generate code for a use declaration. */
3077 +
/* For function/variable imports, this generates the parent module. */
3078 +
static void gen_use(gen_t *g, node_t *n) {
3079 +
    /* For wildcard re-exports, n->sym is NULL since we're not binding
3080 +
     * the module itself, just re-exporting its symbols. */
3081 +
    if (!n->sym)
3082 +
        return;
3083 +
3084 +
    module_t *mod = n->sym->scope->mod;
3085 +
3086 +
    gen_module(g, mod);
3087 +
}
3088 +
3089 +
/* Generating nothing. This is used eg. for type declaration nodes
3090 +
 * which don't have any associated code. */
3091 +
static void gen_nop(gen_t *g, node_t *n) {
3092 +
    (void)g;
3093 +
    (void)n;
3094 +
}
3095 +
3096 +
/* Generate code from AST. */
3097 +
/* Pre-size the initialized data region by summing the aligned sizes of all
3098 +
 * initialized (non-BSS) constants and statics across every module. */
3099 +
static void data_presize(gen_t *g) {
3100 +
    usize total = 0;
3101 +
3102 +
    for (usize m = 0; m < g->mm->nmodules; m++) {
3103 +
        module_t *mod = &g->mm->modules[m];
3104 +
3105 +
        if (!mod->ast)
3106 +
            continue;
3107 +
        if (mod->attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST))
3108 +
            continue;
3109 +
3110 +
        node_t  *n     = mod->ast;
3111 +
        node_t **stmts = nodespan_ptrs(&mod->parser, n->val.block.stmts);
3112 +
3113 +
        for (usize i = 0; i < n->val.block.stmts.len; i++) {
3114 +
            node_t *stmt  = stmts[i];
3115 +
            node_t *value = NULL;
3116 +
3117 +
            if (stmt->cls == NODE_CONST)
3118 +
                value = stmt->val.constant.value;
3119 +
            else if (stmt->cls == NODE_STATIC)
3120 +
                value = stmt->val.static_decl.value;
3121 +
            else
3122 +
                continue;
3123 +
3124 +
            if (value->cls == NODE_UNDEF)
3125 +
                continue;
3126 +
3127 +
            total  = align(total, WORD_SIZE);
3128 +
            total += stmt->type->size;
3129 +
        }
3130 +
    }
3131 +
    g->data.rw_init_total = align(total, WORD_SIZE);
3132 +
}
3133 +
3134 +
int gen_emit(gen_t *g, module_t *root) {
3135 +
    /* Pre-size the initialized data region so that BSS items are placed
3136 +
     * after all initialized data in the rw section. */
3137 +
    data_presize(g);
3138 +
3139 +
    /* Generate root module. This has to have address zero, as it has
3140 +
     * the entry point. */
3141 +
    gen_module(g, root);
3142 +
3143 +
    /* Generate `std` module if available. */
3144 +
    module_t *std = module_manager_lookup_by_qualified_name(g->mm, "std");
3145 +
    if (std) {
3146 +
        gen_module(g, std);
3147 +
    }
3148 +
    /* Check that all patches have been applied. */
3149 +
    for (usize i = 0; i < g->nfnpatches; i++) {
3150 +
        if (!g->fnpatches[i].applied)
3151 +
            bail(
3152 +
                "jump for function '%s' was not patched",
3153 +
                g->fnpatches[i].fn_name
3154 +
            );
3155 +
    }
3156 +
    /* Check that all return patches have been applied. */
3157 +
    for (usize i = 0; i < g->fn.nretpatches; i++) {
3158 +
        if (!g->fn.retpatches[i].applied)
3159 +
            bail("return statement was not properly patched");
3160 +
    }
3161 +
    /* Check that all break patches have been applied. */
3162 +
    for (usize i = 0; i < g->fn.nbrkpatches; i++) {
3163 +
        if (!g->fn.brkpatches[i].applied)
3164 +
            bail("break statement was not properly patched");
3165 +
    }
3166 +
    /* Keep root module reference for data emission. */
3167 +
    g->mod = root;
3168 +
3169 +
    return 0;
3170 +
}
3171 +
3172 +
static value_t gen_as_cast(gen_t *g, node_t *n) {
3173 +
    node_t *expr = n->val.as_expr.expr;
3174 +
    value_t val  = gen_expr(g, expr, false);
3175 +
3176 +
    /* If casting to the same type, no conversion needed */
3177 +
    if (val.type == n->type)
3178 +
        return val;
3179 +
3180 +
    /* For casts between different primitive types, we need to handle
3181 +
     * size changes properly (e.g., u8 -> i32 requires zero extension) */
3182 +
    /* If the types are the same size, just change the type metadata */
3183 +
    if (val.type->size == n->type->size) {
3184 +
        val.type = n->type;
3185 +
        return val;
3186 +
    }
3187 +
    /* For size changes, we need to properly load and re-store the value
3188 +
     * to ensure correct zero/sign extension */
3189 +
    if (val.loc == LOC_STACK) {
3190 +
        /* Load the value using the source type (proper sized load) */
3191 +
        reg_t rd = emit_load(g, val);
3192 +
        /* Push to stack using the target type (proper sized store) */
3193 +
        i32 offset = emit_regpush(g, rd, n->type);
3194 +
        freereg(g, rd);
3195 +
3196 +
        return value_stack(OFFSET(FP, offset), n->type);
3197 +
    }
3198 +
    /* For non-stack values (registers, immediates), just change the
3199 +
     * type */
3200 +
    val.type = n->type;
3201 +
3202 +
    return val;
3203 +
}
3204 +
3205 +
void gen_dump_bin(gen_t *g, FILE *text, FILE *data_ro, FILE *data_rw) {
3206 +
    /* Write instructions */
3207 +
    fwrite(g->instrs, sizeof(u32), g->ninstrs, text);
3208 +
    /* Write data */
3209 +
    data_emit_rw(&g->data, data_rw);
3210 +
    data_emit_ro(&g->data, data_ro);
3211 +
3212 +
    fflush(text);
3213 +
    fflush(data_ro);
3214 +
    fflush(data_rw);
3215 +
}
3216 +
3217 +
/* Initialize a `gen` object. */
3218 +
void gen_init(gen_t *g, types_t *t, module_manager_t *mm, u32 flags) {
3219 +
    g->ninstrs        = 0;
3220 +
    g->nfnpatches     = 0;
3221 +
    g->fn.current     = NULL;
3222 +
    g->fn.nretpatches = 0;
3223 +
    g->fn.nbrkpatches = 0;
3224 +
    g->regs           = ralloc();
3225 +
    g->types          = t;
3226 +
    g->loop.current   = NULL;
3227 +
    g->loop.end       = 0;
3228 +
    g->mm             = mm;
3229 +
    g->flags          = flags;
3230 +
3231 +
    /* Initialize data section */
3232 +
    data_init(&g->data);
3233 +
}
gen.h added +153 -0
1 +
#ifndef GEN_H
2 +
#define GEN_H
3 +
4 +
#include "ast.h"
5 +
#include "limits.h"
6 +
#include "module.h"
7 +
#include "ralloc.h"
8 +
#include "resolver.h"
9 +
#include "riscv.h"
10 +
#include "scanner.h"
11 +
#include "symtab.h"
12 +
#include "types.h"
13 +
14 +
#include "gen/data.h"
15 +
16 +
/* Create an offset. */
17 +
#define OFFSET(base, off) ((offset_t){ base, off })
18 +
19 +
/* Function call instruction patch, used to patch a jump location after
20 +
 * function addresses are calculated. */
21 +
typedef enum {
22 +
    PATCH_CALL,    /* Function call (JAL instruction) */
23 +
    PATCH_ADDRESS, /* Function address (load instruction) */
24 +
} patch_type_t;
25 +
26 +
/* Metadata for deferred branch patching that may expand to a trampoline. */
27 +
typedef struct {
28 +
    usize   pc;       /* Branch instruction index */
29 +
    usize   tramp_pc; /* Trampoline instruction index */
30 +
    iname_t op;       /* Branch operation */
31 +
    reg_t   rs1;      /* First operand register */
32 +
    reg_t   rs2;      /* Second operand register */
33 +
    bool    valid;    /* Whether this patch slot is in use */
34 +
} branch_patch_t;
35 +
36 +
typedef struct {
37 +
    usize          skip_body; /* Jump location when pattern (or guard) fails */
38 +
    branch_patch_t guard_branch; /* Guard failure branch placeholder */
39 +
} match_case_ctrl_t;
40 +
41 +
typedef struct {
42 +
    const char  *fn_name;
43 +
    usize        pc;          /* Instruction index. */
44 +
    usize        tramp_pc;    /* Optional secondary slot for long jumps. */
45 +
    bool         applied;     /* Whether the patch was applied. */
46 +
    patch_type_t patch_type;  /* Type of patch to apply. */
47 +
    reg_t        target_reg;  /* Target register for address patches. */
48 +
    reg_t        scratch_reg; /* Scratch register for far calls. */
49 +
} fnpatch_t;
50 +
51 +
/* Return patch, used to jump to a function's epilogue from return statements */
52 +
typedef struct {
53 +
    usize pc;      /* Instruction index of the jump placeholder */
54 +
    bool  applied; /* Whether the patch was applied */
55 +
} retpatch_t;
56 +
57 +
/* Control flow patch, used to jump to a function's epilogue from
58 +
 * return statements, or jump to a loop's end. */
59 +
typedef struct {
60 +
    usize   pc;      /* Instruction index of the jump placeholder */
61 +
    bool    applied; /* Whether the patch was applied */
62 +
    node_t *loop;    /* Parent loop of this patch, for breaks. */
63 +
} ctpatch_t;
64 +
65 +
/* Loop context for handling control flow statements. */
66 +
typedef struct {
67 +
    usize   start;   /* Start address of loop (for `continue`). */
68 +
    usize   end;     /* End address of loop (for `break`). */
69 +
    node_t *current; /* Current loop we're in. */
70 +
} loop_t;
71 +
72 +
/* Function context for handling return statements. */
73 +
typedef struct {
74 +
    /* Return and break patches for current function. */
75 +
    ctpatch_t retpatches[MAX_RET_PATCHES];
76 +
    usize     nretpatches;
77 +
    ctpatch_t brkpatches[MAX_BRK_PATCHES];
78 +
    usize     nbrkpatches;
79 +
    symbol_t *current; /* Current function. */
80 +
} fn_t;
81 +
82 +
/* Code generator context. */
83 +
typedef struct gen_t {
84 +
    instr_t   instrs[MAX_INSTRS];
85 +
    usize     ninstrs;
86 +
    loop_t    loop; /* Current loop. */
87 +
    fn_t      fn;   /* Current function. */
88 +
    fnpatch_t fnpatches[MAX_FN_PATCHES];
89 +
    usize     nfnpatches;
90 +
91 +
    types_t          *types;
92 +
    ralloc_t          regs;
93 +
    module_manager_t *mm;
94 +
    struct module_t  *mod;  /* Current module being compiled */
95 +
    data_section_t    data; /* Static data section */
96 +
    u32               flags;
97 +
} gen_t;
98 +
99 +
/* Represents a tagged value (eg. an optional or enum with payload) */
100 +
typedef struct {
101 +
    value_t tag; /* Location of the tag */
102 +
    value_t val; /* Location of the value */
103 +
    type_t *typ; /* The tagged type (eg. `?T` or `enum`) */
104 +
} tval_t;
105 +
106 +
value_t value_stack(offset_t off, type_t *ty);
107 +
value_t value_addr(usize addr, i32 off, type_t *ty);
108 +
value_t value_imm(imm_t imm, type_t *ty);
109 +
value_t value_reg(reg_t r, type_t *ty);
110 +
111 +
static inline reg_t nextreg(gen_t *g) {
112 +
    return ralloc_next(&g->regs);
113 +
}
114 +
115 +
static inline reg_t nextreg_except(gen_t *g, reg_t r) {
116 +
    return ralloc_next_except(&g->regs, r);
117 +
}
118 +
119 +
static inline bool isreserved(gen_t *g, reg_t r) {
120 +
    if (r == FP)
121 +
        return true;
122 +
    return !ralloc_is_free(&g->regs, r);
123 +
}
124 +
125 +
static inline void freereg(gen_t *g, reg_t r) {
126 +
    ralloc_free(&g->regs, r);
127 +
}
128 +
129 +
static inline reg_t usereg(gen_t *g, reg_t r) {
130 +
    ralloc_reserve(&g->regs, r);
131 +
    return r;
132 +
}
133 +
134 +
/* Calculate a jump offset. */
135 +
i32 jump_offset(usize from, usize to);
136 +
/* Reserve stack space for the given type. */
137 +
i32 reserve(gen_t *g, type_t *ty);
138 +
/* Like `align`, but rounds down. */
139 +
i32 align_stack(i32 addr, i32 alignment);
140 +
141 +
/* Optional and enum value helper functions */
142 +
tval_t tval_from_val(gen_t *g, value_t val);
143 +
i32    tval_payload_zero_size(type_t *container);
144 +
void   tval_store(gen_t *g, value_t dest, value_t value, i32 tag);
145 +
146 +
/* Write instructions in binary format. */
147 +
void gen_dump_bin(gen_t *g, FILE *text, FILE *data_ro, FILE *data_rw);
148 +
/* Generate code for the given module. */
149 +
int gen_emit(gen_t *g, module_t *root);
150 +
/* Initialize a `codegen` object. */
151 +
void gen_init(gen_t *g, types_t *t, module_manager_t *mm, u32 flags);
152 +
153 +
#endif
gen/data.c added +364 -0
1 +
#include <assert.h>
2 +
#include <stdio.h>
3 +
#include <string.h>
4 +
5 +
#include "../ast.h"
6 +
#include "../gen.h"
7 +
#include "../io.h"
8 +
#include "../limits.h"
9 +
#include "../parser.h"
10 +
#include "../riscv.h"
11 +
#include "../strings.h"
12 +
13 +
#include "data.h"
14 +
15 +
static void emit_node_data(
16 +
    node_t *n, parser_t *p, FILE *out, data_section_t *d
17 +
);
18 +
static void emit_array_data(
19 +
    node_t *n, parser_t *p, FILE *out, data_section_t *d
20 +
);
21 +
22 +
/* Write a little-endian word */
23 +
static void write_le32(FILE *out, u32 v) {
24 +
    u8 bytes[4];
25 +
26 +
    bytes[0] = (v & 0xFF);
27 +
    bytes[1] = ((v >> 8) & 0xFF);
28 +
    bytes[2] = ((v >> 16) & 0xFF);
29 +
    bytes[3] = ((v >> 24) & 0xFF);
30 +
31 +
    fwrite(bytes, sizeof(bytes), 1, out);
32 +
}
33 +
34 +
static void write_le64(FILE *out, u64 v) {
35 +
    u8 bytes[8];
36 +
37 +
    bytes[0] = (v & 0xFF);
38 +
    bytes[1] = ((v >> 8) & 0xFF);
39 +
    bytes[2] = ((v >> 16) & 0xFF);
40 +
    bytes[3] = ((v >> 24) & 0xFF);
41 +
    bytes[4] = ((v >> 32) & 0xFF);
42 +
    bytes[5] = ((v >> 40) & 0xFF);
43 +
    bytes[6] = ((v >> 48) & 0xFF);
44 +
    bytes[7] = ((v >> 56) & 0xFF);
45 +
46 +
    fwrite(bytes, sizeof(bytes), 1, out);
47 +
}
48 +
49 +
/* Add a string literal to the data section.
50 +
 * Returns the offset in the data section. */
51 +
usize data_string(data_section_t *d, const char *str, usize len) {
52 +
    /* Check if this string already exists in the data section */
53 +
    for (usize i = 0; i < d->nstrings; i++) {
54 +
        string_data_t *existing = &d->strings[i];
55 +
56 +
        if (existing->length == len && memcmp(existing->data, str, len) == 0) {
57 +
            return d->ro_offset + existing->offset;
58 +
        }
59 +
    }
60 +
    d->ro_size = align(d->ro_size, WORD_SIZE);
61 +
62 +
    /* Store the string information */
63 +
    string_data_t *s = &d->strings[d->nstrings++];
64 +
    s->data          = str;
65 +
    s->length        = len;
66 +
    s->offset        = d->ro_size;
67 +
68 +
    usize data    = len + 1;                /* Account for `NULL` terminator */
69 +
    usize padded  = align(data, WORD_SIZE); /* Align to word boundary */
70 +
    d->ro_size   += padded;
71 +
72 +
    return d->ro_offset + s->offset;
73 +
}
74 +
75 +
/* Add a node value to the data section.
76 +
 * Initialized items are placed in [0, rw_init_total), BSS items in
77 +
 * [rw_init_total, ...).  Returns the offset in the data section. */
78 +
usize data_node(
79 +
    data_section_t *d,
80 +
    parser_t       *p,
81 +
    node_t         *node,
82 +
    const char     *name,
83 +
    usize           name_len
84 +
) {
85 +
    bool bss = node->cls == NODE_UNDEF;
86 +
87 +
    /* Store the constant information */
88 +
    data_item_t *item          = &d->items[d->nitems++];
89 +
    item->kind                 = DATA_CONST;
90 +
    item->node                 = node;
91 +
    item->parser               = p;
92 +
    item->as.constant.name     = name;
93 +
    item->as.constant.name_len = name_len;
94 +
95 +
    if (bss) {
96 +
        d->rw_bss_size  = align(d->rw_bss_size, WORD_SIZE);
97 +
        item->offset    = d->rw_init_total + d->rw_bss_size;
98 +
        d->rw_bss_size += node->type->size;
99 +
    } else {
100 +
        d->rw_init_size  = align(d->rw_init_size, WORD_SIZE);
101 +
        item->offset     = d->rw_init_size;
102 +
        d->rw_init_size += node->type->size;
103 +
    }
104 +
    return d->rw_offset + item->offset;
105 +
}
106 +
107 +
/* Add array data for a slice to the data section.
108 +
 * Returns the offset in the data section. */
109 +
usize data_array(data_section_t *d, parser_t *p, node_t *n) {
110 +
    /* Check if this array already exists in the data section */
111 +
    for (usize i = 0; i < d->nitems; i++) {
112 +
        data_item_t *existing = &d->items[i];
113 +
114 +
        if (existing->kind == DATA_ARRAY && existing->node == n) {
115 +
            return d->rw_offset + existing->offset;
116 +
        }
117 +
    }
118 +
    d->rw_init_size = align(d->rw_init_size, WORD_SIZE);
119 +
120 +
    /* Store the array information */
121 +
    data_item_t *item     = &d->items[d->nitems++];
122 +
    item->kind            = DATA_ARRAY;
123 +
    item->offset          = d->rw_init_size;
124 +
    item->node            = n;
125 +
    item->parser          = p;
126 +
    item->as.array.length = n->val.array_lit.elems.len;
127 +
    item->as.array.elem   = n->type->info.slc.elem;
128 +
129 +
    d->rw_init_size += n->type->size;
130 +
131 +
    return d->rw_offset + item->offset;
132 +
}
133 +
134 +
/* Initialize data section */
135 +
void data_init(data_section_t *d) {
136 +
    d->nstrings      = 0;
137 +
    d->nitems        = 0;
138 +
    d->ro_size       = 0;
139 +
    d->rw_init_total = 0;
140 +
    d->rw_init_size  = 0;
141 +
    d->rw_bss_size   = 0;
142 +
    d->rw_offset     = DATA_RW_OFFSET;
143 +
    d->ro_offset     = DATA_RO_OFFSET;
144 +
}
145 +
146 +
/* For slices, we need to emit:
147 +
 * 1. The data pointer; points to array data
148 +
 * 2. The length */
149 +
static void emit_slice_data(node_t *n, FILE *out, data_section_t *d) {
150 +
    if (n->cls == NODE_STRING) {
151 +
        /* For string literals, look up or register the string offset */
152 +
        usize addr = 0;
153 +
154 +
        for (usize i = 0; i < d->nstrings; i++) {
155 +
            string_data_t *s = &d->strings[i];
156 +
            if (s->length == n->val.string_lit.length &&
157 +
                memcmp(s->data, n->val.string_lit.data, s->length) == 0) {
158 +
                addr = s->offset + d->ro_offset;
159 +
                break;
160 +
            }
161 +
        }
162 +
        /* If string not found, register it now */
163 +
        if (!addr) {
164 +
            addr = data_string(
165 +
                d, n->val.string_lit.data, n->val.string_lit.length
166 +
            );
167 +
        }
168 +
        write_le64(out, addr);
169 +
        write_le64(out, n->val.string_lit.length);
170 +
    } else {
171 +
        bail("unsupported slice node %s", node_names[n->cls]);
172 +
    }
173 +
}
174 +
175 +
static void emit_node_data(
176 +
    node_t *n, parser_t *p, FILE *out, data_section_t *d
177 +
) {
178 +
    /* Handle undefined nodes by emitting zeros for their type size */
179 +
    if (n->cls == NODE_UNDEF) {
180 +
        for (i32 i = 0; i < n->type->size; i++) {
181 +
            fputc(0, out);
182 +
        }
183 +
        return;
184 +
    }
185 +
    /* Resolve identifiers that reference constants */
186 +
    if (n->cls == NODE_IDENT && n->sym && n->sym->kind == SYM_CONSTANT) {
187 +
        emit_node_data(n->sym->node->val.constant.value, p, out, d);
188 +
        return;
189 +
    }
190 +
    switch (n->type->cls) {
191 +
    case TYPE_ARRAY: {
192 +
        if (n->cls == NODE_ARRAY_LIT) {
193 +
            node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems);
194 +
            for (usize j = 0; j < n->val.array_lit.elems.len; j++) {
195 +
                emit_node_data(elems[j], p, out, d);
196 +
            }
197 +
        } else if (n->cls == NODE_ARRAY_REPEAT_LIT) {
198 +
            /* Array repeat literal: emit the same value N times */
199 +
            usize count = n->val.array_repeat_lit.count->val.number.value.u;
200 +
            for (usize j = 0; j < count; j++) {
201 +
                emit_node_data(n->val.array_repeat_lit.value, p, out, d);
202 +
            }
203 +
        } else {
204 +
            bail("unsupported array node %s", node_names[n->cls]);
205 +
        }
206 +
        break;
207 +
    }
208 +
    case TYPE_RECORD: {
209 +
        /* Emit record fields in order, with proper inter-field padding. */
210 +
        node_t **fields = nodespan_ptrs(p, n->val.record_lit.fields);
211 +
        i32      pos    = 0;
212 +
213 +
        for (usize i = 0; i < n->val.record_lit.fields.len; i++) {
214 +
            node_t   *field     = fields[i];
215 +
            node_t   *field_val = field->val.record_lit_field.value;
216 +
            symbol_t *field_sym = field->sym;
217 +
            i32       field_off = field_sym->e.field.offset;
218 +
219 +
            /* Emit padding bytes to reach the field's offset. */
220 +
            while (pos < field_off) {
221 +
                fputc(0, out);
222 +
                pos++;
223 +
            }
224 +
            emit_node_data(field_val, p, out, d);
225 +
            pos += field_val->type->size;
226 +
        }
227 +
        /* Emit trailing padding to reach the full record size. */
228 +
        while (pos < n->type->size) {
229 +
            fputc(0, out);
230 +
            pos++;
231 +
        }
232 +
        break;
233 +
    }
234 +
    case TYPE_SLICE:
235 +
        emit_slice_data(n, out, d);
236 +
        break;
237 +
    case TYPE_UNION: {
238 +
        assert(n->sym);
239 +
        assert(n->sym->node);
240 +
        /* For union types, write the tag byte and pad to the type size. */
241 +
        fputc((u8)n->sym->node->val.union_variant.value, out);
242 +
        for (i32 i = 1; i < n->type->size; i++) {
243 +
            fputc(0, out);
244 +
        }
245 +
        break;
246 +
    }
247 +
    case TYPE_BOOL:
248 +
        fputc(n->val.bool_lit ? 1 : 0, out);
249 +
        break;
250 +
    case TYPE_U8: {
251 +
        u8 value = (u8)n->val.number.value.u;
252 +
        if (n->cls == NODE_CHAR)
253 +
            value = (u8)n->val.char_lit;
254 +
        fputc(value, out);
255 +
        break;
256 +
    }
257 +
    case TYPE_U16: {
258 +
        u16 value = (u16)n->val.number.value.u;
259 +
        fputc(value & 0xFF, out);
260 +
        fputc((value >> 8) & 0xFF, out);
261 +
        break;
262 +
    }
263 +
    case TYPE_U32:
264 +
        write_le32(out, n->val.number.value.u);
265 +
        break;
266 +
    case TYPE_I8:
267 +
        fputc((u8)n->val.number.value.i, out);
268 +
        break;
269 +
    case TYPE_I16: {
270 +
        i16 value = (i16)n->val.number.value.i;
271 +
        fputc(value & 0xFF, out);
272 +
        fputc((value >> 8) & 0xFF, out);
273 +
        break;
274 +
    }
275 +
    case TYPE_I32:
276 +
        write_le32(out, n->val.number.value.i);
277 +
        break;
278 +
    default:
279 +
        break;
280 +
    }
281 +
282 +
    /* Add padding to ensure alignment */
283 +
    usize size    = n->type->size;
284 +
    usize aligned = align(size, n->type->align);
285 +
    usize padding = aligned - size;
286 +
287 +
    for (usize i = 0; i < padding; i++) {
288 +
        fputc(0, out);
289 +
    }
290 +
}
291 +
292 +
/* Helper function to emit array data */
293 +
static void emit_array_data(
294 +
    node_t *n, parser_t *p, FILE *out, data_section_t *d
295 +
) {
296 +
    /* Emit each array element */
297 +
    node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems);
298 +
    for (usize i = 0; i < n->val.array_lit.elems.len; i++) {
299 +
        emit_node_data(elems[i], p, out, d);
300 +
    }
301 +
    /* Padding */
302 +
    usize aligned = align(n->type->size, n->type->align);
303 +
    usize padding = aligned - n->type->size;
304 +
305 +
    for (usize i = 0; i < padding; i++) {
306 +
        fputc(0, out);
307 +
    }
308 +
}
309 +
310 +
/* Emit the data section to the output file */
311 +
void data_emit_ro(data_section_t *d, FILE *out) {
312 +
    if (!out || d->ro_size == 0) {
313 +
        return; /* No data to emit */
314 +
    }
315 +
    for (usize i = 0; i < d->nstrings; i++) {
316 +
        string_data_t *s = &d->strings[i];
317 +
318 +
        /* Write string data */
319 +
        fwrite(s->data, 1, s->length, out);
320 +
        fputc(0, out); /* NULL terminator */
321 +
322 +
        /* Write padding */
323 +
        u32 padding = (WORD_SIZE - ((s->length + 1) % WORD_SIZE)) % WORD_SIZE;
324 +
325 +
        for (usize j = 0; j < padding; j++) {
326 +
            fputc(0, out);
327 +
        }
328 +
    }
329 +
}
330 +
331 +
void data_emit_rw(data_section_t *d, FILE *out) {
332 +
    if (!out || d->rw_init_total == 0) {
333 +
        return; /* No initialized data to emit */
334 +
    }
335 +
    /* Emit only initialized items (offsets < rw_init_total).
336 +
     * BSS items (offsets >= rw_init_total) are zero-initialized by the
337 +
     * runtime and are not written to the file. */
338 +
    usize current = 0;
339 +
340 +
    for (usize i = 0; i < d->nitems; i++) {
341 +
        data_item_t *item = &d->items[i];
342 +
343 +
        /* Skip BSS items */
344 +
        if (item->offset >= d->rw_init_total)
345 +
            continue;
346 +
347 +
        /* Pad to reach the item's offset */
348 +
        while (current < item->offset) {
349 +
            fputc(0, out);
350 +
            current++;
351 +
        }
352 +
        if (item->kind == DATA_ARRAY) {
353 +
            emit_array_data(item->node, item->parser, out, d);
354 +
        } else {
355 +
            emit_node_data(item->node, item->parser, out, d);
356 +
        }
357 +
        current = ftell(out);
358 +
    }
359 +
    /* Pad to the full initialized size */
360 +
    while (current < d->rw_init_total) {
361 +
        fputc(0, out);
362 +
        current++;
363 +
    }
364 +
}
gen/data.h added +79 -0
1 +
#ifndef DATA_H
2 +
#define DATA_H
3 +
4 +
#include <stdio.h>
5 +
6 +
#include "../limits.h"
7 +
#include "../resolver.h"
8 +
#include "../types.h"
9 +
10 +
/* Data section offsets in memory */
11 +
#define DATA_RO_OFFSET 0x10000
12 +
#define DATA_RW_OFFSET 0xFFFFF0
13 +
14 +
/* String literal data in the data section */
15 +
typedef struct {
16 +
    const char *data;   /* String content */
17 +
    usize       length; /* String length */
18 +
    usize       offset; /* Offset in data section */
19 +
} string_data_t;
20 +
21 +
/* Kind of data item in the rw data section */
22 +
typedef enum {
23 +
    DATA_ARRAY, /* Array backing data for slices */
24 +
    DATA_CONST, /* Named constant */
25 +
} data_kind_t;
26 +
27 +
/* Unified data item for the rw data section */
28 +
typedef struct {
29 +
    data_kind_t      kind;
30 +
    usize            offset; /* Offset in data section */
31 +
    node_t          *node;
32 +
    struct parser_t *parser; /* Parser that owns this node's spans */
33 +
    union {
34 +
        struct {
35 +
            usize   length; /* Number of elements */
36 +
            type_t *elem;   /* Element type */
37 +
        } array;
38 +
        struct {
39 +
            const char *name;     /* Constant name */
40 +
            usize       name_len; /* Length of name */
41 +
        } constant;
42 +
    } as;
43 +
} data_item_t;
44 +
45 +
/* Data section for static data (strings, constants, etc.) */
46 +
typedef struct {
47 +
    string_data_t strings[MAX_STRING_LITERALS];
48 +
    usize         nstrings;
49 +
    data_item_t   items[MAX_CONSTANTS * 2]; /* Arrays and constants */
50 +
    usize         nitems;
51 +
    usize         ro_size;       /* Total size of read-only data section */
52 +
    usize         ro_offset;     /* Data section offset */
53 +
    usize         rw_offset;     /* Data section offset */
54 +
    usize         rw_init_total; /* Pre-computed total size */
55 +
    usize         rw_init_size;  /* Current init cursor */
56 +
    usize         rw_bss_size;   /* Current BSS cursor */
57 +
} data_section_t;
58 +
59 +
/* Initialize the data section management. */
60 +
void data_init(data_section_t *d);
61 +
/* Add a string literal to the data section.
62 +
 * Returns the data section offset where the string is stored. */
63 +
usize data_string(data_section_t *d, const char *str, usize len);
64 +
/* Add array data for a slice to the data section.
65 +
 * Returns the offset in the data section. */
66 +
usize data_array(data_section_t *d, struct parser_t *p, node_t *array_node);
67 +
/* Add a node to the data section. */
68 +
usize data_node(
69 +
    data_section_t  *d,
70 +
    struct parser_t *p,
71 +
    node_t          *node,
72 +
    const char      *name,
73 +
    usize            name_len
74 +
);
75 +
/* Emit the data section to the output. */
76 +
void data_emit_ro(data_section_t *d, FILE *out);
77 +
void data_emit_rw(data_section_t *d, FILE *out);
78 +
79 +
#endif
gen/emit.c added +1176 -0
1 +
#include <stdio.h>
2 +
#include <stdlib.h>
3 +
4 +
#include "emit.h"
5 +
6 +
void split_imm(i32 imm, i32 *hi, i32 *lo) {
7 +
    /* Split immediate into upper 20 bits and lower 12 bits */
8 +
    *hi = ((imm + 0x800) >> 12) & 0xFFFFF; /* Add 0x800 for proper rounding */
9 +
    *lo = imm & 0xFFF;
10 +
    if (*lo & 0x800) {
11 +
        /* If the highest bit of the lower 12 bits is set,
12 +
           it will be sign-extended, so adjust upper part */
13 +
        *lo = *lo | ~0xFFF; /* Sign extend lower */
14 +
    }
15 +
}
16 +
17 +
void emit_li(gen_t *g, reg_t rd, i32 imm) {
18 +
    if (is_small(imm)) {
19 +
        emit(g, instr(I_ADDI, rd, 0, 0, imm));
20 +
        return;
21 +
    }
22 +
    i32 hi, lo;
23 +
    split_imm(imm, &hi, &lo);
24 +
25 +
    emit(g, instr(I_LUI, rd, 0, 0, hi));
26 +
    /* Use ADDIW to sign-extend the 32-bit result on RV64, otherwise
27 +
     * LUI's upper-bit sign-extension leaves garbage in bits 63:32. */
28 +
    if (lo != 0) {
29 +
        emit(g, instr(I_ADDIW, rd, rd, 0, lo));
30 +
    } else {
31 +
        /* Even with lo == 0, LUI sign-extends bit 31 into 63:32.
32 +
         * Use SEXT.W (ADDIW rd, rd, 0) to canonicalize. */
33 +
        emit(g, instr(I_ADDIW, rd, rd, 0, 0));
34 +
    }
35 +
}
36 +
37 +
void emit_mv(gen_t *g, reg_t dst, reg_t src) {
38 +
    if (dst != src) {
39 +
        emit(g, instr(I_MV, dst, src, 0, 0));
40 +
    }
41 +
}
42 +
43 +
usize emit_jump(gen_t *g, usize offset) {
44 +
    return emit(g, JMP(jump_offset(g->ninstrs, offset)));
45 +
}
46 +
47 +
/* Compute hi/lo split for PC-relative offset to target address. */
48 +
static void pc_rel_offset(gen_t *g, usize addr, i32 *hi, i32 *lo) {
49 +
    i32 target_addr  = (i32)(addr * INSTR_SIZE);
50 +
    i32 current_addr = (i32)(g->ninstrs * INSTR_SIZE);
51 +
    i32 offset       = target_addr - current_addr;
52 +
    split_imm(offset, hi, lo);
53 +
}
54 +
55 +
void emit_pc_rel_addr(gen_t *g, reg_t rd, usize addr) {
56 +
    i32 hi, lo;
57 +
    pc_rel_offset(g, addr, &hi, &lo);
58 +
    emit(g, AUIPC(rd, hi));
59 +
    emit(g, ADDI(rd, rd, lo));
60 +
}
61 +
62 +
static usize emit_call_far(gen_t *g, usize addr, reg_t scratch) {
63 +
    i32 hi, lo;
64 +
    pc_rel_offset(g, addr, &hi, &lo);
65 +
    usize pc = emit(g, AUIPC(scratch, hi));
66 +
    emit(g, JALR(RA, scratch, lo));
67 +
    return pc;
68 +
}
69 +
70 +
usize emit_call(gen_t *g, usize addr) {
71 +
    i32 offset = jump_offset(g->ninstrs, addr);
72 +
    if (is_jump_imm(offset))
73 +
        return emit(g, JAL(RA, offset));
74 +
75 +
    reg_t scratch = nextreg(g);
76 +
    usize pc      = emit_call_far(g, addr, scratch);
77 +
    freereg(g, scratch);
78 +
    return pc;
79 +
}
80 +
81 +
void emit_record_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
82 +
    for (usize i = 0; i < ty->info.srt.nfields; i++) {
83 +
        symbol_t *field     = ty->info.srt.fields[i];
84 +
        type_t   *field_typ = field->e.field.typ;
85 +
        i32       field_off = field->e.field.offset;
86 +
        offset_t  field_src = OFFSET(src.base, src.offset + field_off);
87 +
        offset_t  field_dst = OFFSET(dst.base, dst.offset + field_off);
88 +
89 +
        emit_memcopy(g, field_src, field_dst, field_typ);
90 +
    }
91 +
}
92 +
93 +
static value_t emit_field_get(value_t sval, i32 off, type_t *typ) {
94 +
    switch (sval.loc) {
95 +
    case LOC_REG:
96 +
        return value_stack(OFFSET(sval.as.reg, off), typ);
97 +
    case LOC_STACK:
98 +
        return value_stack(
99 +
            OFFSET(sval.as.off.base, sval.as.off.offset + off), typ
100 +
        );
101 +
    case LOC_ADDR:
102 +
        return value_addr(sval.as.adr.base, sval.as.adr.offset + off, typ);
103 +
    case LOC_NONE:
104 +
    case LOC_IMM:
105 +
        break;
106 +
    }
107 +
    abort();
108 +
}
109 +
110 +
/* RISC-V load/store immediates are limited to signed 12 bits. This helper folds
111 +
 * large displacements into a temporary register so the generated instruction
112 +
 * still uses the small-immediate forms, keeping the addressing logic in callers
113 +
 * simple. */
114 +
static addr_adj_t adjust_addr_avoid(
115 +
    gen_t *g, reg_t base, i32 *offset, reg_t avoid
116 +
) {
117 +
    if (is_small(*offset))
118 +
        return (addr_adj_t){ base, false };
119 +
120 +
    reg_t tmp = avoid ? nextreg_except(g, avoid) : nextreg(g);
121 +
122 +
    emit_li(g, tmp, *offset);
123 +
    emit(g, ADD(tmp, base, tmp));
124 +
    *offset = 0;
125 +
126 +
    return (addr_adj_t){ tmp, true };
127 +
}
128 +
129 +
static addr_adj_t adjust_addr(gen_t *g, reg_t base, i32 *offset) {
130 +
    return adjust_addr_avoid(g, base, offset, 0);
131 +
}
132 +
133 +
/* Release any temporary register created by `adjust_addr`. */
134 +
static void release_addr(gen_t *g, addr_adj_t adj) {
135 +
    if (adj.temp)
136 +
        freereg(g, adj.base);
137 +
}
138 +
139 +
void emit_addr_offset(gen_t *g, reg_t dst, reg_t base, i32 offset) {
140 +
    if (is_small(offset)) {
141 +
        emit(g, ADDI(dst, base, offset));
142 +
        return;
143 +
    }
144 +
    reg_t tmp = nextreg(g);
145 +
    emit_li(g, tmp, offset);
146 +
    emit(g, ADD(dst, base, tmp));
147 +
    freereg(g, tmp);
148 +
}
149 +
150 +
value_t emit_slice_lit(
151 +
    gen_t *g, i32 offset, usize ptr, usize len, type_t *typ
152 +
) {
153 +
    static type_t ptr_type = { .cls = TYPE_PTR };
154 +
    imm_t         imm_ptr  = (imm_t){ .u = ptr }; /* Slice pointer */
155 +
    imm_t         imm_len  = (imm_t){ .u = len }; /* Slice length */
156 +
157 +
    emit_store(
158 +
        g, value_imm(imm_ptr, &ptr_type), FP, offset + SLICE_FIELD_PTR_OFFSET
159 +
    );
160 +
    emit_store(
161 +
        g, value_imm(imm_len, &ptr_type), FP, offset + SLICE_FIELD_LEN_OFFSET
162 +
    );
163 +
    return value_stack(OFFSET(FP, offset), typ);
164 +
}
165 +
166 +
value_t emit_record_field_get(value_t sval, symbol_t *field) {
167 +
    i32     foff  = field->e.field.offset;
168 +
    type_t *ftype = field->node->type;
169 +
170 +
    return emit_field_get(sval, foff, ftype);
171 +
}
172 +
173 +
void emit_record_field_set(
174 +
    gen_t *g, value_t val, reg_t base, i32 record_offset, symbol_t *field
175 +
) {
176 +
    i32 field_offset  = field->e.field.offset;
177 +
    i32 target_offset = record_offset + field_offset;
178 +
179 +
    value_t dest = value_stack(OFFSET(base, target_offset), field->e.field.typ);
180 +
181 +
    emit_replace(g, dest, val);
182 +
}
183 +
184 +
void emit_memzero(gen_t *g, offset_t dst, i32 size) {
185 +
    if (size == 0) /* Nothing to do for zero-sized regions */
186 +
        return;
187 +
188 +
    reg_t cursor = nextreg(g);
189 +
    emit_addr_offset(g, cursor, dst.base, dst.offset);
190 +
191 +
    /* Calculate word-aligned size and remainder */
192 +
    i32 aligned_size = align_stack(size, WORD_SIZE);
193 +
    i32 remainder    = size - aligned_size;
194 +
195 +
    reg_t end = ZERO;
196 +
197 +
    /* Only use the word-based loop if we have at least one complete word */
198 +
    if (aligned_size > 0) {
199 +
        end = nextreg(g);
200 +
        emit_addr_offset(g, end, cursor, aligned_size);
201 +
202 +
        usize loop_start = g->ninstrs;
203 +
        usize branch_end = emit(g, NOP);
204 +
205 +
        /* Store zero to current address and increment by word size */
206 +
        emit(g, SD(ZERO, cursor, 0));
207 +
        emit(g, ADDI(cursor, cursor, WORD_SIZE));
208 +
        emit(g, JMP(jump_offset(g->ninstrs, loop_start))); /* Loop back */
209 +
210 +
        g->instrs[branch_end] =
211 +
            BGE(cursor, end, jump_offset(branch_end, g->ninstrs));
212 +
    }
213 +
214 +
    /* At least four bytes left */
215 +
    if (remainder >= 4) { /* Store a word (4 bytes) */
216 +
        emit(g, SW(ZERO, cursor, 0));
217 +
        emit(g, ADDI(cursor, cursor, 4));
218 +
        remainder -= 4;
219 +
    }
220 +
    /* At least two bytes left */
221 +
    if (remainder >= 2) { /* Store a halfword (2 bytes) */
222 +
        emit(g, SH(ZERO, cursor, 0));
223 +
        emit(g, ADDI(cursor, cursor, 2));
224 +
        remainder -= 2;
225 +
    }
226 +
    /* One byte left */
227 +
    if (remainder == 1) {
228 +
        emit(g, SB(ZERO, cursor, 0));
229 +
    }
230 +
    freereg(g, cursor);
231 +
    if (aligned_size > 0)
232 +
        freereg(g, end);
233 +
}
234 +
235 +
void emit_replace(gen_t *g, value_t old, value_t new) {
236 +
    if (old.type->cls == TYPE_OPT) {
237 +
        if (new.type->cls == TYPE_OPT) {
238 +
            switch (old.loc) {
239 +
            case LOC_STACK:
240 +
                emit_memcopy(g, new.as.off, old.as.off, old.type);
241 +
                break;
242 +
            case LOC_ADDR: {
243 +
                /* Handle assignment to LOC_ADDR optional */
244 +
                reg_t base = nextreg(g);
245 +
                emit_li(g, base, old.as.adr.base);
246 +
                emit_store(g, new, base, old.as.adr.offset);
247 +
                freereg(g, base);
248 +
                break;
249 +
            }
250 +
            default:
251 +
                bail(
252 +
                    "can't replace tagged value with storage location %d",
253 +
                    old.loc
254 +
                );
255 +
            }
256 +
        } else if (new.type->cls == old.type->info.opt.elem->cls) {
257 +
            /* T -> ?T coercion: create some value */
258 +
            tval_store(g, old, new, 1);
259 +
        } else {
260 +
            bail(
261 +
                "cannot assign %s to %s; type mismatch",
262 +
                type_names[new.type->cls],
263 +
                type_names[old.type->cls]
264 +
            );
265 +
        }
266 +
    } else if (old.type->cls == TYPE_RESULT) {
267 +
        type_t *payload = old.type->info.res.payload;
268 +
        type_t *err     = old.type->info.res.err;
269 +
270 +
        if (new.type->cls == TYPE_RESULT) {
271 +
            switch (old.loc) {
272 +
            case LOC_STACK:
273 +
                emit_memcopy(g, new.as.off, old.as.off, old.type);
274 +
                break;
275 +
            case LOC_ADDR: {
276 +
                /* Handle assignment to LOC_ADDR result */
277 +
                reg_t base = nextreg(g);
278 +
                emit_li(g, base, old.as.adr.base);
279 +
                emit_store(g, new, base, old.as.adr.offset);
280 +
                freereg(g, base);
281 +
                break;
282 +
            }
283 +
            default:
284 +
                bail(
285 +
                    "can't replace tagged value with storage location %d",
286 +
                    old.loc
287 +
                );
288 +
            }
289 +
        } else if (new.type == payload) {
290 +
            emit_result_store_success(g, old, new);
291 +
        } else if (new.type == err) {
292 +
            emit_result_store_error(g, old, new);
293 +
        } else {
294 +
            bail(
295 +
                "cannot assign %s to %s; type mismatch",
296 +
                type_names[new.type->cls],
297 +
                type_names[old.type->cls]
298 +
            );
299 +
        }
300 +
    } else {
301 +
        /* Non-optional assignments (original logic) */
302 +
        switch (old.loc) {
303 +
        case LOC_REG:
304 +
            /* Load the new value directly into the register of
305 +
             * the old value. */
306 +
            emit_load_into(g, old.as.reg, new);
307 +
            break;
308 +
        case LOC_STACK:
309 +
            emit_store(g, new, old.as.off.base, old.as.off.offset);
310 +
            break;
311 +
        case LOC_ADDR: {
312 +
            reg_t base = usereg(g, nextreg(g));
313 +
            emit_li(g, base, old.as.adr.base);
314 +
            emit_store(g, new, base, old.as.adr.offset);
315 +
            freereg(g, base);
316 +
            break;
317 +
        }
318 +
        default:
319 +
            bail("can't replace variable with storage location %d", old.loc);
320 +
        }
321 +
    }
322 +
323 +
    /* Free the new location and update the value, since we don't
324 +
     * need two copies of the value. Only free temporaries so we don't
325 +
     * invalidate live values that are intentionally kept in registers
326 +
     * (eg. function parameters). */
327 +
    if (new.loc == LOC_REG && new.temp) {
328 +
        freereg(g, new.as.reg);
329 +
    }
330 +
}
331 +
332 +
void emit_array_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
333 +
    type_t *elem_type = ty->info.ary.elem;
334 +
    usize   length    = ty->info.ary.length;
335 +
336 +
    for (usize i = 0; i < length; i++) {
337 +
        i32      elem_off = (i32)(i * elem_type->size);
338 +
        offset_t elem_src = OFFSET(src.base, src.offset + elem_off);
339 +
        offset_t elem_dst = OFFSET(dst.base, dst.offset + elem_off);
340 +
341 +
        emit_memcopy(g, elem_src, elem_dst, elem_type);
342 +
    }
343 +
}
344 +
345 +
/* Copy single value between offsets, via register */
346 +
static void emit_offset_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
347 +
    reg_t rs = emit_load(g, value_stack(src, ty));
348 +
    emit_regstore(g, rs, dst.base, dst.offset, ty);
349 +
    freereg(g, rs);
350 +
}
351 +
352 +
/* Copy a full machine word (WORD_SIZE bytes) using LD/SD. */
353 +
static void emit_dword_copy(gen_t *g, offset_t src, offset_t dst) {
354 +
    reg_t      tmp     = nextreg(g);
355 +
    i32        src_off = src.offset;
356 +
    i32        dst_off = dst.offset;
357 +
    addr_adj_t src_adj = adjust_addr(g, src.base, &src_off);
358 +
    emit(g, LD(tmp, src_adj.base, src_off));
359 +
    release_addr(g, src_adj);
360 +
    addr_adj_t dst_adj = adjust_addr(g, dst.base, &dst_off);
361 +
    emit(g, SD(tmp, dst_adj.base, dst_off));
362 +
    release_addr(g, dst_adj);
363 +
    freereg(g, tmp);
364 +
}
365 +
366 +
/* Copy tagged values (optional and payload unions) */
367 +
static void emit_tval_copy(
368 +
    gen_t   *g,
369 +
    offset_t src,
370 +
    offset_t dst,
371 +
    usize    size,
372 +
    i32      val_offset,
373 +
    type_t  *value_type
374 +
) {
375 +
    /* Copy tag byte */
376 +
    emit_offset_copy(g, src, dst, g->types->type_u8);
377 +
378 +
    /* Zero padding between tag (1 byte) and payload start, so that
379 +
     * byte-level equality comparisons of tagged values work correctly
380 +
     * even when the destination was previously uninitialized. */
381 +
    if (val_offset > TAG_SIZE) {
382 +
        emit_memzero(
383 +
            g, OFFSET(dst.base, dst.offset + TAG_SIZE), val_offset - TAG_SIZE
384 +
        );
385 +
    }
386 +
387 +
    if (size == 0)
388 +
        return;
389 +
390 +
    offset_t val_src = OFFSET(src.base, src.offset + val_offset);
391 +
    offset_t val_dst = OFFSET(dst.base, dst.offset + val_offset);
392 +
393 +
    if (value_type) {
394 +
        /* Use recursive memcopy for typed data (optionals) */
395 +
        emit_memcopy(g, val_src, val_dst, value_type);
396 +
        return;
397 +
    }
398 +
    /* Copy raw bytes for untyped data (payload unions) */
399 +
    usize copied = 0;
400 +
401 +
    /* Copy whole dwords (8 bytes) */
402 +
    while (copied + WORD_SIZE <= size) {
403 +
        emit_dword_copy(
404 +
            g,
405 +
            OFFSET(val_src.base, val_src.offset + (i32)copied),
406 +
            OFFSET(val_dst.base, val_dst.offset + (i32)copied)
407 +
        );
408 +
        copied += WORD_SIZE;
409 +
    }
410 +
    /* Copy remaining word (4 bytes) if present */
411 +
    if (size - copied >= 4) {
412 +
        emit_offset_copy(
413 +
            g,
414 +
            OFFSET(val_src.base, val_src.offset + (i32)copied),
415 +
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
416 +
            g->types->type_i32
417 +
        );
418 +
        copied += 4;
419 +
    }
420 +
    /* Copy remaining halfword if present */
421 +
    if (size - copied >= 2) {
422 +
        emit_offset_copy(
423 +
            g,
424 +
            OFFSET(val_src.base, val_src.offset + (i32)copied),
425 +
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
426 +
            g->types->type_u16
427 +
        );
428 +
        copied += 2;
429 +
    }
430 +
    /* Copy remaining byte if present */
431 +
    if (size - copied == 1) {
432 +
        emit_offset_copy(
433 +
            g,
434 +
            OFFSET(val_src.base, val_src.offset + (i32)copied),
435 +
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
436 +
            g->types->type_u8
437 +
        );
438 +
    }
439 +
}
440 +
441 +
void emit_memcopy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
442 +
    if (src.base == dst.base && src.offset == dst.offset)
443 +
        return; /* Nothing to do. */
444 +
445 +
    switch (ty->cls) {
446 +
    case TYPE_RECORD:
447 +
        emit_record_copy(g, src, dst, ty);
448 +
        return;
449 +
    case TYPE_ARRAY:
450 +
        emit_array_copy(g, src, dst, ty);
451 +
        return;
452 +
    case TYPE_OPT: {
453 +
        /* For optional types, copy tag and typed value */
454 +
        i32 val_off = align(TAG_SIZE, ty->info.opt.elem->align);
455 +
        emit_tval_copy(
456 +
            g, src, dst, ty->info.opt.elem->size, val_off, ty->info.opt.elem
457 +
        );
458 +
        return;
459 +
    }
460 +
    case TYPE_UNION:
461 +
        if (ty->info.uni.has_payload) {
462 +
            /* Copy the full payload area including alignment padding,
463 +
             * so that byte-level equality comparisons work correctly. */
464 +
            i32   val_off      = align(TAG_SIZE, ty->align);
465 +
            usize payload_size = ty->size - val_off;
466 +
            emit_tval_copy(g, src, dst, payload_size, val_off, NULL);
467 +
            return;
468 +
        }
469 +
        break;
470 +
    case TYPE_SLICE: {
471 +
        /* For slice types, copy both pointer (8 bytes) and length (8 bytes) */
472 +
        emit_dword_copy(g, src, dst);
473 +
        emit_dword_copy(
474 +
            g,
475 +
            OFFSET(src.base, src.offset + WORD_SIZE),
476 +
            OFFSET(dst.base, dst.offset + WORD_SIZE)
477 +
        );
478 +
        return;
479 +
    }
480 +
    case TYPE_RESULT: {
481 +
        bail("result types are never materialized");
482 +
    }
483 +
    default:
484 +
        break;
485 +
    }
486 +
    /* For primitive types, just copy via a register. */
487 +
    emit_offset_copy(g, src, dst, ty);
488 +
}
489 +
490 +
value_t emit_store(gen_t *g, value_t v, reg_t base, int offset) {
491 +
    switch (v.loc) {
492 +
    case LOC_IMM: {
493 +
        /* Load, store, free. */
494 +
        reg_t rd = nextreg(g);
495 +
        emit_load_into(g, rd, v);
496 +
        emit_regstore(g, rd, base, offset, v.type);
497 +
        freereg(g, rd);
498 +
499 +
        break;
500 +
    }
501 +
    case LOC_REG:
502 +
        if (type_is_passed_by_ref(v.type)) {
503 +
            emit_memcopy(g, OFFSET(v.as.reg, 0), OFFSET(base, offset), v.type);
504 +
        } else {
505 +
            emit_regstore(g, v.as.reg, base, offset, v.type);
506 +
        }
507 +
        break;
508 +
    case LOC_STACK:
509 +
        emit_memcopy(g, v.as.off, OFFSET(base, offset), v.type);
510 +
        break;
511 +
    case LOC_ADDR: {
512 +
        /* Copy from data section into stack */
513 +
        reg_t addr = nextreg(g);
514 +
        emit_li(g, addr, v.as.adr.base);
515 +
        emit_memcopy(
516 +
            g, OFFSET(addr, v.as.adr.offset), OFFSET(base, offset), v.type
517 +
        );
518 +
        freereg(g, addr);
519 +
520 +
        break;
521 +
    }
522 +
    case LOC_NONE:
523 +
        break;
524 +
    }
525 +
    return value_stack(OFFSET(base, offset), v.type);
526 +
}
527 +
528 +
reg_t emit_load(gen_t *g, value_t v) {
529 +
    if (v.loc == LOC_REG && v.temp) {
530 +
        return v.as.reg;
531 +
    } else {
532 +
        return emit_load_into(g, nextreg(g), v);
533 +
    }
534 +
}
535 +
536 +
/* Load a full machine dword (WORD_SIZE = 8 bytes) from a value. */
537 +
reg_t emit_load_dword(gen_t *g, value_t v) {
538 +
    /* Use TYPE_PTR to trigger LD (8-byte load). */
539 +
    type_t ptr_type = { .cls = TYPE_PTR };
540 +
    return emit_load(
541 +
        g, (value_t){ .loc = v.loc, .as = v.as, .type = &ptr_type }
542 +
    );
543 +
}
544 +
545 +
reg_t emit_load_offset(gen_t *g, value_t v, i32 offset) {
546 +
    reg_t rd = nextreg(g);
547 +
    switch (v.loc) {
548 +
    case LOC_REG:
549 +
        emit(g, LD(rd, v.as.reg, offset));
550 +
        break;
551 +
    case LOC_STACK: {
552 +
        i32        combined_offset = v.as.off.offset + offset;
553 +
        addr_adj_t adj = adjust_addr(g, v.as.off.base, &combined_offset);
554 +
        emit(g, LD(rd, adj.base, combined_offset));
555 +
        release_addr(g, adj);
556 +
        break;
557 +
    }
558 +
    case LOC_ADDR: {
559 +
        reg_t base = nextreg(g);
560 +
        emit_li(g, base, v.as.adr.base);
561 +
        i32        combined_offset = v.as.adr.offset + offset;
562 +
        addr_adj_t adj             = adjust_addr(g, base, &combined_offset);
563 +
        emit(g, LD(rd, adj.base, combined_offset));
564 +
        release_addr(g, adj);
565 +
        freereg(g, base);
566 +
        break;
567 +
    }
568 +
    case LOC_IMM:
569 +
    case LOC_NONE:
570 +
        abort();
571 +
    }
572 +
    return rd;
573 +
}
574 +
575 +
value_t emit_push(gen_t *g, value_t v) {
576 +
    /* Always allocate new stack space - each variable should have its own
577 +
     * location */
578 +
    int offset = reserve(g, v.type);
579 +
    return emit_store(g, v, FP, offset);
580 +
}
581 +
582 +
value_t emit_array_index(gen_t *g, value_t array_val, value_t index, bool ref) {
583 +
    reg_t   elem_siz    = nextreg(g);
584 +
    reg_t   data_adr    = ZERO;
585 +
    reg_t   base_reg    = ZERO;
586 +
    reg_t   base_alloc  = ZERO;
587 +
    i32     base_offset = 0;
588 +
    type_t *elem_type;
589 +
    type_t *arr_type = array_val.type;
590 +
    if (arr_type->cls == TYPE_PTR) {
591 +
        arr_type = arr_type->info.ptr.target;
592 +
    }
593 +
594 +
    /* Handle different storage locations */
595 +
    if (array_val.type->cls == TYPE_PTR) {
596 +
        /* Dereference pointers up front to get the actual base address. */
597 +
        base_reg    = emit_load_dword(g, array_val);
598 +
        base_offset = 0;
599 +
    } else if (array_val.loc == LOC_REG) {
600 +
        base_reg    = array_val.as.reg;
601 +
        base_offset = 0;
602 +
    } else if (array_val.loc == LOC_STACK) {
603 +
        base_reg    = array_val.as.off.base;
604 +
        base_offset = array_val.as.off.offset;
605 +
    } else if (array_val.loc == LOC_ADDR) {
606 +
        /* For constants in the data section, load the address but don't
607 +
         * dereference it. This way we get the actual array base address for
608 +
         * indexing. */
609 +
        base_reg = nextreg(g);
610 +
        emit_li(g, base_reg, array_val.as.adr.base);
611 +
        base_offset = array_val.as.adr.offset;
612 +
        base_alloc  = base_reg;
613 +
    } else {
614 +
        bail("cannot index array/slice at this location");
615 +
    }
616 +
    /* Load index into a register. Will hold final output */
617 +
    reg_t rd = emit_load(g, index);
618 +
619 +
    if (arr_type->cls == TYPE_SLICE) {
620 +
        /* Adjust base_offset for large offsets before loading slice fields */
621 +
        i32        ptr_offset = base_offset;
622 +
        addr_adj_t adj        = adjust_addr(g, base_reg, &ptr_offset);
623 +
624 +
        /* Load data pointer (first dword of slice) */
625 +
        /* and use it as our new base. */
626 +
        data_adr = nextreg(g);
627 +
        emit(g, LD(data_adr, adj.base, ptr_offset));
628 +
629 +
        /* Load slice length (second dword of slice) for bounds checking */
630 +
        reg_t len = nextreg(g);
631 +
        emit(g, LD(len, adj.base, ptr_offset + WORD_SIZE));
632 +
633 +
        release_addr(g, adj);
634 +
635 +
        /* Bounds check: if index >= length, emit EBREAK */
636 +
        /* Skip EBREAK if index < length (jump 2 instructions) */
637 +
        emit(g, BLTU(rd, len, INSTR_SIZE * 2));
638 +
        emit(g, EBREAK);
639 +
640 +
        freereg(g, len);
641 +
642 +
        base_reg    = data_adr;
643 +
        base_offset = 0;
644 +
        elem_type   = arr_type->info.slc.elem;
645 +
    } else {
646 +
        elem_type = arr_type->info.ary.elem;
647 +
    }
648 +
649 +
    /* Get element size */
650 +
    emit_li(g, elem_siz, elem_type->size);
651 +
    emit(g, MUL(rd, rd, elem_siz)); /* Relative offset. */
652 +
    emit(g, ADD(rd, rd, base_reg));
653 +
654 +
    freereg(g, elem_siz);
655 +
    freereg(g, data_adr);
656 +
    if (base_alloc)
657 +
        freereg(g, base_alloc);
658 +
659 +
    if (base_offset != 0 && !is_small(base_offset)) {
660 +
        emit_addr_offset(g, rd, rd, base_offset);
661 +
        base_offset = 0;
662 +
    }
663 +
664 +
    if (ref) {
665 +
        return value_stack(OFFSET(rd, base_offset), elem_type);
666 +
    } else {
667 +
        /* Reserve space on stack for the element */
668 +
        i32 stack_offset = reserve(g, elem_type);
669 +
670 +
        /* Copy element from array to stack using memcopy */
671 +
        offset_t src = OFFSET(rd, base_offset);  /* Source: element in array */
672 +
        offset_t dst = OFFSET(FP, stack_offset); /* Destination: stack */
673 +
        emit_memcopy(g, src, dst, elem_type);
674 +
675 +
        freereg(g, rd);
676 +
677 +
        /* Return a stack-based value pointing to the array element. */
678 +
        return value_stack(dst, elem_type);
679 +
    }
680 +
}
681 +
682 +
usize emit_regstore(gen_t *g, reg_t src, reg_t base, i32 offset, type_t *ty) {
683 +
    reg_t      orig_base   = base;
684 +
    i32        orig_offset = offset;
685 +
    addr_adj_t adj         = adjust_addr_avoid(g, base, &offset, src);
686 +
    reg_t      addr        = adj.base;
687 +
    usize      idx         = 0;
688 +
689 +
    switch (ty->cls) {
690 +
    case TYPE_BOOL:
691 +
    case TYPE_I8:
692 +
    case TYPE_U8:
693 +
        idx = emit(g, SB(src, addr, offset));
694 +
        break;
695 +
    case TYPE_I16:
696 +
    case TYPE_U16:
697 +
        idx = emit(g, SH(src, addr, offset));
698 +
        break;
699 +
    case TYPE_I32:
700 +
    case TYPE_U32:
701 +
        idx = emit(g, SW(src, addr, offset));
702 +
        break;
703 +
    case TYPE_PTR: /* References are pointers, so store as a dword. */
704 +
    case TYPE_FN:  /* Function pointers are addresses, so store as a dword. */
705 +
        idx = emit(g, SD(src, addr, offset));
706 +
        break;
707 +
    case TYPE_UNION:
708 +
        if (ty->info.uni.has_payload) {
709 +
            /* Tag is 1 byte. */
710 +
            idx = emit(g, SB(src, addr, offset));
711 +
            break;
712 +
        }
713 +
        release_addr(g, adj);
714 +
        return emit_regstore(g, src, orig_base, orig_offset, ty->info.uni.base);
715 +
    case TYPE_ARRAY:
716 +
    case TYPE_RECORD:
717 +
    case TYPE_OPT:
718 +
        /* Structs, arrays, optional types are stored by reference, so
719 +
         * just store the address (pointer). */
720 +
        idx = emit(g, SD(src, addr, offset));
721 +
        break;
722 +
    case TYPE_SLICE:
723 +
        release_addr(g, adj);
724 +
        bail("storing slices via register store is unsupported");
725 +
    default:
726 +
        bail("storing unsupported type `%s`", type_names[ty->cls]);
727 +
    }
728 +
    release_addr(g, adj);
729 +
730 +
    return idx;
731 +
}
732 +
733 +
void emit_store_tag(gen_t *g, tval_t tv, reg_t tag_reg) {
734 +
    i32        off = tv.tag.as.off.offset;
735 +
    addr_adj_t adj = adjust_addr(g, tv.tag.as.off.base, &off);
736 +
737 +
    emit(g, SB(tag_reg, adj.base, off));
738 +
    release_addr(g, adj);
739 +
}
740 +
741 +
usize emit_regload(gen_t *g, reg_t dst, reg_t base, i32 offset, type_t *ty) {
742 +
    reg_t      orig_base   = base;
743 +
    i32        orig_offset = offset;
744 +
    addr_adj_t adj         = adjust_addr(g, base, &offset);
745 +
    reg_t      addr        = adj.base;
746 +
    usize      idx         = 0;
747 +
748 +
    switch (ty->cls) {
749 +
    case TYPE_BOOL:
750 +
    case TYPE_U8:
751 +
        idx = emit(g, LBU(dst, addr, offset));
752 +
        break;
753 +
    case TYPE_I8:
754 +
        idx = emit(g, LB(dst, addr, offset));
755 +
        break;
756 +
    case TYPE_U16:
757 +
        idx = emit(g, LHU(dst, addr, offset));
758 +
        break;
759 +
    case TYPE_I16:
760 +
        idx = emit(g, LH(dst, addr, offset));
761 +
        break;
762 +
    case TYPE_I32:
763 +
        idx = emit(g, LW(dst, addr, offset));
764 +
        break;
765 +
    case TYPE_U32:
766 +
        idx = emit(g, LWU(dst, addr, offset));
767 +
        break;
768 +
    case TYPE_PTR: /* Raw pointer values occupy one 64-bit dword. */
769 +
    case TYPE_FN:  /* Function pointers are addresses, so load as a dword. */
770 +
        idx = emit(g, LD(dst, addr, offset));
771 +
        break;
772 +
    case TYPE_UNION:
773 +
        if (ty->info.uni.has_payload) {
774 +
            idx = emit(g, ADDI(dst, addr, offset));
775 +
            break;
776 +
        }
777 +
        release_addr(g, adj);
778 +
        return emit_regload(g, dst, orig_base, orig_offset, ty->info.uni.base);
779 +
    case TYPE_ARRAY:
780 +
    case TYPE_RECORD:
781 +
    case TYPE_SLICE:
782 +
    case TYPE_OPT:
783 +
        /* For records, arrays, optional types, we load the address in the
784 +
         * register. */
785 +
        idx = emit(g, ADDI(dst, addr, offset));
786 +
        break;
787 +
    default:
788 +
        release_addr(g, adj);
789 +
        bail("loading unsupported type `%s`", type_names[ty->cls]);
790 +
    }
791 +
    release_addr(g, adj);
792 +
793 +
    return idx;
794 +
}
795 +
796 +
int emit_regpush(gen_t *g, reg_t src, type_t *ty) {
797 +
    /* Store the register to the stack. */
798 +
    int offset = reserve(g, ty);
799 +
    emit_regstore(g, src, FP, offset, ty);
800 +
801 +
    return offset;
802 +
}
803 +
804 +
i32 reserve_aligned(gen_t *g, type_t *ty, i32 align) {
805 +
    frame_t *frame = &g->fn.current->e.fn.frame;
806 +
807 +
    /* Zero-sized types (e.g. empty arrays) don't need stack space. */
808 +
    if (ty->size == 0) {
809 +
        return frame->sp;
810 +
    }
811 +
    frame->sp = align_stack(frame->sp - ty->size, align);
812 +
813 +
    if (-frame->sp >= MAX_FRAME_SIZE)
814 +
        bail("stack frame overflow");
815 +
    if (-frame->sp < 0)
816 +
        bail("stack frame underflow");
817 +
818 +
    if (-frame->sp > frame->size)
819 +
        frame->size = -frame->sp;
820 +
821 +
    /* Zero memory for non-packed types to ensure clean initialization.
822 +
     * Packed types are skipped as they are densely packed without padding. */
823 +
    if (!type_is_packed(ty)) {
824 +
        emit_memzero(g, OFFSET(FP, frame->sp), ty->size);
825 +
    }
826 +
827 +
    return frame->sp;
828 +
}
829 +
830 +
reg_t emit_load_into(gen_t *g, reg_t dst, value_t src) {
831 +
    switch (src.loc) {
832 +
    case LOC_IMM:
833 +
        switch (src.type->cls) {
834 +
        case TYPE_UNION: /* Unions default to i32 base type. */
835 +
        case TYPE_I8:
836 +
        case TYPE_I16:
837 +
        case TYPE_I32:
838 +
            emit_li(g, dst, src.as.imm.i);
839 +
            break;
840 +
        case TYPE_U8:
841 +
        case TYPE_U16:
842 +
        case TYPE_U32:
843 +
        case TYPE_PTR:
844 +
        case TYPE_FN:
845 +
            emit_li(g, dst, src.as.imm.u);
846 +
            break;
847 +
        case TYPE_BOOL:
848 +
            emit_li(g, dst, src.as.imm.b);
849 +
            break;
850 +
        default:
851 +
            bail("unsupported type `%s`", type_names[src.type->cls]);
852 +
        }
853 +
        break;
854 +
    case LOC_STACK:
855 +
        /* For types passed by reference, load the address
856 +
         * instead of the value. */
857 +
        if (type_is_passed_by_ref(src.type)) {
858 +
            i32        off = src.as.off.offset;
859 +
            addr_adj_t adj = adjust_addr(g, src.as.off.base, &off);
860 +
            emit(g, ADDI(dst, adj.base, off));
861 +
            release_addr(g, adj);
862 +
        } else {
863 +
            emit_regload(g, dst, src.as.off.base, src.as.off.offset, src.type);
864 +
        }
865 +
        break;
866 +
    case LOC_REG: {
867 +
        reg_t rs = src.as.reg;
868 +
        if (rs == dst) {
869 +
            break;
870 +
        }
871 +
        if (src.temp)
872 +
            freereg(g, rs);
873 +
874 +
        emit(g, MV(dst, rs));
875 +
        break;
876 +
    }
877 +
    case LOC_ADDR: {
878 +
        /* Start by loading the address into the register */
879 +
        emit_li(g, dst, src.as.adr.base);
880 +
881 +
        /* For non-compound types, we need to load the value from the address.
882 +
         * For compound types, we keep the address itself. */
883 +
        if (!type_is_passed_by_ref(src.type)) {
884 +
            emit_regload(g, dst, dst, src.as.adr.offset, src.type);
885 +
        } else {
886 +
            /* For compound types passed by reference, add the offset to get
887 +
             * the actual address. */
888 +
            if (src.as.adr.offset != 0) {
889 +
                emit(g, ADDI(dst, dst, src.as.adr.offset));
890 +
            }
891 +
        }
892 +
        break;
893 +
    }
894 +
    case LOC_NONE:
895 +
        break;
896 +
    }
897 +
    return dst;
898 +
}
899 +
900 +
/* Compare values at two memory addresses and accumulate result.
901 +
 * Loads values from memory, compares them, and ANDs the comparison result
902 +
 * with the accumulating result register. */
903 +
static void emit_cmp_step(
904 +
    gen_t  *g,
905 +
    reg_t   left_val,   /* Register to hold left value during comparison */
906 +
    reg_t   right_val,  /* Register to hold right value during comparison */
907 +
    reg_t   left_addr,  /* Base address register for left operand */
908 +
    reg_t   right_addr, /* Base address register for right operand */
909 +
    usize   offset,     /* Byte offset from base addresses to load from */
910 +
    reg_t   result,     /* Register that accumulates comparison results */
911 +
    type_t *val_typ     /* Type information for loading value */
912 +
) {
913 +
    /* Load values from both memory addresses at the given offset */
914 +
    emit_regload(g, left_val, left_addr, offset, val_typ);
915 +
    emit_regload(g, right_val, right_addr, offset, val_typ);
916 +
917 +
    /* XOR the two loaded values: left_val = left_val ^ right_val
918 +
     * If values are equal, result will be 0 (equal values XOR to 0)
919 +
     * If values differ, result will be non-zero */
920 +
    emit(g, XOR(left_val, left_val, right_val));
921 +
    /* Convert XOR result to 1 (equal) or 0 (not equal) */
922 +
    emit(g, SLTIU(left_val, left_val, 1));
923 +
    /* Accumulate the result with a previous result.
924 +
     * If any comparison fails, the final result becomes 0 */
925 +
    emit(g, AND(result, result, left_val));
926 +
}
927 +
928 +
/* Compare raw bytes at two memory addresses.
929 +
 * Sets result = 1 if all bytes match. */
930 +
void emit_bytes_equal(
931 +
    gen_t *g, reg_t left, reg_t right, usize size, reg_t result
932 +
) {
933 +
    /* Start assuming they're equal */
934 +
    emit_li(g, result, 1);
935 +
936 +
    if (size == 0)
937 +
        return; /* Zero bytes are always equal */
938 +
939 +
    reg_t left_val  = nextreg(g);
940 +
    reg_t right_val = nextreg(g);
941 +
942 +
    /* Compare dword by dword (8 bytes) */
943 +
    usize i, remaining = size;
944 +
945 +
    for (i = 0; i + WORD_SIZE <= size; i += WORD_SIZE) {
946 +
        /* Load 8-byte dwords directly with LD */
947 +
        i32        off_l = (i32)i, off_r = (i32)i;
948 +
        addr_adj_t adj_l = adjust_addr(g, left, &off_l);
949 +
        emit(g, LD(left_val, adj_l.base, off_l));
950 +
        release_addr(g, adj_l);
951 +
        addr_adj_t adj_r = adjust_addr(g, right, &off_r);
952 +
        emit(g, LD(right_val, adj_r.base, off_r));
953 +
        release_addr(g, adj_r);
954 +
        emit(g, XOR(left_val, left_val, right_val));
955 +
        emit(g, SLTIU(left_val, left_val, 1));
956 +
        emit(g, AND(result, result, left_val));
957 +
    }
958 +
    remaining -= i;
959 +
960 +
    if (remaining >= 4) {
961 +
        emit_cmp_step(
962 +
            g, left_val, right_val, left, right, i, result, g->types->type_u32
963 +
        );
964 +
        i         += 4;
965 +
        remaining -= 4;
966 +
    }
967 +
968 +
    if (remaining >= 2) {
969 +
        emit_cmp_step(
970 +
            g, left_val, right_val, left, right, i, result, g->types->type_u16
971 +
        );
972 +
        i         += 2;
973 +
        remaining -= 2;
974 +
    }
975 +
    if (remaining == 1) {
976 +
        emit_cmp_step(
977 +
            g, left_val, right_val, left, right, i, result, g->types->type_u8
978 +
        );
979 +
    }
980 +
    freereg(g, left_val);
981 +
    freereg(g, right_val);
982 +
}
983 +
984 +
void emit_memequal(
985 +
    gen_t *g, reg_t left, reg_t right, type_t *ty, reg_t result
986 +
) {
987 +
    switch (ty->cls) {
988 +
    case TYPE_OPT: { /* For optional types, compare tag and value */
989 +
        reg_t left_tag  = nextreg(g);
990 +
        reg_t right_tag = nextreg(g);
991 +
992 +
        /* Load tags (first byte) */
993 +
        emit(g, LBU(left_tag, left, 0));
994 +
        emit(g, LBU(right_tag, right, 0));
995 +
996 +
        /* Compare tags directly - if different, optionals are not equal */
997 +
        emit_li(g, result, 0); /* Assume not equal */
998 +
        usize jump_to_end = emit(g, NOP);
999 +
1000 +
        /* If both are nil (tag == 0), they're equal */
1001 +
        emit_li(g, result, 1); /* Set equal */
1002 +
        usize skip_value_check = emit(g, NOP);
1003 +
1004 +
        /* Compare values (past tag) */
1005 +
        type_t *inner_type = ty->info.opt.elem;
1006 +
        i32     val_off    = align(TAG_SIZE, inner_type->align);
1007 +
        reg_t   left_val   = nextreg(g);
1008 +
        reg_t   right_val  = nextreg(g);
1009 +
1010 +
        /* Calculate value addresses (skip tag) */
1011 +
        emit(g, ADDI(left_val, left, val_off));
1012 +
        emit(g, ADDI(right_val, right, val_off));
1013 +
1014 +
        /* Load values if primitive type */
1015 +
        if (type_is_primitive(inner_type)) {
1016 +
            emit_regload(g, left_val, left_val, 0, inner_type);
1017 +
            emit_regload(g, right_val, right_val, 0, inner_type);
1018 +
        }
1019 +
1020 +
        /* Compare the values recursively */
1021 +
        emit_memequal(g, left_val, right_val, inner_type, result);
1022 +
1023 +
        /* Patch skip_value_check: jump here if both tags are 0 (nil) */
1024 +
        g->instrs[skip_value_check] =
1025 +
            BEQ(left_tag, ZERO, jump_offset(skip_value_check, g->ninstrs));
1026 +
1027 +
        /* Patch jump_to_end: jump here if tags are different */
1028 +
        g->instrs[jump_to_end] =
1029 +
            BNE(left_tag, right_tag, jump_offset(jump_to_end, g->ninstrs));
1030 +
1031 +
        freereg(g, left_tag);
1032 +
        freereg(g, right_tag);
1033 +
        freereg(g, left_val);
1034 +
        freereg(g, right_val);
1035 +
1036 +
        break;
1037 +
    }
1038 +
    case TYPE_I8:
1039 +
    case TYPE_I16:
1040 +
    case TYPE_I32:
1041 +
        /* For primitive types, compare directly */
1042 +
        emit(g, SUB(result, left, right));
1043 +
        emit(g, SLTIU(result, result, 1));
1044 +
        break;
1045 +
    case TYPE_U8:
1046 +
    case TYPE_U16:
1047 +
    case TYPE_U32:
1048 +
    case TYPE_BOOL:
1049 +
    case TYPE_PTR:
1050 +
        /* For primitive types, compare directly */
1051 +
        emit(g, XOR(result, left, right));
1052 +
        emit(g, SLTIU(result, result, 1));
1053 +
        break;
1054 +
    case TYPE_UNION:
1055 +
        if (!ty->info.uni.has_payload) {
1056 +
            type_t *base =
1057 +
                ty->info.uni.base ? ty->info.uni.base : g->types->type_i32;
1058 +
            emit_memequal(g, left, right, base, result);
1059 +
        } else {
1060 +
            emit_bytes_equal(g, left, right, ty->size, result);
1061 +
        }
1062 +
        break;
1063 +
    case TYPE_ARRAY:
1064 +
    case TYPE_RECORD:
1065 +
    case TYPE_SLICE:
1066 +
        emit_bytes_equal(g, left, right, ty->size, result);
1067 +
        break;
1068 +
    default:
1069 +
        bail("equality is not supported for type `%s`", ty->name);
1070 +
    }
1071 +
}
1072 +
1073 +
void emit_copy_by_ref(gen_t *g, value_t src, value_t dst) {
1074 +
    static type_t ptr_type = { .cls = TYPE_PTR };
1075 +
1076 +
    if (src.loc == LOC_REG && dst.loc == LOC_REG) {
1077 +
        emit_mv(g, dst.as.reg, src.as.reg);
1078 +
    } else if (src.loc == LOC_REG && dst.loc == LOC_STACK) {
1079 +
        i32     dst_off  = dst.as.off.offset;
1080 +
        type_t *store_ty = dst.type;
1081 +
1082 +
        if (dst.type->cls == TYPE_SLICE) {
1083 +
            /* Slice fat pointers live on the stack; only copy the address. */
1084 +
            dst_off  += SLICE_FIELD_PTR_OFFSET;
1085 +
            store_ty  = &ptr_type;
1086 +
        }
1087 +
        emit_regstore(g, src.as.reg, dst.as.off.base, dst_off, store_ty);
1088 +
    } else if (src.loc == LOC_STACK && dst.loc == LOC_REG) {
1089 +
        type_t *load_ty = dst.type;
1090 +
        i32     src_off = src.as.off.offset;
1091 +
1092 +
        if (dst.type->cls == TYPE_SLICE) {
1093 +
            load_ty  = &ptr_type;
1094 +
            src_off += SLICE_FIELD_PTR_OFFSET;
1095 +
        }
1096 +
        emit_regload(g, dst.as.reg, src.as.off.base, src_off, load_ty);
1097 +
    } else if (src.loc == LOC_STACK && dst.loc == LOC_STACK) {
1098 +
        i32        src_off = src.as.off.offset;
1099 +
        addr_adj_t src_adj = adjust_addr(g, src.as.off.base, &src_off);
1100 +
        reg_t      adr     = nextreg(g);
1101 +
1102 +
        emit(g, ADDI(adr, src_adj.base, src_off));
1103 +
1104 +
        i32        dst_off = dst.as.off.offset;
1105 +
        addr_adj_t dst_adj = adjust_addr(g, dst.as.off.base, &dst_off);
1106 +
1107 +
        if (dst.type->cls == TYPE_SLICE)
1108 +
            dst_off += SLICE_FIELD_PTR_OFFSET;
1109 +
1110 +
        emit(g, SD(adr, dst_adj.base, dst_off));
1111 +
1112 +
        release_addr(g, dst_adj);
1113 +
        release_addr(g, src_adj);
1114 +
        freereg(g, adr);
1115 +
    } else if (src.loc == LOC_ADDR && dst.loc == LOC_STACK) {
1116 +
        reg_t adr = nextreg(g);
1117 +
        /* Load the absolute address into a register. */
1118 +
        emit_li(g, adr, (i32)(src.as.adr.base + src.as.adr.offset));
1119 +
        i32     dst_off  = dst.as.off.offset;
1120 +
        type_t *store_ty = dst.type;
1121 +
1122 +
        if (dst.type->cls == TYPE_SLICE) {
1123 +
            dst_off  += SLICE_FIELD_PTR_OFFSET;
1124 +
            store_ty  = &ptr_type;
1125 +
        }
1126 +
        emit_regstore(g, adr, dst.as.off.base, dst_off, store_ty);
1127 +
        freereg(g, adr);
1128 +
    } else {
1129 +
        bail("don't know how to copy between these slots");
1130 +
    }
1131 +
}
1132 +
1133 +
/* Write a successful result tag (0) and copy the payload if present. */
1134 +
void emit_result_store_success(gen_t *g, value_t dest, value_t value) {
1135 +
    tval_t tv  = tval_from_val(g, dest);
1136 +
    reg_t  tag = nextreg(g);
1137 +
1138 +
    emit_li(g, tag, 0);
1139 +
    emit_store_tag(g, tv, tag);
1140 +
    freereg(g, tag);
1141 +
1142 +
    type_t *payload = dest.type->info.res.payload;
1143 +
1144 +
    /* Nb. We don't memzero, since result types are always unwrapped to
1145 +
     * one of their payloads. */
1146 +
1147 +
    if (payload->size > 0) {
1148 +
        /* Check if we need to wrap the value in an optional. */
1149 +
        if (payload->cls == TYPE_OPT && value.type->cls != TYPE_OPT) {
1150 +
            /* Wrap non-optional value in an optional */
1151 +
            value_t payload_val = value_stack(
1152 +
                OFFSET(tv.val.as.off.base, tv.val.as.off.offset), payload
1153 +
            );
1154 +
            tval_store(g, payload_val, value, 1);
1155 +
        } else {
1156 +
            emit_store(g, value, tv.val.as.off.base, tv.val.as.off.offset);
1157 +
        }
1158 +
    }
1159 +
}
1160 +
1161 +
/* Write an error Result tag (1) and copy the error payload. */
1162 +
void emit_result_store_error(gen_t *g, value_t dest, value_t err) {
1163 +
    tval_t tv  = tval_from_val(g, dest);
1164 +
    reg_t  tag = nextreg(g);
1165 +
1166 +
    emit_li(g, tag, 1);
1167 +
    emit_store_tag(g, tv, tag);
1168 +
    freereg(g, tag);
1169 +
1170 +
    /* Nb. We don't memzero, since result types are always unwrapped to
1171 +
     * one of their payloads. */
1172 +
1173 +
    if (err.type->cls != TYPE_VOID) {
1174 +
        emit_store(g, err, tv.val.as.off.base, tv.val.as.off.offset);
1175 +
    }
1176 +
}
gen/emit.h added +121 -0
1 +
#ifndef EMIT_H
2 +
#define EMIT_H
3 +
4 +
#include <stdlib.h>
5 +
#include <string.h>
6 +
7 +
#include "../ast.h"
8 +
#include "../gen.h"
9 +
#include "../io.h"
10 +
#include "../scanner.h"
11 +
#include "../types.h"
12 +
13 +
/* Code emission. */
14 +
#define emit(g, ins) __emit(g, ins)
15 +
16 +
/* Slice field offsets */
17 +
enum {
18 +
    SLICE_FIELD_PTR_OFFSET = 0,
19 +
    SLICE_FIELD_LEN_OFFSET = WORD_SIZE,
20 +
};
21 +
22 +
/* Helper describing the possibly-adjusted base register for stack accesses. */
23 +
typedef struct {
24 +
    reg_t base; /* Register that should be used for the access. */
25 +
    bool  temp; /* Whether `base` was synthesized and must be freed. */
26 +
} addr_adj_t;
27 +
28 +
/* Emit the given instruction. */
29 +
static inline usize __emit(gen_t *g, instr_t ins) {
30 +
    if (g->ninstrs >= MAX_INSTRS) {
31 +
        abort();
32 +
    }
33 +
    g->instrs[g->ninstrs] = ins;
34 +
    g->ninstrs++;
35 +
36 +
    return g->ninstrs - 1;
37 +
}
38 +
39 +
/* Split a 32-bit immediate into upper 20 bits and lower 12 bits for RISC-V. */
40 +
void split_imm(i32 imm, i32 *hi, i32 *lo);
41 +
/* Emit a load immediate (LI) instruction sequence. */
42 +
void emit_li(gen_t *g, reg_t rd, i32 imm);
43 +
/* Helper function to copy register values if needed. */
44 +
void emit_mv(gen_t *g, reg_t dst, reg_t src);
45 +
/* Emit relative jump to offset. */
46 +
usize emit_jump(gen_t *g, usize offset);
47 +
/* Emit a function call. */
48 +
usize emit_call(gen_t *g, usize addr);
49 +
/* Load a PC-relative address into a register. */
50 +
void emit_pc_rel_addr(gen_t *g, reg_t rd, usize addr);
51 +
/* Emit code to index into an array */
52 +
value_t emit_array_index(gen_t *g, value_t array_val, value_t index, bool);
53 +
/* Load a value into a specific register. */
54 +
reg_t emit_load_into(gen_t *g, reg_t dst, value_t src);
55 +
/* Load a value from a value. */
56 +
reg_t emit_load(gen_t *g, value_t v);
57 +
/* Load a word from a value. */
58 +
reg_t emit_load_dword(gen_t *g, value_t v);
59 +
/* Load a value with an offset. */
60 +
reg_t emit_load_offset(gen_t *g, value_t v, i32 offset);
61 +
/* Store a value on the stack. Returns a new value located on the stack. */
62 +
value_t emit_store(gen_t *g, value_t v, reg_t base, int offset);
63 +
/* Push a value to the stack. */
64 +
value_t emit_push(gen_t *g, value_t v);
65 +
/* Replace a value, eg. for assigning */
66 +
void emit_replace(gen_t *g, value_t old, value_t new);
67 +
/* Compute dst = base + offset while respecting SIMM12 limits. */
68 +
void emit_addr_offset(gen_t *g, reg_t dst, reg_t base, i32 offset);
69 +
/* Load a value at a stack offset into a register. */
70 +
usize emit_regload(gen_t *g, reg_t dst, reg_t base, i32 offset, type_t *ty);
71 +
/* Push a register to the stack. */
72 +
int emit_regpush(gen_t *g, reg_t src, type_t *ty);
73 +
/* Store a register value on the stack. */
74 +
usize emit_regstore(gen_t *g, reg_t src, reg_t base, i32 offset, type_t *ty);
75 +
/* Store a tvalue tag. */
76 +
void emit_store_tag(gen_t *g, tval_t tv, reg_t tag_reg);
77 +
/* Reserve stack space with explicit alignment. */
78 +
i32 reserve_aligned(gen_t *g, type_t *ty, i32 align);
79 +
80 +
/* Copy memory between two locations */
81 +
void emit_memcopy(gen_t *g, offset_t src, offset_t dst, type_t *ty);
82 +
/* Zero out a memory region */
83 +
void emit_memzero(gen_t *g, offset_t dst, i32 size);
84 +
/* Compare two registers based on their type:
85 +
 *
86 +
 * - If references, compares the addresses (pointers)
87 +
 * - If values, compares the content
88 +
 * - For records and arrays, compares each element
89 +
 * - For slices, compares the pointer addresses
90 +
 *
91 +
 * Puts the result (0 or 1) in the result register.
92 +
 */
93 +
void emit_memequal(gen_t *g, reg_t left, reg_t right, type_t *ty, reg_t result);
94 +
/* Compare raw bytes between two memory locations */
95 +
void emit_bytes_equal(
96 +
    gen_t *g, reg_t left, reg_t right, usize size, reg_t result
97 +
);
98 +
/* Copy a record from one memory offset to another. */
99 +
void emit_record_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty);
100 +
/* Store a value directly to a record field. */
101 +
void emit_record_field_set(
102 +
    gen_t *g, value_t val, reg_t base, i32 record_offset, symbol_t *field
103 +
);
104 +
/* Calculate record field value from a record value and field symbol.
105 +
 * Creates a stack-based value pointing to the field at the correct offset. */
106 +
value_t emit_record_field_get(value_t sval, symbol_t *field);
107 +
/* Copy an array from one memory location to another. */
108 +
void emit_array_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty);
109 +
/* Copy a word between two locations. Copies addresses of
110 +
 * pass-by-reference types. */
111 +
void emit_copy_by_ref(gen_t *g, value_t src, value_t dst);
112 +
/* Emit a slice literal */
113 +
value_t emit_slice_lit(gen_t *g, i32 offset, usize ptr, usize len, type_t *typ);
114 +
/* Store a successful result value by clearing the tag and
115 +
 * writing the payload. */
116 +
void emit_result_store_success(gen_t *g, value_t dest, value_t value);
117 +
/* Store an error result by writing the raw error tag and
118 +
 * zeroing the payload. */
119 +
void emit_result_store_error(gen_t *g, value_t dest, value_t err);
120 +
121 +
#endif
io.c added +58 -0
1 +
#include <stdarg.h>
2 +
#include <stdio.h>
3 +
#include <stdlib.h>
4 +
#include <string.h>
5 +
6 +
#include "io.h"
7 +
#include "types.h"
8 +
9 +
i32 readfile(const char *path, char **data) {
10 +
    FILE *fp   = NULL;
11 +
    i32   size = -1;
12 +
13 +
    *data = NULL;
14 +
15 +
    if (!(fp = fopen(path, "r"))) {
16 +
        goto cleanup;
17 +
    }
18 +
    if (fseek(fp, 0L, SEEK_END) != 0) {
19 +
        goto cleanup;
20 +
    }
21 +
    if ((size = ftell(fp)) < 0) {
22 +
        goto cleanup;
23 +
    }
24 +
    if (fseek(fp, 0L, SEEK_SET) != 0) {
25 +
        goto cleanup;
26 +
    }
27 +
    if ((*data = malloc((size_t)size + 1)) == NULL) {
28 +
        goto cleanup;
29 +
    }
30 +
    if (fread(*data, 1, (size_t)size, fp) != (size_t)size) {
31 +
        size = -1;
32 +
        goto cleanup;
33 +
    }
34 +
    (*data)[size] = '\0';
35 +
36 +
cleanup:
37 +
    if (fp) {
38 +
        fclose(fp);
39 +
    }
40 +
    if (size < 0 && *data) {
41 +
        free(*data);
42 +
        *data = NULL;
43 +
    }
44 +
    return size;
45 +
}
46 +
47 +
void _bail(const char *file, i32 line, const char *restrict fmt, ...) {
48 +
    va_list ap;
49 +
    va_start(ap, fmt);
50 +
51 +
    fflush(stdout);
52 +
    fprintf(stderr, "%s:%d: fatal: ", file, line);
53 +
    vfprintf(stderr, fmt, ap);
54 +
    fprintf(stderr, "\n");
55 +
    va_end(ap);
56 +
57 +
    exit(1);
58 +
}
io.h added +19 -0
1 +
#ifndef IO_H
2 +
#define IO_H
3 +
4 +
#include "types.h"
5 +
6 +
/* Abort execution and exit with an error code. */
7 +
#define bail(...) _bail(__FILE__, __LINE__, __VA_ARGS__)
8 +
9 +
/* Debug output - disabled for bootstrap compiler. */
10 +
#define debug(...) ((void)0)
11 +
12 +
__attribute__((noreturn)) void _bail(
13 +
    const char *file, int line, const char *restrict fmt, ...
14 +
);
15 +
16 +
/* Read a file in its entirety into `data`. */
17 +
i32 readfile(const char *path, char **data);
18 +
19 +
#endif
limits.h added +44 -0
1 +
#ifndef LIMITS_H
2 +
#define LIMITS_H
3 +
4 +
#include "riscv.h"
5 +
6 +
#define MAX_NODES            32768
7 +
#define MAX_UNION_VARIANTS   128
8 +
#define MAX_RECORD_FIELDS    32
9 +
#define MAX_ARRAY_ELEMS      1024
10 +
#define MAX_BLOCK_STATEMENTS 512
11 +
#define MAX_SWITCH_CASES     128
12 +
#define MAX_CASE_PATTERNS    64
13 +
#define MAX_QUALIFIED_NAME   128
14 +
#define MAX_FN_PARAMS        ((A7 - A0) + 1)
15 +
#define MAX_FN_THROWS        8
16 +
#define MAX_FN_LOCALS        32
17 +
#define MAX_SYMBOLS          16384
18 +
#define MAX_SCOPES           8192
19 +
#define MAX_SCOPE_SYMBOLS    512
20 +
#define MAX_INSTRS           (1 << 20)
21 +
#define MAX_FN_PATCHES       4096
22 +
#define MAX_RET_PATCHES      256
23 +
#define MAX_BRK_PATCHES      512
24 +
#define MAX_TYPES            4096
25 +
#define MAX_FRAME_SIZE       (512 * 1024)
26 +
#define MAX_STRING_LITERALS  1024
27 +
#define MAX_CONSTANTS        256
28 +
#define MAX_TRY_CATCHES      8
29 +
30 +
/* Pool size for variable-length node pointer arrays.
31 +
 * Replaces per-node embedded arrays to shrink node_t. */
32 +
#define MAX_NODEPTR_POOL (MAX_NODES * 4)
33 +
34 +
/* Pool sizes for type_t sub-arrays (variants, fields, params, throws). */
35 +
#define MAX_SYMPTR_POOL  16384
36 +
#define MAX_TYPEPTR_POOL 16384
37 +
38 +
/* Maximum values for module system */
39 +
#define MAX_MODULES     48
40 +
#define MAX_MODULE_DEPS 24
41 +
#define MAX_PATH_LEN    1024
42 +
#define MAX_IMPORTS     32
43 +
44 +
#endif
module.c added +341 -0
1 +
#include <libgen.h>
2 +
#include <stdio.h>
3 +
#include <stdlib.h>
4 +
#include <string.h>
5 +
6 +
#include "ast.h"
7 +
#include "io.h"
8 +
#include "limits.h"
9 +
#include "module.h"
10 +
#include "parser.h"
11 +
#include "scanner.h"
12 +
#include "symtab.h"
13 +
#include "util.h"
14 +
15 +
/* Print an error string to `stderr`. */
16 +
#define error(...)                                                             \
17 +
    do {                                                                       \
18 +
        fprintf(stderr, "module: ");                                           \
19 +
        fprintf(stderr, __VA_ARGS__);                                          \
20 +
        fprintf(stderr, "\n");                                                 \
21 +
    } while (0)
22 +
23 +
/* Extract the directory part of a path */
24 +
static void getdirname(const char *path, char *result, size_t maxlen) {
25 +
    char buf[MAX_PATH_LEN];
26 +
    strndup(buf, path, MAX_PATH_LEN);
27 +
28 +
    char *dir = dirname(buf);
29 +
    strndup(result, dir, maxlen);
30 +
}
31 +
32 +
/* Check if a file has .rad extension */
33 +
static bool is_source(const char *path) {
34 +
    const char *dot = strrchr(path, '.');
35 +
    if (!dot)
36 +
        return false;
37 +
    return strcmp(dot, SOURCE_EXT) == 0;
38 +
}
39 +
40 +
/* Helper function to convert path with / to qualified name with :: */
41 +
static void path_to_qualified(const char *path, char *qualified, usize len) {
42 +
    usize j = 0;
43 +
44 +
    for (usize i = 0; path[i] && j < len - 2; i++) {
45 +
        if (path[i] == '/') {
46 +
            qualified[j++] = ':';
47 +
            qualified[j++] = ':';
48 +
        } else {
49 +
            qualified[j++] = path[i];
50 +
        }
51 +
    }
52 +
    qualified[j] = '\0';
53 +
}
54 +
55 +
/* Extract the file name without directory and extension */
56 +
static void getbasename(const char *path, char *result, size_t maxlen) {
57 +
    char buf[MAX_PATH_LEN];
58 +
    strndup(buf, path, MAX_PATH_LEN);
59 +
60 +
    /* Remove .rad extension if present */
61 +
    char *base = basename(buf);
62 +
    if (is_source(base)) {
63 +
        *strrchr(base, '.') = '\0';
64 +
    }
65 +
    strndup(result, base, maxlen);
66 +
}
67 +
68 +
/* Initialize the module manager */
69 +
void module_manager_init(module_manager_t *mm, const char *entryfile) {
70 +
    mm->nmodules = 0;
71 +
    getdirname(entryfile, mm->rootdir, MAX_PATH_LEN);
72 +
}
73 +
74 +
/* Initialize a module */
75 +
static void module_init(module_t *module, const char *path) {
76 +
    memset(module, 0, sizeof(*module));
77 +
    strndup(module->path, path, MAX_PATH_LEN);
78 +
    getbasename(path, module->name, MAX_PATH_LEN);
79 +
    strndup(module->qualified, module->name, MAX_PATH_LEN);
80 +
81 +
    module->state      = MODULE_STATE_UNVISITED;
82 +
    module->declared   = false;
83 +
    module->checked    = false;
84 +
    module->compiled   = false;
85 +
    module->source     = NULL;
86 +
    module->ast        = NULL;
87 +
    module->scope      = NULL;
88 +
    module->default_fn = NULL;
89 +
    module->parent     = NULL;
90 +
    module->nchildren  = 0;
91 +
}
92 +
93 +
/* Helper function to create a module path from a string */
94 +
void module_path(char dest[MAX_QUALIFIED_NAME], const char *name) {
95 +
    strncpy(dest, name, MAX_QUALIFIED_NAME);
96 +
}
97 +
98 +
/* Helper function to append a path component to a module path */
99 +
void module_qualify_str(
100 +
    char dest[MAX_QUALIFIED_NAME], const char *child, u16 len
101 +
) {
102 +
    strlcat(dest, "::", MAX_QUALIFIED_NAME);
103 +
    strncat(dest, child, len);
104 +
}
105 +
106 +
/* Helper function to append a path component to a module path */
107 +
void module_qualify(char dest[MAX_QUALIFIED_NAME], node_t *ident) {
108 +
    module_qualify_str(dest, ident->val.ident.name, ident->val.ident.length);
109 +
}
110 +
111 +
/* Add a module to the manager with custom qualified name */
112 +
module_t *module_manager_register_qualified(
113 +
    module_manager_t *mm, const char *path, const char *qualified
114 +
) {
115 +
    if (!mm || !path)
116 +
        return NULL;
117 +
118 +
    if (mm->nmodules >= MAX_MODULES) {
119 +
        error("maximum number of modules (%d) exceeded", MAX_MODULES);
120 +
        return NULL;
121 +
    }
122 +
    module_t *mod = NULL;
123 +
    if ((mod = module_manager_lookup(mm, path))) {
124 +
        return mod;
125 +
    }
126 +
    mod = &mm->modules[mm->nmodules++];
127 +
    module_init(mod, path);
128 +
129 +
    /* Build hierarchy if qualified name provided */
130 +
    if (qualified) {
131 +
        char path_copy[MAX_PATH_LEN];
132 +
        strncpy(path_copy, qualified, MAX_PATH_LEN - 1);
133 +
        path_copy[MAX_PATH_LEN - 1] = '\0';
134 +
135 +
        /* Remove .rad extension */
136 +
        char *ext = strrchr(path_copy, '.');
137 +
        if (ext)
138 +
            *ext = '\0';
139 +
140 +
        /* Check if this is a submodule (contains /) */
141 +
        char *slash = strrchr(path_copy, '/'); // Find LAST slash
142 +
        if (slash) {
143 +
            *slash            = '\0';
144 +
            char *parent_path = path_copy; // Everything before last slash
145 +
            char *child_name  = slash + 1; // Everything after last slash
146 +
147 +
            /* Convert parent path to qualified name for lookup */
148 +
            char parent_q[MAX_PATH_LEN];
149 +
            path_to_qualified(parent_path, parent_q, MAX_PATH_LEN);
150 +
151 +
            /* Look up parent using :: notation */
152 +
            module_t *parent =
153 +
                module_manager_lookup_by_qualified_name(mm, parent_q);
154 +
155 +
            if (!parent) {
156 +
                error(
157 +
                    "parent module '%s' not found for '%s'", parent_q, qualified
158 +
                );
159 +
                mm->nmodules--; // Rollback the module we just added
160 +
                return NULL;
161 +
            }
162 +
            mod->parent                           = parent;
163 +
            parent->children[parent->nchildren++] = mod;
164 +
165 +
            /* Write module qualified name */
166 +
            module_path(mod->qualified, parent_q);
167 +
            module_qualify_str(mod->qualified, child_name, strlen(child_name));
168 +
        } else {
169 +
            /* No parent (root-level module) */
170 +
            module_path(mod->qualified, path_copy);
171 +
        }
172 +
    }
173 +
174 +
    return mod;
175 +
}
176 +
177 +
/* Add a module to the manager */
178 +
module_t *module_manager_register(module_manager_t *mm, const char *path) {
179 +
    return module_manager_register_qualified(mm, path, NULL);
180 +
}
181 +
182 +
/* Find a module in the manager by path */
183 +
module_t *module_manager_lookup(module_manager_t *mm, const char *path) {
184 +
    for (usize i = 0; i < mm->nmodules; i++) {
185 +
        if (strcmp(mm->modules[i].path, path) == 0) {
186 +
            return &mm->modules[i];
187 +
        }
188 +
    }
189 +
    return NULL;
190 +
}
191 +
192 +
/* Find a module by name in the module manager */
193 +
module_t *module_manager_lookup_by_name(
194 +
    module_manager_t *mm, const char *name, u16 length
195 +
) {
196 +
    for (usize j = 0; j < mm->nmodules; j++) {
197 +
        if (strncmp(mm->modules[j].name, name, length) == 0 &&
198 +
            strlen(mm->modules[j].name) == length) {
199 +
            return &mm->modules[j];
200 +
        }
201 +
    }
202 +
    return NULL;
203 +
}
204 +
205 +
/* Find a module by qualified name in the module manager */
206 +
module_t *module_manager_lookup_by_qualified_name(
207 +
    module_manager_t *mm, const char *name
208 +
) {
209 +
    for (usize j = 0; j < mm->nmodules; j++) {
210 +
        if (strcmp(mm->modules[j].qualified, name) == 0) {
211 +
            return &mm->modules[j];
212 +
        }
213 +
    }
214 +
    return NULL;
215 +
}
216 +
217 +
/* Parse a single module, attaching diagnostics on failure. */
218 +
bool module_parse(module_t *module, i32 *err) {
219 +
    i32 size = readfile(module->path, &module->source);
220 +
    if (size < 0) {
221 +
        *err = MODULE_NOT_FOUND;
222 +
        error("unable to read module file at '%s'", module->path);
223 +
        return false;
224 +
    }
225 +
    scanner_init(&module->parser.scanner, module->path, module->source);
226 +
    parser_init(&module->parser);
227 +
228 +
    node_t *ast = parser_parse(&module->parser);
229 +
    if (module->parser.errors) {
230 +
        *err = MODULE_PARSE_ERROR;
231 +
        return false;
232 +
    }
233 +
    if (!ast) {
234 +
        *err = MODULE_PARSE_ERROR;
235 +
        error("failed to parse module '%s'", module->path);
236 +
        return false;
237 +
    }
238 +
    if (ast->cls != NODE_MOD_BODY) {
239 +
        *err = MODULE_PARSE_ERROR;
240 +
        error(
241 +
            "module '%s' parsed with unexpected root node (%d)",
242 +
            module->path,
243 +
            ast->cls
244 +
        );
245 +
        return false;
246 +
    }
247 +
248 +
    symbol_t sym = (symbol_t){
249 +
        .name   = module->name,
250 +
        .length = strlen(module->name),
251 +
        .node   = ast,
252 +
        .kind   = SYM_MODULE,
253 +
        .e.mod  = module,
254 +
    };
255 +
    module->ast      = ast;
256 +
    module->ast->sym = alloc_symbol(sym);
257 +
258 +
    return true;
259 +
}
260 +
261 +
bool module_manager_parse(module_manager_t *mm, i32 *err) {
262 +
    *err = MODULE_OK;
263 +
264 +
    for (usize i = 0; i < mm->nmodules; i++) {
265 +
        if (!(module_parse(&mm->modules[i], err))) {
266 +
            return false;
267 +
        }
268 +
    }
269 +
    return true;
270 +
}
271 +
272 +
/* Find module by relative import path */
273 +
module_t *module_manager_find_relative(
274 +
    module_manager_t *mm, const char *basepath, const char *import
275 +
) {
276 +
    /* Find the importing module */
277 +
    module_t *importer = module_manager_lookup(mm, basepath);
278 +
    if (!importer) {
279 +
        return NULL;
280 +
    }
281 +
282 +
    char import_copy[MAX_PATH_LEN];
283 +
    strndup(import_copy, import, MAX_PATH_LEN);
284 +
285 +
    char *segments[MAX_MODULES];
286 +
    usize nsegments = 0;
287 +
288 +
    for (char *token = strtok(import_copy, ":"); token;
289 +
         token       = strtok(NULL, ":")) {
290 +
        if (*token == '\0')
291 +
            continue;
292 +
        if (nsegments >= MAX_MODULES)
293 +
            break;
294 +
        segments[nsegments++] = token;
295 +
    }
296 +
    if (nsegments == 0)
297 +
        return NULL;
298 +
299 +
    module_t *current = importer;
300 +
    usize     index   = 0;
301 +
302 +
    while (index < nsegments && strcmp(segments[index], "super") == 0) {
303 +
        if (!current || !current->parent)
304 +
            return NULL;
305 +
        current = current->parent;
306 +
        index++;
307 +
    }
308 +
309 +
    if (index == nsegments)
310 +
        return current;
311 +
312 +
    if (index == 0 && nsegments == 1) {
313 +
        const char *segment = segments[0];
314 +
315 +
        for (usize i = 0; i < importer->nchildren; i++) {
316 +
            if (strcmp(importer->children[i]->name, segment) == 0) {
317 +
                return importer->children[i];
318 +
            }
319 +
        }
320 +
        return module_manager_lookup_by_name(mm, segment, strlen(segment));
321 +
    }
322 +
323 +
    if (index == 0) {
324 +
        current = module_manager_lookup_by_name(
325 +
            mm, segments[index], strlen(segments[index])
326 +
        );
327 +
        index++;
328 +
    }
329 +
330 +
    for (; index < nsegments && current; index++) {
331 +
        module_t *child = NULL;
332 +
        for (usize j = 0; j < current->nchildren; j++) {
333 +
            if (strcmp(current->children[j]->name, segments[index]) == 0) {
334 +
                child = current->children[j];
335 +
                break;
336 +
            }
337 +
        }
338 +
        current = child;
339 +
    }
340 +
    return current;
341 +
}
module.h added +80 -0
1 +
#ifndef MODULE_H
2 +
#define MODULE_H
3 +
4 +
#include "ast.h"
5 +
#include "limits.h"
6 +
#include "parser.h"
7 +
#include "symtab.h"
8 +
9 +
/* Extension for source files */
10 +
#define SOURCE_EXT ".rad"
11 +
12 +
/* Error codes for module loading */
13 +
enum {
14 +
    MODULE_OK           = 0,
15 +
    MODULE_NOT_FOUND    = 1,
16 +
    MODULE_PARSE_ERROR  = 2,
17 +
    MODULE_TYPE_ERROR   = 3,
18 +
    MODULE_CIRCULAR_DEP = 4
19 +
};
20 +
21 +
/* State of a module in the dependency graph */
22 +
typedef enum {
23 +
    MODULE_STATE_UNVISITED = 0,
24 +
    MODULE_STATE_VISITING  = 1,
25 +
    MODULE_STATE_VISITED   = 2
26 +
} module_state_t;
27 +
28 +
typedef struct module_t module_t;
29 +
30 +
struct module_t {
31 +
    char           path[MAX_PATH_LEN];
32 +
    char           name[MAX_PATH_LEN];
33 +
    char           qualified[MAX_PATH_LEN];
34 +
    attrib_t       attribs;
35 +
    node_t        *ast;
36 +
    char          *source;
37 +
    parser_t       parser;
38 +
    module_state_t state;
39 +
    scope_t       *scope;
40 +
    symbol_t      *default_fn;
41 +
    bool           declared;
42 +
    bool           checked;
43 +
    bool           compiled;
44 +
    module_t      *parent;
45 +
    module_t      *children[MAX_MODULES];
46 +
    u8             nchildren;
47 +
};
48 +
49 +
typedef struct {
50 +
    module_t *root;
51 +
    module_t  modules[MAX_MODULES];
52 +
    u8        nmodules;
53 +
    char      rootdir[MAX_PATH_LEN];
54 +
} module_manager_t;
55 +
56 +
void      module_manager_init(module_manager_t *mm, const char *entryfile);
57 +
bool      module_manager_parse(module_manager_t *mm, int *err);
58 +
module_t *module_manager_register(module_manager_t *mm, const char *path);
59 +
module_t *module_manager_register_qualified(
60 +
    module_manager_t *mm, const char *path, const char *qualified
61 +
);
62 +
module_t *module_manager_lookup(module_manager_t *mm, const char *path);
63 +
module_t *module_manager_lookup_by_name(
64 +
    module_manager_t *mm, const char *name, u16 length
65 +
);
66 +
module_t *module_manager_lookup_by_qualified_name(
67 +
    module_manager_t *mm, const char *name
68 +
);
69 +
module_t *module_manager_find_relative(
70 +
    module_manager_t *mm, const char *base_path, const char *import_path
71 +
);
72 +
bool module_parse(module_t *module, int *err);
73 +
void module_qualify(char dest[MAX_QUALIFIED_NAME], node_t *ident);
74 +
void module_qualify_str(
75 +
    char dest[MAX_QUALIFIED_NAME], const char *child, u16 len
76 +
);
77 +
void module_path(char dest[MAX_QUALIFIED_NAME], const char *name);
78 +
void module_register_test(module_t *mod, node_t *test);
79 +
80 +
#endif
options.c added +49 -0
1 +
#include <stdio.h>
2 +
#include <stdlib.h>
3 +
#include <string.h>
4 +
5 +
#include "io.h"
6 +
#include "options.h"
7 +
#include "types.h"
8 +
9 +
/* Create a new options struct. */
10 +
struct options options(int argc, char *argv[]) {
11 +
    return (struct options){
12 +
        .inputs   = { 0 },
13 +
        .ninputs  = 0,
14 +
        .modules  = { 0 },
15 +
        .nmodules = 0,
16 +
        .argv     = argv,
17 +
        .argc     = argc,
18 +
        .output   = NULL,
19 +
    };
20 +
}
21 +
22 +
/* Parse the command line options. */
23 +
int options_parse(struct options *o) {
24 +
    for (int i = 1; i < o->argc; i++) {
25 +
        if (o->argv[i][0] != '-') {
26 +
            o->inputs[o->ninputs++] = o->argv[i];
27 +
            continue;
28 +
        }
29 +
        char *arg = &o->argv[i][1];
30 +
31 +
        if (!strcmp(arg, "o")) {
32 +
            if (++i >= o->argc)
33 +
                bail("`-o` requires an output path");
34 +
            o->output = o->argv[i];
35 +
        } else if (!strcmp(arg, "mod")) {
36 +
            if (++i >= o->argc)
37 +
                bail("`-mod` requires a module path");
38 +
            o->modules[o->nmodules++] = o->argv[i];
39 +
        } else if (!strcmp(arg, "pkg") || !strcmp(arg, "entry")) {
40 +
            /* Ignored; consumed by the self-hosted compiler only. */
41 +
            i++;
42 +
        } else if (!strcmp(arg, "test") || !strcmp(arg, "dump")) {
43 +
            /* Ignored; consumed by the self-hosted compiler only. */
44 +
        } else {
45 +
            bail("unknown option `-%s`", arg);
46 +
        }
47 +
    }
48 +
    return 0;
49 +
}
options.h added +26 -0
1 +
#ifndef OPTIONS_H
2 +
#define OPTIONS_H
3 +
4 +
/* Command-line flags. */
5 +
enum flags {
6 +
    FLAG_TEST = 1 << 9,
7 +
};
8 +
9 +
/* Command-line options structure. */
10 +
struct options {
11 +
    char  *output;
12 +
    char  *inputs[32];
13 +
    int    ninputs;
14 +
    char  *modules[64];
15 +
    int    nmodules;
16 +
    int    argc;
17 +
    char **argv;
18 +
};
19 +
20 +
/* Create a new options struct. */
21 +
struct options options(int argc, char *argv[]);
22 +
23 +
/* Parse the command line options. */
24 +
int options_parse(struct options *o);
25 +
26 +
#endif
parser.c added +2401 -0
1 +
#include <stdarg.h>
2 +
#include <stdio.h>
3 +
#include <stdlib.h>
4 +
#include <string.h>
5 +
6 +
#include "ast.h"
7 +
#include "io.h"
8 +
#include "limits.h"
9 +
#include "parser.h"
10 +
#include "scanner.h"
11 +
#include "strings.h"
12 +
13 +
#define error(...) __error(__VA_ARGS__, NULL)
14 +
15 +
static node_t *parse_expr(parser_t *p);
16 +
static node_t *parse_stmt_or_block(parser_t *p);
17 +
static node_t *parse_cond(parser_t *p);
18 +
static node_t *parse_if(parser_t *p);
19 +
static node_t *parse_if_let(parser_t *p);
20 +
static node_t *parse_if_case(parser_t *p);
21 +
static node_t *parse_block(parser_t *p);
22 +
static node_t *parse_stmt(parser_t *p);
23 +
static node_t *parse_type(parser_t *p);
24 +
static node_t *parse_union(parser_t *p, node_t *attrs);
25 +
static node_t *parse_record(parser_t *p, node_t *attrs);
26 +
static node_t *parse_record_type(parser_t *p);
27 +
static node_t *parse_record_lit(parser_t *p, node_t *type_name);
28 +
static node_t *parse_postfix(parser_t *p, node_t *expr);
29 +
static node_t *parse_as_cast(parser_t *p, node_t *expr);
30 +
static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls);
31 +
static node_t *parse_static(parser_t *p);
32 +
static node_t *parse_ident(parser_t *p, const char *error);
33 +
static node_t *parse_ident_or_placeholder(parser_t *p, const char *error);
34 +
static node_t *parse_scope_segment(parser_t *p, const char *error);
35 +
static node_t *parse_label(parser_t *p, const char *error);
36 +
static node_t *parse_assignment(parser_t *p, node_t *lval);
37 +
static node_t *parse_fn_call_arg(parser_t *p);
38 +
static node_t *parse_match(parser_t *p);
39 +
static node_t *parse_match_case(parser_t *p);
40 +
static node_t *parse_builtin(parser_t *p);
41 +
static node_t *parse_throw(parser_t *p);
42 +
static node_t *parse_try(parser_t *p, bool panic, bool optional);
43 +
static node_t *parse_panic(parser_t *p);
44 +
static bool    token_is_stmt_terminator(tokenclass_t cls);
45 +
static bool    stmt_requires_semicolon(const node_t *stmt);
46 +
static bool    consume_statement_separator(
47 +
       parser_t *p, node_t *stmt, bool require
48 +
   );
49 +
50 +
/* Initialize parser. */
51 +
void parser_init(parser_t *p) {
52 +
    p->root    = NULL;
53 +
    p->errors  = 0;
54 +
    p->nnodes  = 0;
55 +
    p->nptrs   = 0;
56 +
    p->context = PARSE_CTX_NORMAL;
57 +
}
58 +
59 +
/* Report an error with optional format string. */
60 +
static void __error(parser_t *p, const char *fmt, ...) {
61 +
    va_list ap;
62 +
    va_start(ap, fmt);
63 +
64 +
    location_t loc = scanner_get_location(&p->scanner, p->current.position);
65 +
    fprintf(stderr, "%s:%u:%u: error: ", loc.file, loc.line, loc.col);
66 +
    vfprintf(stderr, fmt, ap);
67 +
    fprintf(stderr, "\n");
68 +
    va_end(ap);
69 +
70 +
    p->errors++;
71 +
}
72 +
73 +
/* Check that the current token is equal to the given type. */
74 +
static bool check(parser_t *p, tokenclass_t cls) {
75 +
    return p->current.cls == cls;
76 +
}
77 +
78 +
/* Advance the parser by one token. */
79 +
static void advance(parser_t *p) {
80 +
    p->previous = p->current;
81 +
    p->current  = scanner_next(&p->scanner);
82 +
}
83 +
84 +
/* Like `check`, but also advances the parser if it matches. */
85 +
static bool consume(parser_t *p, tokenclass_t cls) {
86 +
    if (check(p, cls)) {
87 +
        advance(p);
88 +
        return true;
89 +
    }
90 +
    return false;
91 +
}
92 +
93 +
/* Like `consume`, but report an error if it doesn't match. */
94 +
__nodiscard static bool expect(
95 +
    parser_t *p, tokenclass_t cls, const char *message
96 +
) {
97 +
    if (consume(p, cls)) {
98 +
        return true;
99 +
    }
100 +
    error(p, message);
101 +
102 +
    return false;
103 +
}
104 +
105 +
/* Allocate a new AST node. */
106 +
static node_t *node(parser_t *p, nodeclass_t cls) {
107 +
    if (p->nnodes >= MAX_NODES) {
108 +
        abort();
109 +
    }
110 +
    node_t *n = &p->nodes[p->nnodes++];
111 +
    n->cls    = cls;
112 +
    n->type   = NULL;
113 +
    n->sym    = NULL;
114 +
    n->offset = p->current.position;
115 +
    n->length = p->current.length;
116 +
    n->file   = p->scanner.file;
117 +
118 +
    return n;
119 +
}
120 +
121 +
/* Parse a type annotation.
122 +
 * Eg. `i32` or `[i32; 12]` */
123 +
static node_t *parse_type(parser_t *p) {
124 +
    /* Parse optional types. */
125 +
    if (p->current.cls == T_QUESTION) {
126 +
        node_t *opt = node(p, NODE_TYPE);
127 +
        advance(p); /* Consume `?`. */
128 +
129 +
        node_t *elem_type = parse_type(p);
130 +
        if (!elem_type)
131 +
            return NULL;
132 +
133 +
        opt->val.type.tclass    = TYPE_OPT;
134 +
        opt->val.type.elem_type = elem_type;
135 +
136 +
        return opt;
137 +
    }
138 +
139 +
    /* Parse pointer types and slice types. */
140 +
    if (p->current.cls == T_STAR) {
141 +
        advance(p); /* Consume `*`. */
142 +
143 +
        /* Consume `mut` */
144 +
        bool mut = consume(p, T_MUT);
145 +
146 +
        /* Parse slice types like `*[i32]` or `*mut [i32]` */
147 +
        if (p->current.cls == T_LBRACKET) {
148 +
            node_t *slice = node(p, NODE_TYPE);
149 +
            advance(p); /* Consume `[`. */
150 +
151 +
            node_t *elem_type = parse_type(p);
152 +
            if (!elem_type)
153 +
                return NULL;
154 +
155 +
            if (!expect(p, T_RBRACKET, "expected `]` after slice element type"))
156 +
                return NULL;
157 +
158 +
            slice->val.type.tclass         = TYPE_SLICE;
159 +
            slice->val.type.elem_type      = elem_type;
160 +
            slice->val.type.info.slice.mut = mut;
161 +
162 +
            return slice;
163 +
        }
164 +
165 +
        /* Otherwise it's a pointer type like `*i32` or `*mut i32` */
166 +
        node_t *ptr       = node(p, NODE_TYPE);
167 +
        node_t *elem_type = parse_type(p);
168 +
        if (!elem_type)
169 +
            return NULL;
170 +
171 +
        ptr->val.type.tclass       = TYPE_PTR;
172 +
        ptr->val.type.elem_type    = elem_type;
173 +
        ptr->val.type.info.ptr.mut = mut;
174 +
175 +
        return ptr;
176 +
    }
177 +
178 +
    /* Parse array types. */
179 +
    if (p->current.cls == T_LBRACKET) {
180 +
        advance(p); /* Consume `[`. */
181 +
182 +
        /* Get the element type. */
183 +
        node_t *elem_type = parse_type(p);
184 +
        if (!elem_type)
185 +
            return NULL;
186 +
187 +
        /* Expect a semicolon separator. */
188 +
        if (!expect(p, T_SEMICOLON, "expected `;` in array type"))
189 +
            return NULL;
190 +
191 +
        /* Parse the array length. */
192 +
        node_t *length = parse_expr(p);
193 +
        if (!length) {
194 +
            error(p, "expected array size expression");
195 +
            return NULL;
196 +
        }
197 +
        /* Expect the closing bracket */
198 +
        if (!expect(p, T_RBRACKET, "expected `]` after array size"))
199 +
            return NULL;
200 +
201 +
        node_t *ary                     = node(p, NODE_TYPE);
202 +
        ary->val.type.tclass            = TYPE_ARRAY;
203 +
        ary->val.type.elem_type         = elem_type;
204 +
        ary->val.type.info.array.length = length;
205 +
206 +
        return ary;
207 +
    }
208 +
209 +
    /* Type identifiers are treated differently, as a concrete type cannot
210 +
     * yet be assigned. */
211 +
    if (p->current.cls == T_IDENT || p->current.cls == T_SUPER) {
212 +
        node_t *path =
213 +
            parse_scope_segment(p, "expected type identifier or `super`");
214 +
        if (!path)
215 +
            return NULL;
216 +
217 +
        while (consume(p, T_COLON_COLON)) {
218 +
            node_t *next =
219 +
                parse_scope_segment(p, "expected identifier name after `::`");
220 +
            if (!next)
221 +
                return NULL;
222 +
223 +
            node_t *scope          = node(p, NODE_SCOPE);
224 +
            scope->val.access.lval = path;
225 +
            scope->val.access.rval = next;
226 +
            path                   = scope;
227 +
        }
228 +
        return path;
229 +
    }
230 +
    node_t *n = node(p, NODE_TYPE);
231 +
232 +
    switch (p->current.cls) {
233 +
    case T_I8:
234 +
        advance(p);
235 +
        n->val.type.tclass = TYPE_I8;
236 +
        return n;
237 +
    case T_I16:
238 +
        advance(p);
239 +
        n->val.type.tclass = TYPE_I16;
240 +
        return n;
241 +
    case T_I32:
242 +
        advance(p);
243 +
        n->val.type.tclass = TYPE_I32;
244 +
        return n;
245 +
    case T_U8:
246 +
        advance(p);
247 +
        n->val.type.tclass = TYPE_U8;
248 +
        return n;
249 +
    case T_U16:
250 +
        advance(p);
251 +
        n->val.type.tclass = TYPE_U16;
252 +
        return n;
253 +
    case T_U32:
254 +
        advance(p);
255 +
        n->val.type.tclass = TYPE_U32;
256 +
        return n;
257 +
    case T_BOOL:
258 +
        advance(p);
259 +
        n->val.type.tclass = TYPE_BOOL;
260 +
        return n;
261 +
    case T_VOID:
262 +
        advance(p);
263 +
        n->val.type.tclass = TYPE_VOID;
264 +
        return n;
265 +
    case T_OPAQUE:
266 +
        advance(p);
267 +
        n->val.type.tclass = TYPE_OPAQUE;
268 +
        return n;
269 +
    case T_FN: {
270 +
        advance(p); /* consume `fn` */
271 +
272 +
        if (!expect(p, T_LPAREN, "expected `(` after `fn`"))
273 +
            return NULL;
274 +
275 +
        n->val.type.tclass         = TYPE_FN;
276 +
        n->val.type.info.fn.params = nodespan_alloc(p, MAX_FN_PARAMS);
277 +
        n->val.type.info.fn.ret    = NULL;
278 +
        n->val.type.info.fn.throws = nodespan_alloc(p, MAX_FN_THROWS);
279 +
280 +
        /* Parse parameter types */
281 +
        if (!check(p, T_RPAREN)) {
282 +
            node_t *param = NULL;
283 +
284 +
            do {
285 +
                if (n->val.type.info.fn.params.len >= MAX_FN_PARAMS) {
286 +
                    error(p, "too many function pointer parameters");
287 +
                    return NULL;
288 +
                }
289 +
                if (!(param = parse_type(p))) {
290 +
                    return NULL;
291 +
                }
292 +
                nodespan_push(p, &n->val.type.info.fn.params, param);
293 +
            } while (consume(p, T_COMMA));
294 +
        }
295 +
        if (!expect(
296 +
                p, T_RPAREN, "expected `)` after function pointer parameters"
297 +
            ))
298 +
            return NULL;
299 +
300 +
        /* Parse return type */
301 +
        if (consume(p, T_ARROW)) {
302 +
            if (!(n->val.type.info.fn.ret = parse_type(p))) {
303 +
                return NULL;
304 +
            }
305 +
        }
306 +
307 +
        if (consume(p, T_THROWS)) {
308 +
            if (!expect(p, T_LPAREN, "expected `(` after `throws`"))
309 +
                return NULL;
310 +
311 +
            if (!check(p, T_RPAREN)) {
312 +
                do {
313 +
                    if (n->val.type.info.fn.throws.len >= MAX_FN_THROWS) {
314 +
                        error(p, "maximum number of thrown types exceeded");
315 +
                        return NULL;
316 +
                    }
317 +
318 +
                    node_t *thrown = parse_type(p);
319 +
                    if (!thrown)
320 +
                        return NULL;
321 +
322 +
                    nodespan_push(p, &n->val.type.info.fn.throws, thrown);
323 +
                } while (consume(p, T_COMMA));
324 +
            }
325 +
326 +
            if (!expect(p, T_RPAREN, "expected `)` after throws clause"))
327 +
                return NULL;
328 +
        }
329 +
        return n;
330 +
    }
331 +
    default:
332 +
        error(p, "expected type annotation, eg. `i32`, `bool`, etc.");
333 +
        return NULL;
334 +
    }
335 +
}
336 +
337 +
/* Parse primary expressions. */
338 +
static node_t *parse_array_literal(parser_t *p) {
339 +
    node_t *n = NULL;
340 +
341 +
    if (check(p, T_RBRACKET)) { /* Empty array `[]` */
342 +
        n                      = node(p, NODE_ARRAY_LIT);
343 +
        n->val.array_lit.elems = (nodespan_t){ 0 };
344 +
    } else {
345 +
        node_t *expr = parse_expr(p);
346 +
        if (!expr)
347 +
            return NULL;
348 +
349 +
        /* Check if this is a repeat array [value; count] */
350 +
        if (consume(p, T_SEMICOLON)) {
351 +
            n                             = node(p, NODE_ARRAY_REPEAT_LIT);
352 +
            n->val.array_repeat_lit.value = expr;
353 +
            n->val.array_repeat_lit.count = parse_expr(p);
354 +
355 +
            if (!n->val.array_repeat_lit.count)
356 +
                return NULL;
357 +
        } else {
358 +
            /* Regular array literal [a, b, ...] */
359 +
            n                      = node(p, NODE_ARRAY_LIT);
360 +
            n->val.array_lit.elems = (nodespan_t){ 0 };
361 +
            nodespan_push(p, &n->val.array_lit.elems, expr);
362 +
363 +
            /* Continue parsing remaining elements */
364 +
            while (consume(p, T_COMMA) && !check(p, T_RBRACKET)) {
365 +
                node_t *elem = parse_expr(p);
366 +
                if (!elem)
367 +
                    return NULL;
368 +
369 +
                nodespan_push(p, &n->val.array_lit.elems, elem);
370 +
            }
371 +
        }
372 +
    }
373 +
    if (!expect(p, T_RBRACKET, "expected `]` after array elements"))
374 +
        return NULL;
375 +
376 +
    return n;
377 +
}
378 +
379 +
static node_t *parse_builtin(parser_t *p) {
380 +
    node_t *n = node(p, NODE_BUILTIN);
381 +
382 +
    /* Token is @identifier, skip the '@' to get the name. */
383 +
    const char *name   = p->current.start + 1;
384 +
    usize       length = p->current.length - 1;
385 +
386 +
    advance(p); /* consume `@identifier` */
387 +
388 +
    builtin_kind_t kind;
389 +
390 +
    if (!strncmp(name, "sizeOf", 6)) {
391 +
        kind = BUILTIN_SIZE_OF;
392 +
    } else if (!strncmp(name, "alignOf", 7)) {
393 +
        kind = BUILTIN_ALIGN_OF;
394 +
    } else if (!strncmp(name, "sliceOf", 7)) {
395 +
        kind = BUILTIN_SLICE_OF;
396 +
    } else {
397 +
        error(p, "unknown builtin `@%.*s`", (int)length, name);
398 +
        return NULL;
399 +
    }
400 +
    if (!expect(p, T_LPAREN, "expected `(` after builtin name"))
401 +
        return NULL;
402 +
403 +
    n->val.builtin.kind = kind;
404 +
    n->val.builtin.args = (nodespan_t){ 0 };
405 +
406 +
    /* @sliceOf takes two expression arguments: @sliceOf(ptr, len) */
407 +
    if (kind == BUILTIN_SLICE_OF) {
408 +
        parse_ctx_t prev = p->context;
409 +
        p->context       = PARSE_CTX_NORMAL;
410 +
411 +
        node_t *ptr_expr = parse_expr(p);
412 +
        if (!ptr_expr)
413 +
            return NULL;
414 +
        nodespan_push(p, &n->val.builtin.args, ptr_expr);
415 +
416 +
        if (!expect(
417 +
                p, T_COMMA, "expected `,` after first argument to @sliceOf"
418 +
            ))
419 +
            return NULL;
420 +
421 +
        node_t *len_expr = parse_expr(p);
422 +
        if (!len_expr)
423 +
            return NULL;
424 +
        nodespan_push(p, &n->val.builtin.args, len_expr);
425 +
426 +
        p->context = prev;
427 +
    } else {
428 +
        /* @sizeOf and @alignOf take type arguments only. */
429 +
        node_t *type_arg = parse_type(p);
430 +
        if (!type_arg)
431 +
            return NULL;
432 +
        nodespan_push(p, &n->val.builtin.args, type_arg);
433 +
    }
434 +
435 +
    if (!expect(p, T_RPAREN, "expected `)` after builtin argument"))
436 +
        return NULL;
437 +
438 +
    return n;
439 +
}
440 +
441 +
static node_t *parse_primary(parser_t *p) {
442 +
    node_t *n;
443 +
444 +
    switch (p->current.cls) {
445 +
    case T_LBRACKET: /* Array literal [a, b, c] */
446 +
        advance(p);
447 +
        return parse_array_literal(p);
448 +
449 +
    case T_NOT: /* Unary not operator */
450 +
        n              = node(p, NODE_UNOP);
451 +
        n->val.unop.op = OP_NOT;
452 +
        advance(p);
453 +
454 +
        if (!(n->val.unop.expr = parse_primary(p)))
455 +
            return NULL;
456 +
457 +
        return n;
458 +
459 +
    case T_RECORD: {
460 +
        advance(p); /* consume `record` */
461 +
462 +
        node_t *rtype = parse_record_type(p);
463 +
        if (!rtype)
464 +
            return NULL;
465 +
466 +
        if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) {
467 +
            return parse_record_lit(p, rtype);
468 +
        }
469 +
        if (p->context == PARSE_CTX_NORMAL) {
470 +
            error(p, "expected `{` after anonymous record type");
471 +
            return NULL;
472 +
        }
473 +
        return rtype;
474 +
    }
475 +
476 +
    case T_LBRACE:
477 +
        if (p->context == PARSE_CTX_CONDITION) {
478 +
            error(p, "unexpected `{` in this context");
479 +
            return NULL;
480 +
        }
481 +
        advance(p); /* consume `{` */
482 +
483 +
        return parse_record_lit(p, NULL);
484 +
485 +
    case T_MINUS: /* Unary negation operator */
486 +
        n              = node(p, NODE_UNOP);
487 +
        n->val.unop.op = OP_NEG;
488 +
        advance(p);
489 +
490 +
        if (!(n->val.unop.expr = parse_primary(p)))
491 +
            return NULL;
492 +
493 +
        return n;
494 +
495 +
    case T_TILDE: /* Bitwise NOT operator */
496 +
        n              = node(p, NODE_UNOP);
497 +
        n->val.unop.op = OP_BNOT;
498 +
        advance(p);
499 +
500 +
        if (!(n->val.unop.expr = parse_primary(p)))
501 +
            return NULL;
502 +
503 +
        return n;
504 +
505 +
    case T_AMP:
506 +
        n = node(p, NODE_REF);
507 +
        advance(p);
508 +
509 +
        n->val.ref.mut = consume(p, T_MUT);
510 +
511 +
        if (!(n->val.ref.target = parse_primary(p)))
512 +
            return NULL;
513 +
514 +
        return n;
515 +
516 +
    case T_STAR:
517 +
        n = node(p, NODE_UNOP);
518 +
        advance(p);
519 +
520 +
        n->val.unop.op = OP_DEREF;
521 +
        if (!(n->val.unop.expr = parse_primary(p)))
522 +
            return NULL;
523 +
524 +
        return n;
525 +
526 +
    case T_NUMBER:
527 +
        n = node(p, NODE_NUMBER);
528 +
        advance(p);
529 +
530 +
        n->val.number.text     = p->previous.start;
531 +
        n->val.number.text_len = p->previous.length;
532 +
533 +
        if (check(p, T_DOT_DOT)) {
534 +
            return parse_postfix(p, n);
535 +
        }
536 +
        return n;
537 +
538 +
    case T_CHAR:
539 +
        n = node(p, NODE_CHAR);
540 +
        advance(p);
541 +
542 +
        if (p->previous.start[1] == '\\') {
543 +
            switch (p->previous.start[2]) {
544 +
            case 'n':
545 +
                n->val.char_lit = '\n';
546 +
                break;
547 +
            case 't':
548 +
                n->val.char_lit = '\t';
549 +
                break;
550 +
            case 'r':
551 +
                n->val.char_lit = '\r';
552 +
                break;
553 +
            case '\'':
554 +
                n->val.char_lit = '\'';
555 +
                break;
556 +
            case '\\':
557 +
                n->val.char_lit = '\\';
558 +
                break;
559 +
            default:
560 +
                abort();
561 +
            }
562 +
        } else {
563 +
            n->val.char_lit = p->previous.start[1];
564 +
        }
565 +
        if (check(p, T_DOT_DOT)) {
566 +
            return parse_postfix(p, n);
567 +
        }
568 +
        return n;
569 +
570 +
    case T_STRING: {
571 +
        n = node(p, NODE_STRING);
572 +
        advance(p);
573 +
574 +
        /* Account for quotes. */
575 +
        const char *data = p->previous.start + 1;
576 +
        usize       len  = p->previous.length - 2;
577 +
578 +
        /* Intern string. This escapes the string properly and
579 +
         * NULL-terminates it. */
580 +
        n->val.string_lit.data   = strings_alloc_len(data, len);
581 +
        n->val.string_lit.length = strlen(n->val.string_lit.data);
582 +
583 +
        return n;
584 +
    }
585 +
586 +
    case T_AT_IDENT:
587 +
        return parse_builtin(p);
588 +
589 +
    case T_SUPER:
590 +
        n = node(p, NODE_SUPER);
591 +
        advance(p);
592 +
593 +
        if (check(p, T_COLON_COLON)) {
594 +
            return parse_postfix(p, n);
595 +
        }
596 +
        return n;
597 +
598 +
    case T_IDENT:
599 +
        n                   = node(p, NODE_IDENT);
600 +
        n->val.ident.name   = p->current.start;
601 +
        n->val.ident.length = p->current.length;
602 +
603 +
        advance(p);
604 +
605 +
        /* Check for record initializer, eg. `{ x: 1, y: 2 }` */
606 +
        if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) {
607 +
            return parse_record_lit(p, n);
608 +
        }
609 +
610 +
        /* Check for field access or array indexing. */
611 +
        if (check(p, T_DOT) || check(p, T_LBRACKET) ||
612 +
            check(p, T_COLON_COLON) || check(p, T_LPAREN) ||
613 +
            check(p, T_DOT_DOT)) {
614 +
            return parse_postfix(p, n);
615 +
        }
616 +
        return n;
617 +
618 +
    case T_LPAREN:
619 +
        advance(p);
620 +
621 +
        /* Inside parentheses, we are in a normal parsing context */
622 +
        parse_ctx_t prev = p->context;
623 +
        p->context       = PARSE_CTX_NORMAL;
624 +
        n                = parse_expr(p);
625 +
        p->context       = prev;
626 +
627 +
        if (!expect(p, T_RPAREN, "expected closing `)` after expression"))
628 +
            return NULL;
629 +
630 +
        /* Check for field access or array indexing. */
631 +
        if (check(p, T_DOT) || check(p, T_LBRACKET) || check(p, T_DOT_DOT)) {
632 +
            return parse_postfix(p, n);
633 +
        }
634 +
        return n;
635 +
636 +
    case T_TRUE:
637 +
        n               = node(p, NODE_BOOL);
638 +
        n->val.bool_lit = true;
639 +
        advance(p);
640 +
641 +
        return n;
642 +
643 +
    case T_FALSE:
644 +
        n               = node(p, NODE_BOOL);
645 +
        n->val.bool_lit = false;
646 +
        advance(p);
647 +
648 +
        return n;
649 +
650 +
    case T_NIL:
651 +
        n = node(p, NODE_NIL);
652 +
        advance(p);
653 +
654 +
        return n;
655 +
656 +
    case T_UNDEF:
657 +
        n = node(p, NODE_UNDEF);
658 +
        advance(p);
659 +
660 +
        return n;
661 +
662 +
    case T_UNDERSCORE:
663 +
        n = node(p, NODE_PLACEHOLDER);
664 +
        advance(p);
665 +
666 +
        return n;
667 +
668 +
    case T_TRY: {
669 +
        advance(p);
670 +
671 +
        bool panic    = consume(p, T_BANG);
672 +
        bool optional = consume(p, T_QUESTION);
673 +
674 +
        node_t *expr = parse_try(p, panic, optional);
675 +
        if (!expr)
676 +
            return NULL;
677 +
678 +
        if (check(p, T_DOT) || check(p, T_LBRACKET) ||
679 +
            check(p, T_COLON_COLON) || check(p, T_LPAREN) ||
680 +
            check(p, T_DOT_DOT)) {
681 +
            return parse_postfix(p, expr);
682 +
        }
683 +
        return expr;
684 +
    }
685 +
686 +
    default:
687 +
        error(
688 +
            p,
689 +
            "expected expression, got `%.*s`",
690 +
            p->current.length,
691 +
            p->current.start
692 +
        );
693 +
        return NULL;
694 +
    }
695 +
}
696 +
697 +
/* Parse binary expressions with precedence climbing. */
698 +
static node_t *parse_binary(parser_t *p, node_t *left, int precedence) {
699 +
    /* Operator precedence table. */
700 +
    static const struct {
701 +
        tokenclass_t tok;
702 +
        binop_t      op;
703 +
        int          prec;
704 +
    } ops[] = {
705 +
        /* Arithmetic operators (higher precedence). */
706 +
        { T_PLUS, OP_ADD, 6 },
707 +
        { T_MINUS, OP_SUB, 6 },
708 +
        { T_STAR, OP_MUL, 7 },
709 +
        { T_SLASH, OP_DIV, 7 },
710 +
        { T_PERCENT, OP_MOD, 7 },
711 +
        /* Shift operators. */
712 +
        { T_LSHIFT, OP_SHL, 5 },
713 +
        { T_RSHIFT, OP_SHR, 5 },
714 +
        /* Bitwise operators. */
715 +
        { T_AMP, OP_BAND, 4 },
716 +
        { T_CARET, OP_XOR, 3 },
717 +
        { T_PIPE, OP_BOR, 2 },
718 +
        /* Comparison operators. */
719 +
        { T_EQ_EQ, OP_EQ, 1 },
720 +
        { T_BANG_EQ, OP_NE, 1 },
721 +
        { T_LT, OP_LT, 1 },
722 +
        { T_GT, OP_GT, 1 },
723 +
        { T_LT_EQ, OP_LE, 1 },
724 +
        { T_GT_EQ, OP_GE, 1 },
725 +
        /* Logical operators (lowest precedence). */
726 +
        { T_AND, OP_AND, 0 },
727 +
        { T_OR, OP_OR, 0 },
728 +
    };
729 +
730 +
    for (;;) {
731 +
        int     next = -1;
732 +
        binop_t op;
733 +
734 +
        /* Find matching operator and its precedence. */
735 +
        for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) {
736 +
            if (check(p, ops[i].tok) && ops[i].prec > precedence) {
737 +
                if (next == -1 || ops[i].prec < next) {
738 +
                    next = ops[i].prec;
739 +
                    op   = ops[i].op;
740 +
                }
741 +
            }
742 +
        }
743 +
        if (next == -1)
744 +
            break;
745 +
746 +
        /* Consume the operator token. */
747 +
        advance(p);
748 +
749 +
        /* Parse the right operand. */
750 +
        node_t *right = parse_primary(p);
751 +
752 +
        if (!right)
753 +
            return NULL;
754 +
755 +
        /* Handle `as` casts on the right operand */
756 +
        while (check(p, T_AS)) {
757 +
            right = parse_as_cast(p, right);
758 +
            if (!right)
759 +
                return NULL;
760 +
        }
761 +
        /* Look for higher precedence operators. */
762 +
        for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) {
763 +
            if (check(p, ops[i].tok) && ops[i].prec > next) {
764 +
                right = parse_binary(p, right, next);
765 +
                break;
766 +
            }
767 +
        }
768 +
769 +
        /* Build binary expression node. */
770 +
        node_t *binop          = node(p, NODE_BINOP);
771 +
        binop->offset          = left->offset;
772 +
        binop->length          = right->offset + right->length - left->offset;
773 +
        binop->val.binop.op    = op;
774 +
        binop->val.binop.left  = left;
775 +
        binop->val.binop.right = right;
776 +
        left                   = binop;
777 +
    }
778 +
    return left;
779 +
}
780 +
781 +
/* Parse an `if let` statement.
782 +
 * Syntax: if let x in (expr) { ... } else { ... }
783 +
 */
784 +
static node_t *parse_if_let(parser_t *p) {
785 +
    /* Consume 'let' */
786 +
    if (!expect(p, T_LET, "expected 'let'"))
787 +
        return NULL;
788 +
789 +
    /* Check for `if let case` syntax. */
790 +
    if (check(p, T_CASE)) {
791 +
        return parse_if_case(p);
792 +
    }
793 +
    node_t *n = node(p, NODE_IF_LET);
794 +
795 +
    /* Parse identifier or placeholder */
796 +
    if (consume(p, T_UNDERSCORE)) {
797 +
        n->val.if_let_stmt.var = node(p, NODE_PLACEHOLDER);
798 +
    } else if (expect(p, T_IDENT, "expected identifier or '_' after 'let'")) {
799 +
        n->val.if_let_stmt.var                   = node(p, NODE_IDENT);
800 +
        n->val.if_let_stmt.var->val.ident.name   = p->previous.start;
801 +
        n->val.if_let_stmt.var->val.ident.length = p->previous.length;
802 +
    } else {
803 +
        return NULL;
804 +
    }
805 +
    n->val.if_let_stmt.guard = NULL;
806 +
807 +
    /* Expect '=' */
808 +
    if (!expect(p, T_EQ, "expected `=` after identifier"))
809 +
        return NULL;
810 +
811 +
    /* Parse expression yielding an optional. */
812 +
    n->val.if_let_stmt.expr = parse_cond(p);
813 +
    if (!n->val.if_let_stmt.expr)
814 +
        return NULL;
815 +
816 +
    /* Optional boolean guard. */
817 +
    if (consume(p, T_SEMICOLON)) {
818 +
        n->val.if_let_stmt.guard = parse_cond(p);
819 +
    }
820 +
    /* Parse the 'then' branch */
821 +
    n->val.if_let_stmt.lbranch = parse_block(p);
822 +
    if (!n->val.if_let_stmt.lbranch)
823 +
        return NULL;
824 +
825 +
    /* Parse optional 'else' branch */
826 +
    if (consume(p, T_ELSE)) {
827 +
        /* Check for `else if` construct. */
828 +
        if (check(p, T_IF)) {
829 +
            advance(p); /* Consume the 'if' token. */
830 +
831 +
            /* Create a block to hold the nested if statement. */
832 +
            node_t *block          = node(p, NODE_BLOCK);
833 +
            block->val.block.stmts = (nodespan_t){ 0 };
834 +
835 +
            node_t *nested_if = parse_if(p);
836 +
837 +
            if (!nested_if)
838 +
                return NULL;
839 +
840 +
            /* Add the nested if as a statement in the block. */
841 +
            nodespan_push(p, &block->val.block.stmts, nested_if);
842 +
            /* Set the block as the else branch. */
843 +
            n->val.if_let_stmt.rbranch = block;
844 +
        } else {
845 +
            /* Regular else clause. */
846 +
            n->val.if_let_stmt.rbranch = parse_block(p);
847 +
        }
848 +
    } else {
849 +
        n->val.if_let_stmt.rbranch = NULL;
850 +
    }
851 +
852 +
    return n;
853 +
}
854 +
855 +
/* Parse an `if let case` statement. Called after 'let' has been consumed. */
856 +
static node_t *parse_if_case(parser_t *p) {
857 +
    node_t *n = node(p, NODE_IF_CASE);
858 +
859 +
    if (!expect(p, T_CASE, "expected 'case'"))
860 +
        return NULL;
861 +
862 +
    parse_ctx_t pctx = p->context;
863 +
    p->context       = PARSE_CTX_NORMAL;
864 +
    node_t *pattern  = parse_primary(p);
865 +
    p->context       = pctx;
866 +
867 +
    if (!pattern)
868 +
        return NULL;
869 +
870 +
    n->val.if_case_stmt.pattern = pattern;
871 +
872 +
    if (!expect(p, T_EQ, "expected `=` after pattern"))
873 +
        return NULL;
874 +
875 +
    n->val.if_case_stmt.expr = parse_cond(p);
876 +
    if (!n->val.if_case_stmt.expr)
877 +
        return NULL;
878 +
879 +
    n->val.if_case_stmt.guard = NULL;
880 +
881 +
    if (consume(p, T_SEMICOLON)) {
882 +
        n->val.if_case_stmt.guard = parse_cond(p);
883 +
        if (!n->val.if_case_stmt.guard)
884 +
            return NULL;
885 +
    }
886 +
    n->val.if_case_stmt.lbranch = parse_block(p);
887 +
    if (!n->val.if_case_stmt.lbranch)
888 +
        return NULL;
889 +
890 +
    if (consume(p, T_ELSE)) {
891 +
        if (check(p, T_IF)) {
892 +
            advance(p);
893 +
894 +
            node_t *block          = node(p, NODE_BLOCK);
895 +
            block->val.block.stmts = (nodespan_t){ 0 };
896 +
897 +
            node_t *nested_if = parse_if(p);
898 +
899 +
            if (!nested_if)
900 +
                return NULL;
901 +
902 +
            nodespan_push(p, &block->val.block.stmts, nested_if);
903 +
            n->val.if_case_stmt.rbranch = block;
904 +
        } else {
905 +
            n->val.if_case_stmt.rbranch = parse_block(p);
906 +
            if (!n->val.if_case_stmt.rbranch)
907 +
                return NULL;
908 +
        }
909 +
    } else {
910 +
        n->val.if_case_stmt.rbranch = NULL;
911 +
    }
912 +
    return n;
913 +
}
914 +
915 +
/* Parse a `let case` statement:
916 +
 *   `let case PATTERN = EXPR [; GUARD] else { ... };` */
917 +
static node_t *parse_let_case(parser_t *p) {
918 +
    node_t *n     = node(p, NODE_GUARD_CASE);
919 +
    usize   start = p->previous.position;
920 +
921 +
    parse_ctx_t pctx = p->context;
922 +
    p->context       = PARSE_CTX_NORMAL;
923 +
    node_t *pattern  = parse_primary(p);
924 +
    p->context       = pctx;
925 +
926 +
    if (!pattern)
927 +
        return NULL;
928 +
929 +
    n->val.guard_case_stmt.pattern = pattern;
930 +
931 +
    if (!expect(p, T_EQ, "expected `=` after pattern"))
932 +
        return NULL;
933 +
    if (!(n->val.guard_case_stmt.expr = parse_cond(p)))
934 +
        return NULL;
935 +
936 +
    n->val.guard_case_stmt.guard = NULL;
937 +
938 +
    if (consume(p, T_IF)) {
939 +
        if (!(n->val.guard_case_stmt.guard = parse_cond(p)))
940 +
            return NULL;
941 +
    }
942 +
    if (!expect(p, T_ELSE, "expected `else` after pattern"))
943 +
        return NULL;
944 +
945 +
    if (!(n->val.guard_case_stmt.rbranch = parse_stmt_or_block(p)))
946 +
        return NULL;
947 +
948 +
    n->offset = start;
949 +
    n->length = p->previous.position + p->previous.length - start;
950 +
951 +
    return n;
952 +
}
953 +
954 +
/* Parse an `if` expression, with optional `else` or `else if` clauses.
955 +
 * `else if` is desugared into a nested if inside a block. */
956 +
static node_t *parse_if(parser_t *p) {
957 +
    /* Check for `if let` or `if let case` syntax */
958 +
    if (check(p, T_LET)) {
959 +
        return parse_if_let(p);
960 +
    }
961 +
    /* Regular if statement */
962 +
    node_t *n = node(p, NODE_IF);
963 +
964 +
    n->val.if_stmt.cond = parse_cond(p);
965 +
    if (!n->val.if_stmt.cond)
966 +
        return NULL;
967 +
968 +
    n->val.if_stmt.lbranch = parse_block(p);
969 +
    if (!n->val.if_stmt.lbranch)
970 +
        return NULL;
971 +
972 +
    if (consume(p, T_ELSE)) {
973 +
        /* Check for `else if` construct. */
974 +
        if (check(p, T_IF)) {
975 +
            advance(p); /* Consume the 'if' token. */
976 +
977 +
            /* Create a block to hold the nested if statement. */
978 +
            node_t *block          = node(p, NODE_BLOCK);
979 +
            block->val.block.stmts = (nodespan_t){ 0 };
980 +
981 +
            node_t *nested_if = parse_if(p);
982 +
983 +
            if (!nested_if)
984 +
                return NULL;
985 +
986 +
            /* Add the nested if as a statement in the block. */
987 +
            nodespan_push(p, &block->val.block.stmts, nested_if);
988 +
            /* Set the block as the else branch. */
989 +
            n->val.if_stmt.rbranch = block;
990 +
        } else {
991 +
            /* Regular else clause. */
992 +
            n->val.if_stmt.rbranch = parse_block(p);
993 +
        }
994 +
    } else {
995 +
        n->val.if_stmt.rbranch = NULL;
996 +
    }
997 +
    return n;
998 +
}
999 +
1000 +
/* Parse a match statement. */
1001 +
static node_t *parse_match(parser_t *p) {
1002 +
    node_t *n               = node(p, NODE_MATCH);
1003 +
    n->val.match_stmt.cases = (nodespan_t){ 0 };
1004 +
1005 +
    /* Parse the expression to match on */
1006 +
    if (!(n->val.match_stmt.expr = parse_cond(p)))
1007 +
        return NULL;
1008 +
    if (!expect(p, T_LBRACE, "expected '{' before match cases"))
1009 +
        return NULL;
1010 +
1011 +
    /* Parse cases until we reach the end of the match block */
1012 +
    while (!check(p, T_RBRACE) && !check(p, T_EOF)) {
1013 +
        node_t *case_node = parse_match_case(p);
1014 +
        if (!case_node)
1015 +
            return NULL;
1016 +
1017 +
        if (!nodespan_push(p, &n->val.match_stmt.cases, case_node)) {
1018 +
            error(p, "too many cases in match statement");
1019 +
            return NULL;
1020 +
        }
1021 +
1022 +
        /* Consume the comma separating cases if present */
1023 +
        bool consumed = consume(p, T_COMMA);
1024 +
        (void)consumed;
1025 +
    }
1026 +
    if (!expect(p, T_RBRACE, "expected '}' after match cases"))
1027 +
        return NULL;
1028 +
1029 +
    return n;
1030 +
}
1031 +
1032 +
/* Parse a single match case. */
1033 +
static node_t *parse_match_case(parser_t *p) {
1034 +
    node_t *n                  = node(p, NODE_MATCH_CASE);
1035 +
    n->val.match_case.patterns = (nodespan_t){ 0 };
1036 +
    n->val.match_case.guard    = NULL;
1037 +
1038 +
    if (check(p, T_ELSE)) {
1039 +
        /* For the 'else' case, we use zero patterns
1040 +
         * to indicate the else case */
1041 +
        advance(p);
1042 +
    } else {
1043 +
        if (!expect(p, T_CASE, "expected 'case' at start of match case"))
1044 +
            return NULL;
1045 +
1046 +
        /* Parse one or more comma-separated patterns */
1047 +
        do {
1048 +
            parse_ctx_t pctx = p->context;
1049 +
            p->context       = PARSE_CTX_NORMAL;
1050 +
            node_t *pattern  = parse_primary(p);
1051 +
            p->context       = pctx;
1052 +
1053 +
            if (!pattern) {
1054 +
                return NULL;
1055 +
            }
1056 +
            /* Add pattern to the case */
1057 +
            if (!nodespan_push(p, &n->val.match_case.patterns, pattern)) {
1058 +
                error(p, "too many patterns in case statement");
1059 +
                return NULL;
1060 +
            }
1061 +
        } while (consume(p, T_COMMA)); /* Continue if there's a comma */
1062 +
1063 +
        if (consume(p, T_IF)) {
1064 +
            if (!(n->val.match_case.guard = parse_cond(p)))
1065 +
                return NULL;
1066 +
        }
1067 +
    }
1068 +
    if (!expect(p, T_FAT_ARROW, "expected `=>` after case pattern"))
1069 +
        return NULL;
1070 +
1071 +
    n->val.match_case.body = parse_stmt(p);
1072 +
    if (!n->val.match_case.body)
1073 +
        return NULL;
1074 +
1075 +
    return n;
1076 +
}
1077 +
1078 +
/* Parse a `log` statement. */
1079 +
/* Parse a record declaration. */
1080 +
static node_t *parse_record(parser_t *p, node_t *attrs) {
1081 +
    node_t *n                  = node(p, NODE_RECORD);
1082 +
    n->val.record_decl.attribs = attrs;
1083 +
    n->val.record_decl.fields  = (nodespan_t){ 0 };
1084 +
    n->val.record_decl.tuple   = false;
1085 +
    n->val.record_decl.name    = parse_ident(p, "expected record name");
1086 +
1087 +
    if (!n->val.record_decl.name)
1088 +
        return NULL;
1089 +
1090 +
    if (consume(p, T_LPAREN)) {
1091 +
        n->val.record_decl.tuple = true;
1092 +
1093 +
        if (!check(p, T_RPAREN)) {
1094 +
            do {
1095 +
                node_t *field        = node(p, NODE_RECORD_FIELD);
1096 +
                field->val.var.ident = NULL; /* No field name for tuples */
1097 +
                field->val.var.type  = parse_type(p);
1098 +
                field->val.var.value = NULL;
1099 +
                field->val.var.align = NULL;
1100 +
1101 +
                if (!field->val.var.type)
1102 +
                    return NULL;
1103 +
1104 +
                if (!nodespan_push(p, &n->val.record_decl.fields, field)) {
1105 +
                    error(p, "too many record fields");
1106 +
                    return NULL;
1107 +
                }
1108 +
            } while (consume(p, T_COMMA) && !check(p, T_RPAREN));
1109 +
        }
1110 +
        if (!expect(p, T_RPAREN, "expected `)` after record fields"))
1111 +
            return NULL;
1112 +
1113 +
        /* Unlabeled records must end with semicolon */
1114 +
        if (!expect(p, T_SEMICOLON, "expected `;` after record declaration"))
1115 +
            return NULL;
1116 +
    } else {
1117 +
        /* Record with named fields */
1118 +
        if (!expect(p, T_LBRACE, "expected `{` before record body"))
1119 +
            return NULL;
1120 +
1121 +
        node_t *field;
1122 +
        do {
1123 +
            if (!(field = parse_name_type_value(p, NODE_RECORD_FIELD)))
1124 +
                return NULL;
1125 +
            if (!nodespan_push(p, &n->val.record_decl.fields, field)) {
1126 +
                error(p, "too many record fields");
1127 +
                return NULL;
1128 +
            }
1129 +
        } while (consume(p, T_COMMA) && !check(p, T_RBRACE));
1130 +
1131 +
        if (!expect(p, T_RBRACE, "expected `}`"))
1132 +
            return NULL;
1133 +
    }
1134 +
    return n;
1135 +
}
1136 +
1137 +
static node_t *parse_record_type(parser_t *p) {
1138 +
    node_t *n                 = node(p, NODE_RECORD_TYPE);
1139 +
    n->val.record_type.fields = (nodespan_t){ 0 };
1140 +
1141 +
    if (!expect(p, T_LBRACE, "expected `{` after `record`"))
1142 +
        return NULL;
1143 +
1144 +
    if (!check(p, T_RBRACE)) {
1145 +
        do {
1146 +
            node_t *field = parse_name_type_value(p, NODE_RECORD_FIELD);
1147 +
            if (!field)
1148 +
                return NULL;
1149 +
            if (field->val.var.value) {
1150 +
                error(p, "anonymous record fields cannot have initializers");
1151 +
                return NULL;
1152 +
            }
1153 +
            if (!nodespan_push(p, &n->val.record_type.fields, field)) {
1154 +
                error(p, "too many record fields");
1155 +
                return NULL;
1156 +
            }
1157 +
        } while (consume(p, T_COMMA) && !check(p, T_RBRACE));
1158 +
    }
1159 +
1160 +
    if (!expect(p, T_RBRACE, "expected `}` after record fields"))
1161 +
        return NULL;
1162 +
1163 +
    return n;
1164 +
}
1165 +
1166 +
/* Parse a single record literal field (labeled or shorthand). */
1167 +
static node_t *parse_record_lit_field(parser_t *p) {
1168 +
    node_t *n     = node(p, NODE_RECORD_LIT_FIELD);
1169 +
    usize   start = p->current.position;
1170 +
1171 +
    record_lit_field_t *field = &n->val.record_lit_field;
1172 +
1173 +
    /* Field must start with an identifier. */
1174 +
    node_t *name = parse_ident(p, "expected field name");
1175 +
    if (!name)
1176 +
        return NULL;
1177 +
1178 +
    if (consume(p, T_COLON)) {
1179 +
        /* Labeled field: `name: value` */
1180 +
        field->name  = name;
1181 +
        field->value = parse_expr(p);
1182 +
        if (!field->value)
1183 +
            return NULL;
1184 +
    } else {
1185 +
        /* Shorthand syntax: `{ x }` is equivalent to `{ x: x }` */
1186 +
        field->name  = name;
1187 +
        field->value = name;
1188 +
    }
1189 +
    n->offset = start;
1190 +
    n->length = p->previous.position + p->previous.length - start;
1191 +
1192 +
    return n;
1193 +
}
1194 +
1195 +
/* Parse a record literal expression (e.g., Point { x: 1, y: 2 })
1196 +
 * Also handles pattern syntax: Variant { .. } to discard all fields */
1197 +
static node_t *parse_record_lit(parser_t *p, node_t *type_name) {
1198 +
    node_t *n                = node(p, NODE_RECORD_LIT);
1199 +
    n->val.record_lit.type   = type_name;
1200 +
    n->val.record_lit.fields = (nodespan_t){ 0 };
1201 +
    n->val.record_lit.etc    = false;
1202 +
1203 +
    do {
1204 +
        /* Check for `..` to discard remaining fields. */
1205 +
        if (consume(p, T_DOT_DOT)) {
1206 +
            n->val.record_lit.etc = true;
1207 +
            break;
1208 +
        }
1209 +
        node_t *field = parse_record_lit_field(p);
1210 +
        if (!field)
1211 +
            return NULL;
1212 +
1213 +
        if (!nodespan_push(p, &n->val.record_lit.fields, field)) {
1214 +
            error(p, "too many record fields");
1215 +
            return NULL;
1216 +
        }
1217 +
1218 +
    } while (consume(p, T_COMMA) && !check(p, T_RBRACE));
1219 +
1220 +
    if (!expect(p, T_RBRACE, "expected '}' to end record literal"))
1221 +
        return NULL;
1222 +
1223 +
    return n;
1224 +
}
1225 +
1226 +
/* Parse a union declaration.
1227 +
 * Eg. `union Color { Red, Green, Blue = 5 }` */
1228 +
static node_t *parse_union(parser_t *p, node_t *attrs) {
1229 +
    node_t *n                  = node(p, NODE_UNION);
1230 +
    n->val.union_decl.attribs  = attrs;
1231 +
    n->val.union_decl.variants = (nodespan_t){ 0 };
1232 +
    n->val.union_decl.name     = parse_ident(p, "expected union name");
1233 +
1234 +
    if (!n->val.union_decl.name)
1235 +
        return NULL;
1236 +
1237 +
    /* Parse union body with { ... } */
1238 +
    if (!expect(p, T_LBRACE, "expected `{` before union body"))
1239 +
        return NULL;
1240 +
1241 +
    /* Parse union variants. */
1242 +
    if (!check(p, T_RBRACE)) {
1243 +
        do {
1244 +
            /* Allow optional `case` keyword before variant name. */
1245 +
            consume(p, T_CASE);
1246 +
1247 +
            /* Parse variant name. */
1248 +
            node_t *variant_name = parse_ident(p, "expected variant name");
1249 +
            if (!variant_name)
1250 +
                return NULL;
1251 +
1252 +
            node_t          *v       = node(p, NODE_UNION_VARIANT);
1253 +
            union_variant_t *variant = &v->val.union_variant;
1254 +
1255 +
            variant->name       = variant_name;
1256 +
            variant->type       = NULL;
1257 +
            variant->value_expr = NULL;
1258 +
1259 +
            if (consume(p, T_LPAREN)) {
1260 +
                /* Tuple-like variant: Foo(Type) */
1261 +
                node_t *payload = parse_type(p);
1262 +
                if (!payload)
1263 +
                    return NULL;
1264 +
                variant->type = payload;
1265 +
                if (!expect(p, T_RPAREN, "expected `)` after variant type"))
1266 +
                    return NULL;
1267 +
            } else if (check(p, T_LBRACE)) {
1268 +
                /* Struct-like variant: Bar { x: i32, y: i32 } */
1269 +
                node_t *payload = parse_record_type(p);
1270 +
                if (!payload)
1271 +
                    return NULL;
1272 +
                variant->type = payload;
1273 +
            } else {
1274 +
                /* Check for explicit value assignment. */
1275 +
                if (consume(p, T_EQ)) {
1276 +
                    if (!expect(
1277 +
                            p, T_NUMBER, "expected integer literal after `=`"
1278 +
                        ))
1279 +
                        return NULL;
1280 +
1281 +
                    token_t literal_tok = p->previous;
1282 +
                    node_t *literal     = node(p, NODE_NUMBER);
1283 +
1284 +
                    literal->offset              = literal_tok.position;
1285 +
                    literal->length              = literal_tok.length;
1286 +
                    literal->val.number.text     = literal_tok.start;
1287 +
                    literal->val.number.text_len = literal_tok.length;
1288 +
                    literal->val.number.value    = (imm_t){ 0 };
1289 +
1290 +
                    variant->value_expr = literal;
1291 +
                } else {
1292 +
                    /* Auto-assign value. */
1293 +
                }
1294 +
            }
1295 +
            /* Add variant to declaration node. */
1296 +
            if (!nodespan_push(p, &n->val.union_decl.variants, v)) {
1297 +
                error(p, "too many union variants");
1298 +
                return NULL;
1299 +
            }
1300 +
            /* Allow trailing comma. */
1301 +
        } while (consume(p, T_COMMA) && !check(p, T_RBRACE));
1302 +
    }
1303 +
    if (!expect(p, T_RBRACE, "expected `}`"))
1304 +
        return NULL;
1305 +
1306 +
    return n;
1307 +
}
1308 +
1309 +
/* Parse a code block or an expression */
1310 +
static node_t *parse_stmt_or_block(parser_t *p) {
1311 +
    if (check(p, T_LBRACE)) {
1312 +
        return parse_block(p);
1313 +
    }
1314 +
    node_t *stmt         = parse_stmt(p);
1315 +
    node_t *blk          = node(p, NODE_BLOCK);
1316 +
    blk->val.block.stmts = (nodespan_t){ 0 };
1317 +
    nodespan_push(p, &blk->val.block.stmts, stmt);
1318 +
1319 +
    return blk;
1320 +
}
1321 +
1322 +
/* Parse a code block, enclosed by `{}`. */
1323 +
static node_t *parse_block(parser_t *p) {
1324 +
    if (!expect(p, T_LBRACE, "expected '{' before block")) {
1325 +
        return NULL;
1326 +
    }
1327 +
    node_t *n = node(p, NODE_BLOCK);
1328 +
    node_t *stmt;
1329 +
1330 +
    /* Parse statements. */
1331 +
    n->val.block.stmts = (nodespan_t){ 0 };
1332 +
    while (!check(p, T_RBRACE) && !check(p, T_EOF)) {
1333 +
        usize start = p->current.position;
1334 +
1335 +
        if (!(stmt = parse_stmt(p)))
1336 +
            return NULL;
1337 +
1338 +
        if (!consume_statement_separator(p, stmt, true))
1339 +
            return NULL;
1340 +
1341 +
        stmt->offset = start;
1342 +
        stmt->length = p->current.position - start;
1343 +
1344 +
        if (!nodespan_push(p, &n->val.block.stmts, stmt)) {
1345 +
            error(p, "too many statements in block");
1346 +
            return NULL;
1347 +
        }
1348 +
    }
1349 +
1350 +
    if (!expect(p, T_RBRACE, "expected matching '}' after block"))
1351 +
        return NULL;
1352 +
1353 +
    return n;
1354 +
}
1355 +
1356 +
/* Parse an expression. */
1357 +
static node_t *parse_expr(parser_t *p) {
1358 +
    node_t *lval;
1359 +
1360 +
    if ((lval = parse_primary(p)) == NULL)
1361 +
        return NULL;
1362 +
1363 +
    /* Handle `as` casts before binary operators (higher precedence than
1364 +
     * binary ops, lower than unary) */
1365 +
    while (check(p, T_AS)) {
1366 +
        lval = parse_as_cast(p, lval);
1367 +
        if (!lval)
1368 +
            return NULL;
1369 +
    }
1370 +
    lval = parse_binary(p, lval, -1);
1371 +
1372 +
    return lval;
1373 +
}
1374 +
1375 +
/* Parse an assignment statement. */
1376 +
static node_t *parse_assignment(parser_t *p, node_t *lval) {
1377 +
    /* We've already verified this is an assignment. */
1378 +
    if (lval->cls != NODE_IDENT && lval->cls != NODE_ACCESS &&
1379 +
        lval->cls != NODE_ARRAY_INDEX &&
1380 +
        !(lval->cls == NODE_UNOP && lval->val.unop.op == OP_DEREF)) {
1381 +
        error(
1382 +
            p,
1383 +
            "can't assign to `%.*s`",
1384 +
            lval->length,
1385 +
            &p->scanner.source[lval->offset]
1386 +
        );
1387 +
        return NULL;
1388 +
    }
1389 +
    node_t *rval;
1390 +
1391 +
    if (!(rval = parse_expr(p)))
1392 +
        return NULL;
1393 +
1394 +
    node_t *assign          = node(p, NODE_ASSIGN);
1395 +
    assign->val.assign.lval = lval;
1396 +
    assign->val.assign.rval = rval;
1397 +
1398 +
    return assign;
1399 +
}
1400 +
1401 +
/* Parse a condition. */
1402 +
static node_t *parse_cond(parser_t *p) {
1403 +
    parse_ctx_t prev = p->context;
1404 +
    p->context       = PARSE_CTX_CONDITION;
1405 +
1406 +
    node_t *cond = parse_expr(p);
1407 +
    if (!cond) {
1408 +
        p->context = prev;
1409 +
        return NULL;
1410 +
    }
1411 +
    p->context = prev;
1412 +
1413 +
    return cond;
1414 +
}
1415 +
1416 +
static bool token_is_stmt_terminator(tokenclass_t cls) {
1417 +
    switch (cls) {
1418 +
    case T_SEMICOLON:
1419 +
    case T_RBRACE:
1420 +
    case T_COMMA:
1421 +
    case T_CASE:
1422 +
    case T_ELSE:
1423 +
    case T_EOF:
1424 +
        return true;
1425 +
    default:
1426 +
        return false;
1427 +
    }
1428 +
}
1429 +
1430 +
static bool stmt_requires_semicolon(const node_t *stmt) {
1431 +
    switch (stmt->cls) {
1432 +
    case NODE_IF:
1433 +
    case NODE_IF_LET:
1434 +
    case NODE_IF_CASE:
1435 +
    case NODE_WHILE:
1436 +
    case NODE_WHILE_LET:
1437 +
    case NODE_LOOP:
1438 +
    case NODE_FOR:
1439 +
    case NODE_MATCH:
1440 +
    case NODE_BLOCK:
1441 +
    case NODE_FN:
1442 +
    case NODE_RECORD:
1443 +
    case NODE_UNION:
1444 +
        return false;
1445 +
    default:
1446 +
        return true;
1447 +
    }
1448 +
}
1449 +
1450 +
static bool consume_statement_separator(
1451 +
    parser_t *p, node_t *stmt, bool require
1452 +
) {
1453 +
    if (stmt_requires_semicolon(stmt)) {
1454 +
        return expect(p, T_SEMICOLON, "expected `;` after statement");
1455 +
    }
1456 +
    if (require)
1457 +
        consume(p, T_SEMICOLON);
1458 +
    return true;
1459 +
}
1460 +
1461 +
/* Parse a `return` statement. */
1462 +
static node_t *parse_return(parser_t *p) {
1463 +
    node_t *n = node(p, NODE_RETURN);
1464 +
1465 +
    if (!token_is_stmt_terminator(p->current.cls)) {
1466 +
        n->val.return_stmt.value = parse_expr(p);
1467 +
        if (!n->val.return_stmt.value)
1468 +
            return NULL;
1469 +
    } else {
1470 +
        n->val.return_stmt.value = NULL; /* Return void. */
1471 +
    }
1472 +
1473 +
    return n;
1474 +
}
1475 +
1476 +
static node_t *parse_throw(parser_t *p) {
1477 +
    node_t *n = node(p, NODE_THROW);
1478 +
1479 +
    if (!(n->val.throw_stmt.expr = parse_expr(p)))
1480 +
        return NULL;
1481 +
1482 +
    return n;
1483 +
}
1484 +
1485 +
/* Parse a `break` statement. */
1486 +
static node_t *parse_break(parser_t *p) {
1487 +
    node_t *n = node(p, NODE_BREAK);
1488 +
1489 +
    return n;
1490 +
}
1491 +
1492 +
/* Parse a `for` statement. */
1493 +
static node_t *parse_for(parser_t *p) {
1494 +
    node_t *n               = node(p, NODE_FOR);
1495 +
    n->val.for_stmt.rbranch = NULL;
1496 +
    n->val.for_stmt.idx     = NULL;
1497 +
1498 +
    /* Parse the loop variable name or placeholder */
1499 +
    if (!(n->val.for_stmt.var =
1500 +
              parse_ident_or_placeholder(p, "expected identifier or '_'")))
1501 +
        return NULL;
1502 +
1503 +
    /* Check for optional index variable: `for x, i in xs` */
1504 +
    if (consume(p, T_COMMA)) {
1505 +
        /* Parse the index variable name or placeholder */
1506 +
        if (!(n->val.for_stmt.idx = parse_ident_or_placeholder(
1507 +
                  p, "expected index identifier or '_' after comma"
1508 +
              )))
1509 +
            return NULL;
1510 +
    }
1511 +
1512 +
    if (!expect(p, T_IN, "expected `in`"))
1513 +
        return NULL;
1514 +
1515 +
    if (!(n->val.for_stmt.iter = parse_cond(p)))
1516 +
        return NULL;
1517 +
1518 +
    if (!(n->val.for_stmt.body = parse_block(p)))
1519 +
        return NULL;
1520 +
1521 +
    /* Parse optional `else` clause */
1522 +
    if (consume(p, T_ELSE)) {
1523 +
        if (!(n->val.for_stmt.rbranch = parse_block(p)))
1524 +
            return NULL;
1525 +
    }
1526 +
    return n;
1527 +
}
1528 +
1529 +
/* Parse a `while let` statement. */
1530 +
static node_t *parse_while_let(parser_t *p) {
1531 +
    if (!expect(p, T_LET, "expected `let`"))
1532 +
        return NULL;
1533 +
1534 +
    node_t *n = node(p, NODE_WHILE_LET);
1535 +
1536 +
    /* Parse identifier or placeholder */
1537 +
    if (consume(p, T_UNDERSCORE)) {
1538 +
        n->val.while_let_stmt.var = node(p, NODE_PLACEHOLDER);
1539 +
    } else if (expect(p, T_IDENT, "expected identifier or '_' after `let`")) {
1540 +
        n->val.while_let_stmt.var                   = node(p, NODE_IDENT);
1541 +
        n->val.while_let_stmt.var->val.ident.name   = p->previous.start;
1542 +
        n->val.while_let_stmt.var->val.ident.length = p->previous.length;
1543 +
    } else {
1544 +
        return NULL;
1545 +
    }
1546 +
    n->val.while_let_stmt.guard   = NULL;
1547 +
    n->val.while_let_stmt.rbranch = NULL;
1548 +
1549 +
    if (!expect(p, T_EQ, "expected `=` after identifier"))
1550 +
        return NULL;
1551 +
1552 +
    /* Parse expression yielding an optional. */
1553 +
    n->val.while_let_stmt.expr = parse_cond(p);
1554 +
    if (!n->val.while_let_stmt.expr)
1555 +
        return NULL;
1556 +
1557 +
    /* Optional guard condition after semicolon. */
1558 +
    if (consume(p, T_SEMICOLON)) {
1559 +
        if (!(n->val.while_let_stmt.guard = parse_cond(p)))
1560 +
            return NULL;
1561 +
    }
1562 +
1563 +
    /* Parse the loop body and optional 'else' branch */
1564 +
    if (!(n->val.while_let_stmt.body = parse_block(p)))
1565 +
        return NULL;
1566 +
    if (consume(p, T_ELSE)) {
1567 +
        if (!(n->val.while_let_stmt.rbranch = parse_block(p)))
1568 +
            return NULL;
1569 +
    }
1570 +
    return n;
1571 +
}
1572 +
1573 +
/* Parse a `while` statement. */
1574 +
static node_t *parse_while(parser_t *p) {
1575 +
    /* Check for `while let` syntax */
1576 +
    if (check(p, T_LET)) {
1577 +
        return parse_while_let(p);
1578 +
    }
1579 +
    node_t *n                 = node(p, NODE_WHILE);
1580 +
    n->val.while_stmt.rbranch = NULL;
1581 +
1582 +
    if (!(n->val.while_stmt.cond = parse_cond(p)))
1583 +
        return NULL;
1584 +
    if (!(n->val.while_stmt.body = parse_block(p)))
1585 +
        return NULL;
1586 +
1587 +
    /* Parse optional else clause */
1588 +
    if (consume(p, T_ELSE)) {
1589 +
        if (!(n->val.while_stmt.rbranch = parse_block(p)))
1590 +
            return NULL;
1591 +
    }
1592 +
    return n;
1593 +
}
1594 +
1595 +
/* Parse a `loop` statement. */
1596 +
static node_t *parse_loop(parser_t *p) {
1597 +
    node_t *n = node(p, NODE_LOOP);
1598 +
1599 +
    if (!(n->val.loop_stmt.body = parse_block(p)))
1600 +
        return NULL;
1601 +
1602 +
    return n;
1603 +
}
1604 +
1605 +
static node_t *parse_try(parser_t *p, bool panic, bool optional) {
1606 +
    node_t *n = node(p, NODE_TRY);
1607 +
1608 +
    n->val.try_expr.expr       = NULL;
1609 +
    n->val.try_expr.catch_expr = NULL;
1610 +
    n->val.try_expr.handlers   = nodespan_alloc(p, MAX_TRY_CATCHES);
1611 +
    n->val.try_expr.panic      = panic;
1612 +
    n->val.try_expr.optional   = optional;
1613 +
1614 +
    if (!(n->val.try_expr.expr = parse_primary(p)))
1615 +
        return NULL;
1616 +
1617 +
    /* Parse catch clause: `catch { ... }` or `catch e { ... }` */
1618 +
    if (consume(p, T_CATCH)) {
1619 +
        node_t *catch_node                   = node(p, NODE_CATCH);
1620 +
        catch_node->val.catch_clause.binding = NULL;
1621 +
        catch_node->val.catch_clause.body    = NULL;
1622 +
        catch_node->val.catch_clause.scope   = NULL;
1623 +
1624 +
        /* Check for error binding: `catch e { ... }` */
1625 +
        if (check(p, T_IDENT)) {
1626 +
            node_t *binding                      = node(p, NODE_IDENT);
1627 +
            binding->val.ident.name              = p->current.start;
1628 +
            binding->val.ident.length            = p->current.length;
1629 +
            catch_node->val.catch_clause.binding = binding;
1630 +
            advance(p);
1631 +
        }
1632 +
1633 +
        if (!check(p, T_LBRACE)) {
1634 +
            error(p, "expected `{` after `catch`");
1635 +
            return NULL;
1636 +
        }
1637 +
        if (!(catch_node->val.catch_clause.body = parse_block(p)))
1638 +
            return NULL;
1639 +
1640 +
        n->val.try_expr.catch_expr = catch_node;
1641 +
    }
1642 +
    return n;
1643 +
}
1644 +
1645 +
static node_t *parse_panic(parser_t *p) {
1646 +
    node_t *panic = node(p, NODE_PANIC);
1647 +
1648 +
    /* `panic { "Something's wrong!" }` */
1649 +
    if (consume(p, T_LBRACE)) {
1650 +
        node_t *expr = parse_expr(p);
1651 +
        if (!(panic->val.panic_stmt.message = expr))
1652 +
            return NULL;
1653 +
        if (!expect(p, T_RBRACE, "expected closing `}` after expression"))
1654 +
            return NULL;
1655 +
1656 +
        return panic;
1657 +
    }
1658 +
1659 +
    if (token_is_stmt_terminator(p->current.cls)) {
1660 +
        panic->val.panic_stmt.message = NULL;
1661 +
        return panic;
1662 +
    }
1663 +
1664 +
    node_t *expr = parse_expr(p);
1665 +
    if (!(panic->val.panic_stmt.message = expr))
1666 +
        return NULL;
1667 +
1668 +
    return panic;
1669 +
}
1670 +
1671 +
/* Parse a name, type, and optional value.
1672 +
 *
1673 +
 * Used for record field declarations, variable declarations, and record field
1674 +
 * initializations. */
1675 +
static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls) {
1676 +
    node_t *n        = node(p, cls);
1677 +
    usize   start    = p->current.position;
1678 +
    node_t *type     = NULL;
1679 +
    bool    is_typed = false;
1680 +
1681 +
    n->val.var.ident =
1682 +
        parse_ident_or_placeholder(p, "expected identifier or '_'");
1683 +
    if (!n->val.var.ident)
1684 +
        return NULL;
1685 +
1686 +
    if (cls == NODE_VAR) {
1687 +
        /* Type annotation is optional for variable declarations. */
1688 +
        if (consume(p, T_COLON))
1689 +
            is_typed = true;
1690 +
    } else {
1691 +
        if (!expect(p, T_COLON, "expected `:` after identifier"))
1692 +
            return NULL;
1693 +
        is_typed = true;
1694 +
    }
1695 +
1696 +
    if (is_typed) {
1697 +
        type = parse_type(p);
1698 +
        if (!type)
1699 +
            return NULL;
1700 +
1701 +
        if (cls == NODE_VAR) {
1702 +
            n->val.var.align = NULL;
1703 +
1704 +
            if (consume(p, T_ALIGN)) {
1705 +
                if (!expect(p, T_LPAREN, "expected `(` after `align`"))
1706 +
                    return NULL;
1707 +
1708 +
                n->val.var.align            = node(p, NODE_ALIGN);
1709 +
                n->val.var.align->val.align = parse_expr(p);
1710 +
1711 +
                if (!expect(p, T_RPAREN, "expected `)` after expression"))
1712 +
                    return NULL;
1713 +
            }
1714 +
        }
1715 +
    } else if (cls == NODE_VAR) {
1716 +
        n->val.var.align = NULL;
1717 +
    }
1718 +
    n->val.var.type  = type;
1719 +
    n->val.var.value = NULL;
1720 +
1721 +
    /* Parse the optional value. */
1722 +
    if (consume(p, T_EQ)) {
1723 +
        node_t *value = parse_expr(p);
1724 +
        if (!value)
1725 +
            return NULL;
1726 +
        n->val.var.value = value;
1727 +
    }
1728 +
    /* Set the node location. */
1729 +
    n->offset = start;
1730 +
    n->length = p->previous.position + p->previous.length - start;
1731 +
1732 +
    return n;
1733 +
}
1734 +
1735 +
/* Parse a variable declaration. */
1736 +
static node_t *parse_var(parser_t *p, bool mutable) {
1737 +
    node_t *var = parse_name_type_value(p, NODE_VAR);
1738 +
1739 +
    if (!var)
1740 +
        return NULL;
1741 +
1742 +
    var->val.var.mutable = mutable;
1743 +
1744 +
    /* Parse optional `else` clause. */
1745 +
    if (consume(p, T_ELSE)) {
1746 +
        if (mutable) {
1747 +
            error(p, "let-else bindings cannot be mutable");
1748 +
            return NULL;
1749 +
        }
1750 +
        if (!var->val.var.value) {
1751 +
            error(p, "let-else requires an initializer");
1752 +
            return NULL;
1753 +
        }
1754 +
        node_t *rbranch = parse_stmt_or_block(p);
1755 +
        if (!rbranch)
1756 +
            return NULL;
1757 +
1758 +
        var->cls                        = NODE_GUARD_LET;
1759 +
        var->val.guard_let_stmt.var     = var->val.var.ident;
1760 +
        var->val.guard_let_stmt.expr    = var->val.var.value;
1761 +
        var->val.guard_let_stmt.rbranch = rbranch;
1762 +
        var->length = p->previous.position + p->previous.length - var->offset;
1763 +
1764 +
        return var;
1765 +
    }
1766 +
    var->length = p->previous.position + p->previous.length - var->offset;
1767 +
1768 +
    return var;
1769 +
}
1770 +
1771 +
/* Parse a static variable declaration. */
1772 +
static node_t *parse_static(parser_t *p) {
1773 +
    node_t *n     = node(p, NODE_STATIC);
1774 +
    usize   start = p->previous.position;
1775 +
1776 +
    node_t *ident = parse_label(p, "expected identifier in static declaration");
1777 +
    if (!ident)
1778 +
        return NULL;
1779 +
1780 +
    node_t *type = parse_type(p);
1781 +
    if (!type)
1782 +
        return NULL;
1783 +
1784 +
    if (!expect(p, T_EQ, "expected `=` in static declaration"))
1785 +
        return NULL;
1786 +
1787 +
    node_t *value = parse_expr(p);
1788 +
    if (!value)
1789 +
        return NULL;
1790 +
1791 +
    n->val.static_decl.ident = ident;
1792 +
    n->val.static_decl.type  = type;
1793 +
    n->val.static_decl.value = value;
1794 +
    n->offset                = start;
1795 +
    n->length = p->previous.position + p->previous.length - start;
1796 +
1797 +
    return n;
1798 +
}
1799 +
1800 +
/* Parse a constant declaration. */
1801 +
static node_t *parse_const(parser_t *p) {
1802 +
    node_t *var = parse_name_type_value(p, NODE_CONST);
1803 +
1804 +
    if (!var)
1805 +
        return NULL;
1806 +
1807 +
    return var;
1808 +
}
1809 +
1810 +
/* Parse a module use declaration. */
1811 +
static node_t *parse_use(parser_t *p, node_t *attrs) {
1812 +
    usize start = p->current.position;
1813 +
1814 +
    /* Parse the first identifier in the path. */
1815 +
    node_t *path = parse_scope_segment(p, "expected module name after 'use'");
1816 +
    if (!path)
1817 +
        return NULL;
1818 +
1819 +
    /* Track if this is a wildcard import. */
1820 +
    bool wildcard = false;
1821 +
1822 +
    /* Continue parsing the dotted path if present. */
1823 +
    while (consume(p, T_COLON_COLON)) {
1824 +
        /* Check for wildcard import (e.g., `use foo::*`) */
1825 +
        if (consume(p, T_STAR)) {
1826 +
            wildcard = true;
1827 +
            break;
1828 +
        }
1829 +
1830 +
        node_t *n          = node(p, NODE_SCOPE);
1831 +
        n->val.access.lval = path;
1832 +
1833 +
        /* Parse the sub-module name. */
1834 +
        node_t *mod =
1835 +
            parse_scope_segment(p, "expected identifier or '*' after '::'");
1836 +
        if (!mod)
1837 +
            return NULL;
1838 +
1839 +
        n->val.access.rval = mod;
1840 +
        path               = n;
1841 +
    }
1842 +
1843 +
    /* Create a use node and wrap the path. */
1844 +
    node_t *use_node                = node(p, NODE_USE);
1845 +
    use_node->val.use_decl.path     = path;
1846 +
    use_node->val.use_decl.attribs  = attrs;
1847 +
    use_node->val.use_decl.wildcard = wildcard;
1848 +
1849 +
    /* Set position information. */
1850 +
    use_node->offset = start;
1851 +
    use_node->length = p->previous.position + p->previous.length - start;
1852 +
1853 +
    return use_node;
1854 +
}
1855 +
1856 +
/* Parse a module declaration. */
1857 +
static node_t *parse_mod(parser_t *p, node_t *attrs) {
1858 +
    usize start = p->current.position;
1859 +
1860 +
    node_t *ident = parse_ident(p, "expected module name after 'mod'");
1861 +
    if (!ident)
1862 +
        return NULL;
1863 +
    node_t *mod_node               = node(p, NODE_MOD);
1864 +
    mod_node->val.mod_decl.ident   = ident;
1865 +
    mod_node->val.mod_decl.attribs = attrs;
1866 +
1867 +
    mod_node->offset = start;
1868 +
    mod_node->length = p->previous.position + p->previous.length - start;
1869 +
1870 +
    return mod_node;
1871 +
}
1872 +
1873 +
/* Parse a function parameter. */
1874 +
static node_t *parse_fn_param(parser_t *p) {
1875 +
    /* Create parameter node. */
1876 +
    node_t *param = node(p, NODE_PARAM);
1877 +
    node_t *name  = parse_label(p, "expected parameter name");
1878 +
    if (!name)
1879 +
        return NULL;
1880 +
1881 +
    param->val.param.ident = name;
1882 +
1883 +
    /* Parse and store parameter type. */
1884 +
    if (!(param->val.param.type = parse_type(p)))
1885 +
        return NULL;
1886 +
1887 +
    return param;
1888 +
}
1889 +
1890 +
static node_t *parse_module_body(parser_t *p) {
1891 +
    node_t *mod          = node(p, NODE_MOD_BODY);
1892 +
    mod->val.block.stmts = (nodespan_t){ 0 };
1893 +
1894 +
    while (!check(p, T_EOF)) {
1895 +
        node_t *stmt;
1896 +
        usize   start = p->current.position;
1897 +
1898 +
        if (!(stmt = parse_stmt(p)))
1899 +
            return NULL;
1900 +
1901 +
        if (!consume_statement_separator(p, stmt, true))
1902 +
            return NULL;
1903 +
1904 +
        stmt->offset = start;
1905 +
        stmt->length = p->current.position - start;
1906 +
1907 +
        if (!nodespan_push(p, &mod->val.block.stmts, stmt)) {
1908 +
            error(p, "too many statements in module");
1909 +
            return NULL;
1910 +
        }
1911 +
    }
1912 +
    return mod;
1913 +
}
1914 +
1915 +
/* Parse a function definition. */
1916 +
static node_t *parse_fn(parser_t *p, node_t *attrs) {
1917 +
    node_t *n     = node(p, NODE_FN);
1918 +
    node_t *param = NULL;
1919 +
1920 +
    /* Parse the function name. */
1921 +
    node_t *name = parse_ident(p, "expected function name");
1922 +
    if (!name)
1923 +
        return NULL;
1924 +
1925 +
    n->val.fn_decl.ident   = name;
1926 +
    n->val.fn_decl.params  = nodespan_alloc(p, MAX_FN_PARAMS);
1927 +
    n->val.fn_decl.throws  = nodespan_alloc(p, MAX_FN_THROWS);
1928 +
    n->val.fn_decl.attribs = attrs;
1929 +
    n->val.fn_decl.body    = NULL;
1930 +
1931 +
    /* Check if it's an extern function */
1932 +
    bool is_extern = (attrs && attrs->val.attrib & ATTRIB_EXTERN);
1933 +
1934 +
    if (!expect(p, T_LPAREN, "expected `(` after function name"))
1935 +
        return NULL;
1936 +
1937 +
    /* Parse parameters with types */
1938 +
    if (!check(p, T_RPAREN)) {
1939 +
        do {
1940 +
            if (n->val.fn_decl.params.len >= MAX_FN_PARAMS) {
1941 +
                error(
1942 +
                    p,
1943 +
                    "maximum number of function parameters (%d) exceeded",
1944 +
                    MAX_FN_PARAMS
1945 +
                );
1946 +
                return NULL;
1947 +
            }
1948 +
            if (!(param = parse_fn_param(p))) {
1949 +
                return NULL;
1950 +
            }
1951 +
            node_fn_add_param(p, n, param);
1952 +
1953 +
        } while (consume(p, T_COMMA));
1954 +
    }
1955 +
    if (!expect(p, T_RPAREN, "expected matching `)` after parameters list"))
1956 +
        return NULL;
1957 +
1958 +
    if (consume(p, T_ARROW)) {
1959 +
        if (!(n->val.fn_decl.return_type = parse_type(p))) {
1960 +
            return NULL;
1961 +
        }
1962 +
    } else {
1963 +
        n->val.fn_decl.return_type = NULL;
1964 +
    }
1965 +
    if (consume(p, T_THROWS)) {
1966 +
        if (!expect(p, T_LPAREN, "expected `(` after `throws`"))
1967 +
            return NULL;
1968 +
1969 +
        if (!check(p, T_RPAREN)) {
1970 +
            do {
1971 +
                if (n->val.fn_decl.throws.len >= MAX_FN_THROWS) {
1972 +
                    error(p, "maximum number of thrown types exceeded");
1973 +
                    return NULL;
1974 +
                }
1975 +
                node_t *thrown = parse_type(p);
1976 +
                if (!thrown)
1977 +
                    return NULL;
1978 +
1979 +
                nodespan_push(p, &n->val.fn_decl.throws, thrown);
1980 +
            } while (consume(p, T_COMMA));
1981 +
        }
1982 +
        if (!expect(p, T_RPAREN, "expected `)` after throws clause"))
1983 +
            return NULL;
1984 +
    }
1985 +
1986 +
    /* For extern functions, expect semicolon instead of body */
1987 +
    if (is_extern) {
1988 +
        if (!expect(
1989 +
                p, T_SEMICOLON, "expected `;` after extern function declaration"
1990 +
            ))
1991 +
            return NULL;
1992 +
    } else {
1993 +
        if (!(n->val.fn_decl.body = parse_block(p)))
1994 +
            return NULL;
1995 +
    }
1996 +
    return n;
1997 +
}
1998 +
1999 +
/* Try to parse an annotation like `@default`.
2000 +
 * Returns true if a known annotation was found and consumed.
2001 +
 * Returns false if not an annotation (e.g. @sizeOf) - tokens not consumed. */
2002 +
static bool try_parse_annotation(parser_t *p, attrib_t *attrs) {
2003 +
    if (!check(p, T_AT_IDENT))
2004 +
        return false;
2005 +
2006 +
    /* Token is @identifier, skip the '@' to get the name. */
2007 +
    const char *name   = p->current.start + 1;
2008 +
    usize       length = p->current.length - 1;
2009 +
2010 +
    if (length == 7 && !strncmp(name, "default", 7)) {
2011 +
        advance(p); /* Consume `@default`. */
2012 +
        *attrs |= ATTRIB_DEFAULT;
2013 +
        return true;
2014 +
    }
2015 +
    if (length == 4 && !strncmp(name, "test", 4)) {
2016 +
        advance(p); /* Consume `@test`. */
2017 +
        *attrs |= ATTRIB_TEST;
2018 +
        return true;
2019 +
    }
2020 +
    if (length == 9 && !strncmp(name, "intrinsic", 9)) {
2021 +
        advance(p); /* Consume `@intrinsic`. */
2022 +
        *attrs |= ATTRIB_INTRINSIC;
2023 +
        return true;
2024 +
    }
2025 +
    /* Not a known annotation - leave for parse_builtin to handle. */
2026 +
    return false;
2027 +
}
2028 +
2029 +
/* Parse statement attributes. */
2030 +
static node_t *parse_attribs(parser_t *p) {
2031 +
    node_t  *n     = NULL;
2032 +
    attrib_t attrs = ATTRIB_NONE;
2033 +
2034 +
    for (;;) {
2035 +
        if (consume(p, T_PUB)) {
2036 +
            if (attrs & ATTRIB_PUB) {
2037 +
                error(p, "duplicate `pub` attribute");
2038 +
                return NULL;
2039 +
            }
2040 +
            attrs |= ATTRIB_PUB;
2041 +
        } else if (try_parse_annotation(p, &attrs)) {
2042 +
            /* Annotation was consumed, continue. */
2043 +
        } else if (consume(p, T_EXTERN)) {
2044 +
            if (attrs & ATTRIB_EXTERN) {
2045 +
                error(p, "duplicate `extern` attribute");
2046 +
                return NULL;
2047 +
            }
2048 +
            attrs |= ATTRIB_EXTERN;
2049 +
        } else {
2050 +
            break;
2051 +
        }
2052 +
    }
2053 +
2054 +
    if (attrs != ATTRIB_NONE) {
2055 +
        n             = node(p, NODE_ATTRIBUTE);
2056 +
        n->val.attrib = attrs;
2057 +
    }
2058 +
    return n;
2059 +
}
2060 +
2061 +
/* Parse a statement. */
2062 +
static node_t *parse_stmt(parser_t *p) {
2063 +
    /* Parse any attributes that come before the statement. */
2064 +
    node_t *attrs = parse_attribs(p);
2065 +
2066 +
    if (attrs) {
2067 +
        switch (p->current.cls) {
2068 +
        case T_FN:
2069 +
        case T_UNION:
2070 +
        case T_RECORD:
2071 +
        case T_MOD:
2072 +
        case T_CONST:
2073 +
        case T_USE:
2074 +
            break;
2075 +
        default:
2076 +
            error(p, "attributes are not allowed in this context");
2077 +
            return NULL;
2078 +
        }
2079 +
2080 +
        /* Verify extern is only used with functions */
2081 +
        if ((attrs->val.attrib & ATTRIB_EXTERN) && p->current.cls != T_FN) {
2082 +
            error(
2083 +
                p, "extern attribute is only allowed on function declarations"
2084 +
            );
2085 +
            return NULL;
2086 +
        }
2087 +
    }
2088 +
2089 +
    switch (p->current.cls) {
2090 +
    case T_LBRACE:
2091 +
        return parse_block(p);
2092 +
    case T_LET:
2093 +
        advance(p);
2094 +
        if (consume(p, T_CASE)) {
2095 +
            return parse_let_case(p);
2096 +
        }
2097 +
        if (consume(p, T_MUT)) {
2098 +
            return parse_var(p, true);
2099 +
        }
2100 +
        return parse_var(p, false);
2101 +
    case T_STATIC:
2102 +
        advance(p);
2103 +
        return parse_static(p);
2104 +
    case T_CONST:
2105 +
        advance(p);
2106 +
        return parse_const(p);
2107 +
    case T_USE:
2108 +
        advance(p);
2109 +
        return parse_use(p, attrs);
2110 +
    case T_MOD:
2111 +
        advance(p);
2112 +
        return parse_mod(p, attrs);
2113 +
    case T_RETURN:
2114 +
        advance(p);
2115 +
        return parse_return(p);
2116 +
    case T_THROW:
2117 +
        advance(p);
2118 +
        return parse_throw(p);
2119 +
    case T_BREAK:
2120 +
        advance(p);
2121 +
        return parse_break(p);
2122 +
    case T_WHILE:
2123 +
        advance(p);
2124 +
        return parse_while(p);
2125 +
    case T_FOR:
2126 +
        advance(p);
2127 +
        return parse_for(p);
2128 +
    case T_LOOP:
2129 +
        advance(p);
2130 +
        return parse_loop(p);
2131 +
    case T_IF:
2132 +
        advance(p);
2133 +
        return parse_if(p);
2134 +
    case T_MATCH:
2135 +
        advance(p);
2136 +
        return parse_match(p);
2137 +
    case T_FN:
2138 +
        advance(p);
2139 +
        return parse_fn(p, attrs);
2140 +
    case T_UNION:
2141 +
        advance(p);
2142 +
        return parse_union(p, attrs);
2143 +
    case T_RECORD:
2144 +
        advance(p);
2145 +
        return parse_record(p, attrs);
2146 +
    case T_PANIC:
2147 +
        advance(p);
2148 +
        return parse_panic(p);
2149 +
    default:
2150 +
        break;
2151 +
    }
2152 +
    /* Parse an expression as a statement or an assignment statement. */
2153 +
    node_t *expr;
2154 +
2155 +
    if ((expr = parse_expr(p)) == NULL)
2156 +
        return NULL;
2157 +
2158 +
    /* If we see an equals sign, this is an assignment statement */
2159 +
    if (consume(p, T_EQ)) {
2160 +
        return parse_assignment(p, expr);
2161 +
    }
2162 +
2163 +
    /* Create an expression statement node. */
2164 +
    node_t *stmt        = node(p, NODE_EXPR_STMT);
2165 +
    stmt->val.expr_stmt = expr;
2166 +
2167 +
    return stmt;
2168 +
}
2169 +
2170 +
/* Parse a function argument, which may have an optional label. */
2171 +
static node_t *parse_fn_call_arg(parser_t *p) {
2172 +
    usize   start = p->current.position;
2173 +
    node_t *arg   = node(p, NODE_CALL_ARG);
2174 +
2175 +
    /* Parse the expression first */
2176 +
    node_t *expr = parse_expr(p);
2177 +
    if (!expr)
2178 +
        return NULL;
2179 +
2180 +
    /* Check if this was an identifier followed by a colon
2181 +
     * (making it a label), or the complete expression. */
2182 +
    if (expr->cls == NODE_IDENT && consume(p, T_COLON)) {
2183 +
        /* It's a label, parse the actual value expression */
2184 +
        arg->val.call_arg.label = expr;
2185 +
2186 +
        if (!(arg->val.call_arg.expr = parse_expr(p))) {
2187 +
            return NULL;
2188 +
        }
2189 +
    } else {
2190 +
        arg->val.call_arg.label = NULL;
2191 +
        arg->val.call_arg.expr  = expr;
2192 +
    }
2193 +
    arg->offset = start;
2194 +
    arg->length = p->previous.position + p->previous.length - start;
2195 +
2196 +
    return arg;
2197 +
}
2198 +
2199 +
/* Parse an identifier. */
2200 +
static node_t *parse_ident(parser_t *p, const char *error) {
2201 +
    if (!expect(p, T_IDENT, error))
2202 +
        return NULL;
2203 +
2204 +
    node_t *ident = node(p, NODE_IDENT);
2205 +
2206 +
    ident->val.ident.name   = p->previous.start;
2207 +
    ident->val.ident.length = p->previous.length;
2208 +
2209 +
    return ident;
2210 +
}
2211 +
2212 +
/* Parse either an identifier or a placeholder ('_'). */
2213 +
static node_t *parse_ident_or_placeholder(parser_t *p, const char *error) {
2214 +
    if (consume(p, T_UNDERSCORE)) {
2215 +
        return node(p, NODE_PLACEHOLDER);
2216 +
    }
2217 +
    return parse_ident(p, error);
2218 +
}
2219 +
2220 +
/* Parse a label.
2221 +
 * Returns an identifier node. Expects IDENT followed by COLON. */
2222 +
static node_t *parse_label(parser_t *p, const char *error) {
2223 +
    if (!expect(p, T_IDENT, error))
2224 +
        return NULL;
2225 +
2226 +
    node_t *ident = node(p, NODE_IDENT);
2227 +
2228 +
    ident->val.ident.name   = p->previous.start;
2229 +
    ident->val.ident.length = p->previous.length;
2230 +
2231 +
    if (!expect(p, T_COLON, "expected ':' after identifier"))
2232 +
        return NULL;
2233 +
2234 +
    return ident;
2235 +
}
2236 +
2237 +
static node_t *parse_scope_segment(parser_t *p, const char *error) {
2238 +
    if (check(p, T_SUPER)) {
2239 +
        node_t *super_node = node(p, NODE_SUPER);
2240 +
        advance(p);
2241 +
        return super_node;
2242 +
    }
2243 +
    return parse_ident(p, error);
2244 +
}
2245 +
2246 +
static node_t *parse_as_cast(parser_t *p, node_t *expr) {
2247 +
    if (!consume(p, T_AS))
2248 +
        return NULL;
2249 +
2250 +
    node_t *as           = node(p, NODE_AS);
2251 +
    as->val.as_expr.expr = expr;
2252 +
2253 +
    /* Parse the target type */
2254 +
    node_t *typ = parse_type(p);
2255 +
    if (!typ)
2256 +
        return NULL;
2257 +
2258 +
    as->val.as_expr.type = typ;
2259 +
    as->offset           = expr->offset;
2260 +
    as->length           = p->current.position - as->offset;
2261 +
2262 +
    return as;
2263 +
}
2264 +
2265 +
/* Parse postfix expressions (field access and array indexing).
2266 +
 *
2267 +
 * This function handles both field access (expr.field) and array indexing
2268 +
 * (expr[index]) in a unified way, enabling arbitrarily complex nested
2269 +
 * expressions like `x.y.z[1].w[2][3].q`.
2270 +
 */
2271 +
static node_t *parse_postfix(parser_t *p, node_t *expr) {
2272 +
    node_t *result = expr;
2273 +
2274 +
    for (;;) {
2275 +
        if (consume(p, T_DOT)) { /* Field access. */
2276 +
            node_t *n          = node(p, NODE_ACCESS);
2277 +
            n->val.access.lval = result;
2278 +
2279 +
            node_t *field = parse_ident(p, "expected field name after `.`");
2280 +
            if (!field)
2281 +
                return NULL;
2282 +
2283 +
            field->val.ident.name   = p->previous.start;
2284 +
            field->val.ident.length = p->previous.length;
2285 +
            n->val.access.rval      = field;
2286 +
2287 +
            result = n;
2288 +
        } else if (consume(p, T_DOT_DOT)) {
2289 +
            node_t *range          = node(p, NODE_RANGE);
2290 +
            range->val.range.start = result;
2291 +
            range->val.range.end   = NULL;
2292 +
2293 +
            /* Check if there's a right-hand side for the range. */
2294 +
            if (!check(p, T_RBRACKET) && !check(p, T_SEMICOLON) &&
2295 +
                !check(p, T_COMMA) && !check(p, T_RPAREN) &&
2296 +
                !check(p, T_LBRACE)) {
2297 +
                if (!(range->val.range.end = parse_expr(p))) {
2298 +
                    return NULL;
2299 +
                }
2300 +
            }
2301 +
            result = range;
2302 +
        } else if (consume(p, T_COLON_COLON)) { /* Scope access */
2303 +
            node_t *ident =
2304 +
                parse_scope_segment(p, "expected identifier name after `::`");
2305 +
            if (!ident)
2306 +
                return NULL;
2307 +
2308 +
            node_t *n          = node(p, NODE_SCOPE);
2309 +
            n->val.access.lval = result;
2310 +
            n->val.access.rval = ident;
2311 +
2312 +
            result = n;
2313 +
        } else if (consume(p, T_LBRACKET)) { /* Array indexing or slicing. */
2314 +
            node_t *expr = NULL;
2315 +
2316 +
            if (consume(p, T_DOT_DOT)) {
2317 +
                /* Either `..` or `..n` */
2318 +
                /* Create range node with NULL start and end. */
2319 +
                expr                  = node(p, NODE_RANGE);
2320 +
                expr->val.range.start = NULL;
2321 +
                expr->val.range.end   = NULL;
2322 +
2323 +
                if (!check(p, T_RBRACKET)) {
2324 +
                    if (!(expr->val.range.end = parse_expr(p))) {
2325 +
                        return NULL;
2326 +
                    }
2327 +
                }
2328 +
            } else {
2329 +
                /* Either `n`, `n..` or `n..m` */
2330 +
                node_t *index = parse_expr(p);
2331 +
                if (!index)
2332 +
                    return NULL;
2333 +
2334 +
                expr = index;
2335 +
            }
2336 +
            /* Create array index node with the index expression */
2337 +
            node_t *n          = node(p, NODE_ARRAY_INDEX);
2338 +
            n->val.access.lval = result;
2339 +
            n->val.access.rval = expr;
2340 +
2341 +
            n->offset = result->offset;
2342 +
            n->length = result->length;
2343 +
2344 +
            /* Expect closing bracket */
2345 +
            if (!expect(p, T_RBRACKET, "expected `]` after array index"))
2346 +
                return NULL;
2347 +
2348 +
            result = n;
2349 +
        } else if (consume(p, T_LPAREN)) { /* Parse function call. */
2350 +
            node_t *call          = node(p, NODE_CALL);
2351 +
            call->val.call.callee = result;
2352 +
            call->val.call.args   = nodespan_alloc(p, MAX_FN_PARAMS);
2353 +
2354 +
            node_t *arg = NULL;
2355 +
            if (!check(p, T_RPAREN)) {
2356 +
                do {
2357 +
                    if (!(arg = parse_fn_call_arg(p))) {
2358 +
                        return NULL;
2359 +
                    }
2360 +
                    nodespan_push(p, &call->val.call.args, arg);
2361 +
                } while (consume(p, T_COMMA));
2362 +
            }
2363 +
            if (!expect(p, T_RPAREN, "expected `)` after function arguments"))
2364 +
                return NULL;
2365 +
2366 +
            result = call;
2367 +
        } else if (p->context == PARSE_CTX_NORMAL &&
2368 +
                   result->cls == NODE_SCOPE && check(p, T_LBRACE)) {
2369 +
            /* Record literal after scope access: `Union::Variant { ... }`. */
2370 +
            advance(p); /* consume `{` */
2371 +
2372 +
            node_t *literal = parse_record_lit(p, result);
2373 +
            if (!literal)
2374 +
                return NULL;
2375 +
2376 +
            result = literal;
2377 +
        } else {
2378 +
            /* No postfix operators to try. */
2379 +
            break;
2380 +
        }
2381 +
    }
2382 +
    return result;
2383 +
}
2384 +
2385 +
/* Parse a complete program, return the root of the AST, or `NULL`
2386 +
 * if parsing failed. */
2387 +
node_t *parser_parse(parser_t *p) {
2388 +
    p->current = scanner_next(&p->scanner);
2389 +
2390 +
    /* Create a top-level module. */
2391 +
    node_t *root = parse_module_body(p);
2392 +
    if (!root)
2393 +
        return NULL;
2394 +
2395 +
    if (!expect(p, T_EOF, "expected end-of-file"))
2396 +
        return NULL;
2397 +
2398 +
    root->length = (usize)(p->scanner.cursor - p->scanner.source);
2399 +
2400 +
    return (p->root = root);
2401 +
}
parser.h added +38 -0
1 +
#ifndef PARSER_H
2 +
#define PARSER_H
3 +
4 +
#include "ast.h"
5 +
#include "limits.h"
6 +
#include "scanner.h"
7 +
8 +
/* Parsing context to handle ambiguities */
9 +
typedef enum {
10 +
    PARSE_CTX_NORMAL,    /* Normal expression context */
11 +
    PARSE_CTX_CONDITION, /* Inside condition where { starts block */
12 +
} parse_ctx_t;
13 +
14 +
/* Parser state */
15 +
typedef struct parser_t {
16 +
    scanner_t   scanner;
17 +
    token_t     current;
18 +
    token_t     previous;
19 +
    node_t     *root;
20 +
    u32         errors;
21 +
    node_t      nodes[MAX_NODES];
22 +
    u32         nnodes;
23 +
    parse_ctx_t context;
24 +
25 +
    /* Pool for variable-length node pointer arrays.
26 +
     * Nodes store an index + count into this pool instead of
27 +
     * embedding large arrays, keeping node_t small. */
28 +
    struct node_t *ptrs[MAX_NODEPTR_POOL];
29 +
    u32            nptrs;
30 +
} parser_t;
31 +
32 +
/* Initialize parser with scanner */
33 +
void parser_init(parser_t *p);
34 +
35 +
/* Parse a complete program */
36 +
node_t *parser_parse(parser_t *p);
37 +
38 +
#endif
radiance.c added +147 -0
1 +
#include <errno.h>
2 +
#include <stdio.h>
3 +
#include <stdlib.h>
4 +
#include <string.h>
5 +
#include <unistd.h>
6 +
7 +
#include "ast.h"
8 +
#include "desugar.h"
9 +
#include "gen.h"
10 +
#include "io.h"
11 +
#include "module.h"
12 +
#include "options.h"
13 +
#include "parser.h"
14 +
#include "resolver.h"
15 +
#include "scanner.h"
16 +
#include "strings.h"
17 +
#include "symtab.h"
18 +
#include "types.h"
19 +
#include "util.h"
20 +
21 +
static int compile(struct options *o) {
22 +
    if (o->ninputs > 1) {
23 +
        bail("too many inputs (%d)", o->ninputs);
24 +
    }
25 +
    if (o->ninputs < 1) {
26 +
        bail("no input files");
27 +
    }
28 +
    if (!o->output) {
29 +
        bail("an output file must be specified with `-o`");
30 +
    }
31 +
32 +
    FILE       *text     = NULL;
33 +
    FILE       *data_ro  = NULL;
34 +
    FILE       *data_rw  = NULL;
35 +
    const char *rootpath = o->inputs[0];
36 +
    int         err      = MODULE_OK;
37 +
38 +
    static module_manager_t mm;
39 +
    module_manager_init(&mm, rootpath);
40 +
41 +
    /* Register the root module */
42 +
    if (!(mm.root = module_manager_register(&mm, rootpath))) {
43 +
        bail("error registering root module '%s'", rootpath);
44 +
    }
45 +
46 +
    /* Register additional modules specified with -mod.
47 +
     *
48 +
     * Module paths are given as full relative paths (e.g. `lib/std/foo.rad`).
49 +
     * The qualified name is derived by stripping the leading directory
50 +
     * component (e.g. `std/foo.rad`). */
51 +
    for (int i = 0; i < o->nmodules; i++) {
52 +
        const char *path      = o->modules[i];
53 +
        const char *qualified = strchr(path, '/');
54 +
        qualified             = qualified ? qualified + 1 : path;
55 +
56 +
        if (!module_manager_register_qualified(&mm, path, qualified)) {
57 +
            bail("error registering module '%s'", path);
58 +
        }
59 +
    }
60 +
61 +
    /* Parse all modules */
62 +
    if (!module_manager_parse(&mm, &err)) {
63 +
        bail("error parsing modules");
64 +
    }
65 +
66 +
    /* Run desugaring pass on all modules */
67 +
    for (usize i = 0; i < mm.nmodules; i++) {
68 +
        module_t *mod = &mm.modules[i];
69 +
70 +
        if (mod->ast) {
71 +
            static desugar_t d;
72 +
73 +
            mod->ast = desugar_run(&d, mod, mod->ast);
74 +
            if (!mod->ast) {
75 +
                bail("desugaring failed for module %s", mod->name);
76 +
            }
77 +
        }
78 +
    }
79 +
80 +
    static resolve_t t;
81 +
    resolve_init(&t, &mm);
82 +
83 +
    if (!resolve_run(&t, mm.root)) {
84 +
        bail("type checking failed");
85 +
    }
86 +
87 +
    /* Initialize code generator */
88 +
    static gen_t g;
89 +
    gen_init(&g, &t.types, &mm, 0);
90 +
91 +
    if (!(text = fopen(o->output, "w"))) {
92 +
        bail("failed to open '%s' for writing: %s", o->output, strerror(errno));
93 +
    }
94 +
95 +
    /* Generate code */
96 +
    if (gen_emit(&g, mm.root) != 0) {
97 +
        bail("code generation failed");
98 +
    }
99 +
100 +
    /* Write binary output */
101 +
    if (g.data.ro_size > 0) {
102 +
        char datapath_ro[MAX_PATH_LEN] = { 0 };
103 +
        strncpy(datapath_ro, o->output, MAX_PATH_LEN);
104 +
        strlcat(datapath_ro, ".ro.data", MAX_PATH_LEN);
105 +
106 +
        if (!(data_ro = fopen(datapath_ro, "w"))) {
107 +
            bail(
108 +
                "failed to open '%s' for writing: %s",
109 +
                datapath_ro,
110 +
                strerror(errno)
111 +
            );
112 +
        }
113 +
    }
114 +
    if (g.data.rw_init_total > 0) {
115 +
        char datapath[MAX_PATH_LEN] = { 0 };
116 +
        strncpy(datapath, o->output, MAX_PATH_LEN);
117 +
        strlcat(datapath, ".rw.data", MAX_PATH_LEN);
118 +
119 +
        if (!(data_rw = fopen(datapath, "w"))) {
120 +
            bail(
121 +
                "failed to open '%s' for writing: %s", datapath, strerror(errno)
122 +
            );
123 +
        }
124 +
    }
125 +
    gen_dump_bin(&g, text, data_ro, data_rw);
126 +
127 +
    if (data_ro)
128 +
        fclose(data_ro);
129 +
    if (data_rw)
130 +
        fclose(data_rw);
131 +
    if (text)
132 +
        fclose(text);
133 +
134 +
    return 0;
135 +
}
136 +
137 +
int main(int argc, char *argv[]) {
138 +
    strings_init();
139 +
140 +
    struct options o = options(argc, argv);
141 +
    options_parse(&o);
142 +
143 +
    if (compile(&o) != 0) {
144 +
        bail("compilation failed");
145 +
    }
146 +
    return 0;
147 +
}
ralloc.c added +88 -0
1 +
/**
2 +
 * Register allocator.
3 +
 * Uses a simple stack-based algorithm.
4 +
 */
5 +
#include <stdio.h>
6 +
#include <stdlib.h>
7 +
8 +
#include "io.h"
9 +
#include "ralloc.h"
10 +
#include "riscv.h"
11 +
#include "types.h"
12 +
13 +
/* Order of temporary registers to allocate. */
14 +
const reg_t ralloc_regs[] = { A0, A1, A2, A3, A4, A5, A6, A7,
15 +
                              T0, T1, T2, T3, T4, T5, T6 };
16 +
17 +
ralloc_t ralloc(void) {
18 +
    return (ralloc_t){ .regs = { false } };
19 +
}
20 +
21 +
reg_t ralloc_next(ralloc_t *ra) {
22 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
23 +
        if (!ra->regs[i]) {
24 +
            ra->regs[i] = true;
25 +
            return ralloc_regs[i];
26 +
        }
27 +
    }
28 +
    bail("out of registers");
29 +
}
30 +
31 +
reg_t ralloc_next_except(ralloc_t *ra, reg_t avoid) {
32 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
33 +
        if (!ra->regs[i] && ralloc_regs[i] != avoid) {
34 +
            ra->regs[i] = true;
35 +
            return ralloc_regs[i];
36 +
        }
37 +
    }
38 +
    return ralloc_next(ra);
39 +
}
40 +
41 +
void ralloc_free(ralloc_t *ra, reg_t r) {
42 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
43 +
        if (ralloc_regs[i] == r) {
44 +
            ra->regs[i] = false;
45 +
            break;
46 +
        }
47 +
    }
48 +
}
49 +
50 +
void ralloc_reserve(ralloc_t *ra, reg_t r) {
51 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
52 +
        if (ralloc_regs[i] == r) {
53 +
            ra->regs[i] = true;
54 +
            break;
55 +
        }
56 +
    }
57 +
}
58 +
59 +
bool ralloc_is_free(ralloc_t *ra, reg_t r) {
60 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
61 +
        if (ralloc_regs[i] == r) {
62 +
            return !ra->regs[i];
63 +
        }
64 +
    }
65 +
    return false;
66 +
}
67 +
68 +
void ralloc_free_all(ralloc_t *ra) {
69 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
70 +
        ra->regs[i] = false;
71 +
    }
72 +
}
73 +
74 +
void ralloc_save(ralloc_t *ra, bool *reserved) {
75 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
76 +
        if (ralloc_regs[i] != A0) {
77 +
            reserved[i] = ra->regs[i];
78 +
        }
79 +
    }
80 +
}
81 +
82 +
void ralloc_restore(ralloc_t *ra, bool *reserved) {
83 +
    for (int i = 0; i < RALLOC_NREGS; i++) {
84 +
        if (ralloc_regs[i] != A0) {
85 +
            ra->regs[i] = reserved[i];
86 +
        }
87 +
    }
88 +
}
ralloc.h added +39 -0
1 +
#ifndef ralloc_H
2 +
#define ralloc_H
3 +
4 +
#include <stdio.h>
5 +
6 +
#include "riscv.h"
7 +
#include "types.h"
8 +
9 +
/* Number of available registers. */
10 +
#define RALLOC_NREGS 15
11 +
12 +
/* Order of registers to allocate. */
13 +
extern const reg_t ralloc_regs[RALLOC_NREGS];
14 +
15 +
/* Register allocator context. */
16 +
typedef struct {
17 +
    bool regs[RALLOC_NREGS]; /* Registers status. */
18 +
} ralloc_t;
19 +
20 +
/* Return a new register allocator. */
21 +
ralloc_t ralloc(void);
22 +
/* Allocate and return a new register. */
23 +
reg_t ralloc_next(ralloc_t *ra);
24 +
/* Allocate and return a new register, avoiding `r` when possible. */
25 +
reg_t ralloc_next_except(ralloc_t *ra, reg_t r);
26 +
/* Free the given register. */
27 +
void ralloc_free(ralloc_t *ra, reg_t r);
28 +
/* Free all allocated registers. */
29 +
void ralloc_free_all(ralloc_t *ra);
30 +
/* Returns whether this register is free to reserve. */
31 +
bool ralloc_is_free(ralloc_t *ra, reg_t r);
32 +
/* Reserve a register. Fails if the register is already reserved. */
33 +
void ralloc_reserve(ralloc_t *ra, reg_t r);
34 +
/* Save the register allocation state. */
35 +
void ralloc_save(ralloc_t *ra, bool *reserved);
36 +
/* Restore the register allocation state. */
37 +
void ralloc_restore(ralloc_t *ra, bool *reserved);
38 +
39 +
#endif
resolver.c added +3600 -0
1 +
#include <assert.h>
2 +
#include <limits.h>
3 +
#include <stdint.h>
4 +
#include <stdio.h>
5 +
#include <stdlib.h>
6 +
#include <string.h>
7 +
8 +
#include "ast.h"
9 +
#include "io.h"
10 +
#include "limits.h"
11 +
#include "module.h"
12 +
#include "resolver.h"
13 +
#include "riscv.h"
14 +
#include "strings.h"
15 +
#include "symtab.h"
16 +
#include "util.h"
17 +
18 +
#define max(a, b) (a > b ? a : b)
19 +
20 +
#define DEFAULT_SIZE  4
21 +
#define DEFAULT_ALIGN 4
22 +
23 +
static type_t *alloc_type(
24 +
    resolve_t  *t,
25 +
    typeclass_t kind,
26 +
    const char *name,
27 +
    usize       namel,
28 +
    i32         size,
29 +
    i32         align
30 +
);
31 +
static type_t *alloc_array_type(resolve_t *t, type_t *elem, usize length);
32 +
static type_t *alloc_slice_type(
33 +
    resolve_t *t, type_t *elem, type_t *base, bool mut
34 +
);
35 +
static type_t *alloc_union_type(resolve_t *t, union_decl_t *uni);
36 +
static type_t *alloc_result_type(resolve_t *t, type_t *payload, type_t *err);
37 +
static type_t *alloc_record_type(resolve_t *t, record_decl_t *rec);
38 +
static type_t *alloc_anonymous_record_type(resolve_t *t);
39 +
static type_t *alloc_ptr_type(resolve_t *t, type_t *base, bool mut);
40 +
static type_t *alloc_opt_type(resolve_t *t, type_t *elem);
41 +
static type_t *resolve_node(resolve_t *t, node_t *n, type_t *expected_type);
42 +
static type_t *resolve_var(resolve_t *t, node_t *n);
43 +
static type_t *resolve_const(resolve_t *t, node_t *n);
44 +
static type_t *resolve_static(resolve_t *t, node_t *n);
45 +
static type_t *resolve_use(resolve_t *t, node_t *n);
46 +
static type_t *resolve_mod_decl(resolve_t *t, node_t *n);
47 +
static bool    resolve_mod_def(resolve_t *t, module_t *module);
48 +
static type_t *resolve_scope(resolve_t *t, node_t *n);
49 +
static type_t *resolve_block(resolve_t *t, node_t *n);
50 +
static type_t *resolve_fn_def(resolve_t *t, node_t *n);
51 +
static type_t *resolve_fn_decl(resolve_t *t, node_t *n);
52 +
static type_t *resolve_number(resolve_t *t, node_t *n, type_t *expected);
53 +
static type_t *resolve_builtin(resolve_t *t, node_t *n, type_t *expected);
54 +
static bool    resolve_decls(resolve_t *t, module_t *module);
55 +
static type_t *resolve_throw(resolve_t *t, node_t *n);
56 +
static type_t *resolve_try_expr(resolve_t *t, node_t *n, type_t *expected);
57 +
static bool    declare_record(resolve_t *t, node_t *n);
58 +
static bool    declare_enum(resolve_t *t, node_t *n);
59 +
static type_t *resolve_tuple_record_constructor(
60 +
    resolve_t *t, node_t *call, type_t *record_type
61 +
);
62 +
static type_t *type_unify(
63 +
    resolve_t *t, type_t *a, type_t *b, node_t *n, bool co, const char *ctx
64 +
);
65 +
static type_t   *resolve_type(resolve_t *t, node_t *n);
66 +
static symbol_t *resolve_name(resolve_t *t, node_t *n, symkind_t kind);
67 +
static bool      resolve_const_usize(resolve_t *t, node_t *expr, usize *value);
68 +
static bool      symbol_add(resolve_t *t, node_t *ident, node_t *n);
69 +
static void      finalize_type_layout(resolve_t *t);
70 +
static void      module_scope_path(node_t *node, char *path_str);
71 +
static bool      node_is_super(const node_t *n);
72 +
static module_t *module_super_ancestor(module_t *mod, usize depth);
73 +
static bool      node_diverges(node_t *n);
74 +
75 +
/* Initialize type checker. */
76 +
void resolve_init(resolve_t *t, module_manager_t *mm) {
77 +
    t->fn              = NULL;
78 +
    t->global          = symtab_scope(NULL, NULL);
79 +
    t->scope           = t->global;
80 +
    t->mm              = mm;
81 +
    t->module          = NULL;
82 +
    t->recordid        = 0;
83 +
    t->ctx             = TC_CTX_NORMAL;
84 +
    t->types.nsympool  = 0;
85 +
    t->types.ntypepool = 0;
86 +
    t->types.type_bool = alloc_type(t, TYPE_BOOL, "bool", 4, 1, 1);
87 +
    t->types.type_char =
88 +
        alloc_type(t, TYPE_I8, "i8", 2, sizeof(i8), sizeof(i8));
89 +
    t->types.type_i8 = alloc_type(t, TYPE_I8, "i8", 2, sizeof(i8), sizeof(i8));
90 +
    t->types.type_i16 =
91 +
        alloc_type(t, TYPE_I16, "i16", 3, sizeof(i16), sizeof(i16));
92 +
    t->types.type_i32 =
93 +
        alloc_type(t, TYPE_I32, "i32", 3, sizeof(i32), sizeof(i32));
94 +
    t->types.type_u8 = alloc_type(t, TYPE_U8, "u8", 2, sizeof(u8), sizeof(u8));
95 +
    t->types.type_u16 =
96 +
        alloc_type(t, TYPE_U16, "u16", 3, sizeof(u16), sizeof(u16));
97 +
    t->types.type_u32 =
98 +
        alloc_type(t, TYPE_U32, "u32", 3, sizeof(u32), sizeof(u32));
99 +
    t->types.type_str    = alloc_slice_type(t, t->types.type_u8, NULL, false);
100 +
    t->types.type_void   = alloc_type(t, TYPE_VOID, "void", 4, 0, 0);
101 +
    t->types.type_opaque = alloc_type(t, TYPE_OPAQUE, "opaque", 6, 0, 0);
102 +
    t->types.type_never  = alloc_type(t, TYPE_NEVER, "never", 5, 0, 0);
103 +
104 +
    /* Add root module to global scope
105 +
     * so it can be accessed with `::module` */
106 +
    if (mm->root && mm->root->ast && mm->root->ast->sym) {
107 +
        /* Root module declarations are checked later, so just add the symbol */
108 +
        symtab_add_symbol(t->global, mm->root->ast->sym);
109 +
    }
110 +
}
111 +
112 +
symbol_t **types_alloc_sympool(types_t *t, u8 n) {
113 +
    assert(t->nsympool + n <= MAX_SYMPTR_POOL);
114 +
    symbol_t **ptr  = &t->sympool[t->nsympool];
115 +
    t->nsympool    += n;
116 +
    return ptr;
117 +
}
118 +
119 +
type_t **types_alloc_typepool(types_t *t, u8 n) {
120 +
    assert(t->ntypepool + n <= MAX_TYPEPTR_POOL);
121 +
    type_t **ptr  = &t->typepool[t->ntypepool];
122 +
    t->ntypepool += n;
123 +
    return ptr;
124 +
}
125 +
126 +
type_t *deref_type(type_t *ref) {
127 +
    type_t *target = ref->info.ptr.target;
128 +
129 +
    return target;
130 +
}
131 +
132 +
bool ident_eq(node_t *ident, const char *str, usize len) {
133 +
    const char *ident_str = ident->val.ident.name;
134 +
    usize       ident_len = ident->val.ident.length;
135 +
136 +
    return ident_len == len && (memcmp(ident_str, str, len) == 0);
137 +
}
138 +
139 +
static bool node_is_super(const node_t *n) {
140 +
    return n && n->cls == NODE_SUPER;
141 +
}
142 +
143 +
static module_t *module_super_ancestor(module_t *mod, usize depth) {
144 +
    module_t *current = mod;
145 +
146 +
    for (usize i = 0; i < depth; i++) {
147 +
        if (!current || !current->parent)
148 +
            return NULL;
149 +
        current = current->parent;
150 +
    }
151 +
    return current;
152 +
}
153 +
154 +
inline bool type_is_packed(type_t *t) {
155 +
    switch (t->cls) {
156 +
    case TYPE_RECORD:
157 +
        if ((i32)t->info.srt.packedsize != t->size) {
158 +
            return false;
159 +
        }
160 +
        break;
161 +
    case TYPE_ARRAY:
162 +
    case TYPE_SLICE:
163 +
    default:
164 +
        break;
165 +
    }
166 +
    return true;
167 +
}
168 +
169 +
inline bool type_is_numeric(typeclass_t t) {
170 +
    return t >= TYPE_I8 && t <= TYPE_U32;
171 +
}
172 +
173 +
inline bool type_is_address(typeclass_t t) {
174 +
    return t == TYPE_PTR || t == TYPE_SLICE || t == TYPE_FN;
175 +
}
176 +
177 +
inline bool type_is_compound(type_t *t) {
178 +
    typeclass_t cls = t->cls;
179 +
180 +
    return cls == TYPE_ARRAY || cls == TYPE_RECORD || cls == TYPE_SLICE ||
181 +
           cls == TYPE_PTR || cls == TYPE_OPT || cls == TYPE_RESULT ||
182 +
           cls == TYPE_FN || type_is_union_with_payload(t);
183 +
}
184 +
185 +
inline bool type_is_passed_by_ref(type_t *t) {
186 +
    typeclass_t cls = t->cls;
187 +
188 +
    return cls == TYPE_ARRAY || cls == TYPE_RECORD || cls == TYPE_SLICE ||
189 +
           cls == TYPE_OPT || cls == TYPE_RESULT ||
190 +
           type_is_union_with_payload(t);
191 +
}
192 +
193 +
inline bool type_is_union_with_payload(type_t *ty) {
194 +
    return ty->cls == TYPE_UNION && ty->info.uni.has_payload;
195 +
}
196 +
197 +
inline bool type_is_tagged_value(type_t *ty) {
198 +
    return ty->cls == TYPE_OPT || ty->cls == TYPE_RESULT ||
199 +
           type_is_union_with_payload(ty);
200 +
}
201 +
202 +
inline bool type_is_primitive(type_t *t) {
203 +
    return !type_is_compound(t);
204 +
}
205 +
206 +
inline bool type_is_int(typeclass_t t) {
207 +
    return t >= TYPE_I8 && t <= TYPE_U32;
208 +
}
209 +
210 +
inline bool type_is_unsigned(typeclass_t t) {
211 +
    return t == TYPE_U8 || t == TYPE_U16 || t == TYPE_U32;
212 +
}
213 +
214 +
bool type_coercible(type_t *a, type_t *b) {
215 +
    if (a == b)
216 +
        return true;
217 +
218 +
    /* Handle slice coercion: *mut [T] can coerce to *[T] */
219 +
    if (a->cls == TYPE_SLICE && b->cls == TYPE_SLICE) {
220 +
        if (a->info.slc.elem != b->info.slc.elem)
221 +
            return false;
222 +
        /* Mutable can coerce to immutable, but not vice versa */
223 +
        if (!a->info.slc.mut && b->info.slc.mut)
224 +
            return false;
225 +
        return true;
226 +
    }
227 +
    /* Handle pointer coercion: *mut T can coerce to *T */
228 +
    if (a->cls == TYPE_PTR && b->cls == TYPE_PTR) {
229 +
        if (a->info.ptr.target != b->info.ptr.target)
230 +
            return false;
231 +
        if (!a->info.ptr.mut && b->info.ptr.mut)
232 +
            return false;
233 +
        return true;
234 +
    }
235 +
    return false;
236 +
}
237 +
238 +
/* Unify two types, attempting to find the most general unifier.
239 +
 * Returns the unified type on success, or `NULL` if types cannot be unified.
240 +
 * If `n` and `context` are provided, reports an error on failure. */
241 +
static type_t *type_unify(
242 +
    resolve_t  *t,
243 +
    type_t     *a,
244 +
    type_t     *b,
245 +
    node_t     *n,      /* Node to report error on, or NULL for silent */
246 +
    bool        coerce, /* Allow safe type coercion */
247 +
    const char *context /* Context string for error message */
248 +
) {
249 +
    /* If the pointers are equal, they're already unified */
250 +
    if (a == b)
251 +
        return a;
252 +
    /* If they are both `NULL`, there's nothing we can do */
253 +
    if (!a && !b)
254 +
        return NULL;
255 +
256 +
    /* Treat `never` as compatible with any type. */
257 +
    if (a && a->cls == TYPE_NEVER)
258 +
        return b ? b : a;
259 +
    if (b && b->cls == TYPE_NEVER)
260 +
        return a ? a : b;
261 +
262 +
    /* If one type is NULL, create optional of the other type */
263 +
    if (!a && b && (b->cls == TYPE_OPT))
264 +
        return alloc_opt_type(t, b);
265 +
    if (!b && a && (a->cls == TYPE_OPT))
266 +
        return alloc_opt_type(t, a);
267 +
268 +
    /* If either type is `NULL` and the other is not an optional, bail,
269 +
     * because we have an error. */
270 +
    if (!a || !b) {
271 +
        return NULL;
272 +
    }
273 +
    /* Handle coercion of T to ?T */
274 +
    if (coerce) {
275 +
        if (b->cls == TYPE_OPT && a->cls != TYPE_OPT) {
276 +
            if (type_unify(t, a, b->info.opt.elem, n, coerce, context)) {
277 +
                return b; /* a unifies with ?T's element, result is ?T */
278 +
            }
279 +
            /* Try to unify a with the optional's element type */
280 +
            type_t *unified =
281 +
                type_unify(t, a, b->info.opt.elem, NULL, coerce, NULL);
282 +
            if (unified) {
283 +
                return alloc_opt_type(t, unified);
284 +
            }
285 +
        }
286 +
    }
287 +
    /* Handle pointer types */
288 +
    if (a->cls == TYPE_PTR && b->cls == TYPE_PTR) {
289 +
        /* Allow coercion from *T to *opaque and *mut T to *mut opaque */
290 +
        if (coerce && (a->info.ptr.target->cls == TYPE_OPAQUE ||
291 +
                       b->info.ptr.target->cls == TYPE_OPAQUE)) {
292 +
            return a->info.ptr.target->cls == TYPE_OPAQUE ? a : b;
293 +
        }
294 +
295 +
        type_t *unified = type_unify(
296 +
            t, a->info.ptr.target, b->info.ptr.target, NULL, coerce, NULL
297 +
        );
298 +
        if (unified) {
299 +
            /* When coercing *mut T to *T, prefer immutable target */
300 +
            if (coerce && a->info.ptr.mut && !b->info.ptr.mut) {
301 +
                return b;
302 +
            }
303 +
            if (unified == a->info.ptr.target) {
304 +
                return a;
305 +
            } else if (unified == b->info.ptr.target) {
306 +
                return b;
307 +
            } else {
308 +
                return alloc_ptr_type(t, unified, a->info.ptr.mut);
309 +
            }
310 +
        }
311 +
        goto error;
312 +
    }
313 +
    /* Handle numeric type unification - promote to wider type */
314 +
    if (type_is_numeric(a->cls) && type_is_numeric(b->cls)) {
315 +
        /* Return the "wider" type based on size and signedness */
316 +
        if (a->size > b->size) {
317 +
            return a;
318 +
        } else if (b->size > a->size) {
319 +
            return b;
320 +
        } else {
321 +
            /* Same size - prefer unsigned over signed */
322 +
            if ((a->cls >= TYPE_U8 && a->cls <= TYPE_U32) &&
323 +
                (b->cls >= TYPE_I8 && b->cls <= TYPE_I32)) {
324 +
                return a; /* a is unsigned, b is signed */
325 +
            } else if ((b->cls >= TYPE_U8 && b->cls <= TYPE_U32) &&
326 +
                       (a->cls >= TYPE_I8 && a->cls <= TYPE_I32)) {
327 +
                return b; /* b is unsigned, a is signed */
328 +
            } else {
329 +
                return a; /* Default to first type if same category */
330 +
            }
331 +
        }
332 +
    }
333 +
    /* Handle array types */
334 +
    if (a->cls == TYPE_ARRAY && b->cls == TYPE_ARRAY) {
335 +
        /* Arrays must have same length to unify */
336 +
        if (a->info.ary.length != b->info.ary.length) {
337 +
            goto error;
338 +
        }
339 +
        /* Unify element types */
340 +
        type_t *unified = type_unify(
341 +
            t, a->info.ary.elem, b->info.ary.elem, NULL, false, NULL
342 +
        );
343 +
        if (unified) {
344 +
            /* If element types are already the same, return existing array */
345 +
            if (unified == a->info.ary.elem) {
346 +
                return a;
347 +
            } else if (unified == b->info.ary.elem) {
348 +
                return b;
349 +
            } else {
350 +
                return alloc_array_type(t, unified, a->info.ary.length);
351 +
            }
352 +
        }
353 +
        goto error;
354 +
    }
355 +
    /* Handle slice types */
356 +
    if (a->cls == TYPE_SLICE && b->cls == TYPE_SLICE) {
357 +
        /* Allow coercion from *[T] to *[opaque] */
358 +
        if (coerce && (a->info.slc.elem->cls == TYPE_OPAQUE ||
359 +
                       b->info.slc.elem->cls == TYPE_OPAQUE)) {
360 +
            return a->info.slc.elem->cls == TYPE_OPAQUE ? a : b;
361 +
        }
362 +
        type_t *unified = type_unify(
363 +
            t, a->info.slc.elem, b->info.slc.elem, NULL, false, NULL
364 +
        );
365 +
        if (unified) {
366 +
            /* When coercing *mut [T] to *[T], prefer immutable target */
367 +
            if (coerce && a->info.slc.mut && !b->info.slc.mut) {
368 +
                return b;
369 +
            }
370 +
            if (unified == a->info.slc.elem) {
371 +
                return a;
372 +
            } else if (unified == b->info.slc.elem) {
373 +
                return b;
374 +
            } else {
375 +
                return alloc_slice_type(t, unified, NULL, a->info.slc.mut);
376 +
            }
377 +
        }
378 +
        goto error;
379 +
    }
380 +
    /* Handle optional types */
381 +
    if (a->cls == TYPE_OPT && b->cls == TYPE_OPT) {
382 +
        type_t *unified = type_unify(
383 +
            t, a->info.opt.elem, b->info.opt.elem, NULL, coerce, NULL
384 +
        );
385 +
        if (unified) {
386 +
            if (unified == a->info.opt.elem) {
387 +
                return a;
388 +
            } else if (unified == b->info.opt.elem) {
389 +
                return b;
390 +
            } else {
391 +
                return alloc_opt_type(t, unified);
392 +
            }
393 +
        }
394 +
        goto error;
395 +
    }
396 +
    if (a->cls == TYPE_RESULT && b->cls == TYPE_RESULT) {
397 +
        if (a->info.res.err != b->info.res.err)
398 +
            goto error;
399 +
400 +
        type_t *payload = type_unify(
401 +
            t, a->info.res.payload, b->info.res.payload, NULL, coerce, NULL
402 +
        );
403 +
404 +
        if (payload) {
405 +
            if (payload == a->info.res.payload) {
406 +
                return a;
407 +
            } else if (payload == b->info.res.payload) {
408 +
                return b;
409 +
            }
410 +
        }
411 +
        goto error;
412 +
    }
413 +
    /* Handle array to slice conversion */
414 +
    if (a->cls == TYPE_ARRAY && b->cls == TYPE_SLICE) {
415 +
        if (b->info.slc.mut) {
416 +
            goto error;
417 +
        }
418 +
        type_t *unified = type_unify(
419 +
            t, a->info.ary.elem, b->info.slc.elem, NULL, coerce, NULL
420 +
        );
421 +
        if (unified && unified == a->info.ary.elem) {
422 +
            return a->slice; /* Convert array to its slice type */
423 +
        }
424 +
        goto error;
425 +
    }
426 +
    if (b->cls == TYPE_ARRAY && a->cls == TYPE_SLICE) {
427 +
        if (a->info.slc.mut) {
428 +
            goto error;
429 +
        }
430 +
        type_t *unified = type_unify(
431 +
            t, a->info.slc.elem, b->info.ary.elem, NULL, coerce, NULL
432 +
        );
433 +
        if (unified && unified == b->info.ary.elem) {
434 +
            return b->slice; /* Convert array to its slice type */
435 +
        }
436 +
        goto error;
437 +
    }
438 +
    if (a->cls == TYPE_FN && b->cls == TYPE_FN) {
439 +
        usize nparams = a->info.fun.nparams;
440 +
        if (b->info.fun.nparams != nparams) {
441 +
            goto error;
442 +
        }
443 +
        for (usize i = 0; i < nparams; i++) {
444 +
            type_t *pa = a->info.fun.params[i];
445 +
            type_t *pb = b->info.fun.params[i];
446 +
447 +
            if (pa != pb)
448 +
                goto error;
449 +
        }
450 +
        if (a->info.fun.ret != b->info.fun.ret)
451 +
            goto error;
452 +
453 +
        return a;
454 +
    }
455 +
456 +
error:
457 +
    return NULL;
458 +
}
459 +
460 +
static type_t *resolve_throw(resolve_t *t, node_t *n) {
461 +
    type_t *fn_ret   = t->fn->node->type->info.fun.ret;
462 +
    type_t *err_type = fn_ret->info.res.err;
463 +
464 +
    if (!resolve_node(t, n->val.throw_stmt.expr, err_type))
465 +
        return NULL;
466 +
467 +
    return (n->type = fn_ret);
468 +
}
469 +
470 +
static type_t *resolve_try_expr(resolve_t *t, node_t *n, type_t *expected) {
471 +
    bool    optional   = n->val.try_expr.optional;
472 +
    node_t *expr       = n->val.try_expr.expr;
473 +
    node_t *catch_expr = n->val.try_expr.catch_expr;
474 +
475 +
    resolve_ctx_t pctx = t->ctx;
476 +
    t->ctx             = TC_CTX_TRY;
477 +
    type_t *expr_type  = resolve_node(t, expr, NULL);
478 +
    t->ctx             = pctx;
479 +
480 +
    if (!expr_type)
481 +
        return NULL;
482 +
    type_t *payload = expr_type->info.res.payload;
483 +
484 +
    /* `try?` converts errors to nil and returns an optional type. */
485 +
    if (optional) {
486 +
        if (payload->cls != TYPE_OPT) {
487 +
            payload = alloc_opt_type(t, payload);
488 +
        }
489 +
        return (n->type = payload);
490 +
    }
491 +
492 +
    if (catch_expr) {
493 +
        node_t *catch_binding = catch_expr->val.catch_clause.binding;
494 +
        node_t *catch_body    = catch_expr->val.catch_clause.body;
495 +
        type_t *err_type      = expr_type->info.res.err;
496 +
497 +
        /* If there's a binding, create a scope and add the error variable. */
498 +
        if (catch_binding) {
499 +
            catch_expr->val.catch_clause.scope = symtab_scope(t->scope, NULL);
500 +
            t->scope = catch_expr->val.catch_clause.scope;
501 +
502 +
            catch_binding->type = err_type;
503 +
            if (!symbol_add(t, catch_binding, catch_binding))
504 +
                return NULL;
505 +
506 +
            catch_binding->sym->e.var.typ   = err_type;
507 +
            catch_binding->sym->e.var.align = err_type->align;
508 +
            catch_binding->sym->scope       = t->scope;
509 +
        }
510 +
        type_t *catch_type = resolve_node(t, catch_body, NULL);
511 +
512 +
        if (catch_binding) {
513 +
            t->scope = t->scope->parent;
514 +
        }
515 +
        if (!catch_type)
516 +
            return NULL;
517 +
        if (catch_type->cls != TYPE_NEVER)
518 +
            return (n->type = t->types.type_void);
519 +
520 +
        /* Divergent catch block: fall through and keep payload type. */
521 +
    }
522 +
523 +
    if (expected) {
524 +
        type_t *target = expected;
525 +
526 +
        if (expected->cls == TYPE_RESULT)
527 +
            target = expected->info.res.payload;
528 +
529 +
        type_t *unified = type_unify(t, payload, target, n, true, NULL);
530 +
        if (unified)
531 +
            payload = unified;
532 +
    }
533 +
    return (n->type = payload);
534 +
}
535 +
536 +
/* Process a submodule declaration */
537 +
static type_t *resolve_mod_decl(resolve_t *t, node_t *n) {
538 +
    node_t *name = n->val.mod_decl.ident;
539 +
540 +
    char rel[MAX_PATH_LEN] = { 0 };
541 +
    strncpy(rel, name->val.ident.name, name->val.ident.length);
542 +
543 +
    /* Convert to path relative to current module and find it */
544 +
    module_t *submod =
545 +
        module_manager_find_relative(t->mm, t->module->path, rel);
546 +
    if (!submod)
547 +
        return NULL;
548 +
    symbol_t *sym = symtab_scope_lookup(
549 +
        t->scope, name->val.ident.name, name->val.ident.length, SYM_MODULE
550 +
    );
551 +
    if (sym) {
552 +
        n->sym = sym;
553 +
    } else {
554 +
        if (!symbol_add(t, name, n)) { /* Add module to current scope */
555 +
            return NULL;
556 +
        }
557 +
    }
558 +
    if (!resolve_decls(t, submod)) {
559 +
        return NULL;
560 +
    }
561 +
    /* For mod declarations, also do full type checking */
562 +
    if (!resolve_mod_def(t, submod)) {
563 +
        return NULL;
564 +
    }
565 +
    n->sym->e.mod          = submod;
566 +
    n->sym->e.mod->attribs = n->val.mod_decl.attribs
567 +
                                 ? n->val.mod_decl.attribs->val.attrib
568 +
                                 : ATTRIB_NONE;
569 +
    module_path(submod->qualified, t->module->qualified);
570 +
    module_qualify(submod->qualified, name);
571 +
572 +
    return (n->type = t->types.type_void);
573 +
}
574 +
575 +
/* Helper function to look up a symbol in a module's scope */
576 +
static type_t *module_lookup(
577 +
    resolve_t *t, node_t *n, node_t *child, module_t *module
578 +
) {
579 +
    /* If the module hasn't been checked yet, check it on-demand.
580 +
     * This allows parent modules to reference submodule types. */
581 +
    if (!module->scope && !module->declared &&
582 +
        module->state != MODULE_STATE_VISITING) {
583 +
        if (!resolve_decls(t, module)) {
584 +
            return NULL;
585 +
        }
586 +
    }
587 +
    if (!module->scope)
588 +
        return NULL;
589 +
590 +
    symbol_t *sym = symtab_scope_lookup(
591 +
        module->scope, child->val.ident.name, child->val.ident.length, SYM_ANY
592 +
    );
593 +
    if (!sym)
594 +
        return NULL;
595 +
    n->sym  = sym;
596 +
    n->type = sym->node->type;
597 +
598 +
    return n->type;
599 +
}
600 +
601 +
static symbol_t *union_variant_lookup(type_t *typ, node_t *n) {
602 +
    for (usize i = 0; i < typ->info.uni.nvariants; i++) {
603 +
        symbol_t *v = typ->info.uni.variants[i];
604 +
        if (ident_eq(n, v->name, v->length)) {
605 +
            return v;
606 +
        }
607 +
    }
608 +
    return NULL;
609 +
}
610 +
611 +
/* Look up a record field by name. */
612 +
static symbol_t *record_field_lookup(type_t *typ, node_t *n) {
613 +
    for (usize i = 0; i < typ->info.srt.nfields; i++) {
614 +
        symbol_t *f = typ->info.srt.fields[i];
615 +
        if (ident_eq(n, f->name, f->length)) {
616 +
            return f;
617 +
        }
618 +
    }
619 +
    return NULL;
620 +
}
621 +
622 +
/* Add a field to a record type. */
623 +
static bool record_field_add(
624 +
    resolve_t *t,
625 +
    type_t    *rec_typ,
626 +
    node_t    *field,
627 +
    node_t    *field_ident,
628 +
    type_t    *field_typ
629 +
) {
630 +
    (void)t;
631 +
    const char *field_name;
632 +
    usize       field_len;
633 +
    char        tuple_name[16];
634 +
635 +
    if (field_ident) {
636 +
        field_name = field_ident->val.ident.name;
637 +
        field_len  = field_ident->val.ident.length;
638 +
    } else {
639 +
        /* Tuple field: generate synthetic name based on index */
640 +
        snprintf(
641 +
            tuple_name,
642 +
            sizeof(tuple_name),
643 +
            "%u",
644 +
            (unsigned)rec_typ->info.srt.nfields
645 +
        );
646 +
        field_name = strings_alloc(tuple_name);
647 +
        field_len  = strlen(field_name);
648 +
    }
649 +
    field->type = field_typ;
650 +
651 +
    /* Nb. Since we're modifying the record size as we add fields, we always
652 +
     * add new fields at the end of the record. */
653 +
    i32 field_align    = field_typ->align;
654 +
    i32 aligned_offset = align(rec_typ->size, field_align);
655 +
656 +
    /* Keep track of packed size */
657 +
    rec_typ->info.srt.packedsize += field_typ->size;
658 +
659 +
    field->sym = alloc_symbol((symbol_t){
660 +
        .name = field_name,
661 +
        .length = field_len,
662 +
        .node = field,
663 +
        .kind = SYM_FIELD,
664 +
        .e.field = {
665 +
            .typ = field_typ,
666 +
            .offset = (i32)aligned_offset,
667 +
        },
668 +
    });
669 +
    /* Update record size to include this new field */
670 +
    rec_typ->size = aligned_offset + field_typ->size;
671 +
672 +
    /* Update record alignment to be the maximum of its current alignment
673 +
     * and the new field's alignment */
674 +
    rec_typ->align =
675 +
        (rec_typ->align > field_typ->align) ? rec_typ->align : field_typ->align;
676 +
    /* Add field to record type. */
677 +
    rec_typ->info.srt.fields[rec_typ->info.srt.nfields++] = field->sym;
678 +
679 +
    return true;
680 +
}
681 +
682 +
static bool update_i32(i32 *dst, i32 value) {
683 +
    if (*dst == value)
684 +
        return false;
685 +
    *dst = value;
686 +
    return true;
687 +
}
688 +
689 +
static bool update_bool(bool *dst, bool value) {
690 +
    if (*dst == value)
691 +
        return false;
692 +
    *dst = value;
693 +
    return true;
694 +
}
695 +
696 +
static bool update_record_layout(type_t *strct_typ) {
697 +
    i32  size         = 0;
698 +
    i32  record_align = 1;
699 +
    u32  packedsize   = 0;
700 +
    bool changed      = false;
701 +
702 +
    for (usize i = 0; i < strct_typ->info.srt.nfields; i++) {
703 +
        symbol_t *field_sym  = strct_typ->info.srt.fields[i];
704 +
        type_t   *field_type = field_sym->e.field.typ;
705 +
706 +
        i32 field_align = field_type->align ? field_type->align : DEFAULT_ALIGN;
707 +
        i32 field_size  = field_type->size;
708 +
        i32 offset      = align(size, field_align);
709 +
710 +
        if (field_sym->e.field.offset != offset) {
711 +
            field_sym->e.field.offset = offset;
712 +
            changed                   = true;
713 +
        }
714 +
715 +
        size = offset + field_size;
716 +
        if (field_align > record_align)
717 +
            record_align = field_align;
718 +
719 +
        packedsize += (u32)field_size;
720 +
    }
721 +
    /* Round overall size up to record alignment to match C layout. */
722 +
    size = align(size, record_align);
723 +
724 +
    changed |= update_i32(&strct_typ->size, size);
725 +
    changed |= update_i32(&strct_typ->align, record_align);
726 +
    if (strct_typ->info.srt.packedsize != packedsize) {
727 +
        strct_typ->info.srt.packedsize = packedsize;
728 +
        changed                        = true;
729 +
    }
730 +
731 +
    return changed;
732 +
}
733 +
734 +
static bool update_array_layout(type_t *typ) {
735 +
    type_t *elem = typ->info.ary.elem;
736 +
    if (!elem)
737 +
        return false;
738 +
739 +
    i32  elem_align = elem->align ? elem->align : DEFAULT_ALIGN;
740 +
    i32  size       = elem->size * (i32)typ->info.ary.length;
741 +
    bool changed    = false;
742 +
743 +
    changed |= update_i32(&typ->size, size);
744 +
    changed |= update_i32(&typ->align, elem_align);
745 +
746 +
    return changed;
747 +
}
748 +
749 +
static bool update_opt_layout(type_t *typ) {
750 +
    type_t *elem = typ->info.opt.elem;
751 +
    if (!elem)
752 +
        return false;
753 +
754 +
    i32  elem_align = elem->align ? elem->align : DEFAULT_ALIGN;
755 +
    i32  alignment  = max(elem_align, TAG_SIZE);
756 +
    i32  val_offset = align(TAG_SIZE, elem_align);
757 +
    i32  size       = align(val_offset + elem->size, alignment);
758 +
    bool changed    = false;
759 +
760 +
    changed |= update_i32(&typ->size, size);
761 +
    changed |= update_i32(&typ->align, alignment);
762 +
763 +
    return changed;
764 +
}
765 +
766 +
static bool update_result_layout(resolve_t *t, type_t *typ) {
767 +
    type_t *payload = typ->info.res.payload;
768 +
    type_t *err     = typ->info.res.err;
769 +
770 +
    i32 payload_align =
771 +
        payload == t->types.type_void ? TAG_SIZE : payload->align;
772 +
    i32 err_align = err == t->types.type_void ? TAG_SIZE : err->align;
773 +
    i32 alignment = max(max(payload_align, err_align), TAG_SIZE);
774 +
775 +
    i32  payload_size = payload == t->types.type_void ? 0 : payload->size;
776 +
    i32  err_size     = err == t->types.type_void ? 0 : err->size;
777 +
    i32  val_offset   = align(TAG_SIZE, alignment);
778 +
    i32  value_size   = align(max(payload_size, err_size), alignment);
779 +
    i32  size         = val_offset + value_size;
780 +
    bool changed      = false;
781 +
782 +
    changed |= update_i32(&typ->size, size);
783 +
    changed |= update_i32(&typ->align, alignment);
784 +
785 +
    return changed;
786 +
}
787 +
788 +
static bool update_enum_layout(type_t *typ) {
789 +
    i32  new_align   = typ->info.uni.base ? typ->info.uni.base->align : 0;
790 +
    bool has_payload = false;
791 +
    i32  variantsize = 0;
792 +
    bool changed     = false;
793 +
794 +
    if (new_align <= 0)
795 +
        new_align = TAG_SIZE;
796 +
797 +
    for (usize i = 0; i < typ->info.uni.nvariants; i++) {
798 +
        symbol_t *variant_sym = typ->info.uni.variants[i];
799 +
        if (!variant_sym || !variant_sym->node)
800 +
            continue;
801 +
802 +
        node_t *variant_node = variant_sym->node;
803 +
        type_t *payload      = variant_node->type;
804 +
805 +
        if (!payload || payload->cls == TYPE_VOID)
806 +
            continue;
807 +
808 +
        has_payload = true;
809 +
        if (payload->size > variantsize)
810 +
            variantsize = payload->size;
811 +
        if (payload->align > new_align)
812 +
            new_align = payload->align;
813 +
    }
814 +
815 +
    if (new_align <= 0)
816 +
        new_align = TAG_SIZE;
817 +
818 +
    i32 size = typ->info.uni.base ? typ->info.uni.base->size : TAG_SIZE;
819 +
    if (has_payload) {
820 +
        i32 val_offset = align(TAG_SIZE, new_align);
821 +
        i32 aligned_payload =
822 +
            variantsize > 0 ? align(variantsize, new_align) : 0;
823 +
        size = val_offset + aligned_payload;
824 +
    }
825 +
    changed |= update_bool(&typ->info.uni.has_payload, has_payload);
826 +
    changed |= update_i32(&typ->info.uni.variantsize, variantsize);
827 +
    changed |= update_i32(&typ->align, new_align);
828 +
    changed |= update_i32(&typ->size, size);
829 +
830 +
    return changed;
831 +
}
832 +
833 +
static bool update_type_layout(resolve_t *t, type_t *typ) {
834 +
    switch (typ->cls) {
835 +
    case TYPE_ARRAY:
836 +
        return update_array_layout(typ);
837 +
    case TYPE_UNION:
838 +
        return update_enum_layout(typ);
839 +
    case TYPE_RECORD:
840 +
        return update_record_layout(typ);
841 +
    case TYPE_OPT:
842 +
        return update_opt_layout(typ);
843 +
    case TYPE_RESULT:
844 +
        return update_result_layout(t, typ);
845 +
    default:
846 +
        return false;
847 +
    }
848 +
}
849 +
850 +
static void finalize_type_layout(resolve_t *t) {
851 +
    usize max_passes = t->types.nobjects ? t->types.nobjects : 1;
852 +
    for (usize pass = 0; pass < max_passes; pass++) {
853 +
        bool changed = false;
854 +
855 +
        for (usize i = 0; i < t->types.nobjects; i++) {
856 +
            type_t *typ = &t->types.objects[i];
857 +
            if (update_type_layout(t, typ))
858 +
                changed = true;
859 +
        }
860 +
        if (!changed)
861 +
            return;
862 +
    }
863 +
    bail("type layout failed to stabilize");
864 +
}
865 +
866 +
static bool declare_enum(resolve_t *t, node_t *n) {
867 +
    union_decl_t *decl = &n->val.union_decl;
868 +
869 +
    if (!n->sym) {
870 +
        if (!symbol_add(t, decl->name, n))
871 +
            return false;
872 +
        if (!n->sym)
873 +
            return false;
874 +
    }
875 +
    if (!n->type) {
876 +
        type_t *typ        = alloc_union_type(t, decl);
877 +
        n->sym->e.typ.info = n->type = typ;
878 +
    } else if (!n->sym->e.typ.info) {
879 +
        n->sym->e.typ.info = n->type;
880 +
    }
881 +
    return true;
882 +
}
883 +
884 +
static bool declare_record(resolve_t *t, node_t *n) {
885 +
    record_decl_t *decl = &n->val.record_decl;
886 +
887 +
    if (!n->sym) {
888 +
        if (!symbol_add(t, decl->name, n))
889 +
            return false;
890 +
        if (!n->sym)
891 +
            return false;
892 +
    }
893 +
    if (!n->type) {
894 +
        type_t *strct_typ  = alloc_record_type(t, decl);
895 +
        n->sym->e.typ.info = n->type = strct_typ;
896 +
    } else if (!n->sym->e.typ.info) {
897 +
        n->sym->e.typ.info = n->type;
898 +
    }
899 +
    return true;
900 +
}
901 +
902 +
static bool resolve_const_usize(resolve_t *t, node_t *expr, usize *value) {
903 +
    if (expr->cls == NODE_NUMBER) {
904 +
        *value = expr->val.number.value.u;
905 +
        return true;
906 +
    }
907 +
    symbol_t *sym = expr->sym;
908 +
909 +
    if (!sym && (expr->cls == NODE_IDENT || expr->cls == NODE_SCOPE)) {
910 +
        sym = resolve_name(t, expr, SYM_CONSTANT);
911 +
912 +
        if (!sym)
913 +
            return false;
914 +
    }
915 +
916 +
    if (!sym || sym->kind != SYM_CONSTANT || !sym->node ||
917 +
        sym->node->cls != NODE_CONST)
918 +
        return false;
919 +
920 +
    node_t *value_node = sym->node->val.constant.value;
921 +
    if (!value_node || value_node->cls != NODE_NUMBER)
922 +
        return false;
923 +
    *value = value_node->val.number.value.u;
924 +
925 +
    return true;
926 +
}
927 +
928 +
static bool resolve_record_literal_fields(
929 +
    resolve_t *t, node_t *lit, type_t *record_type
930 +
) {
931 +
    node_t **lit_fields =
932 +
        nodespan_ptrs(&t->module->parser, lit->val.record_lit.fields);
933 +
    for (usize i = 0; i < lit->val.record_lit.fields.len; i++) {
934 +
        node_t             *field_init = lit_fields[i];
935 +
        record_lit_field_t *init       = &field_init->val.record_lit_field;
936 +
937 +
        symbol_t *field_sym = record_field_lookup(record_type, init->name);
938 +
        if (!field_sym)
939 +
            return false;
940 +
941 +
        type_t *field_typ = field_sym->e.field.typ;
942 +
943 +
        if (!resolve_node(t, init->value, field_typ))
944 +
            return false;
945 +
946 +
        field_init->sym = field_sym;
947 +
    }
948 +
    return true;
949 +
}
950 +
951 +
static bool resolve_record_literal_types(
952 +
    resolve_t *t,
953 +
    node_t    *type_node,
954 +
    type_t    *expected,
955 +
    type_t   **out_record,
956 +
    type_t   **out_result,
957 +
    symbol_t **out_variant
958 +
) {
959 +
    type_t   *record_type = NULL;
960 +
    type_t   *result_type = NULL;
961 +
    symbol_t *variant_sym = NULL;
962 +
963 +
    /* Explicit type annotation: either
964 +
     * `Type { ... }` or
965 +
     * `module::Type { ... }`, or
966 +
     * `Enum::Variant { ... }` */
967 +
    if (type_node) {
968 +
        switch (type_node->cls) {
969 +
        case NODE_SCOPE:
970 +
        case NODE_IDENT: {
971 +
            symbol_t *sym = resolve_name(t, type_node, SYM_ANY);
972 +
            if (!sym)
973 +
                return false;
974 +
975 +
            type_t *resolved = type_node->type;
976 +
            if (!resolved && sym->node)
977 +
                resolved = sym->node->type;
978 +
979 +
            if (type_node->cls == NODE_SCOPE && sym->kind == SYM_VARIANT &&
980 +
                sym->node->cls == NODE_UNION_VARIANT) {
981 +
                if (!resolved || resolved->cls != TYPE_UNION)
982 +
                    return false;
983 +
984 +
                type_t *variant_type = sym->node->type;
985 +
                if (!variant_type || variant_type->cls != TYPE_RECORD)
986 +
                    return false;
987 +
988 +
                record_type = variant_type;
989 +
                result_type = resolved;
990 +
                variant_sym = sym;
991 +
992 +
                break;
993 +
            }
994 +
995 +
            if (!resolved) {
996 +
                resolved = resolve_type(t, type_node);
997 +
                if (!resolved)
998 +
                    return false;
999 +
            }
1000 +
1001 +
            if (resolved->cls != TYPE_RECORD)
1002 +
                return false;
1003 +
1004 +
            record_type = resolved;
1005 +
            result_type = record_type;
1006 +
1007 +
            break;
1008 +
        }
1009 +
        case NODE_RECORD_TYPE: {
1010 +
            type_t *resolved = resolve_type(t, type_node);
1011 +
            if (!resolved)
1012 +
                return false;
1013 +
1014 +
            if (resolved->cls != TYPE_RECORD)
1015 +
                return false;
1016 +
1017 +
            record_type = resolved;
1018 +
            result_type = record_type;
1019 +
1020 +
            break;
1021 +
        }
1022 +
        default:
1023 +
            return false;
1024 +
        }
1025 +
    } else {
1026 +
        /* No explicit type: fall back to the expected type from context */
1027 +
        if (!expected)
1028 +
            return false;
1029 +
1030 +
        if (expected->cls == TYPE_OPT)
1031 +
            expected = expected->info.opt.elem;
1032 +
1033 +
        if (expected->cls != TYPE_RECORD)
1034 +
            return false;
1035 +
1036 +
        record_type = expected;
1037 +
        result_type = record_type;
1038 +
    }
1039 +
1040 +
    *out_record = record_type;
1041 +
    *out_result = result_type;
1042 +
    if (out_variant)
1043 +
        *out_variant = variant_sym;
1044 +
1045 +
    return true;
1046 +
}
1047 +
1048 +
static bool anonymous_record_equals(
1049 +
    resolve_t *t, type_t *typ, record_type_t *stype
1050 +
) {
1051 +
    if (typ->info.srt.nfields != stype->fields.len)
1052 +
        return false;
1053 +
1054 +
    node_t **fields = nodespan_ptrs(&t->module->parser, stype->fields);
1055 +
    for (usize i = 0; i < stype->fields.len; i++) {
1056 +
        node_t   *field_node = fields[i];
1057 +
        symbol_t *field_sym  = typ->info.srt.fields[i];
1058 +
1059 +
        if (field_node->type != field_sym->e.field.typ)
1060 +
            return false;
1061 +
1062 +
        if (!ident_eq(
1063 +
                field_node->val.var.ident, field_sym->name, field_sym->length
1064 +
            ))
1065 +
            return false;
1066 +
    }
1067 +
    return true;
1068 +
}
1069 +
1070 +
static type_t *anonymous_record_lookup(resolve_t *t, record_type_t *stype) {
1071 +
    for (usize i = 0; i < t->types.nobjects; i++) {
1072 +
        type_t *typ = &t->types.objects[i];
1073 +
1074 +
        if (typ->cls != TYPE_RECORD || !typ->info.srt.anonymous)
1075 +
            continue;
1076 +
        if (anonymous_record_equals(t, typ, stype))
1077 +
            return typ;
1078 +
    }
1079 +
    return NULL;
1080 +
}
1081 +
1082 +
static bool union_variant_add(
1083 +
    resolve_t *t, type_t *typ, node_t *v, usize idx, i32 *iota
1084 +
) {
1085 +
    (void)idx;
1086 +
    union_variant_t *variant = &v->val.union_variant;
1087 +
    const char      *name    = variant->name->val.ident.name;
1088 +
    const usize      length  = variant->name->val.ident.length;
1089 +
1090 +
    symbol_t *sym = alloc_symbol((symbol_t){
1091 +
        .name   = name,
1092 +
        .length = length,
1093 +
        .node   = v,
1094 +
        .kind   = SYM_VARIANT,
1095 +
    });
1096 +
1097 +
    if (variant->type) {
1098 +
        type_t *payload = resolve_type(t, variant->type);
1099 +
        if (!payload)
1100 +
            return false;
1101 +
1102 +
        v->type        = payload;
1103 +
        variant->value = *iota;
1104 +
        *iota          = variant->value + 1;
1105 +
    } else {
1106 +
        v->type = t->types.type_void;
1107 +
1108 +
        if (variant->value_expr) {
1109 +
            if (!resolve_number(t, variant->value_expr, t->types.type_i32))
1110 +
                return false;
1111 +
1112 +
            variant->value = variant->value_expr->val.number.value.i;
1113 +
            *iota          = variant->value + 1;
1114 +
        } else {
1115 +
            variant->value = *iota;
1116 +
            *iota          = variant->value + 1;
1117 +
        }
1118 +
    }
1119 +
    assert(typ->info.uni.nvariants < MAX_UNION_VARIANTS);
1120 +
    typ->info.uni.variants[typ->info.uni.nvariants++] = sym;
1121 +
    update_enum_layout(typ);
1122 +
1123 +
    return true;
1124 +
}
1125 +
1126 +
/* Allocate a type. */
1127 +
static type_t *alloc_type(
1128 +
    resolve_t  *t,
1129 +
    typeclass_t kind,
1130 +
    const char *name,
1131 +
    usize       namelen,
1132 +
    i32         size,
1133 +
    i32         align
1134 +
) {
1135 +
    if (t->types.nobjects >= MAX_TYPES) {
1136 +
        bail("type overflow: too many types");
1137 +
        return NULL;
1138 +
    }
1139 +
    type_t *slot = &t->types.objects[t->types.nobjects++];
1140 +
1141 +
    slot->name      = name;
1142 +
    slot->namelen   = namelen;
1143 +
    slot->cls       = kind;
1144 +
    slot->size      = size;
1145 +
    slot->align     = align;
1146 +
    slot->ptr       = NULL;
1147 +
    slot->ptr_mut   = NULL;
1148 +
    slot->slice     = NULL;
1149 +
    slot->slice_mut = NULL;
1150 +
1151 +
    /* For non-pointer types, allocate a pointer type and
1152 +
     * link it to the target type. */
1153 +
    if (kind != TYPE_PTR) {
1154 +
        slot->ptr = alloc_ptr_type(t, slot, false);
1155 +
    }
1156 +
    return slot;
1157 +
}
1158 +
1159 +
/* Allocate a slice type.
1160 +
 * `base` can be `NULL` for things like `*[u8]` from string literals. */
1161 +
static type_t *alloc_slice_type(
1162 +
    resolve_t *t, type_t *elem, type_t *base, bool mut
1163 +
) {
1164 +
    if (base) {
1165 +
        if (!mut && base->slice) {
1166 +
            return base->slice;
1167 +
        }
1168 +
        if (mut && base->slice_mut) {
1169 +
            return base->slice_mut;
1170 +
        }
1171 +
    } else {
1172 +
        if (!mut && elem->slice) {
1173 +
            return elem->slice;
1174 +
        }
1175 +
        if (mut && elem->slice_mut) {
1176 +
            return elem->slice_mut;
1177 +
        }
1178 +
    }
1179 +
1180 +
    char buf[MAX_STRING_LEN] = { 0 };
1181 +
    if (mut) {
1182 +
        snprintf(
1183 +
            buf, MAX_STRING_LEN, "*mut [%.*s]", (int)elem->namelen, elem->name
1184 +
        );
1185 +
    } else {
1186 +
        snprintf(
1187 +
            buf, MAX_STRING_LEN, "*[%.*s]", (int)elem->namelen, elem->name
1188 +
        );
1189 +
    }
1190 +
    const char *name = strings_alloc(buf);
1191 +
1192 +
    type_t *typ =
1193 +
        alloc_type(t, TYPE_SLICE, name, strlen(name), WORD_SIZE * 2, WORD_SIZE);
1194 +
    typ->info.slc.elem = elem;
1195 +
    typ->info.slc.base = base;
1196 +
    typ->info.slc.mut  = mut;
1197 +
1198 +
    if (base) {
1199 +
        if (!mut) {
1200 +
            base->slice = typ;
1201 +
        } else {
1202 +
            base->slice_mut = typ;
1203 +
        }
1204 +
    } else {
1205 +
        if (!mut) {
1206 +
            elem->slice = typ;
1207 +
        } else {
1208 +
            elem->slice_mut = typ;
1209 +
        }
1210 +
    }
1211 +
    return typ;
1212 +
}
1213 +
1214 +
/* Allocate a pointer type. */
1215 +
static type_t *alloc_ptr_type(resolve_t *t, type_t *base, bool mut) {
1216 +
    if (!mut && base->ptr) {
1217 +
        return base->ptr;
1218 +
    }
1219 +
    if (mut && base->ptr_mut) {
1220 +
        return base->ptr_mut;
1221 +
    }
1222 +
1223 +
    char buf[MAX_STRING_LEN] = { 0 };
1224 +
    if (mut) {
1225 +
        snprintf(
1226 +
            buf, MAX_STRING_LEN, "*mut %.*s", (int)base->namelen, base->name
1227 +
        );
1228 +
    } else {
1229 +
        snprintf(buf, MAX_STRING_LEN, "*%.*s", (int)base->namelen, base->name);
1230 +
    }
1231 +
    const char *name = strings_alloc(buf);
1232 +
1233 +
    type_t *typ =
1234 +
        alloc_type(t, TYPE_PTR, name, strlen(name), WORD_SIZE, WORD_SIZE);
1235 +
    typ->info.ptr.target = base;
1236 +
    typ->info.ptr.mut    = mut;
1237 +
1238 +
    if (!mut) {
1239 +
        base->ptr = typ;
1240 +
    } else {
1241 +
        base->ptr_mut = typ;
1242 +
    }
1243 +
    return typ;
1244 +
}
1245 +
1246 +
/* Allocate an array type. */
1247 +
static type_t *alloc_array_type(resolve_t *t, type_t *elem, usize length) {
1248 +
    /* First check if we already have this array type */
1249 +
    for (usize i = 0; i < t->types.nobjects; i++) {
1250 +
        type_t *typ = &t->types.objects[i];
1251 +
1252 +
        if (typ->cls == TYPE_ARRAY && typ->info.ary.elem == elem &&
1253 +
            typ->info.ary.length == length) {
1254 +
            return typ;
1255 +
        }
1256 +
    }
1257 +
    char buf[MAX_STRING_LEN] = { 0 };
1258 +
    snprintf(
1259 +
        buf,
1260 +
        MAX_STRING_LEN,
1261 +
        "[%.*s; %ld]",
1262 +
        (int)elem->namelen,
1263 +
        elem->name,
1264 +
        length
1265 +
    );
1266 +
    const char *name = strings_alloc(buf);
1267 +
1268 +
    type_t *array_type = alloc_type(t, TYPE_ARRAY, name, strlen(name), 0, 0);
1269 +
1270 +
    array_type->info.ary.elem   = elem;
1271 +
    array_type->info.ary.length = length;
1272 +
    update_array_layout(array_type);
1273 +
1274 +
    array_type->slice = alloc_slice_type(t, elem, array_type, false);
1275 +
    array_type->ptr   = alloc_ptr_type(t, array_type, false);
1276 +
1277 +
    return array_type;
1278 +
}
1279 +
1280 +
static type_t *alloc_union_type(resolve_t *t, union_decl_t *uni) {
1281 +
    type_t *typ = alloc_type(
1282 +
        t,
1283 +
        TYPE_UNION,
1284 +
        uni->name->val.ident.name,
1285 +
        uni->name->val.ident.length,
1286 +
        WORD_SIZE,
1287 +
        WORD_SIZE
1288 +
    );
1289 +
    /* TODO: use correct type based on union variants.
1290 +
     * For now, default all enums to an `i32` base type. */
1291 +
    typ->info.uni.decl     = uni;
1292 +
    typ->info.uni.base     = t->types.type_i32;
1293 +
    typ->info.uni.variants = types_alloc_sympool(&t->types, MAX_UNION_VARIANTS);
1294 +
    typ->info.uni.nvariants   = 0;
1295 +
    typ->info.uni.variantsize = 0;
1296 +
    typ->info.uni.has_payload = false;
1297 +
1298 +
    return typ;
1299 +
}
1300 +
1301 +
static type_t *alloc_fn_type(
1302 +
    resolve_t *t, node_t *n, type_t *ret, usize nparams
1303 +
) {
1304 +
    type_t *type = alloc_type(
1305 +
        t,
1306 +
        TYPE_FN,
1307 +
        n->sym ? n->sym->name : "#fn",
1308 +
        n->sym ? n->sym->length : 3,
1309 +
        DEFAULT_SIZE,
1310 +
        DEFAULT_ALIGN
1311 +
    );
1312 +
    type->info.fun.ret     = ret ? ret : t->types.type_void;
1313 +
    type->info.fun.params  = types_alloc_typepool(&t->types, MAX_FN_PARAMS);
1314 +
    type->info.fun.throws  = types_alloc_typepool(&t->types, MAX_FN_THROWS);
1315 +
    type->info.fun.nparams = nparams;
1316 +
    type->info.fun.nthrows = 0;
1317 +
1318 +
    return (n->type = type);
1319 +
}
1320 +
1321 +
static type_t *alloc_record_type(resolve_t *t, record_decl_t *srt) {
1322 +
    type_t *typ = alloc_type(
1323 +
        t,
1324 +
        TYPE_RECORD,
1325 +
        srt->name->val.ident.name,
1326 +
        srt->name->val.ident.length,
1327 +
        0, /* Size will be updated when we add fields */
1328 +
        DEFAULT_ALIGN
1329 +
    );
1330 +
    typ->info.srt.fields  = types_alloc_sympool(&t->types, MAX_RECORD_FIELDS);
1331 +
    typ->info.srt.nfields = 0;
1332 +
    typ->info.srt.packedsize = 0;
1333 +
    typ->info.srt.anonymous  = false;
1334 +
    typ->info.srt.tuple      = srt->tuple;
1335 +
1336 +
    return typ;
1337 +
}
1338 +
1339 +
static type_t *alloc_anonymous_record_type(resolve_t *t) {
1340 +
    char buf[32];
1341 +
    snprintf(buf, sizeof(buf), "record#%u", (unsigned)t->recordid++);
1342 +
    const char *name = strings_alloc(buf);
1343 +
1344 +
    type_t *typ = alloc_type(
1345 +
        t,
1346 +
        TYPE_RECORD,
1347 +
        name,
1348 +
        strlen(name),
1349 +
        0, /* Size will be updated when we add fields */
1350 +
        DEFAULT_ALIGN
1351 +
    );
1352 +
    typ->info.srt.fields  = types_alloc_sympool(&t->types, MAX_RECORD_FIELDS);
1353 +
    typ->info.srt.nfields = 0;
1354 +
    typ->info.srt.packedsize = 0;
1355 +
    typ->info.srt.anonymous  = true;
1356 +
1357 +
    return typ;
1358 +
}
1359 +
1360 +
/* Allocate an optional type. */
1361 +
static type_t *alloc_opt_type(resolve_t *t, type_t *elem) {
1362 +
    /* First check if we already have this optional type */
1363 +
    for (usize i = 0; i < t->types.nobjects; i++) {
1364 +
        type_t *typ = &t->types.objects[i];
1365 +
1366 +
        if (typ->cls == TYPE_OPT && typ->info.opt.elem == elem) {
1367 +
            return typ;
1368 +
        }
1369 +
    }
1370 +
    char buf[MAX_STRING_LEN] = { 0 };
1371 +
    snprintf(buf, MAX_STRING_LEN, "?%.*s", (int)elem->namelen, elem->name);
1372 +
    const char *name = strings_alloc(buf);
1373 +
1374 +
    type_t *opt_type = alloc_type(t, TYPE_OPT, name, strlen(name), 0, 0);
1375 +
1376 +
    opt_type->info.opt.elem = elem;
1377 +
    update_opt_layout(opt_type);
1378 +
1379 +
    return opt_type;
1380 +
}
1381 +
1382 +
static type_t *alloc_result_type(resolve_t *t, type_t *payload, type_t *err) {
1383 +
    /* Find existing result type that matches this one. */
1384 +
    for (usize i = 0; i < t->types.nobjects; i++) {
1385 +
        type_t *typ = &t->types.objects[i];
1386 +
1387 +
        if (typ->cls == TYPE_RESULT && typ->info.res.payload == payload &&
1388 +
            typ->info.res.err == err) {
1389 +
            return typ;
1390 +
        }
1391 +
    }
1392 +
1393 +
    char buf[MAX_STRING_LEN] = { 0 };
1394 +
    snprintf(
1395 +
        buf,
1396 +
        MAX_STRING_LEN,
1397 +
        "result<%.*s, %.*s>",
1398 +
        (int)err->namelen,
1399 +
        err->name,
1400 +
        (int)payload->namelen,
1401 +
        payload->name
1402 +
    );
1403 +
    const char *name = strings_alloc(buf);
1404 +
1405 +
    type_t *result_typ = alloc_type(t, TYPE_RESULT, name, strlen(name), 0, 0);
1406 +
1407 +
    result_typ->info.res.err     = err;
1408 +
    result_typ->info.res.payload = payload;
1409 +
    update_result_layout(t, result_typ);
1410 +
1411 +
    return result_typ;
1412 +
}
1413 +
1414 +
static bool resolve_fn_throws(
1415 +
    resolve_t *t, type_t *fn_type, nodespan_t throws, type_t *ret_payload
1416 +
) {
1417 +
    usize nthrows = throws.len;
1418 +
    if (nthrows == 0) {
1419 +
        fn_type->info.fun.ret = ret_payload;
1420 +
        return true;
1421 +
    }
1422 +
    if (nthrows > MAX_FN_THROWS)
1423 +
        bail("too many throw types");
1424 +
1425 +
    node_t **throw_nodes = nodespan_ptrs(&t->module->parser, throws);
1426 +
    for (usize i = 0; i < nthrows; i++) {
1427 +
        node_t *thrown     = throw_nodes[i];
1428 +
        type_t *thrown_typ = resolve_type(t, thrown);
1429 +
1430 +
        if (!thrown_typ)
1431 +
            return false;
1432 +
        fn_type->info.fun.throws[i] = thrown_typ;
1433 +
        fn_type->info.fun.nthrows++;
1434 +
    }
1435 +
    type_t *thrown_typ = fn_type->info.fun.throws[0];
1436 +
    type_t *result_typ = alloc_result_type(t, ret_payload, thrown_typ);
1437 +
1438 +
    fn_type->info.fun.ret = result_typ;
1439 +
1440 +
    return true;
1441 +
}
1442 +
1443 +
static bool union_variant_validate_args(
1444 +
    resolve_t *t, node_t *call, symbol_t *variant_sym, node_t **out_arg_expr
1445 +
) {
1446 +
    (void)t;
1447 +
    type_t *variant_type = variant_sym->node->type;
1448 +
    usize   nargs        = call->val.call.args.len;
1449 +
1450 +
    if (variant_type->cls == TYPE_VOID) {
1451 +
        if (out_arg_expr)
1452 +
            *out_arg_expr = NULL;
1453 +
        return nargs == 0;
1454 +
    }
1455 +
    if (nargs != 1)
1456 +
        return false;
1457 +
1458 +
    if (out_arg_expr)
1459 +
        *out_arg_expr =
1460 +
            nodespan_ptrs(&t->module->parser, call->val.call.args)[0]
1461 +
                ->val.call_arg.expr;
1462 +
1463 +
    return true;
1464 +
}
1465 +
1466 +
/* Check a union constructor call like `Expr::number(42)`. */
1467 +
static type_t *resolve_enum_constructor(
1468 +
    resolve_t *t, node_t *call, type_t *union_type, symbol_t *variant_sym
1469 +
) {
1470 +
    type_t *variant_type = variant_sym->node->type;
1471 +
    node_t *arg_expr     = NULL;
1472 +
1473 +
    if (!union_variant_validate_args(t, call, variant_sym, &arg_expr))
1474 +
        return NULL;
1475 +
1476 +
    if (arg_expr) {
1477 +
        if (!resolve_node(t, arg_expr, variant_type))
1478 +
            return NULL;
1479 +
    }
1480 +
1481 +
    call->sym  = variant_sym;
1482 +
    call->type = union_type;
1483 +
1484 +
    return union_type;
1485 +
}
1486 +
1487 +
/* Check tuple record constructor call */
1488 +
static type_t *resolve_tuple_record_constructor(
1489 +
    resolve_t *t, node_t *call, type_t *record_type
1490 +
) {
1491 +
    usize nfields = record_type->info.srt.nfields;
1492 +
    usize nargs   = call->val.call.args.len;
1493 +
1494 +
    if (nargs != nfields)
1495 +
        return NULL;
1496 +
1497 +
    /* Type check each argument against the corresponding field type. */
1498 +
    for (usize i = 0; i < nargs; i++) {
1499 +
        node_t *arg = nodespan_ptrs(&t->module->parser, call->val.call.args)[i];
1500 +
        symbol_t *field_sym = record_type->info.srt.fields[i];
1501 +
        type_t   *field_typ = field_sym->e.field.typ;
1502 +
1503 +
        if (!resolve_node(t, arg, field_typ))
1504 +
            return NULL;
1505 +
    }
1506 +
    call->sym = NULL;
1507 +
1508 +
    return (call->type = record_type);
1509 +
}
1510 +
1511 +
static bool symbol_add(resolve_t *t, node_t *ident, node_t *n) {
1512 +
    if (ident->cls == NODE_PLACEHOLDER)
1513 +
        return true;
1514 +
1515 +
    return symtab_add_ident(t->scope, ident, n);
1516 +
}
1517 +
1518 +
static symbol_t *resolve_name(resolve_t *t, node_t *n, symkind_t kind) {
1519 +
    n->sym = NULL;
1520 +
1521 +
    if (n->cls == NODE_SCOPE) {
1522 +
        if (!resolve_scope(t, n) || !n->sym)
1523 +
            return NULL;
1524 +
        if (kind != SYM_ANY && n->sym->kind != kind)
1525 +
            return NULL;
1526 +
        return n->sym;
1527 +
    }
1528 +
1529 +
    symbol_t *sym =
1530 +
        symtab_lookup(t->scope, n->val.ident.name, n->val.ident.length, kind);
1531 +
1532 +
    if (!sym && kind == SYM_ANY) {
1533 +
        sym = symtab_lookup(
1534 +
            t->scope, n->val.ident.name, n->val.ident.length, SYM_ANY
1535 +
        );
1536 +
    }
1537 +
1538 +
    if (sym) {
1539 +
        n->sym = sym;
1540 +
1541 +
        if (sym->node && sym->node->type && !n->type)
1542 +
            n->type = sym->node->type;
1543 +
1544 +
        return sym;
1545 +
    }
1546 +
    return NULL;
1547 +
}
1548 +
1549 +
/* Resolve a type by looking up its definition if necessary, eg. for custom
1550 +
 * types defined in the source code. */
1551 +
static type_t *resolve_type(resolve_t *t, node_t *n) {
1552 +
    if (n->type)
1553 +
        return n->type;
1554 +
1555 +
    switch (n->cls) {
1556 +
    case NODE_TYPE:
1557 +
        switch (n->val.type.tclass) {
1558 +
        case TYPE_U8:
1559 +
            return (n->type = t->types.type_u8);
1560 +
        case TYPE_U16:
1561 +
            return (n->type = t->types.type_u16);
1562 +
        case TYPE_U32:
1563 +
            return (n->type = t->types.type_u32);
1564 +
        case TYPE_I8:
1565 +
            return (n->type = t->types.type_i8);
1566 +
        case TYPE_I16:
1567 +
            return (n->type = t->types.type_i16);
1568 +
        case TYPE_I32:
1569 +
            return (n->type = t->types.type_i32);
1570 +
        case TYPE_BOOL:
1571 +
            return (n->type = t->types.type_bool);
1572 +
        case TYPE_VOID:
1573 +
            return (n->type = t->types.type_void);
1574 +
        case TYPE_OPAQUE:
1575 +
            return (n->type = t->types.type_opaque);
1576 +
        case TYPE_FN: {
1577 +
            /* Resolve return type */
1578 +
            type_t *ret_type = n->val.type.info.fn.ret
1579 +
                                   ? resolve_type(t, n->val.type.info.fn.ret)
1580 +
                                   : t->types.type_void;
1581 +
            if (!ret_type)
1582 +
                return NULL;
1583 +
1584 +
            n->type =
1585 +
                alloc_fn_type(t, n, ret_type, n->val.type.info.fn.params.len);
1586 +
1587 +
            /* Resolve parameter types */
1588 +
            for (usize i = 0; i < n->val.type.info.fn.params.len; i++) {
1589 +
                type_t *param_typ = resolve_type(
1590 +
                    t,
1591 +
                    nodespan_ptrs(
1592 +
                        &t->module->parser, n->val.type.info.fn.params
1593 +
                    )[i]
1594 +
                );
1595 +
                if (!param_typ)
1596 +
                    return NULL;
1597 +
1598 +
                n->type->info.fun.params[i] = param_typ;
1599 +
            }
1600 +
            if (!resolve_fn_throws(
1601 +
                    t, n->type, n->val.type.info.fn.throws, ret_type
1602 +
                ))
1603 +
                return NULL;
1604 +
1605 +
            return n->type;
1606 +
        }
1607 +
        case TYPE_ARRAY: {
1608 +
            type_t *elem_typ = resolve_type(t, n->val.type.elem_type);
1609 +
1610 +
            if (!elem_typ)
1611 +
                return NULL;
1612 +
1613 +
            node_t *len_node = n->val.type.info.array.length;
1614 +
            if (!resolve_node(t, len_node, t->types.type_u32))
1615 +
                return NULL;
1616 +
1617 +
            usize len = 0;
1618 +
            if (!resolve_const_usize(t, len_node, &len))
1619 +
                return NULL;
1620 +
            return (n->type = alloc_array_type(t, elem_typ, len));
1621 +
        }
1622 +
        case TYPE_SLICE: {
1623 +
            type_t *elem_typ = resolve_type(t, n->val.type.elem_type);
1624 +
            if (!elem_typ)
1625 +
                return NULL;
1626 +
1627 +
            bool mut = n->val.type.info.slice.mut;
1628 +
            return (n->type = alloc_slice_type(t, elem_typ, NULL, mut));
1629 +
        }
1630 +
        case TYPE_UNION:
1631 +
        case TYPE_RESULT:
1632 +
        case TYPE_RECORD:
1633 +
            abort();
1634 +
        case TYPE_PTR: {
1635 +
            type_t *elem_typ = resolve_type(t, n->val.type.elem_type);
1636 +
1637 +
            if (!elem_typ)
1638 +
                return NULL;
1639 +
            bool mut = n->val.type.info.ptr.mut;
1640 +
1641 +
            return (n->type = alloc_ptr_type(t, elem_typ, mut));
1642 +
        }
1643 +
        case TYPE_OPT: {
1644 +
            type_t *elem_typ = resolve_type(t, n->val.type.elem_type);
1645 +
1646 +
            if (!elem_typ)
1647 +
                return NULL;
1648 +
1649 +
            return (n->type = alloc_opt_type(t, elem_typ));
1650 +
        }
1651 +
        default:
1652 +
            break;
1653 +
        }
1654 +
        break;
1655 +
    case NODE_RECORD_TYPE: {
1656 +
        record_type_t *stype = &n->val.record_type;
1657 +
        node_t **fields      = nodespan_ptrs(&t->module->parser, stype->fields);
1658 +
1659 +
        for (usize i = 0; i < stype->fields.len; i++) {
1660 +
            node_t *field = fields[i];
1661 +
            type_t *typ   = resolve_type(t, field->val.var.type);
1662 +
1663 +
            if (!typ)
1664 +
                return NULL;
1665 +
1666 +
            field->type = typ;
1667 +
        }
1668 +
        type_t *existing = anonymous_record_lookup(t, stype);
1669 +
        if (existing)
1670 +
            return (n->type = existing);
1671 +
1672 +
        type_t *typ = alloc_anonymous_record_type(t);
1673 +
        for (usize i = 0; i < stype->fields.len; i++) {
1674 +
            node_t *field = fields[i];
1675 +
1676 +
            if (!record_field_add(
1677 +
                    t, typ, field, field->val.var.ident, field->type
1678 +
                ))
1679 +
                return NULL;
1680 +
        }
1681 +
1682 +
        return (n->type = typ);
1683 +
    }
1684 +
    case NODE_SCOPE:
1685 +
    case NODE_IDENT: {
1686 +
        symbol_t *sym = resolve_name(t, n, SYM_TYPE);
1687 +
1688 +
        if (!sym)
1689 +
            return NULL;
1690 +
1691 +
        if (!sym->node || !sym->node->type)
1692 +
            bail("type symbol missing type information");
1693 +
1694 +
        return (n->type = sym->node->type);
1695 +
    }
1696 +
    default:
1697 +
        bail("node is not a kind of type, class is %d", n->cls);
1698 +
    }
1699 +
    return NULL;
1700 +
}
1701 +
1702 +
static type_t *resolve_number(resolve_t *t, node_t *n, type_t *expected) {
1703 +
    if (!expected)
1704 +
        expected = t->types.type_i32;
1705 +
    if (expected->cls == TYPE_OPT)
1706 +
        expected = expected->info.opt.elem;
1707 +
1708 +
    if (!expected || !type_is_numeric(expected->cls))
1709 +
        return NULL;
1710 +
1711 +
    type_t     *result_type = expected;
1712 +
    typeclass_t tclass      = expected->cls;
1713 +
    imm_t       value       = { 0 };
1714 +
1715 +
    /* Create a null-terminated copy of the text for strto* functions. */
1716 +
    static char text[16] = { 0 };
1717 +
    memcpy(text, n->val.number.text, n->val.number.text_len);
1718 +
    text[n->val.number.text_len] = '\0';
1719 +
1720 +
    /* Manual binary literal parsing since `strtol` doesn't support 0b in this
1721 +
     * environment. */
1722 +
    bool is_binary = (text[0] == '0' && (text[1] == 'b' || text[1] == 'B'));
1723 +
    u32  binval    = 0;
1724 +
1725 +
    if (is_binary) {
1726 +
        for (usize i = 2; text[i]; i++) {
1727 +
            binval = (binval << 1) + (text[i] - '0');
1728 +
        }
1729 +
    }
1730 +
1731 +
    /* Parse the number based on the type */
1732 +
    switch (tclass) {
1733 +
    case TYPE_I8:
1734 +
    case TYPE_I16:
1735 +
    case TYPE_I32: {
1736 +
        i32 val;
1737 +
        if (is_binary) {
1738 +
            val = (i32)binval;
1739 +
        } else {
1740 +
            val = strtol(text, NULL, 0);
1741 +
        }
1742 +
        value.i = val;
1743 +
        break;
1744 +
    }
1745 +
    case TYPE_U8:
1746 +
    case TYPE_U16:
1747 +
    case TYPE_U32: {
1748 +
        u32 val;
1749 +
        if (is_binary) {
1750 +
            val = binval;
1751 +
        } else {
1752 +
            val = strtoul(text, NULL, 0);
1753 +
        }
1754 +
        value.u = val;
1755 +
        break;
1756 +
    }
1757 +
    default:
1758 +
        break;
1759 +
    }
1760 +
    n->val.number.value = value;
1761 +
1762 +
    return (n->type = result_type);
1763 +
}
1764 +
1765 +
static type_t *resolve_builtin(resolve_t *t, node_t *n, type_t *expected) {
1766 +
    (void)expected;
1767 +
1768 +
    builtin_kind_t kind = n->val.builtin.kind;
1769 +
    node_t **args = nodespan_ptrs(&t->module->parser, n->val.builtin.args);
1770 +
    type_t  *typ;
1771 +
1772 +
    /* @sliceOf is handled separately since it takes two runtime arguments */
1773 +
    if (kind == BUILTIN_SLICE_OF) {
1774 +
        /* Check first argument (pointer) */
1775 +
        type_t *ptr_type = resolve_node(t, args[0], NULL);
1776 +
        if (!ptr_type)
1777 +
            return NULL;
1778 +
1779 +
        /* Check second argument (length) */
1780 +
        type_t *len_type = resolve_node(t, args[1], t->types.type_u32);
1781 +
        if (!len_type)
1782 +
            return NULL;
1783 +
1784 +
        /* Result is a slice of the pointer's element type */
1785 +
        type_t *elem_type = ptr_type->info.ptr.target;
1786 +
        bool    mut       = ptr_type->info.ptr.mut;
1787 +
1788 +
        return (n->type = alloc_slice_type(t, elem_type, NULL, mut));
1789 +
    }
1790 +
1791 +
    node_t *expr = args[0];
1792 +
    switch (expr->cls) {
1793 +
    case NODE_TYPE:
1794 +
    case NODE_RECORD_TYPE:
1795 +
        typ = resolve_type(t, expr);
1796 +
        break;
1797 +
    default:
1798 +
        typ = resolve_node(t, expr, NULL);
1799 +
        break;
1800 +
    }
1801 +
    if (!typ)
1802 +
        return NULL;
1803 +
1804 +
    u32 value = 0;
1805 +
1806 +
    switch (kind) {
1807 +
    case BUILTIN_SIZE_OF:
1808 +
        value = (u32)typ->size;
1809 +
        break;
1810 +
    case BUILTIN_ALIGN_OF:
1811 +
        value = (u32)typ->align;
1812 +
        if (expr->sym && expr->sym->kind == SYM_VARIABLE &&
1813 +
            expr->sym->e.var.align > 0) {
1814 +
            value = (u32)expr->sym->e.var.align;
1815 +
        }
1816 +
        break;
1817 +
    case BUILTIN_SLICE_OF:
1818 +
        /* Already handled above */
1819 +
        break;
1820 +
    }
1821 +
    n->cls = NODE_NUMBER;
1822 +
1823 +
    n->val.number.text     = NULL;
1824 +
    n->val.number.text_len = 0;
1825 +
    n->val.number.value.u  = value;
1826 +
1827 +
    return (n->type = t->types.type_u32);
1828 +
}
1829 +
1830 +
/* Bind a pattern variable to a field type. Handles identifiers and
1831 +
 * placeholders. If is_ref_match is true, the binding type is wrapped
1832 +
 * in a pointer type. */
1833 +
static bool bind_pattern_var(
1834 +
    resolve_t *t,
1835 +
    node_t    *binding,
1836 +
    type_t    *field_typ,
1837 +
    bool       is_ref_match,
1838 +
    bool       ref_mut
1839 +
) {
1840 +
    type_t *binding_type =
1841 +
        is_ref_match ? alloc_ptr_type(t, field_typ, ref_mut) : field_typ;
1842 +
    if (binding->cls == NODE_IDENT) {
1843 +
        binding->type = binding_type;
1844 +
        if (!symbol_add(t, binding, binding))
1845 +
            return false;
1846 +
        binding->sym->e.var.typ   = binding_type;
1847 +
        binding->sym->e.var.align = binding_type->align;
1848 +
        binding->sym->scope       = t->scope;
1849 +
    }
1850 +
    return true;
1851 +
}
1852 +
1853 +
/* Bind pattern variables to record fields. Works for both tuple-style S(x, y)
1854 +
 * and labeled T { x, y } patterns. Returns false on error.
1855 +
 * If is_ref_match is true, bindings are pointer types. */
1856 +
static bool resolve_record_pattern_bindings(
1857 +
    resolve_t *t,
1858 +
    node_t    *pattern,
1859 +
    type_t    *rec_type,
1860 +
    bool       is_ref_match,
1861 +
    bool       ref_mut
1862 +
) {
1863 +
    if (pattern->cls == NODE_CALL) {
1864 +
        usize nargs = pattern->val.call.args.len;
1865 +
        for (usize i = 0; i < nargs; i++) {
1866 +
            node_t *arg_node =
1867 +
                nodespan_ptrs(&t->module->parser, pattern->val.call.args)[i];
1868 +
            node_t   *arg       = (arg_node->cls == NODE_CALL_ARG)
1869 +
                                      ? arg_node->val.call_arg.expr
1870 +
                                      : arg_node;
1871 +
            symbol_t *field_sym = rec_type->info.srt.fields[i];
1872 +
1873 +
            if (!bind_pattern_var(
1874 +
                    t, arg, field_sym->e.field.typ, is_ref_match, ref_mut
1875 +
                ))
1876 +
                return false;
1877 +
            arg_node->sym = field_sym;
1878 +
        }
1879 +
    } else if (pattern->cls == NODE_RECORD_LIT) {
1880 +
        node_t **fields =
1881 +
            nodespan_ptrs(&t->module->parser, pattern->val.record_lit.fields);
1882 +
1883 +
        for (usize f = 0; f < pattern->val.record_lit.fields.len; f++) {
1884 +
            node_t *field_node  = fields[f];
1885 +
            node_t *binding     = field_node->val.record_lit_field.value;
1886 +
            node_t *name_node   = field_node->val.record_lit_field.name;
1887 +
            node_t *lookup_node = name_node ? name_node : binding;
1888 +
1889 +
            symbol_t *field_sym = record_field_lookup(rec_type, lookup_node);
1890 +
            if (!field_sym)
1891 +
                return false;
1892 +
1893 +
            field_node->sym = field_sym;
1894 +
1895 +
            if (!bind_pattern_var(
1896 +
                    t, binding, field_sym->e.field.typ, is_ref_match, ref_mut
1897 +
                ))
1898 +
                return false;
1899 +
        }
1900 +
    }
1901 +
    return true;
1902 +
}
1903 +
1904 +
/* Check a match statement case. */
1905 +
static type_t *resolve_match_case(resolve_t *t, node_t *n, type_t *match_typ) {
1906 +
    /* If this is the default (else) case, there are no patterns to check. */
1907 +
    if (!n->val.match_case.patterns.len) {
1908 +
        if (!resolve_node(t, n->val.match_case.body, NULL))
1909 +
            return NULL;
1910 +
1911 +
        return (n->type = t->types.type_void);
1912 +
    }
1913 +
    scope_t *prev = t->scope;
1914 +
1915 +
    /* Check if matching on a pointer to a union - bindings will be pointers */
1916 +
    bool    is_ref_match = false;
1917 +
    bool    ref_mut      = false;
1918 +
    type_t *union_typ    = match_typ;
1919 +
1920 +
    if (match_typ->cls == TYPE_PTR &&
1921 +
        type_is_union_with_payload(match_typ->info.ptr.target)) {
1922 +
        is_ref_match = true;
1923 +
        ref_mut      = match_typ->info.ptr.mut;
1924 +
        union_typ    = match_typ->info.ptr.target;
1925 +
    }
1926 +
1927 +
    if (type_is_union_with_payload((union_typ))) {
1928 +
        /* Create a shared scope for all patterns in this case */
1929 +
        scope_t *case_scope        = symtab_scope(t->scope, NULL);
1930 +
        t->scope                   = case_scope;
1931 +
        n->val.match_case.variable = NULL;
1932 +
1933 +
        /* Check each pattern in this case */
1934 +
        node_t **patterns =
1935 +
            nodespan_ptrs(&t->module->parser, n->val.match_case.patterns);
1936 +
        for (usize p = 0; p < n->val.match_case.patterns.len; p++) {
1937 +
            node_t *pattern   = patterns[p];
1938 +
            node_t *callee    = NULL;
1939 +
            bool    is_call   = (pattern->cls == NODE_CALL);
1940 +
            bool    is_reclit = (pattern->cls == NODE_RECORD_LIT);
1941 +
1942 +
            if (is_call) {
1943 +
                callee = pattern->val.call.callee;
1944 +
            } else if (is_reclit) {
1945 +
                callee = pattern->val.record_lit.type;
1946 +
                if (!callee)
1947 +
                    return NULL;
1948 +
            } else if (pattern->cls == NODE_SCOPE) {
1949 +
                callee = pattern;
1950 +
            } else {
1951 +
                return NULL;
1952 +
            }
1953 +
1954 +
            type_t *parent  = resolve_scope(t, callee);
1955 +
            node_t *variant = callee->val.access.rval;
1956 +
1957 +
            if (!parent)
1958 +
                return NULL;
1959 +
1960 +
            symbol_t *variant_sym = union_variant_lookup(union_typ, variant);
1961 +
            if (!variant_sym)
1962 +
                return NULL;
1963 +
            variant->sym = variant_sym;
1964 +
1965 +
            type_t *variant_type = variant_sym->node->type;
1966 +
1967 +
            if (variant_type->cls == TYPE_VOID) {
1968 +
                if (is_call) {
1969 +
                    if (!union_variant_validate_args(
1970 +
                            t, pattern, variant_sym, NULL
1971 +
                        ))
1972 +
                        return NULL;
1973 +
                }
1974 +
            } else if (is_reclit) {
1975 +
                if (variant_type->cls != TYPE_RECORD)
1976 +
                    return NULL;
1977 +
                if (!resolve_record_pattern_bindings(
1978 +
                        t, pattern, variant_type, is_ref_match, ref_mut
1979 +
                    ))
1980 +
                    return NULL;
1981 +
            } else {
1982 +
                node_t *arg_expr = NULL;
1983 +
1984 +
                if (!union_variant_validate_args(
1985 +
                        t, pattern, variant_sym, &arg_expr
1986 +
                    ))
1987 +
                    return NULL;
1988 +
1989 +
                node_t *variable           = arg_expr;
1990 +
                n->val.match_case.variable = variable;
1991 +
1992 +
                /* Create scope for the bound variable.
1993 +
                 * If matching on a pointer to union, binding is a pointer. */
1994 +
                type_t *binding_type =
1995 +
                    is_ref_match ? alloc_ptr_type(t, variant_type, ref_mut)
1996 +
                                 : variant_type;
1997 +
                variable->type = binding_type;
1998 +
1999 +
                if (variable->cls == NODE_IDENT) {
2000 +
                    /* Add the bound variable to the scope */
2001 +
                    if (!symbol_add(t, variable, variable))
2002 +
                        return NULL;
2003 +
2004 +
                    variable->sym->e.var.typ   = binding_type;
2005 +
                    variable->sym->e.var.align = binding_type->align;
2006 +
                    variable->sym->scope       = t->scope;
2007 +
                }
2008 +
            }
2009 +
            /* Set the pattern type to the union type. */
2010 +
            pattern->type = match_typ;
2011 +
        }
2012 +
    } else if (match_typ->cls == TYPE_RECORD) {
2013 +
        /* Record pattern matching: `match rec { case T(x) => ... }` */
2014 +
        scope_t *case_scope        = symtab_scope(t->scope, NULL);
2015 +
        t->scope                   = case_scope;
2016 +
        n->val.match_case.variable = NULL;
2017 +
2018 +
        node_t **patterns =
2019 +
            nodespan_ptrs(&t->module->parser, n->val.match_case.patterns);
2020 +
        for (usize p = 0; p < n->val.match_case.patterns.len; p++) {
2021 +
            node_t *pattern = patterns[p];
2022 +
            if (!resolve_record_pattern_bindings(
2023 +
                    t, pattern, match_typ, false, false
2024 +
                ))
2025 +
                return NULL;
2026 +
            pattern->type = match_typ;
2027 +
        }
2028 +
    } else {
2029 +
        bool pctx = t->ctx;
2030 +
        t->ctx    = TC_CTX_PATTERN;
2031 +
2032 +
        /* Check each pattern in this case */
2033 +
        node_t **patterns2 =
2034 +
            nodespan_ptrs(&t->module->parser, n->val.match_case.patterns);
2035 +
        for (usize p = 0; p < n->val.match_case.patterns.len; p++) {
2036 +
            node_t *pattern = patterns2[p];
2037 +
            if (!resolve_node(t, pattern, match_typ))
2038 +
                return NULL;
2039 +
        }
2040 +
        t->ctx = pctx;
2041 +
    }
2042 +
    if (n->val.match_case.guard) {
2043 +
        if (!resolve_node(t, n->val.match_case.guard, t->types.type_bool))
2044 +
            return NULL;
2045 +
    }
2046 +
    /* Check case body */
2047 +
    if (!resolve_node(t, n->val.match_case.body, NULL)) {
2048 +
        t->scope = prev;
2049 +
        return NULL;
2050 +
    }
2051 +
    t->scope = prev;
2052 +
2053 +
    return (n->type = t->types.type_void);
2054 +
}
2055 +
2056 +
static type_t *resolve_call_fn_ptr(resolve_t *t, symbol_t *sym, node_t *call) {
2057 +
    node_t *fn = sym->node;
2058 +
2059 +
    if (fn->type->cls != TYPE_FN)
2060 +
        return NULL;
2061 +
2062 +
    /* Check each argument type. */
2063 +
    for (usize i = 0; i < call->val.call.args.len; i++) {
2064 +
        node_t *arg_node =
2065 +
            nodespan_ptrs(&t->module->parser, call->val.call.args)[i];
2066 +
        type_t *param_typ = fn->type->info.fun.params[i];
2067 +
2068 +
        if (!resolve_node(t, arg_node->val.call_arg.expr, param_typ))
2069 +
            return NULL;
2070 +
    }
2071 +
    call->sym = sym;
2072 +
2073 +
    return (call->type = fn->type->info.fun.ret);
2074 +
}
2075 +
2076 +
static type_t *resolve_call_fn(resolve_t *t, symbol_t *sym, node_t *call) {
2077 +
    node_t *fn = sym->node;
2078 +
2079 +
    if (fn->type->cls != TYPE_FN)
2080 +
        return NULL;
2081 +
2082 +
    sym->e.fn.used = true;
2083 +
2084 +
    /* Check each argument type. */
2085 +
    for (usize i = 0; i < call->val.call.args.len; i++) {
2086 +
        node_t *arg_node =
2087 +
            nodespan_ptrs(&t->module->parser, call->val.call.args)[i];
2088 +
        node_t *param_node =
2089 +
            nodespan_ptrs(&sym->scope->mod->parser, fn->val.fn_decl.params)[i];
2090 +
        type_t *param_type = resolve_type(t, param_node->val.param.type);
2091 +
2092 +
        if (!resolve_node(t, arg_node->val.call_arg.expr, param_type))
2093 +
            return NULL;
2094 +
    }
2095 +
    call->sym = sym;
2096 +
2097 +
    return (call->type = fn->type->info.fun.ret);
2098 +
}
2099 +
2100 +
/* Helper function to build a module path from NODE_ACCESS nodes */
2101 +
static void module_scope_path(node_t *node, char *path_str) {
2102 +
    if (node->cls == NODE_IDENT) {
2103 +
        strncat(path_str, node->val.ident.name, node->val.ident.length);
2104 +
    } else if (node->cls == NODE_SUPER) {
2105 +
        strlcat(path_str, "super", MAX_PATH_LEN);
2106 +
    } else if (node->cls == NODE_SCOPE) {
2107 +
        module_scope_path(node->val.access.lval, path_str);
2108 +
        strlcat(path_str, "::", MAX_PATH_LEN);
2109 +
        module_scope_path(node->val.access.rval, path_str);
2110 +
    } else {
2111 +
    }
2112 +
}
2113 +
2114 +
static type_t *resolve_use(resolve_t *t, node_t *n) {
2115 +
    /* Extract the import path from the `use` node */
2116 +
    node_t *path_node = n->val.use_decl.path;
2117 +
    bool    wildcard  = n->val.use_decl.wildcard;
2118 +
2119 +
    /* Get the last component (symbol name) and parent scope */
2120 +
    node_t *last   = path_node;
2121 +
    node_t *parent = NULL;
2122 +
2123 +
    while (last && (last->cls == NODE_SCOPE)) {
2124 +
        parent = last->val.access.lval;
2125 +
        last   = last->val.access.rval;
2126 +
    }
2127 +
2128 +
    /* Try to find as a module first */
2129 +
    char filepath[MAX_PATH_LEN] = { 0 };
2130 +
    module_scope_path(path_node, filepath);
2131 +
    module_t *imported =
2132 +
        module_manager_find_relative(t->mm, t->module->path, filepath);
2133 +
2134 +
    if (imported) {
2135 +
        /* Module import: check both declarations and definitions */
2136 +
        if (!resolve_decls(t, imported)) {
2137 +
            return NULL;
2138 +
        }
2139 +
        bool in_decl_phase = t->module && !t->module->declared;
2140 +
2141 +
        if (!in_decl_phase) {
2142 +
            if (!resolve_mod_def(t, imported)) {
2143 +
                return NULL;
2144 +
            }
2145 +
        }
2146 +
        if (wildcard) {
2147 +
            /* Re-export all public symbols from the imported module */
2148 +
            for (usize i = 0; i < imported->scope->nsymbols; i++) {
2149 +
                symbol_t *sym = imported->scope->symbols[i];
2150 +
                if (!sym || !sym->node)
2151 +
                    continue;
2152 +
2153 +
                /* Only re-export public symbols */
2154 +
                attrib_t sym_attribs = 0;
2155 +
                switch (sym->kind) {
2156 +
                case SYM_FUNCTION:
2157 +
                    sym_attribs = sym->e.fn.attribs;
2158 +
                    break;
2159 +
                case SYM_TYPE:
2160 +
                    /* Check if it's a record or union declaration */
2161 +
                    if (sym->node->cls == NODE_RECORD) {
2162 +
                        node_t *attribs_node =
2163 +
                            sym->node->val.record_decl.attribs;
2164 +
                        if (attribs_node &&
2165 +
                            attribs_node->cls == NODE_ATTRIBUTE) {
2166 +
                            sym_attribs = attribs_node->val.attrib;
2167 +
                        }
2168 +
                    } else if (sym->node->cls == NODE_UNION) {
2169 +
                        node_t *attribs_node =
2170 +
                            sym->node->val.union_decl.attribs;
2171 +
                        if (attribs_node &&
2172 +
                            attribs_node->cls == NODE_ATTRIBUTE) {
2173 +
                            sym_attribs = attribs_node->val.attrib;
2174 +
                        }
2175 +
                    }
2176 +
                    break;
2177 +
                case SYM_MODULE:
2178 +
                    /* Check module declaration attributes */
2179 +
                    if (sym->node->cls == NODE_MOD) {
2180 +
                        node_t *attribs_node = sym->node->val.mod_decl.attribs;
2181 +
                        if (attribs_node &&
2182 +
                            attribs_node->cls == NODE_ATTRIBUTE) {
2183 +
                            sym_attribs = attribs_node->val.attrib;
2184 +
                        }
2185 +
                    }
2186 +
                    break;
2187 +
                default:
2188 +
                    /* Skip other symbol types for now */
2189 +
                    continue;
2190 +
                }
2191 +
2192 +
                if (sym_attribs & ATTRIB_PUB) {
2193 +
                    if (!symtab_add_alias(t->scope, sym->node, sym)) {
2194 +
                        /* Symbol already exists, skip it */
2195 +
                    }
2196 +
                }
2197 +
            }
2198 +
            return (n->type = t->types.type_void);
2199 +
        } else {
2200 +
            /* Regular module import */
2201 +
            if (!symbol_add(t, last, n)) {
2202 +
                return NULL;
2203 +
            }
2204 +
            n->sym->e.mod = imported;
2205 +
            n->sym->scope = imported->scope;
2206 +
2207 +
            return (n->type = t->types.type_void);
2208 +
        }
2209 +
    }
2210 +
2211 +
    /* Try function/symbol import if there's a parent scope */
2212 +
    if (parent) {
2213 +
        char modulepath[MAX_PATH_LEN] = { 0 };
2214 +
        module_scope_path(parent, modulepath);
2215 +
        module_t *parent_mod =
2216 +
            module_manager_find_relative(t->mm, t->module->path, modulepath);
2217 +
2218 +
        if (parent_mod && resolve_mod_def(t, parent_mod)) {
2219 +
            symbol_t *sym = symtab_scope_lookup(
2220 +
                parent_mod->scope,
2221 +
                last->val.ident.name,
2222 +
                last->val.ident.length,
2223 +
                SYM_ANY
2224 +
            );
2225 +
            if (sym) { /* Add alias with qualified name */
2226 +
                symtab_add_alias(t->scope, last, sym);
2227 +
2228 +
                n->sym        = sym;
2229 +
                n->sym->scope = parent_mod->scope;
2230 +
2231 +
                return (n->type = t->types.type_void);
2232 +
            }
2233 +
        }
2234 +
    }
2235 +
    return NULL;
2236 +
}
2237 +
2238 +
/* Scope access, eg. `foo::bar` */
2239 +
static type_t *resolve_scope(resolve_t *t, node_t *n) {
2240 +
    node_t *parent = n->val.access.lval;
2241 +
    node_t *child  = n->val.access.rval;
2242 +
2243 +
    /* Handle absolute path from global root: ::module::symbol */
2244 +
    if (parent == NULL) {
2245 +
        /* Look up module in global scope */
2246 +
        symbol_t *sym = symtab_lookup(
2247 +
            t->global,
2248 +
            child->val.ident.name,
2249 +
            child->val.ident.length,
2250 +
            SYM_MODULE
2251 +
        );
2252 +
        if (sym) {
2253 +
            n->sym = sym;
2254 +
            return (n->type = t->types.type_void);
2255 +
        }
2256 +
        return NULL;
2257 +
    }
2258 +
2259 +
    /* Handle `super::` references */
2260 +
    if (node_is_super(parent)) {
2261 +
        if (!t->module)
2262 +
            return NULL;
2263 +
2264 +
        module_t *target = module_super_ancestor(t->module, 1);
2265 +
        if (!target)
2266 +
            return NULL;
2267 +
        if (!target->declared && target->state != MODULE_STATE_VISITING) {
2268 +
            if (!resolve_decls(t, target)) {
2269 +
                return NULL;
2270 +
            }
2271 +
        }
2272 +
        if (target->ast && target->ast->sym) {
2273 +
            parent->sym  = target->ast->sym;
2274 +
            parent->type = t->types.type_void;
2275 +
        }
2276 +
        return module_lookup(t, n, child, target);
2277 +
    }
2278 +
2279 +
    /* Handle direct module access: module::symbol */
2280 +
    if (parent->cls == NODE_IDENT) {
2281 +
        symbol_t *sym = resolve_name(t, parent, SYM_MODULE);
2282 +
        if (sym) {
2283 +
            return module_lookup(t, n, child, sym->e.mod);
2284 +
        }
2285 +
    } else if (parent->cls == NODE_SCOPE) {
2286 +
        /* Handle recursive scope access: foo::bar::baz */
2287 +
        type_t *parent_type = resolve_scope(t, parent);
2288 +
        if (!parent_type)
2289 +
            return NULL;
2290 +
2291 +
        /* If parent is a module, look up symbol in module scope */
2292 +
        if (parent->sym && parent->sym->kind == SYM_MODULE) {
2293 +
            return module_lookup(t, n, child, parent->sym->e.mod);
2294 +
        }
2295 +
        /* If parent is a union, handle union scope */
2296 +
        if (parent_type->cls == TYPE_UNION) {
2297 +
            symbol_t *sym = union_variant_lookup(parent_type, child);
2298 +
2299 +
            if (sym) {
2300 +
                n->sym = sym;
2301 +
                return (n->type = parent_type);
2302 +
            }
2303 +
        }
2304 +
        return NULL;
2305 +
    }
2306 +
2307 +
    /* If not a module, treat it as a normal type */
2308 +
    type_t *parent_type = resolve_node(t, parent, NULL);
2309 +
    if (!parent_type)
2310 +
        return NULL;
2311 +
2312 +
    /* Handle union variant access */
2313 +
    if (parent_type->cls == TYPE_UNION) {
2314 +
        if ((n->sym = union_variant_lookup(parent_type, child))) {
2315 +
            /* For unions we store the type of the enum, not the variant */
2316 +
            return (n->type = parent_type);
2317 +
        }
2318 +
    }
2319 +
2320 +
    return NULL;
2321 +
}
2322 +
2323 +
static type_t *resolve_array_repeat(resolve_t *t, node_t *n, type_t *expected) {
2324 +
    if (expected->cls == TYPE_OPT) {
2325 +
        expected = expected->info.opt.elem;
2326 +
    }
2327 +
2328 +
    node_t *value = n->val.array_repeat_lit.value;
2329 +
    node_t *count = n->val.array_repeat_lit.count;
2330 +
2331 +
    /* Type check the value expression */
2332 +
    type_t *expected_typ = expected->info.ary.elem;
2333 +
    type_t *value_typ    = resolve_node(t, value, expected_typ);
2334 +
2335 +
    if (!value_typ)
2336 +
        return NULL;
2337 +
2338 +
    /* Type check the count expression */
2339 +
    if (!resolve_node(t, count, t->types.type_u32))
2340 +
        return NULL;
2341 +
2342 +
    /* Ensure the count is a compile-time constant */
2343 +
    usize length = 0;
2344 +
2345 +
    if (!resolve_const_usize(t, count, &length))
2346 +
        return NULL;
2347 +
2348 +
    /* For array contexts, use expected type */
2349 +
    if (expected->cls == TYPE_ARRAY) {
2350 +
        n->type = expected;
2351 +
    } else {
2352 +
        /* For slice contexts, create a new array type */
2353 +
        n->type = alloc_array_type(t, expected_typ, length);
2354 +
    }
2355 +
    return n->type;
2356 +
}
2357 +
2358 +
/* Check expression types. */
2359 +
static type_t *resolve_node(resolve_t *t, node_t *n, type_t *expected) {
2360 +
    /* Short-circuit if we've already traversed this node. */
2361 +
    if (n->type && n->cls != NODE_RECORD && n->cls != NODE_UNION)
2362 +
        return n->type;
2363 +
2364 +
    switch (n->cls) {
2365 +
    case NODE_ARRAY_LIT: {
2366 +
        if (expected->cls == TYPE_OPT) {
2367 +
            expected = expected->info.opt.elem;
2368 +
        }
2369 +
2370 +
        usize length = n->val.array_lit.elems.len;
2371 +
        if (length == 0) {
2372 +
            /* Create an empty array type with the expected element type. */
2373 +
            type_t *elem_type = expected->info.slc.elem;
2374 +
            n->type           = alloc_array_type(t, elem_type, 0);
2375 +
            return n->type;
2376 +
        }
2377 +
        /* Get the expected element type */
2378 +
        type_t *expected_typ = expected->cls == TYPE_SLICE
2379 +
                                   ? expected->info.slc.elem
2380 +
                                   : expected->info.ary.elem;
2381 +
2382 +
        /* Check all elements */
2383 +
        node_t **elems =
2384 +
            nodespan_ptrs(&t->module->parser, n->val.array_lit.elems);
2385 +
        for (usize i = 0; i < length; i++) {
2386 +
            if (!resolve_node(t, elems[i], expected_typ))
2387 +
                return NULL;
2388 +
        }
2389 +
        if (expected->cls == TYPE_ARRAY) {
2390 +
            n->type = expected;
2391 +
        } else {
2392 +
            /* For slice contexts, create a new array type */
2393 +
            n->type = alloc_array_type(t, expected_typ, length);
2394 +
        }
2395 +
        return n->type;
2396 +
    }
2397 +
2398 +
    case NODE_ARRAY_REPEAT_LIT:
2399 +
        return resolve_array_repeat(t, n, expected);
2400 +
2401 +
    case NODE_ARRAY_INDEX: {
2402 +
        type_t *array_typ = resolve_node(t, n->val.access.lval, NULL);
2403 +
        if (!array_typ)
2404 +
            return NULL;
2405 +
2406 +
        if (array_typ->cls == TYPE_PTR)
2407 +
            array_typ = deref_type(array_typ);
2408 +
2409 +
        node_t *idx_node = n->val.access.rval;
2410 +
2411 +
        if (idx_node->cls == NODE_RANGE) {
2412 +
            if (array_typ->cls == TYPE_SLICE) {
2413 +
                n->type = array_typ;
2414 +
                if (array_typ->info.slc.base)
2415 +
                    array_typ = array_typ->info.slc.base;
2416 +
            }
2417 +
            if (idx_node->val.range.start) {
2418 +
                if (!resolve_node(
2419 +
                        t, idx_node->val.range.start, t->types.type_u32
2420 +
                    ))
2421 +
                    return NULL;
2422 +
            }
2423 +
            if (idx_node->val.range.end) {
2424 +
                if (!resolve_node(
2425 +
                        t, idx_node->val.range.end, t->types.type_u32
2426 +
                    ))
2427 +
                    return NULL;
2428 +
            }
2429 +
            return (n->type = n->type ? n->type : array_typ->slice);
2430 +
        } else {
2431 +
            type_t *elem_typ = array_typ->cls == TYPE_SLICE
2432 +
                                   ? array_typ->info.slc.elem
2433 +
                                   : array_typ->info.ary.elem;
2434 +
2435 +
            if (!resolve_node(t, idx_node, t->types.type_u32))
2436 +
                return NULL;
2437 +
2438 +
            return (n->type = elem_typ);
2439 +
        }
2440 +
    }
2441 +
2442 +
    case NODE_UNION: {
2443 +
        union_decl_t *decl = &n->val.union_decl;
2444 +
        node_t **variants  = nodespan_ptrs(&t->module->parser, decl->variants);
2445 +
        if (!declare_enum(t, n)) {
2446 +
            return NULL;
2447 +
        }
2448 +
        type_t *typ = n->type;
2449 +
2450 +
        /* Add each variant to the union's symbol table. */
2451 +
        i32 iota = 0;
2452 +
        for (usize i = 0; i < decl->variants.len; i++) {
2453 +
            node_t *v = variants[i];
2454 +
2455 +
            if (!union_variant_add(t, typ, v, i, &iota))
2456 +
                return NULL;
2457 +
        }
2458 +
        update_enum_layout(typ);
2459 +
        n->sym->e.typ.info = typ;
2460 +
2461 +
        return (n->type = typ);
2462 +
    }
2463 +
2464 +
    case NODE_RECORD: {
2465 +
        record_decl_t *decl   = &n->val.record_decl;
2466 +
        node_t       **fields = nodespan_ptrs(&t->module->parser, decl->fields);
2467 +
        if (!declare_record(t, n)) {
2468 +
            return NULL;
2469 +
        }
2470 +
        type_t *strct_typ = n->type;
2471 +
2472 +
        /* Add each field to the record. */
2473 +
        for (usize i = 0; i < decl->fields.len; i++) {
2474 +
            node_t     *f          = fields[i];
2475 +
            var_decl_t *field      = &f->val.var;
2476 +
            type_t     *field_type = resolve_type(t, field->type);
2477 +
2478 +
            if (!field_type)
2479 +
                return NULL;
2480 +
2481 +
            if (!record_field_add(t, strct_typ, f, field->ident, field_type)) {
2482 +
                return NULL;
2483 +
            }
2484 +
        }
2485 +
        n->sym->e.typ.info = strct_typ;
2486 +
2487 +
        return strct_typ;
2488 +
    }
2489 +
2490 +
    case NODE_RECORD_TYPE:
2491 +
        return resolve_type(t, n);
2492 +
2493 +
    case NODE_RECORD_FIELD:
2494 +
        /* Record fields are handled when processing the parent record. */
2495 +
        return n->type;
2496 +
2497 +
    case NODE_RECORD_LIT_FIELD:
2498 +
        return n->type;
2499 +
2500 +
    case NODE_RECORD_LIT: {
2501 +
        type_t   *record_type = NULL;
2502 +
        type_t   *result_type = NULL;
2503 +
        symbol_t *variant_sym = NULL;
2504 +
2505 +
        if (!resolve_record_literal_types(
2506 +
                t,
2507 +
                n->val.record_lit.type,
2508 +
                expected,
2509 +
                &record_type,
2510 +
                &result_type,
2511 +
                &variant_sym
2512 +
            ))
2513 +
            return NULL;
2514 +
2515 +
        if (!resolve_record_literal_fields(t, n, record_type))
2516 +
            return NULL;
2517 +
2518 +
        if (variant_sym)
2519 +
            n->sym = variant_sym;
2520 +
2521 +
        return (n->type = result_type);
2522 +
    }
2523 +
2524 +
    case NODE_NUMBER:
2525 +
        return resolve_number(t, n, expected);
2526 +
2527 +
    case NODE_CHAR:
2528 +
        return (n->type = t->types.type_u8);
2529 +
2530 +
    case NODE_STRING:
2531 +
        return (n->type = t->types.type_str);
2532 +
2533 +
    case NODE_BOOL:
2534 +
        return (n->type = t->types.type_bool);
2535 +
2536 +
    case NODE_UNDEF:
2537 +
        return (n->type = expected);
2538 +
2539 +
    case NODE_NIL:
2540 +
        if (expected) {
2541 +
            if (expected->cls == TYPE_OPT)
2542 +
                return (n->type = expected);
2543 +
            return (n->type = alloc_opt_type(t, expected));
2544 +
        }
2545 +
        return NULL;
2546 +
2547 +
    case NODE_SUPER:
2548 +
        return NULL;
2549 +
2550 +
    case NODE_IDENT:
2551 +
    case NODE_SCOPE: {
2552 +
        bool pattern_ctx = (t->ctx == TC_CTX_PATTERN && n->cls == NODE_IDENT);
2553 +
        symbol_t *sym    = resolve_name(t, n, SYM_ANY);
2554 +
2555 +
        if (!sym) {
2556 +
            if (pattern_ctx) {
2557 +
                type_t *bind_type = expected ? expected : t->types.type_void;
2558 +
                n->type           = bind_type;
2559 +
                n->sym            = NULL;
2560 +
                return bind_type;
2561 +
            }
2562 +
            return NULL;
2563 +
        }
2564 +
        type_t *scoped_type = n->type;
2565 +
2566 +
        switch (sym->kind) {
2567 +
        case SYM_VARIABLE:
2568 +
        case SYM_VARIANT:
2569 +
        case SYM_CONSTANT: {
2570 +
            if (!sym->node)
2571 +
                return NULL;
2572 +
            if (sym->node->cls == NODE_UNION_VARIANT) {
2573 +
                if (!scoped_type || scoped_type->cls != TYPE_UNION)
2574 +
                    return NULL;
2575 +
                type_t *variant_type = sym->node->type;
2576 +
                if (variant_type->cls == TYPE_VOID)
2577 +
                    return (n->type = scoped_type);
2578 +
                return NULL;
2579 +
            }
2580 +
            return (n->type = sym->node->type);
2581 +
        }
2582 +
        case SYM_FUNCTION:
2583 +
            if (!sym->node)
2584 +
                return NULL;
2585 +
            sym->e.fn.used = true;
2586 +
            n->type        = sym->node->type;
2587 +
            return n->type;
2588 +
        case SYM_TYPE:
2589 +
            if (!sym->node)
2590 +
                return NULL;
2591 +
            n->type = sym->node->type;
2592 +
            return n->type;
2593 +
        default:
2594 +
            return NULL;
2595 +
        }
2596 +
    }
2597 +
2598 +
    case NODE_REF: {
2599 +
        node_t *target     = n->val.ref.target;
2600 +
        type_t *target_typ = resolve_node(t, target, expected);
2601 +
2602 +
        if (!target_typ)
2603 +
            return NULL;
2604 +
2605 +
        bool    mut_ref = n->val.ref.mut;
2606 +
        type_t *exp     = expected;
2607 +
2608 +
        while (exp && exp->cls == TYPE_OPT) {
2609 +
            exp = exp->info.opt.elem;
2610 +
        }
2611 +
        switch (target->cls) {
2612 +
        case NODE_IDENT: {
2613 +
            return (n->type = alloc_ptr_type(t, target_typ, mut_ref));
2614 +
        }
2615 +
        case NODE_ARRAY_INDEX: {
2616 +
            node_t *idx = target->val.access.rval;
2617 +
            if (idx->cls == NODE_RANGE) {
2618 +
                /* Array slice reference (e.g., &ary[0..3]) */
2619 +
2620 +
                if (target_typ->info.slc.mut == mut_ref) {
2621 +
                    return (n->type = target_typ);
2622 +
                }
2623 +
                type_t *slice_type = alloc_slice_type(
2624 +
                    t,
2625 +
                    target_typ->info.slc.elem,
2626 +
                    target_typ->info.slc.base,
2627 +
                    mut_ref
2628 +
                );
2629 +
                return (n->type = slice_type);
2630 +
            } else {
2631 +
                /* Array element reference (e.g., &ary[3]) */
2632 +
                return (n->type = alloc_ptr_type(t, target_typ, mut_ref));
2633 +
            }
2634 +
        }
2635 +
        case NODE_ARRAY_LIT:
2636 +
        case NODE_ARRAY_REPEAT_LIT:
2637 +
            /* Slice literal. */
2638 +
            if (target_typ->cls == TYPE_ARRAY) {
2639 +
                type_t *slice_type = alloc_slice_type(
2640 +
                    t, target_typ->info.ary.elem, target_typ, mut_ref
2641 +
                );
2642 +
                return (n->type = slice_type);
2643 +
            } else if (target_typ->cls == TYPE_SLICE) {
2644 +
                type_t *slice_type = alloc_slice_type(
2645 +
                    t,
2646 +
                    target_typ->info.slc.elem,
2647 +
                    target_typ->info.slc.base,
2648 +
                    mut_ref
2649 +
                );
2650 +
                return (n->type = slice_type);
2651 +
            } else {
2652 +
                bail("unexpected slice literal type");
2653 +
            }
2654 +
        case NODE_ACCESS:
2655 +
            /* Field access. */
2656 +
            return (n->type = alloc_ptr_type(t, target_typ, mut_ref));
2657 +
        default:
2658 +
            bail("can't take reference of %s", node_names[target->cls]);
2659 +
        }
2660 +
    }
2661 +
2662 +
    case NODE_UNOP: {
2663 +
        switch (n->val.unop.op) {
2664 +
        case OP_NOT: {
2665 +
            type_t *typ = resolve_node(t, n->val.unop.expr, expected);
2666 +
            if (!typ)
2667 +
                return NULL;
2668 +
            return (n->type = typ);
2669 +
        }
2670 +
        case OP_NEG: {
2671 +
            type_t *typ = resolve_node(t, n->val.unop.expr, expected);
2672 +
            if (!typ)
2673 +
                return NULL;
2674 +
            return (n->type = typ);
2675 +
        }
2676 +
        case OP_DEREF: {
2677 +
            type_t *target_typ = resolve_node(t, n->val.unop.expr, NULL);
2678 +
            if (!target_typ)
2679 +
                return NULL;
2680 +
2681 +
            return (n->type = deref_type(target_typ));
2682 +
        }
2683 +
        case OP_BNOT: {
2684 +
            type_t *typ = resolve_node(t, n->val.unop.expr, expected);
2685 +
            if (!typ)
2686 +
                return NULL;
2687 +
            return (n->type = typ);
2688 +
        }
2689 +
        default:
2690 +
            abort();
2691 +
        }
2692 +
    }
2693 +
2694 +
    case NODE_BINOP: {
2695 +
        node_t *lhs         = n->val.binop.left;
2696 +
        node_t *rhs         = n->val.binop.right;
2697 +
        bool    left_is_nil = lhs && lhs->cls == NODE_NIL;
2698 +
2699 +
        /* Check operands without forcing specific expected types */
2700 +
        type_t *left  = NULL;
2701 +
        type_t *right = NULL;
2702 +
2703 +
        if (left_is_nil && rhs && rhs->cls != NODE_NIL) {
2704 +
            right = resolve_node(t, rhs, NULL);
2705 +
            left  = resolve_node(t, lhs, right);
2706 +
        } else {
2707 +
            left  = resolve_node(t, lhs, NULL);
2708 +
            right = resolve_node(t, rhs, left);
2709 +
        }
2710 +
        type_t *unified = NULL;
2711 +
2712 +
        if (!left && !right)
2713 +
            return NULL;
2714 +
2715 +
        /* Check for pointer arithmetic before type unification */
2716 +
        if (n->val.binop.op == OP_ADD || n->val.binop.op == OP_SUB) {
2717 +
            if (left && right) {
2718 +
                /* Allow pointer + integer or integer + pointer */
2719 +
                if (left->cls == TYPE_PTR && type_is_int(right->cls)) {
2720 +
                    return (n->type = left);
2721 +
                }
2722 +
                if (n->val.binop.op == OP_ADD && right->cls == TYPE_PTR &&
2723 +
                    type_is_int(left->cls)) {
2724 +
                    return (n->type = right);
2725 +
                }
2726 +
            }
2727 +
        }
2728 +
2729 +
        bool coerce = n->val.binop.op == OP_EQ || n->val.binop.op == OP_NE;
2730 +
        if (coerce && left && right) {
2731 +
            if (left->cls == TYPE_OPT && right->cls != TYPE_OPT) {
2732 +
                /* Flip arguments because coercion only applies to the rval */
2733 +
                unified =
2734 +
                    type_unify(t, right, left, n, coerce, "binary operation");
2735 +
            } else {
2736 +
                unified =
2737 +
                    type_unify(t, left, right, n, coerce, "binary operation");
2738 +
            }
2739 +
        } else {
2740 +
            unified = type_unify(t, left, right, n, coerce, "binary operation");
2741 +
        }
2742 +
        if (!unified)
2743 +
            return NULL;
2744 +
2745 +
        /* Set operand types to unified type if they were previously NULL */
2746 +
        if (!left)
2747 +
            n->val.binop.left->type = unified;
2748 +
        if (!right)
2749 +
            n->val.binop.right->type = unified;
2750 +
2751 +
        /* Check numeric operations. */
2752 +
        if (n->val.binop.op <= OP_MOD) {
2753 +
            if (expected) {
2754 +
                /* If we have an expected numeric type different from unified,
2755 +
                 * coerce to it. This will affect the instructions used by the
2756 +
                 * code generator. Note that we don't try to unify the two types
2757 +
                 * as this will promote the smaller type to the larger one. */
2758 +
                if (expected != unified)
2759 +
                    return (n->type = expected);
2760 +
            }
2761 +
            return (n->type = unified);
2762 +
        }
2763 +
2764 +
        /* Check comparison operations. */
2765 +
        switch (n->val.binop.op) {
2766 +
        case OP_EQ:
2767 +
        case OP_NE:
2768 +
        case OP_GT:
2769 +
        case OP_LT:
2770 +
        case OP_LE:
2771 +
        case OP_GE:
2772 +
            /* Update operand types to unified type for comparison */
2773 +
            n->val.binop.left->type  = unified;
2774 +
            n->val.binop.right->type = unified;
2775 +
            return (n->type = t->types.type_bool);
2776 +
        case OP_AND:
2777 +
        case OP_OR:
2778 +
            return (n->type = unified);
2779 +
        case OP_BAND:
2780 +
        case OP_BOR:
2781 +
        case OP_XOR:
2782 +
        case OP_SHL:
2783 +
        case OP_SHR:
2784 +
            return (n->type = unified);
2785 +
        case OP_ADD:
2786 +
        case OP_SUB:
2787 +
        case OP_MUL:
2788 +
        case OP_DIV:
2789 +
        case OP_MOD:
2790 +
            /* These are handled above in the numeric operations section */
2791 +
            abort();
2792 +
        }
2793 +
        return NULL;
2794 +
    }
2795 +
2796 +
    case NODE_ACCESS: {
2797 +
        node_t *expr  = n->val.access.lval;
2798 +
        node_t *field = n->val.access.rval;
2799 +
2800 +
        type_t *decl_type = resolve_node(t, expr, NULL);
2801 +
        if (!decl_type)
2802 +
            return NULL;
2803 +
2804 +
        while (decl_type->cls == TYPE_PTR)
2805 +
            decl_type = deref_type(decl_type);
2806 +
2807 +
        if (decl_type->cls == TYPE_RECORD) {
2808 +
            symbol_t *field_sym = record_field_lookup(decl_type, field);
2809 +
            if (!field_sym)
2810 +
                return NULL;
2811 +
2812 +
            n->sym = field_sym;
2813 +
            return (n->type = field_sym->e.field.typ);
2814 +
        } else if (decl_type->cls == TYPE_ARRAY) {
2815 +
            if (ident_eq(field, LEN_FIELD, LEN_FIELD_LEN)) {
2816 +
                n->cls                 = NODE_NUMBER;
2817 +
                n->type                = t->types.type_u32;
2818 +
                n->val.number.value.u  = decl_type->info.ary.length;
2819 +
                n->val.number.text     = NULL;
2820 +
                n->val.number.text_len = 0;
2821 +
                return n->type;
2822 +
            }
2823 +
            return NULL;
2824 +
        } else if (decl_type->cls == TYPE_SLICE) {
2825 +
            if (ident_eq(field, LEN_FIELD, LEN_FIELD_LEN))
2826 +
                return (n->type = t->types.type_u32);
2827 +
            if (ident_eq(field, PTR_FIELD, PTR_FIELD_LEN))
2828 +
                return (n->type = decl_type->info.slc.elem->ptr);
2829 +
            return NULL;
2830 +
        }
2831 +
        return NULL;
2832 +
    }
2833 +
2834 +
    case NODE_USE:
2835 +
        return resolve_use(t, n);
2836 +
2837 +
    case NODE_CALL: {
2838 +
        node_t   *callee = n->val.call.callee;
2839 +
        symbol_t *sym    = resolve_name(t, callee, SYM_ANY);
2840 +
2841 +
        if (!sym)
2842 +
            return NULL;
2843 +
2844 +
        /* Function call */
2845 +
        if (sym->kind == SYM_FUNCTION) {
2846 +
            n->sym = sym;
2847 +
2848 +
            return resolve_call_fn(t, sym, n);
2849 +
        }
2850 +
        /* Tuple record constructor call */
2851 +
        if (sym->kind == SYM_TYPE) {
2852 +
            type_t *typ = sym->e.typ.info;
2853 +
2854 +
            if (typ && typ->cls == TYPE_RECORD && typ->info.srt.tuple) {
2855 +
                return resolve_tuple_record_constructor(t, n, typ);
2856 +
            }
2857 +
        }
2858 +
        /* Function pointer call */
2859 +
        if (sym->kind == SYM_VARIABLE) {
2860 +
            if (callee->cls == NODE_IDENT) {
2861 +
                if (sym->node->type && sym->node->type->cls == TYPE_FN) {
2862 +
                    n->sym = sym;
2863 +
                    return resolve_call_fn_ptr(t, sym, n);
2864 +
                }
2865 +
                return NULL;
2866 +
            }
2867 +
        } else if (sym->kind == SYM_VARIANT) {
2868 +
            if (callee->cls == NODE_SCOPE) {
2869 +
                type_t *scope = resolve_scope(t, callee);
2870 +
2871 +
                if (scope && type_is_union_with_payload(scope))
2872 +
                    return resolve_enum_constructor(t, n, scope, sym);
2873 +
            }
2874 +
        }
2875 +
        return NULL;
2876 +
    }
2877 +
    case NODE_BUILTIN:
2878 +
        return resolve_builtin(t, n, expected);
2879 +
    case NODE_CALL_ARG:
2880 +
        return (n->type = resolve_node(t, n->val.call_arg.expr, expected));
2881 +
2882 +
    case NODE_THROW:
2883 +
        return resolve_throw(t, n);
2884 +
    case NODE_TRY:
2885 +
        return resolve_try_expr(t, n, expected);
2886 +
    case NODE_CATCH:
2887 +
        bail("cannot type check %s", node_names[n->cls]);
2888 +
2889 +
    case NODE_RETURN: {
2890 +
        type_t *fn_ret   = t->fn->node->type->info.fun.ret;
2891 +
        type_t *expected = fn_ret;
2892 +
2893 +
        if (fn_ret->cls == TYPE_RESULT)
2894 +
            expected = fn_ret->info.res.payload;
2895 +
2896 +
        if (expected == t->types.type_void) {
2897 +
            if (n->val.return_stmt.value)
2898 +
                return NULL;
2899 +
            return (n->type = fn_ret);
2900 +
        }
2901 +
        if (n->val.return_stmt.value) {
2902 +
            if (!resolve_node(t, n->val.return_stmt.value, expected))
2903 +
                return NULL;
2904 +
        }
2905 +
        return (n->type = fn_ret);
2906 +
    }
2907 +
2908 +
    case NODE_IF:
2909 +
        if (!resolve_node(t, n->val.if_stmt.cond, t->types.type_bool))
2910 +
            return NULL;
2911 +
2912 +
        type_t *result_typ  = expected ? expected : t->types.type_void;
2913 +
        type_t *lbranch_typ = resolve_node(t, n->val.if_stmt.lbranch, expected);
2914 +
        if (!lbranch_typ)
2915 +
            return NULL;
2916 +
2917 +
        if (n->val.if_stmt.rbranch) {
2918 +
            type_t *rbranch_typ =
2919 +
                resolve_node(t, n->val.if_stmt.rbranch, expected);
2920 +
            if (!rbranch_typ)
2921 +
                return NULL;
2922 +
2923 +
            if (!expected) {
2924 +
                type_t *unified =
2925 +
                    type_unify(t, lbranch_typ, rbranch_typ, n, false, NULL);
2926 +
                if (unified)
2927 +
                    result_typ = unified;
2928 +
            }
2929 +
        }
2930 +
        return (n->type = result_typ);
2931 +
2932 +
    case NODE_IF_LET: {
2933 +
        type_t *expr_type = resolve_node(t, n->val.if_let_stmt.expr, NULL);
2934 +
        if (!expr_type)
2935 +
            return NULL;
2936 +
        /* Create scope for the bound variable */
2937 +
        n->val.if_let_stmt.scope     = symtab_scope(t->scope, NULL);
2938 +
        n->val.if_let_stmt.var->type = expr_type->info.opt.elem;
2939 +
        t->scope                     = n->val.if_let_stmt.scope;
2940 +
2941 +
        /* Add the bound variable to the scope */
2942 +
        if (!symbol_add(t, n->val.if_let_stmt.var, n->val.if_let_stmt.var))
2943 +
            return NULL;
2944 +
2945 +
        /* Only set symbol data if not a placeholder */
2946 +
        if (n->val.if_let_stmt.var->cls != NODE_PLACEHOLDER) {
2947 +
            n->val.if_let_stmt.var->sym->e.var.typ = expr_type->info.opt.elem;
2948 +
            n->val.if_let_stmt.var->sym->e.var.align =
2949 +
                expr_type->info.opt.elem->align;
2950 +
            n->val.if_let_stmt.var->sym->scope = t->scope;
2951 +
        }
2952 +
2953 +
        if (n->val.if_let_stmt.guard) {
2954 +
            if (!resolve_node(t, n->val.if_let_stmt.guard, t->types.type_bool))
2955 +
                return NULL;
2956 +
        }
2957 +
2958 +
        if (!resolve_block(t, n->val.if_let_stmt.lbranch))
2959 +
            return NULL;
2960 +
2961 +
        t->scope = t->scope->parent;
2962 +
2963 +
        if (n->val.if_let_stmt.rbranch) {
2964 +
            if (!resolve_block(t, n->val.if_let_stmt.rbranch))
2965 +
                return NULL;
2966 +
        }
2967 +
        return (n->type = t->types.type_void);
2968 +
    }
2969 +
2970 +
    case NODE_MATCH: {
2971 +
        /* Check the match operand */
2972 +
        type_t *match_typ = resolve_node(t, n->val.match_stmt.expr, NULL);
2973 +
        if (!match_typ)
2974 +
            return NULL;
2975 +
2976 +
        /* Check each case to ensure patterns match the
2977 +
         * match operand type. */
2978 +
        node_t **cases =
2979 +
            nodespan_ptrs(&t->module->parser, n->val.match_stmt.cases);
2980 +
        bool all_diverge = n->val.match_stmt.cases.len > 0;
2981 +
2982 +
        for (usize i = 0; i < n->val.match_stmt.cases.len; i++) {
2983 +
            node_t *c = cases[i];
2984 +
2985 +
            type_t *case_typ = resolve_match_case(t, c, match_typ);
2986 +
            if (!case_typ)
2987 +
                return NULL;
2988 +
2989 +
            /* Check if this case diverges. */
2990 +
            if (!node_diverges(c->val.match_case.body))
2991 +
                all_diverge = false;
2992 +
        }
2993 +
        /* Match diverges if all cases diverge. */
2994 +
        if (all_diverge)
2995 +
            return (n->type = t->types.type_never);
2996 +
2997 +
        return (n->type = t->types.type_void);
2998 +
    }
2999 +
    case NODE_MATCH_CASE:
3000 +
        /* Handled in `NODE_MATCH` */
3001 +
    case NODE_BLOCK:
3002 +
        return (n->type = resolve_block(t, n));
3003 +
    case NODE_FN:
3004 +
        /* Handled at the module level */
3005 +
    case NODE_LOOP:
3006 +
        return (n->type = resolve_block(t, n->val.loop_stmt.body));
3007 +
    case NODE_BREAK:
3008 +
        return (n->type = t->types.type_never);
3009 +
    case NODE_VAR:
3010 +
        return resolve_var(t, n);
3011 +
    case NODE_CONST:
3012 +
        return resolve_const(t, n);
3013 +
    case NODE_STATIC:
3014 +
        return resolve_static(t, n);
3015 +
    case NODE_PARAM:
3016 +
        abort();
3017 +
    case NODE_ASSIGN: {
3018 +
        type_t *ltype = resolve_node(t, n->val.assign.lval, NULL);
3019 +
        if (!ltype)
3020 +
            return NULL;
3021 +
3022 +
        if (!resolve_node(t, n->val.assign.rval, ltype))
3023 +
            return NULL;
3024 +
3025 +
        return (n->type = ltype);
3026 +
    }
3027 +
3028 +
    case NODE_ATTRIBUTE:
3029 +
        return (n->type = t->types.type_void);
3030 +
3031 +
    case NODE_EXPR_STMT: {
3032 +
        /* Check the expression but don't use its result value. */
3033 +
        type_t *typ = resolve_node(t, n->val.expr_stmt, NULL);
3034 +
        if (!typ)
3035 +
            return NULL;
3036 +
3037 +
        /* Expression statements don't produce values. */
3038 +
        return (n->type = t->types.type_void);
3039 +
    }
3040 +
3041 +
    case NODE_MOD:
3042 +
        return resolve_mod_decl(t, n);
3043 +
3044 +
    case NODE_RANGE: {
3045 +
        /* Check range start expression if provided */
3046 +
        if (n->val.range.start) {
3047 +
            if (!resolve_node(t, n->val.range.start, t->types.type_u32))
3048 +
                return NULL;
3049 +
        }
3050 +
        /* Check range end expression if provided */
3051 +
        if (n->val.range.end) {
3052 +
            if (!resolve_node(t, n->val.range.end, t->types.type_u32))
3053 +
                return NULL;
3054 +
        }
3055 +
        /* Range nodes don't have a specific type, they're contextual */
3056 +
        return (n->type = t->types.type_void);
3057 +
    }
3058 +
3059 +
    case NODE_AS: {
3060 +
        if (!resolve_node(t, n->val.as_expr.expr, NULL))
3061 +
            return NULL;
3062 +
3063 +
        type_t *target_type = resolve_type(t, n->val.as_expr.type);
3064 +
        if (!target_type)
3065 +
            return NULL;
3066 +
3067 +
        return (n->type = target_type);
3068 +
    }
3069 +
    case NODE_PANIC:
3070 +
        return (n->type = t->types.type_never);
3071 +
3072 +
    case NODE_WHILE:
3073 +
    case NODE_WHILE_LET:
3074 +
    case NODE_IF_CASE:
3075 +
    case NODE_GUARD_CASE:
3076 +
    case NODE_GUARD_LET:
3077 +
    case NODE_FOR:
3078 +
3079 +
    case NODE_PLACEHOLDER:
3080 +
        /* Placeholders don't produce a value, so return NULL type */
3081 +
        return (n->type = NULL);
3082 +
3083 +
    case NODE_TYPE:
3084 +
    case NODE_UNION_VARIANT:
3085 +
    case NODE_PTR:
3086 +
    case NODE_MOD_BODY:
3087 +
    case NODE_ALIGN:
3088 +
        bail("unsupported node type %s", node_names[n->cls]);
3089 +
    }
3090 +
    return NULL;
3091 +
}
3092 +
3093 +
static node_t *binding_ident(node_t *n) {
3094 +
    switch (n->cls) {
3095 +
    case NODE_VAR:
3096 +
        return n->val.var.ident;
3097 +
    case NODE_CONST:
3098 +
        return n->val.constant.ident;
3099 +
    case NODE_STATIC:
3100 +
        return n->val.static_decl.ident;
3101 +
    default:
3102 +
        bail("unexpected binding node %s", node_names[n->cls]);
3103 +
    }
3104 +
}
3105 +
3106 +
static type_t *resolve_binding(
3107 +
    resolve_t *t, node_t *n, node_t *val, node_t *typ
3108 +
) {
3109 +
    type_t *declared = NULL;
3110 +
    if (typ) {
3111 +
        /* Resolve the declared type before checking the value */
3112 +
        if (!(declared = resolve_type(t, typ)))
3113 +
            return NULL;
3114 +
    }
3115 +
    /* Check the value with the declared type as expected type */
3116 +
    type_t *inferred = resolve_node(t, val, declared);
3117 +
    if (!inferred)
3118 +
        return NULL;
3119 +
3120 +
    type_t *final_type = inferred;
3121 +
3122 +
    if (declared) {
3123 +
        final_type =
3124 +
            type_unify(t, inferred, declared, val, true, "variable binding");
3125 +
3126 +
        if (!final_type)
3127 +
            return NULL;
3128 +
    }
3129 +
3130 +
    node_t *ident = binding_ident(n);
3131 +
3132 +
    /* symbol_add handles placeholders internally */
3133 +
    if (!symbol_add(t, ident, n))
3134 +
        return NULL;
3135 +
3136 +
    /* Only set symbol data if not a placeholder */
3137 +
    if (ident->cls != NODE_PLACEHOLDER) {
3138 +
        n->sym->scope       = t->scope;
3139 +
        n->sym->e.var.typ   = final_type;
3140 +
        n->sym->e.var.align = final_type->align;
3141 +
    }
3142 +
3143 +
    return (n->type = final_type);
3144 +
}
3145 +
3146 +
/* Check if a `const` declaration is valid. */
3147 +
static type_t *resolve_const(resolve_t *t, node_t *n) {
3148 +
    return resolve_binding(t, n, n->val.constant.value, n->val.constant.type);
3149 +
}
3150 +
3151 +
static type_t *resolve_static(resolve_t *t, node_t *n) {
3152 +
    return resolve_binding(
3153 +
        t, n, n->val.static_decl.value, n->val.static_decl.type
3154 +
    );
3155 +
}
3156 +
3157 +
/* Check if a `let` or `mut` declaration is valid. */
3158 +
static type_t *resolve_var(resolve_t *t, node_t *n) {
3159 +
    node_t *type  = n->val.var.type;
3160 +
    node_t *value = n->val.var.value;
3161 +
3162 +
    if (!value)
3163 +
        return NULL;
3164 +
3165 +
    type_t *var_type = resolve_binding(t, n, value, type);
3166 +
3167 +
    if (!var_type)
3168 +
        return NULL;
3169 +
3170 +
    if (n->val.var.align) {
3171 +
        node_t *align = n->val.var.align->val.align;
3172 +
3173 +
        if (!resolve_node(t, align, t->types.type_u32))
3174 +
            return NULL;
3175 +
3176 +
        usize c = 0;
3177 +
3178 +
        if (!resolve_const_usize(t, align, &c))
3179 +
            return NULL;
3180 +
3181 +
        n->sym->e.var.align = (i32)c;
3182 +
    }
3183 +
    return var_type;
3184 +
}
3185 +
3186 +
static bool node_diverges(node_t *n) {
3187 +
    if (!n)
3188 +
        return false;
3189 +
3190 +
    switch (n->cls) {
3191 +
    case NODE_RETURN:
3192 +
    case NODE_THROW:
3193 +
    case NODE_PANIC:
3194 +
        return true;
3195 +
    case NODE_BLOCK:
3196 +
        return n->type && n->type->cls == TYPE_NEVER;
3197 +
    case NODE_IF: {
3198 +
        node_t *then_branch = n->val.if_stmt.lbranch;
3199 +
        node_t *else_branch = n->val.if_stmt.rbranch;
3200 +
3201 +
        if (!then_branch || !else_branch)
3202 +
            return false;
3203 +
3204 +
        return node_diverges(then_branch) && node_diverges(else_branch);
3205 +
    }
3206 +
    case NODE_IF_LET:
3207 +
    case NODE_IF_CASE: {
3208 +
        node_t *then_branch = NULL;
3209 +
        node_t *else_branch = NULL;
3210 +
3211 +
        if (n->cls == NODE_IF_LET) {
3212 +
            then_branch = n->val.if_let_stmt.lbranch;
3213 +
            else_branch = n->val.if_let_stmt.rbranch;
3214 +
        } else {
3215 +
            then_branch = n->val.if_case_stmt.lbranch;
3216 +
            else_branch = n->val.if_case_stmt.rbranch;
3217 +
        }
3218 +
        if (!then_branch || !else_branch)
3219 +
            return false;
3220 +
3221 +
        return node_diverges(then_branch) && node_diverges(else_branch);
3222 +
    }
3223 +
    case NODE_EXPR_STMT: {
3224 +
        node_t *expr = n->val.expr_stmt;
3225 +
3226 +
        if (!expr)
3227 +
            return false;
3228 +
        if (expr->type && expr->type->cls == TYPE_NEVER)
3229 +
            return true;
3230 +
3231 +
        if (expr->cls == NODE_CALL && expr->sym &&
3232 +
            expr->sym->kind == SYM_FUNCTION) {
3233 +
            const char *qualified = expr->sym->qualified;
3234 +
3235 +
            if (qualified &&
3236 +
                strcmp(qualified, "core::intrinsics::ebreak") == 0) {
3237 +
                return true;
3238 +
            }
3239 +
        }
3240 +
        return false;
3241 +
    }
3242 +
    case NODE_MATCH:
3243 +
        /* Match diverges if its type is TYPE_NEVER (all cases diverge). */
3244 +
        return n->type && n->type->cls == TYPE_NEVER;
3245 +
    default:
3246 +
        break;
3247 +
    }
3248 +
    return false;
3249 +
}
3250 +
3251 +
/* Check a code block. */
3252 +
static type_t *resolve_block(resolve_t *t, node_t *n) {
3253 +
    /* Create a new scope for this block. */
3254 +
    scope_t *parent    = t->scope;
3255 +
    n->val.block.scope = symtab_scope(parent, NULL);
3256 +
    t->scope           = n->val.block.scope;
3257 +
3258 +
    /* Check each statement in the block. */
3259 +
    node_t **stmts = nodespan_ptrs(&t->module->parser, n->val.block.stmts);
3260 +
    for (usize i = 0; i < n->val.block.stmts.len; i++) {
3261 +
        if (!resolve_node(t, stmts[i], NULL))
3262 +
            return NULL;
3263 +
    }
3264 +
    /* Return to parent scope. */
3265 +
    t->scope = parent;
3266 +
3267 +
    type_t *block_type = t->types.type_void;
3268 +
3269 +
    if (n->val.block.stmts.len > 0) {
3270 +
        node_t *last = stmts[n->val.block.stmts.len - 1];
3271 +
3272 +
        if (node_diverges(last))
3273 +
            block_type = t->types.type_never;
3274 +
    }
3275 +
    return (n->type = block_type);
3276 +
}
3277 +
3278 +
/* Type check a complete AST, starting from the root module. */
3279 +
bool resolve_run(resolve_t *t, module_t *root) {
3280 +
    if (!resolve_mod_def(t, root))
3281 +
        return false;
3282 +
3283 +
    for (usize i = 0; i < t->mm->nmodules; i++) {
3284 +
        module_t *mod = &t->mm->modules[i];
3285 +
3286 +
        if (!mod->checked) {
3287 +
            if (!resolve_mod_def(t, mod)) {
3288 +
                return false;
3289 +
            }
3290 +
        }
3291 +
    }
3292 +
    return true;
3293 +
}
3294 +
3295 +
/* Type check a module. */
3296 +
static bool resolve_mod_def(resolve_t *t, module_t *module) {
3297 +
    /* First check all function signatures and type declarations */
3298 +
    if (!resolve_decls(t, module)) {
3299 +
        return false;
3300 +
    }
3301 +
    if (module->state == MODULE_STATE_VISITING)
3302 +
        return false;
3303 +
    if (module->state == MODULE_STATE_VISITED && module->checked) {
3304 +
        return true;
3305 +
    }
3306 +
3307 +
    module_t *pmodule = t->module;
3308 +
    scope_t  *pscope  = t->scope;
3309 +
3310 +
    module->state = MODULE_STATE_VISITING;
3311 +
    t->module     = module;
3312 +
    t->scope      = module->scope;
3313 +
3314 +
    /* Type check function bodies */
3315 +
    node_t **mod_stmts =
3316 +
        nodespan_ptrs(&module->parser, module->ast->val.block.stmts);
3317 +
    for (usize i = 0; i < module->ast->val.block.stmts.len; i++) {
3318 +
        node_t *stmt = mod_stmts[i];
3319 +
3320 +
        if (stmt->cls == NODE_FN) {
3321 +
            if (!resolve_fn_def(t, stmt)) {
3322 +
                return false;
3323 +
            }
3324 +
            if (stmt->val.fn_decl.attribs &&
3325 +
                stmt->val.fn_decl.attribs->val.attrib & ATTRIB_DEFAULT) {
3326 +
                if (module->default_fn == NULL) {
3327 +
                    module->default_fn = stmt->sym;
3328 +
                }
3329 +
            }
3330 +
        }
3331 +
    }
3332 +
    if (!module->default_fn) {
3333 +
        for (usize i = 0; i < module->ast->val.block.stmts.len; i++) {
3334 +
            node_t *stmt = mod_stmts[i];
3335 +
            if (stmt->cls == NODE_FN && stmt->val.fn_decl.attribs &&
3336 +
                stmt->val.fn_decl.attribs->val.attrib & ATTRIB_DEFAULT &&
3337 +
                stmt->sym) {
3338 +
                module->default_fn = stmt->sym;
3339 +
                break;
3340 +
            }
3341 +
        }
3342 +
    }
3343 +
    module->checked   = true;
3344 +
    module->state     = MODULE_STATE_VISITED;
3345 +
    module->ast->type = t->types.type_void;
3346 +
3347 +
    t->module = pmodule;
3348 +
    t->scope  = pscope;
3349 +
3350 +
    return true;
3351 +
}
3352 +
3353 +
/* Check function and type declarations */
3354 +
static bool resolve_decls(resolve_t *t, module_t *module) {
3355 +
    if (module->state == MODULE_STATE_VISITING)
3356 +
        return false;
3357 +
    if (module->state == MODULE_STATE_VISITED && module->declared) {
3358 +
        return true;
3359 +
    }
3360 +
3361 +
    module_t *parent = t->module;
3362 +
3363 +
    module->state = MODULE_STATE_VISITING;
3364 +
    module->scope = symtab_scope(t->scope, module);
3365 +
    t->module     = module;
3366 +
    t->scope      = module->scope;
3367 +
3368 +
    node_t   *module_stmts[MAX_BLOCK_STATEMENTS] = { 0 };
3369 +
    module_t *module_refs[MAX_BLOCK_STATEMENTS]  = { 0 };
3370 +
    usize     nmodules                           = 0;
3371 +
3372 +
    /* Predeclare child modules so their symbols are available early. */
3373 +
    node_t **decl_stmts =
3374 +
        nodespan_ptrs(&module->parser, module->ast->val.block.stmts);
3375 +
    for (usize i = 0; i < module->ast->val.block.stmts.len; i++) {
3376 +
        node_t *stmt = decl_stmts[i];
3377 +
3378 +
        if (stmt->cls != NODE_MOD)
3379 +
            continue;
3380 +
3381 +
        node_t *name = stmt->val.mod_decl.ident;
3382 +
3383 +
        char rel[MAX_PATH_LEN] = { 0 };
3384 +
        strncpy(rel, name->val.ident.name, name->val.ident.length);
3385 +
3386 +
        module_t *submod =
3387 +
            module_manager_find_relative(t->mm, module->path, rel);
3388 +
        if (!submod) {
3389 +
            if (stmt->val.mod_decl.attribs &&
3390 +
                (stmt->val.mod_decl.attribs->val.attrib & ATTRIB_TEST))
3391 +
                continue;
3392 +
            return false;
3393 +
        }
3394 +
        symbol_t *sym = symtab_scope_lookup(
3395 +
            module->scope,
3396 +
            name->val.ident.name,
3397 +
            name->val.ident.length,
3398 +
            SYM_MODULE
3399 +
        );
3400 +
        if (!sym) {
3401 +
            if (!symbol_add(t, name, stmt)) {
3402 +
                return false;
3403 +
            }
3404 +
            sym = stmt->sym;
3405 +
        } else {
3406 +
            stmt->sym = sym;
3407 +
        }
3408 +
        sym->e.mod      = submod;
3409 +
        sym->scope      = submod->scope;
3410 +
        submod->attribs = stmt->val.mod_decl.attribs
3411 +
                              ? stmt->val.mod_decl.attribs->val.attrib
3412 +
                              : ATTRIB_NONE;
3413 +
        module_path(submod->qualified, module->qualified);
3414 +
        module_qualify(submod->qualified, name);
3415 +
3416 +
        module_stmts[nmodules]  = stmt;
3417 +
        module_refs[nmodules++] = submod;
3418 +
    }
3419 +
3420 +
    /* Predeclare named types so mutually recursive definitions can resolve. */
3421 +
    for (usize i = 0; i < module->ast->val.block.stmts.len; i++) {
3422 +
        node_t *stmt = decl_stmts[i];
3423 +
3424 +
        if (stmt->cls == NODE_RECORD) {
3425 +
            if (!declare_record(t, stmt)) {
3426 +
                return false;
3427 +
            }
3428 +
        } else if (stmt->cls == NODE_UNION) {
3429 +
            if (!declare_enum(t, stmt)) {
3430 +
                return false;
3431 +
            }
3432 +
        }
3433 +
    }
3434 +
3435 +
    for (usize i = 0; i < module->ast->val.block.stmts.len; i++) {
3436 +
        node_t *stmt = decl_stmts[i];
3437 +
3438 +
        switch (stmt->cls) {
3439 +
        case NODE_USE:
3440 +
            if (!resolve_use(t, stmt)) {
3441 +
                return false;
3442 +
            }
3443 +
            break;
3444 +
        case NODE_FN:
3445 +
            if (!resolve_fn_decl(t, stmt)) {
3446 +
                return false;
3447 +
            }
3448 +
            break;
3449 +
        case NODE_RECORD:
3450 +
        case NODE_UNION:
3451 +
            if (!resolve_node(t, stmt, NULL)) {
3452 +
                return false;
3453 +
            }
3454 +
            break;
3455 +
        case NODE_MOD:
3456 +
            stmt->type = t->types.type_void;
3457 +
            break;
3458 +
        case NODE_CONST:
3459 +
            if (!resolve_const(t, stmt)) {
3460 +
                return false;
3461 +
            }
3462 +
            break;
3463 +
        case NODE_STATIC:
3464 +
            if (!resolve_static(t, stmt)) {
3465 +
                return false;
3466 +
            }
3467 +
            break;
3468 +
        default:
3469 +
            break;
3470 +
        }
3471 +
    }
3472 +
3473 +
    /* Check submodule declarations after parent types are sized,
3474 +
     * so that `super::` references can resolve to fully-typed symbols.
3475 +
     * Skip submodules that are already being visited to avoid false
3476 +
     * circular dependency errors when `use X::Y` directly imports a
3477 +
     * submodule and that submodule uses `super::`. */
3478 +
    for (usize i = 0; i < nmodules; i++) {
3479 +
        module_t *submod = module_refs[i];
3480 +
3481 +
        if (!submod->declared && submod->state != MODULE_STATE_VISITING) {
3482 +
            if (!resolve_decls(t, submod)) {
3483 +
                return false;
3484 +
            }
3485 +
        }
3486 +
        if (module_stmts[i] && module_stmts[i]->sym) {
3487 +
            module_stmts[i]->sym->scope = submod->scope;
3488 +
        }
3489 +
    }
3490 +
3491 +
    for (usize i = 0; i < nmodules; i++) {
3492 +
        module_t *submod = module_refs[i];
3493 +
3494 +
        if (!submod)
3495 +
            return false;
3496 +
        /* Skip submodules that are already being visited to avoid false
3497 +
         * circular dependency errors. They will be checked by their
3498 +
         * original caller. */
3499 +
        if (submod->state == MODULE_STATE_VISITING) {
3500 +
            continue;
3501 +
        }
3502 +
        if (!resolve_mod_def(t, submod)) {
3503 +
            return false;
3504 +
        }
3505 +
    }
3506 +
    finalize_type_layout(t);
3507 +
3508 +
    module->declared  = true;
3509 +
    module->state     = MODULE_STATE_VISITED;
3510 +
    module->ast->type = t->types.type_void;
3511 +
3512 +
    t->scope  = t->scope->parent;
3513 +
    t->module = parent;
3514 +
3515 +
    return true;
3516 +
}
3517 +
3518 +
/* Register a function signature without checking its body */
3519 +
static type_t *resolve_fn_decl(resolve_t *t, node_t *n) {
3520 +
    fn_decl_t *fn = &n->val.fn_decl;
3521 +
3522 +
    /* Check attributes. */
3523 +
    if (fn->attribs && !resolve_node(t, fn->attribs, NULL))
3524 +
        return NULL;
3525 +
3526 +
    attrib_t attrs = fn->attribs ? fn->attribs->val.attrib : ATTRIB_NONE;
3527 +
3528 +
    /* Add function to symbol table */
3529 +
    if (!symbol_add(t, fn->ident, n)) {
3530 +
        return NULL;
3531 +
    }
3532 +
    n->sym->e.fn.attribs = attrs;
3533 +
3534 +
    /* Set up the qualified name for the function */
3535 +
    module_path(n->sym->qualified, t->module->qualified);
3536 +
    module_qualify(n->sym->qualified, fn->ident);
3537 +
3538 +
    /* Initialize usage tracking - mark as used if it's a default function */
3539 +
    n->sym->e.fn.used = (attrs & ATTRIB_DEFAULT) || (attrs & ATTRIB_TEST);
3540 +
3541 +
    /* Initialize function type and scope */
3542 +
    type_t *ret_typ    = n->val.fn_decl.return_type
3543 +
                             ? resolve_type(t, n->val.fn_decl.return_type)
3544 +
                             : t->types.type_void;
3545 +
    n->sym->e.fn.scope = symtab_scope(t->scope, NULL);
3546 +
    n->type = alloc_fn_type(t, n, ret_typ, n->val.fn_decl.params.len);
3547 +
3548 +
    /* Enter function scope temporarily to register parameters */
3549 +
    scope_t *parent = t->scope;
3550 +
    t->scope        = n->sym->e.fn.scope;
3551 +
3552 +
    /* Add parameters to function scope */
3553 +
    for (usize i = 0; i < n->val.fn_decl.params.len; i++) {
3554 +
        node_t *param =
3555 +
            nodespan_ptrs(&t->module->parser, n->val.fn_decl.params)[i];
3556 +
3557 +
        /* Assign declared type to identifier node. */
3558 +
        node_t *type     = param->val.param.type;
3559 +
        type_t *declared = resolve_type(t, type);
3560 +
3561 +
        if (!declared) {
3562 +
            return NULL;
3563 +
        }
3564 +
        param->type = declared;
3565 +
3566 +
        /* Store parameter type in function type for function pointer
3567 +
         * compatibility */
3568 +
        n->type->info.fun.params[i] = declared;
3569 +
3570 +
        if (!symbol_add(t, param->val.param.ident, param)) {
3571 +
            return NULL;
3572 +
        }
3573 +
        param->sym->e.var.typ   = declared;
3574 +
        param->sym->e.var.align = declared->align;
3575 +
    }
3576 +
    t->scope = parent;
3577 +
3578 +
    if (!resolve_fn_throws(t, n->type, fn->throws, ret_typ))
3579 +
        return NULL;
3580 +
3581 +
    return n->type;
3582 +
}
3583 +
3584 +
/* Type check function body (assumes signature is already registered) */
3585 +
static type_t *resolve_fn_def(resolve_t *t, node_t *n) {
3586 +
    /* Set current function and enter function scope */
3587 +
    t->fn    = n->sym;
3588 +
    t->scope = n->sym->e.fn.scope;
3589 +
3590 +
    /* For extern functions, body will be NULL */
3591 +
    if (n->val.fn_decl.body && !resolve_block(t, n->val.fn_decl.body)) {
3592 +
        t->fn    = NULL;
3593 +
        t->scope = t->scope->parent;
3594 +
        return NULL;
3595 +
    }
3596 +
    t->fn    = NULL;
3597 +
    t->scope = t->scope->parent;
3598 +
3599 +
    return n->type;
3600 +
}
resolver.h added +168 -0
1 +
#ifndef RESOLVER_H
2 +
#define RESOLVER_H
3 +
4 +
#include "ast.h"
5 +
#include "limits.h"
6 +
#include "module.h"
7 +
#include "parser.h"
8 +
#include "riscv.h"
9 +
#include "symtab.h"
10 +
#include "types.h"
11 +
12 +
/* Built-in slice/array fields */
13 +
#define LEN_FIELD     "len"
14 +
#define LEN_FIELD_LEN 3
15 +
#define PTR_FIELD     "ptr"
16 +
#define PTR_FIELD_LEN 3
17 +
18 +
typedef struct type_t {
19 +
    typeclass_t cls;
20 +
21 +
    const char *name;    /* Type name */
22 +
    u16         namelen; /* Type name length */
23 +
24 +
    union {
25 +
        struct {
26 +
            union_decl_t *decl; /* AST node for the union. */
27 +
            symbol_t    **variants;
28 +
            u8            nvariants;
29 +
            struct type_t
30 +
                *base;        /* Underlying scalar type for fieldless unions */
31 +
            i32  variantsize; /* Largest payload size (bytes) */
32 +
            bool has_payload; /* Whether any variant carries data */
33 +
        } uni;
34 +
        struct {
35 +
            struct type_t *err;     /* Error set */
36 +
            struct type_t *payload; /* Success payload type */
37 +
        } res;
38 +
        struct {
39 +
            symbol_t **fields;     /* Fields of the record */
40 +
            u8         nfields;    /* Number of fields */
41 +
            u32        packedsize; /* Size if packed */
42 +
            bool       anonymous;  /* Anonymous record */
43 +
            bool       tuple;      /* Tuple-style record */
44 +
        } srt;
45 +
        struct {
46 +
            struct type_t *target; /* Target type. */
47 +
            bool           mut;    /* Mutable pointer. */
48 +
        } ptr;
49 +
        struct {
50 +
            struct type_t  *ret;    /* Return type */
51 +
            struct type_t **params; /* Parameter types */
52 +
            struct type_t **throws;
53 +
            u8              nparams;
54 +
            u8              nthrows;
55 +
        } fun;
56 +
        struct {
57 +
            struct type_t *elem;   /* Type of array elements */
58 +
            u32            length; /* Length for arrays (fixed size) */
59 +
        } ary;
60 +
        struct {
61 +
            struct type_t *elem; /* Type of slice elements */
62 +
            struct type_t *base; /* Base array type */
63 +
            bool           mut;  /* Mutable slice pointer. */
64 +
        } slc;
65 +
        struct {
66 +
            struct type_t *elem; /* Type of the optional value */
67 +
        } opt;
68 +
    } info;
69 +
70 +
    struct type_t *ptr;       /* Pointer type, eg. `*T` for `T`. */
71 +
    struct type_t *ptr_mut;   /* Mutable pointer type, eg. `*mut T`. */
72 +
    struct type_t *slice;     /* Slice type, eg. *[T] for [T]. */
73 +
    struct type_t *slice_mut; /* Mutable slice type, eg. *mut [T]. */
74 +
75 +
    i32 size;  /* Calculated size in bytes. */
76 +
    i32 align; /* Alignment requirements. */
77 +
} type_t;
78 +
79 +
/* Global type context. */
80 +
typedef struct {
81 +
    /* Built-in types.
82 +
     * These point into the `objects` array. */
83 +
    type_t *type_i8;
84 +
    type_t *type_u8;
85 +
    type_t *type_i16;
86 +
    type_t *type_u16;
87 +
    type_t *type_i32;
88 +
    type_t *type_u32;
89 +
    type_t *type_bool;
90 +
    type_t *type_char;
91 +
    type_t *type_str;
92 +
93 +
    /* For statements, which have no type. */
94 +
    type_t *type_void;
95 +
    /* Opaque type that can only be used behind pointers. */
96 +
    type_t *type_opaque;
97 +
    /* For expressions that never produce a value. */
98 +
    type_t *type_never;
99 +
100 +
    /* Type storage across all modules. */
101 +
    type_t objects[MAX_TYPES];
102 +
    u16    nobjects;
103 +
104 +
    /* Pools for pointer arrays inside type_t (variants, fields, params,
105 +
     * throws). Each type_t stores a pointer into one of these pools. */
106 +
    symbol_t      *sympool[MAX_SYMPTR_POOL];
107 +
    u16            nsympool;
108 +
    struct type_t *typepool[MAX_TYPEPTR_POOL];
109 +
    u16            ntypepool;
110 +
} types_t;
111 +
112 +
typedef enum {
113 +
    TC_CTX_NORMAL  = 0,
114 +
    TC_CTX_PATTERN = 1,
115 +
    TC_CTX_TRY     = 2,
116 +
} resolve_ctx_t;
117 +
118 +
/* Type checker state. */
119 +
typedef struct {
120 +
    scope_t          *global;
121 +
    types_t           types;
122 +
    symbol_t         *fn;     /* Track the current function. */
123 +
    scope_t          *scope;  /* Track the current scope. */
124 +
    module_manager_t *mm;     /* Reference to module manager for imports */
125 +
    module_t         *module; /* Currently being resolved module */
126 +
    u32               flags;
127 +
    u16               recordid; /* Next anonymous record ID */
128 +
    resolve_ctx_t     ctx;      /* Allow unbound identifiers in patterns */
129 +
} resolve_t;
130 +
131 +
/* Allocate `n` symbol_t* slots from the type pool. */
132 +
symbol_t **types_alloc_sympool(types_t *t, u8 n);
133 +
/* Allocate `n` type_t* slots from the type pool. */
134 +
type_t **types_alloc_typepool(types_t *t, u8 n);
135 +
136 +
/* Dereference a type. */
137 +
type_t *deref_type(type_t *ref);
138 +
139 +
/* Initialize type checker. */
140 +
void resolve_init(resolve_t *t, module_manager_t *mm);
141 +
/* Typecheck a complete AST. */
142 +
bool resolve_run(resolve_t *t, module_t *root);
143 +
144 +
/* Check if a type is numeric, eg. integer or float. */
145 +
bool type_is_numeric(typeclass_t t);
146 +
/* Check if a type is compound, ie. is made of multiple sub-types. */
147 +
bool type_is_compound(type_t *t);
148 +
/* Check if a type is an address, eg. a pointer or slice. */
149 +
bool type_is_address(typeclass_t t);
150 +
/* Check if a type is an integer */
151 +
bool type_is_int(typeclass_t t);
152 +
/* Check if a type is an unsigned integer */
153 +
bool type_is_unsigned(typeclass_t t);
154 +
/* Check if a type is primitive, ie. not compound. */
155 +
bool type_is_primitive(type_t *t);
156 +
/* Check if a type is passed by reference automatically. */
157 +
bool type_is_passed_by_ref(type_t *t);
158 +
/* Check if a type is a tagged value */
159 +
bool type_is_tagged_value(type_t *ty);
160 +
/* Check if a type is a union with a payload value */
161 +
bool type_is_union_with_payload(type_t *ty);
162 +
/* Check if a type is packed, ie. it has no padding */
163 +
bool type_is_packed(type_t *t);
164 +
/* Check if type `a` can be coerced to type `b`. This handles cases like
165 +
 * *mut [T] -> *[T] where the types are structurally compatible. */
166 +
bool type_coercible(type_t *a, type_t *b);
167 +
168 +
#endif
riscv.c added +280 -0
1 +
/* RISC-V 64-bit (RV64I) instruction builder. */
2 +
#include <stdlib.h>
3 +
4 +
#include "riscv.h"
5 +
#include "types.h"
6 +
7 +
const char *reg_names[] = {
8 +
    "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "fp", "s1", "a0",
9 +
    "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
10 +
    "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"
11 +
};
12 +
13 +
const bool caller_saved_registers[] = {
14 +
    false, false, false, false, false, true,  true, true,  false, false, true,
15 +
    true,  true,  true,  true,  true,  true,  true, false, false, false, false,
16 +
    false, false, false, false, false, false, true, true,  true,  true
17 +
};
18 +
19 +
const reg_t temp_registers[] = { T1, T2, T3, T4, T5, T6 };
20 +
21 +
i32 sign_extend(u32 value, int bit_width) {
22 +
    if ((value >> (bit_width - 1)) & 1) {
23 +
        /* Sign bit is 1, so extend with 1s. */
24 +
        return (i32)(value | (~0u << bit_width));
25 +
    }
26 +
    return (i32)value;
27 +
}
28 +
29 +
i32 align(i32 size, i32 alignment) {
30 +
    /* Verify alignment is a power of 2. */
31 +
    /* This rounds up to the next multiple of alignment. */
32 +
    return (size + alignment - 1) & ~(alignment - 1);
33 +
}
34 +
35 +
/* Creates an I-type instruction struct.
36 +
 * Used for immediate operations like ADDI, SLTI and loads like LW. */
37 +
static instr_t instr_i(
38 +
    opcode_t opcode, funct3_t fn3, reg_t rd, reg_t rs1, i32 imm
39 +
) {
40 +
    return (instr_t){ .i = { .opcode   = opcode,
41 +
                             .rd       = rd,
42 +
                             .rs1      = rs1,
43 +
                             .funct3   = fn3,
44 +
                             .imm_11_0 = (u32)imm } };
45 +
}
46 +
47 +
/* Creates a U-type instruction struct.
48 +
 * Used for LUI (Load Upper Immediate) and AUIPC. */
49 +
static instr_t instr_u(opcode_t opcode, reg_t rd, i32 imm) {
50 +
    return (instr_t){ .u = {
51 +
                          .opcode    = opcode,
52 +
                          .rd        = rd,
53 +
                          .imm_31_12 = (u32)imm & 0xFFFFF,
54 +
                      } };
55 +
}
56 +
57 +
/* Creates an R-type instruction struct.
58 +
 * Used for register-register operations like ADD, SUB, AND, OR. */
59 +
static instr_t instr_r(
60 +
    opcode_t opcode, funct3_t fn3, funct7_t fn7, reg_t rd, reg_t rs1, reg_t rs2
61 +
) {
62 +
    return (instr_t){ .r = { .opcode = opcode,
63 +
                             .rd     = rd,
64 +
                             .rs1    = rs1,
65 +
                             .rs2    = rs2,
66 +
                             .funct3 = fn3,
67 +
                             .funct7 = fn7 } };
68 +
}
69 +
70 +
/* Creates an S-type instruction struct.
71 +
 * Used for store instructions like SW, SH, SB. */
72 +
static instr_t instr_s(
73 +
    opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm
74 +
) {
75 +
    return (instr_t){ .s = { .opcode   = opcode,
76 +
                             .rs1      = rs1,
77 +
                             .rs2      = rs2,
78 +
                             .funct3   = fn3,
79 +
                             .imm_4_0  = (u32)imm & 0x1F,
80 +
                             .imm_11_5 = ((u32)imm >> 5) & 0x7F } };
81 +
}
82 +
83 +
/* Creates an SB-type (branch) instruction struct.
84 +
 * Modified S-type used for conditional branches like BEQ, BNE. */
85 +
static instr_t instr_sb(
86 +
    opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm
87 +
) {
88 +
    return (instr_t){ .b = { .opcode   = opcode,
89 +
                             .rs1      = rs1,
90 +
                             .rs2      = rs2,
91 +
                             .funct3   = fn3,
92 +
                             .imm_11   = (imm >> 11) & 0x1,
93 +
                             .imm_4_1  = (imm >> 1) & 0xF,
94 +
                             .imm_10_5 = (imm >> 5) & 0x3F,
95 +
                             .imm_12   = (imm >> 12) & 0x1 } };
96 +
}
97 +
98 +
/* Creates a UJ-type (jump) instruction struct.
99 +
 * Modified U-type used for JAL instruction. */
100 +
static instr_t instr_uj(opcode_t opcode, reg_t rd, i32 imm) {
101 +
    return (instr_t){ .j = { .opcode    = opcode,
102 +
                             .rd        = rd,
103 +
                             .imm_20    = (imm >> 20) & 0x1,
104 +
                             .imm_10_1  = (imm >> 1) & 0x3FF,
105 +
                             .imm_11    = (imm >> 11) & 0x1,
106 +
                             .imm_19_12 = (imm >> 12) & 0xFF } };
107 +
}
108 +
109 +
/* Instruction definitions table. */
110 +
/* Maps each instruction to its parameters for easy lookup. */
111 +
typedef struct {
112 +
    ifmt_t   type;    /* Instruction type */
113 +
    opcode_t opcode;  /* Opcode value */
114 +
    funct3_t funct3;  /* Function3 value (if applicable) */
115 +
    funct7_t funct7;  /* Function7 value (if applicable) */
116 +
    bool     special; /* Special handling flag */
117 +
} idef_t;
118 +
119 +
/* Instruction definition table */
120 +
static const idef_t idefs[] = {
121 +
    /* Upper immediate instructions. */
122 +
    [I_LUI]   = { IFMT_U, OP_LUI, 0, 0, true },
123 +
    [I_AUIPC] = { IFMT_U, OP_AUIPC, 0, 0, true },
124 +
    /* Jump instructions. */
125 +
    [I_JAL]  = { IFMT_J, OP_JAL, 0, 0, true },
126 +
    [I_JALR] = { IFMT_I, OP_JALR, 0, 0, false },
127 +
    /* Branch instructions. */
128 +
    [I_BEQ]  = { IFMT_B, OP_BRANCH, FUNCT3_BYTE, 0, false },
129 +
    [I_BNE]  = { IFMT_B, OP_BRANCH, FUNCT3_HALF, 0, false },
130 +
    [I_BLT]  = { IFMT_B, OP_BRANCH, FUNCT3_BYTE_U, 0, false },
131 +
    [I_BGE]  = { IFMT_B, OP_BRANCH, FUNCT3_HALF_U, 0, false },
132 +
    [I_BLTU] = { IFMT_B, OP_BRANCH, FUNCT3_OR, 0, false },
133 +
    [I_BGEU] = { IFMT_B, OP_BRANCH, FUNCT3_AND, 0, false },
134 +
    /* Load instructions. */
135 +
    [I_LB]  = { IFMT_I, OP_LOAD, FUNCT3_BYTE, 0, false },
136 +
    [I_LH]  = { IFMT_I, OP_LOAD, FUNCT3_HALF, 0, false },
137 +
    [I_LW]  = { IFMT_I, OP_LOAD, FUNCT3_WORD, 0, false },
138 +
    [I_LBU] = { IFMT_I, OP_LOAD, FUNCT3_BYTE_U, 0, false },
139 +
    [I_LHU] = { IFMT_I, OP_LOAD, FUNCT3_HALF_U, 0, false },
140 +
    /* Store instructions. */
141 +
    [I_SB] = { IFMT_S, OP_STORE, FUNCT3_BYTE, 0, false },
142 +
    [I_SH] = { IFMT_S, OP_STORE, FUNCT3_HALF, 0, false },
143 +
    [I_SW] = { IFMT_S, OP_STORE, FUNCT3_WORD, 0, false },
144 +
    /* ALU immediate operations. */
145 +
    [I_ADDI]  = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, false },
146 +
    [I_SLTI]  = { IFMT_I, OP_IMM, FUNCT3_SLT, 0, false },
147 +
    [I_SLTIU] = { IFMT_I, OP_IMM, FUNCT3_SLTU, 0, false },
148 +
    [I_XORI]  = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, false },
149 +
    [I_ORI]   = { IFMT_I, OP_IMM, FUNCT3_OR, 0, false },
150 +
    [I_ANDI]  = { IFMT_I, OP_IMM, FUNCT3_AND, 0, false },
151 +
    [I_SLLI]  = { IFMT_I, OP_IMM, FUNCT3_SLL, 0, true },
152 +
    [I_SRLI]  = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true },
153 +
    [I_SRAI]  = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true },
154 +
    /* ALU register operations. */
155 +
    [I_ADD]  = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_NORMAL, false },
156 +
    [I_SUB]  = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, false },
157 +
    [I_SLL]  = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_NORMAL, false },
158 +
    [I_SLT]  = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_NORMAL, false },
159 +
    [I_SLTU] = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_NORMAL, false },
160 +
    [I_XOR]  = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_NORMAL, false },
161 +
    [I_SRL]  = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_NORMAL, false },
162 +
    [I_AND]  = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_NORMAL, false },
163 +
    [I_OR]   = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_NORMAL, false },
164 +
    /* M extension - multiply and divide. */
165 +
    [I_MUL]    = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_MUL, false },
166 +
    [I_MULH]   = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_MUL, false },
167 +
    [I_MULHSU] = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_MUL, false },
168 +
    [I_MULHU]  = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_MUL, false },
169 +
    [I_DIV]    = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_MUL, false },
170 +
    [I_DIVU]   = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_MUL, false },
171 +
    [I_REM]    = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_MUL, false },
172 +
    [I_REMU]   = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_MUL, false },
173 +
    /* Pseudo-instructions. */
174 +
    [I_MV]  = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true },
175 +
    [I_JMP] = { IFMT_J, OP_JAL, 0, 0, true },
176 +
    [I_NOP] = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true },
177 +
    [I_NOT] = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, true },
178 +
    [I_NEG] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, true },
179 +
    /* System instructions */
180 +
    [I_EBREAK] = { IFMT_I, OP_SYSTEM, 0, 0, true },
181 +
    [I_ECALL]  = { IFMT_I, OP_SYSTEM, 0, 0, true },
182 +
    /* F Extension floating-point instructions */
183 +
    [I_FADD_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FADD_S, false },
184 +
    [I_FSUB_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FSUB_S, false },
185 +
    [I_FMUL_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FMUL_S, false },
186 +
    [I_FDIV_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FDIV_S, false },
187 +
    [I_FEQ_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FEQ, FUNCT7_FEQ_S, false },
188 +
    [I_FLT_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FLT, FUNCT7_FLT_S, false },
189 +
    [I_FLE_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FLE, FUNCT7_FLE_S, false },
190 +
    [I_FLW]    = { IFMT_I, OP_LOAD_FP, FUNCT3_WORD_FP, 0, false },
191 +
    [I_FSW]    = { IFMT_S, OP_STORE_FP, FUNCT3_WORD_FP, 0, false },
192 +
    /* RV64I load/store */
193 +
    [I_LWU] = { IFMT_I, OP_LOAD, FUNCT3_WORD_U, 0, false },
194 +
    [I_LD]  = { IFMT_I, OP_LOAD, FUNCT3_DOUBLE, 0, false },
195 +
    [I_SD]  = { IFMT_S, OP_STORE, FUNCT3_DOUBLE, 0, false },
196 +
    /* RV64I immediate W-ops */
197 +
    [I_ADDIW] = { IFMT_I, OP_IMM_32, FUNCT3_ADD, 0, false },
198 +
    [I_SLLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SLL, 0, true },
199 +
    [I_SRLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true },
200 +
    [I_SRAIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true },
201 +
    /* RV64I register W-ops */
202 +
    [I_ADDW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_NORMAL, false },
203 +
    [I_SUBW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_SUB, false },
204 +
    [I_SLLW] = { IFMT_R, OP_OP_32, FUNCT3_SLL, FUNCT7_NORMAL, false },
205 +
    [I_SRLW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_NORMAL, false },
206 +
    [I_SRAW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_SRA, false },
207 +
    /* RV64M W-ops */
208 +
    [I_MULW]  = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_MUL, false },
209 +
    [I_DIVW]  = { IFMT_R, OP_OP_32, FUNCT3_XOR, FUNCT7_MUL, false },
210 +
    [I_DIVUW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_MUL, false },
211 +
    [I_REMW]  = { IFMT_R, OP_OP_32, FUNCT3_OR, FUNCT7_MUL, false },
212 +
    [I_REMUW] = { IFMT_R, OP_OP_32, FUNCT3_AND, FUNCT7_MUL, false },
213 +
};
214 +
215 +
/* Generates a RISC-V instruction based on the instruction definition */
216 +
instr_t instr(iname_t iop, reg_t rd, reg_t rs1, reg_t rs2, i32 imm) {
217 +
    const idef_t *def = &idefs[iop];
218 +
219 +
    /* Handle special cases that need specific processing. */
220 +
    if (def->special) {
221 +
        switch (iop) {
222 +
        case I_LUI:
223 +
            return instr_u(OP_LUI, rd, imm);
224 +
        case I_AUIPC:
225 +
            return instr_u(OP_AUIPC, rd, imm);
226 +
        case I_JAL:
227 +
            return instr_uj(OP_JAL, rd, imm);
228 +
        case I_SLLI:
229 +
            return instr_i(OP_IMM, FUNCT3_SLL, rd, rs1, imm & 0x3F);
230 +
        case I_SRLI:
231 +
            return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, imm & 0x3F);
232 +
        case I_SRAI:
233 +
            return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, (imm & 0x3F) + 0x400);
234 +
        case I_SLLIW:
235 +
            return instr_i(OP_IMM_32, FUNCT3_SLL, rd, rs1, imm & 0x1F);
236 +
        case I_SRLIW:
237 +
            return instr_i(OP_IMM_32, FUNCT3_SRL, rd, rs1, imm & 0x1F);
238 +
        case I_SRAIW:
239 +
            return instr_i(
240 +
                OP_IMM_32, FUNCT3_SRL, rd, rs1, (imm & 0x1F) + 0x400
241 +
            );
242 +
        case I_MV:
243 +
            return instr_i(OP_IMM, FUNCT3_ADD, rd, rs1, 0);
244 +
        case I_JMP:
245 +
            return instr_uj(OP_JAL, ZERO, imm);
246 +
        case I_NOP:
247 +
            return instr_i(OP_IMM, FUNCT3_ADD, ZERO, ZERO, 0);
248 +
        case I_NOT:
249 +
            return instr_i(OP_IMM, FUNCT3_XOR, rd, rs1, 1);
250 +
        case I_NEG:
251 +
            return instr_r(OP_OP, FUNCT3_ADD, FUNCT7_SUB, rd, ZERO, rs1);
252 +
        case I_EBREAK:
253 +
            /* EBREAK is encoded as all zeros except for the opcode */
254 +
            return instr_i(OP_SYSTEM, 0, 0, 0, 1);
255 +
        case I_ECALL:
256 +
            /* ECALL is encoded as all zeros including immediate */
257 +
            return instr_i(OP_SYSTEM, 0, 0, 0, 0);
258 +
        default:
259 +
            break;
260 +
        }
261 +
    }
262 +
263 +
    /* Regular instructions by type. */
264 +
    switch (def->type) {
265 +
    case IFMT_I:
266 +
        return instr_i(def->opcode, def->funct3, rd, rs1, imm);
267 +
    case IFMT_R:
268 +
        return instr_r(def->opcode, def->funct3, def->funct7, rd, rs1, rs2);
269 +
    case IFMT_S:
270 +
        return instr_s(def->opcode, def->funct3, rs1, rs2, imm);
271 +
    case IFMT_B:
272 +
        return instr_sb(def->opcode, def->funct3, rs1, rs2, imm);
273 +
    case IFMT_U:
274 +
        return instr_u(def->opcode, rd, imm);
275 +
    case IFMT_J:
276 +
        return instr_uj(def->opcode, rd, imm);
277 +
    default:
278 +
        abort();
279 +
    }
280 +
}
riscv.h added +396 -0
1 +
#ifndef OP_H
2 +
#define OP_H
3 +
4 +
#include "types.h"
5 +
6 +
/* Total number of registers. */
7 +
#define REGISTERS       32
8 +
/* Word size of target architecture (RISCV64). */
9 +
#define WORD_SIZE       8
10 +
/* Tag size for optional/union discriminants. */
11 +
#define TAG_SIZE        1
12 +
/* Instruction size in bytes (always 32-bit, even on RV64). */
13 +
#define INSTR_SIZE      4
14 +
/* Stack alignment requirement. */
15 +
#define STACK_ALIGNMENT 16
16 +
/* The frame pointer register is set as an alias of `S0`. */
17 +
#define FP              S0
18 +
19 +
/* Convenient macro wrappers for `instr`.
20 +
 * Some of these, such as BLE and BGT are implemented by swapping the operands
21 +
 * of other instructions. */
22 +
#define ADDI(rd, rs1, imm)   __instr(I_ADDI, rd, rs1, 0, imm)
23 +
#define SLTI(rd, rs1, imm)   __instr(I_SLTI, rd, rs1, 0, imm)
24 +
#define SLTIU(rd, rs1, imm)  __instr(I_SLTIU, rd, rs1, 0, imm)
25 +
#define XORI(rd, rs1, imm)   __instr(I_XORI, rd, rs1, 0, imm)
26 +
#define ORI(rd, rs1, imm)    __instr(I_ORI, rd, rs1, 0, imm)
27 +
#define ANDI(rd, rs1, imm)   __instr(I_ANDI, rd, rs1, 0, imm)
28 +
#define SLLI(rd, rs1, imm)   __instr(I_SLLI, rd, rs1, 0, imm)
29 +
#define SRLI(rd, rs1, imm)   __instr(I_SRLI, rd, rs1, 0, imm)
30 +
#define SRAI(rd, rs1, imm)   __instr(I_SRAI, rd, rs1, 0, imm)
31 +
#define JALR(rd, rs1, imm)   __instr(I_JALR, rd, rs1, 0, imm)
32 +
#define LB(rd, rs1, imm)     __instr(I_LB, rd, rs1, 0, imm)
33 +
#define LH(rd, rs1, imm)     __instr(I_LH, rd, rs1, 0, imm)
34 +
#define LW(rd, rs1, imm)     __instr(I_LW, rd, rs1, 0, imm)
35 +
#define LWU(rd, rs1, imm)    __instr(I_LWU, rd, rs1, 0, imm)
36 +
#define LD(rd, rs1, imm)     __instr(I_LD, rd, rs1, 0, imm)
37 +
#define LBU(rd, rs1, imm)    __instr(I_LBU, rd, rs1, 0, imm)
38 +
#define LHU(rd, rs1, imm)    __instr(I_LHU, rd, rs1, 0, imm)
39 +
#define SB(rs2, rs1, imm)    __instr(I_SB, 0, rs1, rs2, imm)
40 +
#define SH(rs2, rs1, imm)    __instr(I_SH, 0, rs1, rs2, imm)
41 +
#define SW(rs2, rs1, imm)    __instr(I_SW, 0, rs1, rs2, imm)
42 +
#define SD(rs2, rs1, imm)    __instr(I_SD, 0, rs1, rs2, imm)
43 +
#define BEQ(rs1, rs2, imm)   __instr(I_BEQ, 0, rs1, rs2, imm)
44 +
#define BNE(rs1, rs2, imm)   __instr(I_BNE, 0, rs1, rs2, imm)
45 +
#define BLT(rs1, rs2, imm)   __instr(I_BLT, 0, rs1, rs2, imm)
46 +
#define BGE(rs1, rs2, imm)   __instr(I_BGE, 0, rs1, rs2, imm)
47 +
#define BLTU(rs1, rs2, imm)  __instr(I_BLTU, 0, rs1, rs2, imm)
48 +
#define BGEU(rs1, rs2, imm)  __instr(I_BGEU, 0, rs1, rs2, imm)
49 +
#define BLE(rs1, rs2, imm)   __instr(I_BGE, 0, rs2, rs1, imm)
50 +
#define BGT(rs1, rs2, imm)   __instr(I_BLT, 0, rs2, rs1, imm)
51 +
#define ADD(rd, rs1, rs2)    __instr(I_ADD, rd, rs1, rs2, 0)
52 +
#define SUB(rd, rs1, rs2)    __instr(I_SUB, rd, rs1, rs2, 0)
53 +
#define DIV(rd, rs1, rs2)    __instr(I_DIV, rd, rs1, rs2, 0)
54 +
#define DIVU(rd, rs1, rs2)   __instr(I_DIVU, rd, rs1, rs2, 0)
55 +
#define REM(rd, rs1, rs2)    __instr(I_REM, rd, rs1, rs2, 0)
56 +
#define REMU(rd, rs1, rs2)   __instr(I_REMU, rd, rs1, rs2, 0)
57 +
#define MUL(rd, rs1, rs2)    __instr(I_MUL, rd, rs1, rs2, 0)
58 +
#define SLL(rd, rs1, rs2)    __instr(I_SLL, rd, rs1, rs2, 0)
59 +
#define SLT(rd, rs1, rs2)    __instr(I_SLT, rd, rs1, rs2, 0)
60 +
#define SLTU(rd, rs1, rs2)   __instr(I_SLTU, rd, rs1, rs2, 0)
61 +
#define XOR(rd, rs1, rs2)    __instr(I_XOR, rd, rs1, rs2, 0)
62 +
#define SRL(rd, rs1, rs2)    __instr(I_SRL, rd, rs1, rs2, 0)
63 +
#define AND(rd, rs1, rs2)    __instr(I_AND, rd, rs1, rs2, 0)
64 +
#define OR(rd, rs1, rs2)     __instr(I_OR, rd, rs1, rs2, 0)
65 +
#define LUI(rd, imm)         __instr(I_LUI, rd, 0, 0, imm)
66 +
#define AUIPC(rd, imm)       __instr(I_AUIPC, rd, 0, 0, imm)
67 +
#define JAL(rd, imm)         __instr(I_JAL, rd, 0, 0, imm)
68 +
#define JMP(imm)             __instr(I_JMP, 0, 0, 0, imm)
69 +
#define MV(rd, rs1)          __instr(I_MV, rd, rs1, 0, 0)
70 +
#define NOT(rd, rs1)         __instr(I_NOT, rd, rs1, 0, 0)
71 +
#define NEG(rd, rs1)         __instr(I_NEG, rd, rs1, 0, 0)
72 +
#define NOP                  __instr(I_NOP, 0, 0, 0, 0)
73 +
#define RET                  __instr(I_JALR, ZERO, RA, 0, 0)
74 +
#define EBREAK               __instr(I_EBREAK, 0, 0, 0, 0)
75 +
#define ECALL                __instr(I_ECALL, 0, 0, 0, 0)
76 +
/* RV64I word-width (32-bit) operations */
77 +
#define ADDIW(rd, rs1, imm)  __instr(I_ADDIW, rd, rs1, 0, imm)
78 +
#define ADDW(rd, rs1, rs2)   __instr(I_ADDW, rd, rs1, rs2, 0)
79 +
#define SUBW(rd, rs1, rs2)   __instr(I_SUBW, rd, rs1, rs2, 0)
80 +
#define MULW(rd, rs1, rs2)   __instr(I_MULW, rd, rs1, rs2, 0)
81 +
#define DIVW(rd, rs1, rs2)   __instr(I_DIVW, rd, rs1, rs2, 0)
82 +
#define DIVUW(rd, rs1, rs2)  __instr(I_DIVUW, rd, rs1, rs2, 0)
83 +
#define REMW(rd, rs1, rs2)   __instr(I_REMW, rd, rs1, rs2, 0)
84 +
#define REMUW(rd, rs1, rs2)  __instr(I_REMUW, rd, rs1, rs2, 0)
85 +
#define SLLIW(rd, rs1, imm)  __instr(I_SLLIW, rd, rs1, 0, imm)
86 +
#define SRLIW(rd, rs1, imm)  __instr(I_SRLIW, rd, rs1, 0, imm)
87 +
#define SRAIW(rd, rs1, imm)  __instr(I_SRAIW, rd, rs1, 0, imm)
88 +
#define SLLW(rd, rs1, rs2)   __instr(I_SLLW, rd, rs1, rs2, 0)
89 +
#define SRLW(rd, rs1, rs2)   __instr(I_SRLW, rd, rs1, rs2, 0)
90 +
#define SRAW(rd, rs1, rs2)   __instr(I_SRAW, rd, rs1, rs2, 0)
91 +
/* F Extension - Floating-point instructions */
92 +
#define FADD_S(rd, rs1, rs2) __instr(I_FADD_S, rd, rs1, rs2, 0)
93 +
#define FSUB_S(rd, rs1, rs2) __instr(I_FSUB_S, rd, rs1, rs2, 0)
94 +
#define FMUL_S(rd, rs1, rs2) __instr(I_FMUL_S, rd, rs1, rs2, 0)
95 +
#define FDIV_S(rd, rs1, rs2) __instr(I_FDIV_S, rd, rs1, rs2, 0)
96 +
#define FEQ_S(rd, rs1, rs2)  __instr(I_FEQ_S, rd, rs1, rs2, 0)
97 +
#define FLT_S(rd, rs1, rs2)  __instr(I_FLT_S, rd, rs1, rs2, 0)
98 +
#define FLE_S(rd, rs1, rs2)  __instr(I_FLE_S, rd, rs1, rs2, 0)
99 +
#define FLW(rd, rs1, imm)    __instr(I_FLW, rd, rs1, 0, imm)
100 +
#define FSW(rs2, rs1, imm)   __instr(I_FSW, 0, rs1, rs2, imm)
101 +
102 +
/* String representations of register names. */
103 +
extern const char *reg_names[];
104 +
105 +
/* Boolean map of caller-saved registers.
106 +
 * True for registers that need to be saved by the caller
107 +
 * before a function call. */
108 +
extern const bool caller_saved_registers[REGISTERS];
109 +
110 +
/* RISC-V register names. */
111 +
typedef enum {
112 +
    ZERO = 0,  /* Hard-wired zero */
113 +
    RA   = 1,  /* Return address */
114 +
    SP   = 2,  /* Stack pointer */
115 +
    GP   = 3,  /* Global pointer */
116 +
    TP   = 4,  /* Thread pointer */
117 +
    T0   = 5,  /* Temporary/alternate link register */
118 +
    T1   = 6,  /* Temporary */
119 +
    T2   = 7,  /* Temporary */
120 +
    S0   = 8,  /* Saved register/frame pointer */
121 +
    S1   = 9,  /* Saved register */
122 +
    A0   = 10, /* Function arguments/returns */
123 +
    A1   = 11,
124 +
    A2   = 12, /* Function arguments */
125 +
    A3   = 13,
126 +
    A4   = 14,
127 +
    A5   = 15,
128 +
    A6   = 16,
129 +
    A7   = 17,
130 +
    S2   = 18, /* Saved registers */
131 +
    S3   = 19,
132 +
    S4   = 20,
133 +
    S5   = 21,
134 +
    S6   = 22,
135 +
    S7   = 23,
136 +
    S8   = 24,
137 +
    S9   = 25,
138 +
    S10  = 26,
139 +
    S11  = 27,
140 +
    T3   = 28, /* Temporaries */
141 +
    T4   = 29,
142 +
    T5   = 30,
143 +
    T6   = 31
144 +
} reg_t;
145 +
146 +
/* Temporary registers (T1-T6) */
147 +
extern const reg_t temp_registers[6];
148 +
149 +
/* Opcodes for RISC-V base instruction set */
150 +
typedef enum {
151 +
    OP_LOAD   = 0x03,
152 +
    OP_STORE  = 0x23,
153 +
    OP_BRANCH = 0x63,
154 +
    OP_JALR   = 0x67,
155 +
    OP_JAL    = 0x6F,
156 +
    OP_OP     = 0x33,
157 +
    OP_IMM    = 0x13,
158 +
    OP_AUIPC  = 0x17,
159 +
    OP_IMM_32 = 0x1B, /* RV64I: ADDIW, SLLIW, SRLIW, SRAIW */
160 +
    OP_OP_32 = 0x3B, /* RV64I: ADDW, SUBW, SLLW, SRLW, SRAW, MULW, DIVW, REMW */
161 +
    OP_LUI   = 0x37,
162 +
    OP_SYSTEM = 0x73,
163 +
    OP_FENCE  = 0x0F,
164 +
    /* F Extension opcodes */
165 +
    OP_LOAD_FP  = 0x07,
166 +
    OP_STORE_FP = 0x27,
167 +
    OP_OP_FP    = 0x53
168 +
} opcode_t;
169 +
170 +
/* Function3 values */
171 +
typedef enum {
172 +
    /* Memory operations */
173 +
    FUNCT3_BYTE   = 0x0, /* LB/SB - Load/Store Byte */
174 +
    FUNCT3_HALF   = 0x1, /* LH/SH - Load/Store Halfword */
175 +
    FUNCT3_WORD   = 0x2, /* LW/SW - Load/Store Word */
176 +
    FUNCT3_DOUBLE = 0x3, /* LD/SD - Load/Store Doubleword */
177 +
    FUNCT3_BYTE_U = 0x4, /* LBU - Load Byte Unsigned */
178 +
    FUNCT3_HALF_U = 0x5, /* LHU - Load Halfword Unsigned */
179 +
    FUNCT3_WORD_U = 0x6, /* LWU - Load Word Unsigned */
180 +
181 +
    /* ALU operations */
182 +
    FUNCT3_ADD  = 0x0, /* ADD/SUB/ADDI */
183 +
    FUNCT3_SLL  = 0x1, /* SLL/SLLI */
184 +
    FUNCT3_SLT  = 0x2, /* SLT/SLTI */
185 +
    FUNCT3_SLTU = 0x3, /* SLTU/SLTIU */
186 +
    FUNCT3_XOR  = 0x4, /* XOR/XORI */
187 +
    FUNCT3_SRL  = 0x5, /* SRL/SRA/SRLI/SRAI */
188 +
    FUNCT3_OR   = 0x6, /* OR/ORI */
189 +
    FUNCT3_AND  = 0x7, /* AND/ANDI */
190 +
    /* F Extension function3 codes */
191 +
    FUNCT3_WORD_FP = 0x2, /* FLW/FSW - Load/Store Single */
192 +
    FUNCT3_FEQ     = 0x2, /* FEQ.S */
193 +
    FUNCT3_FLT     = 0x1, /* FLT.S */
194 +
    FUNCT3_FLE     = 0x0  /* FLE.S */
195 +
} funct3_t;
196 +
197 +
/* Function7 values */
198 +
typedef enum {
199 +
    FUNCT7_NORMAL = 0x00,
200 +
    FUNCT7_SUB    = 0x20,
201 +
    FUNCT7_SRA    = 0x20,
202 +
    FUNCT7_MUL    = 0x01,
203 +
    /* F Extension function codes */
204 +
    FUNCT7_FADD_S = 0x00,
205 +
    FUNCT7_FSUB_S = 0x04,
206 +
    FUNCT7_FMUL_S = 0x08,
207 +
    FUNCT7_FDIV_S = 0x0C,
208 +
    FUNCT7_FEQ_S  = 0x50,
209 +
    FUNCT7_FLT_S  = 0x50,
210 +
    FUNCT7_FLE_S  = 0x50
211 +
} funct7_t;
212 +
213 +
/* Represents a RISC-V instruction in its various formats */
214 +
typedef union {
215 +
    struct {
216 +
        u32 opcode : 7;
217 +
        u32 rd     : 5;
218 +
        u32 funct3 : 3;
219 +
        u32 rs1    : 5;
220 +
        u32 rs2    : 5;
221 +
        u32 funct7 : 7;
222 +
    } r; /* Register format */
223 +
224 +
    struct {
225 +
        u32 opcode   : 7;
226 +
        u32 rd       : 5;
227 +
        u32 funct3   : 3;
228 +
        u32 rs1      : 5;
229 +
        u32 imm_11_0 : 12;
230 +
    } i; /* Immediate format */
231 +
232 +
    struct {
233 +
        u32 opcode   : 7;
234 +
        u32 imm_4_0  : 5;
235 +
        u32 funct3   : 3;
236 +
        u32 rs1      : 5;
237 +
        u32 rs2      : 5;
238 +
        u32 imm_11_5 : 7;
239 +
    } s; /* Store format */
240 +
241 +
    struct {
242 +
        u32 opcode   : 7;
243 +
        u32 imm_11   : 1;
244 +
        u32 imm_4_1  : 4;
245 +
        u32 funct3   : 3;
246 +
        u32 rs1      : 5;
247 +
        u32 rs2      : 5;
248 +
        u32 imm_10_5 : 6;
249 +
        u32 imm_12   : 1;
250 +
    } b; /* Branch format */
251 +
252 +
    struct {
253 +
        u32 opcode    : 7;
254 +
        u32 rd        : 5;
255 +
        u32 imm_31_12 : 20;
256 +
    } u; /* Upper immediate format */
257 +
258 +
    struct {
259 +
        u32 opcode    : 7;
260 +
        u32 rd        : 5;
261 +
        u32 imm_19_12 : 8;
262 +
        u32 imm_11    : 1;
263 +
        u32 imm_10_1  : 10;
264 +
        u32 imm_20    : 1;
265 +
    } j; /* Jump format */
266 +
267 +
    u32 raw; /* Raw 32-bit instruction */
268 +
} instr_t;
269 +
270 +
/* Instruction type. */
271 +
typedef enum {
272 +
    IFMT_I, /* I-type (immediate) */
273 +
    IFMT_R, /* R-type (register) */
274 +
    IFMT_S, /* S-type (store) */
275 +
    IFMT_B, /* B-type (branch) */
276 +
    IFMT_U, /* U-type (upper immediate) */
277 +
    IFMT_J, /* J-type (jump) */
278 +
} ifmt_t;
279 +
280 +
/* RISC-V instruction name. */
281 +
typedef enum {
282 +
    I_LUI,
283 +
    I_AUIPC,
284 +
    I_JAL,
285 +
    I_JALR,
286 +
    I_BEQ,
287 +
    I_BNE,
288 +
    I_BLT,
289 +
    I_BGE,
290 +
    I_BLTU,
291 +
    I_BGEU,
292 +
    I_LB,
293 +
    I_LH,
294 +
    I_LW,
295 +
    I_LBU,
296 +
    I_LHU,
297 +
    I_SB,
298 +
    I_SH,
299 +
    I_SW,
300 +
    I_ADDI,
301 +
    I_SLTI,
302 +
    I_SLTIU,
303 +
    I_XORI,
304 +
    I_ORI,
305 +
    I_ANDI,
306 +
    I_SLLI,
307 +
    I_SRLI,
308 +
    I_SRAI,
309 +
    I_ADD,
310 +
    I_SUB,
311 +
    I_SLL,
312 +
    I_SLT,
313 +
    I_SLTU,
314 +
    I_XOR,
315 +
    I_SRL,
316 +
    I_SRA,
317 +
    I_OR,
318 +
    I_AND,
319 +
    I_MUL,
320 +
    I_MULH,
321 +
    I_MULHSU,
322 +
    I_MULHU,
323 +
    I_DIV,
324 +
    I_DIVU,
325 +
    I_REM,
326 +
    I_REMU,
327 +
    I_MV,
328 +
    I_JMP,
329 +
    I_NOP,
330 +
    I_NOT,
331 +
    I_NEG,
332 +
    I_EBREAK,
333 +
    I_ECALL,
334 +
    /* F Extension - Floating-point instructions */
335 +
    I_FADD_S,
336 +
    I_FSUB_S,
337 +
    I_FMUL_S,
338 +
    I_FDIV_S,
339 +
    I_FEQ_S,
340 +
    I_FLT_S,
341 +
    I_FLE_S,
342 +
    I_FLW,
343 +
    I_FSW,
344 +
    /* RV64I extensions */
345 +
    I_LWU,
346 +
    I_LD,
347 +
    I_SD,
348 +
    I_ADDIW,
349 +
    I_SLLIW,
350 +
    I_SRLIW,
351 +
    I_SRAIW,
352 +
    I_ADDW,
353 +
    I_SUBW,
354 +
    I_SLLW,
355 +
    I_SRLW,
356 +
    I_SRAW,
357 +
    I_MULW,
358 +
    I_DIVW,
359 +
    I_DIVUW,
360 +
    I_REMW,
361 +
    I_REMUW
362 +
} iname_t;
363 +
364 +
/* Returns a RISC-V instruction based on the instruction type. */
365 +
instr_t instr(iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm);
366 +
367 +
static inline instr_t __instr(
368 +
    iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm
369 +
) {
370 +
    return instr(op, rd, rs1, rs2, imm);
371 +
}
372 +
373 +
/* Return true when a signed 12-bit immediate can encode `value`. */
374 +
static inline bool is_small(i32 value) {
375 +
    return value >= -2048 && value <= 2047;
376 +
}
377 +
378 +
static inline bool is_branch_imm(i32 value) {
379 +
    return value >= -(1 << 12) && value <= ((1 << 12) - 2) && !(value & 1);
380 +
}
381 +
382 +
static inline bool is_jump_imm(i32 value) {
383 +
    return value >= -(1 << 20) && value <= ((1 << 20) - 2) && !(value & 1);
384 +
}
385 +
386 +
/* Helper function to sign-extend a value. */
387 +
i32 sign_extend(u32 value, int bit_width);
388 +
/* Aligns a size to the specified alignment boundary. */
389 +
i32 align(i32 size, i32 alignment);
390 +
/* Functions to get immediates out of instruction. */
391 +
i32 get_i_imm(instr_t instr);
392 +
i32 get_s_imm(instr_t instr);
393 +
i32 get_b_imm(instr_t instr);
394 +
i32 get_j_imm(instr_t instr);
395 +
396 +
#endif
scanner.c added +372 -0
1 +
#include <ctype.h>
2 +
#include <string.h>
3 +
4 +
#include "scanner.h"
5 +
#include "types.h"
6 +
7 +
/* Keyword lookup table. */
8 +
static const struct {
9 +
    const char  *name;
10 +
    usize        length;
11 +
    tokenclass_t tok;
12 +
} keywords[] = {
13 +
    { "fn", 2, T_FN },           { "pub", 3, T_PUB },
14 +
    { "return", 6, T_RETURN },   { "while", 5, T_WHILE },
15 +
    { "mut", 3, T_MUT },         { "let", 3, T_LET },
16 +
    { "static", 6, T_STATIC },   { "if", 2, T_IF },
17 +
    { "else", 4, T_ELSE },       { "i8", 2, T_I8 },
18 +
    { "i16", 3, T_I16 },         { "i32", 3, T_I32 },
19 +
    { "i64", 3, T_I64 },         { "u8", 2, T_U8 },
20 +
    { "u16", 3, T_U16 },         { "u32", 3, T_U32 },
21 +
    { "u64", 3, T_U64 },         { "f32", 3, T_F32 },
22 +
    { "bool", 4, T_BOOL },       { "void", 4, T_VOID },
23 +
    { "true", 4, T_TRUE },       { "false", 5, T_FALSE },
24 +
    { "nil", 3, T_NIL },         { "loop", 4, T_LOOP },
25 +
    { "try", 3, T_TRY },         { "catch", 5, T_CATCH },
26 +
    { "for", 3, T_FOR },         { "in", 2, T_IN },
27 +
    { "const", 5, T_CONST },     { "break", 5, T_BREAK },
28 +
    { "throw", 5, T_THROW },     { "union", 5, T_UNION },
29 +
    { "and", 3, T_AND },         { "or", 2, T_OR },
30 +
    { "not", 3, T_NOT },         { "match", 5, T_MATCH },
31 +
    { "use", 3, T_USE },         { "case", 4, T_CASE },
32 +
    { "extern", 6, T_EXTERN },   { "mod", 3, T_MOD },
33 +
    { "as", 2, T_AS },           { "record", 6, T_RECORD },
34 +
    { "undefined", 9, T_UNDEF }, { "align", 5, T_ALIGN },
35 +
    { "throws", 6, T_THROWS },   { "super", 5, T_SUPER },
36 +
    { "panic", 5, T_PANIC },     { "opaque", 6, T_OPAQUE },
37 +
};
38 +
39 +
/* Initialize scanner with source text. */
40 +
void scanner_init(scanner_t *s, const char *file, const char *source) {
41 +
    s->file   = file;
42 +
    s->source = source;
43 +
    s->token  = source;
44 +
    s->cursor = source;
45 +
}
46 +
47 +
/* Check if we've reached the end. */
48 +
static bool is_eof(scanner_t *s) {
49 +
    return *s->cursor == '\0';
50 +
}
51 +
52 +
/* Peek at next character. */
53 +
static char peek(scanner_t *s) {
54 +
    if (is_eof(s))
55 +
        return '\0';
56 +
    return s->cursor[1];
57 +
}
58 +
59 +
/* Advance current position and return previous char. */
60 +
static char advance(scanner_t *s) {
61 +
    s->cursor++;
62 +
    return s->cursor[-1];
63 +
}
64 +
65 +
/* Match expected character. */
66 +
static bool consume(scanner_t *s, char expected) {
67 +
    if (is_eof(s))
68 +
        return false;
69 +
    if (*s->cursor != expected)
70 +
        return false;
71 +
    s->cursor++;
72 +
73 +
    return true;
74 +
}
75 +
76 +
/* Create a token of given class. */
77 +
static token_t tok(scanner_t *s, tokenclass_t cls) {
78 +
    token_t t = { .cls      = cls,
79 +
                  .start    = s->token,
80 +
                  .length   = (usize)(s->cursor - s->token),
81 +
                  .position = (usize)(s->token - s->source) };
82 +
    return t;
83 +
}
84 +
85 +
/* Create an error token. */
86 +
static token_t error_tok(
87 +
    scanner_t *s, const char *offset, const char *message
88 +
) {
89 +
    token_t t = { .cls      = T_INVALID,
90 +
                  .start    = message,
91 +
                  .length   = strlen(message),
92 +
                  .position = (usize)(offset - s->source) };
93 +
    return t;
94 +
}
95 +
96 +
/* Skip whitespace and comments. */
97 +
static void skip_whitespace(scanner_t *s) {
98 +
    for (;;) {
99 +
        switch (*s->cursor) {
100 +
        case ' ':
101 +
        case '\r':
102 +
        case '\t':
103 +
            advance(s);
104 +
            break;
105 +
        case '\n':
106 +
            advance(s);
107 +
            break;
108 +
        case '/':
109 +
            if (peek(s) == '/') {
110 +
                /* Comment goes until end of line. */
111 +
                while (*s->cursor != '\n' && !is_eof(s))
112 +
                    advance(s);
113 +
            } else {
114 +
                return;
115 +
            }
116 +
            break;
117 +
        default:
118 +
            return;
119 +
        }
120 +
    }
121 +
}
122 +
123 +
/* Check if character is digit. */
124 +
static bool is_digit(char c) {
125 +
    return c >= '0' && c <= '9';
126 +
}
127 +
128 +
/* Check if character is hex digit. */
129 +
static bool is_hex_digit(char c) {
130 +
    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
131 +
           (c >= 'A' && c <= 'F');
132 +
}
133 +
134 +
/* Check if character is binary digit. */
135 +
static bool is_bin_digit(char c) {
136 +
    return c == '0' || c == '1';
137 +
}
138 +
139 +
/* Check if character is letter. */
140 +
static bool is_alpha(char c) {
141 +
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
142 +
}
143 +
144 +
/* Scan a number token. */
145 +
static token_t scan_number(scanner_t *s) {
146 +
    bool signed_token = (s->token[0] == '-') || (s->token[0] == '+');
147 +
148 +
    if (signed_token)
149 +
        advance(s); /* Consume the leading sign. */
150 +
151 +
    /* Check for hex literal (0x or 0X prefix) */
152 +
    if (s->cursor[-1] == '0' && (*s->cursor == 'x' || *s->cursor == 'X')) {
153 +
        advance(s); /* Consume the 'x' or 'X' */
154 +
        /* Must have at least one hex digit after 0x */
155 +
        if (!is_hex_digit(*s->cursor))
156 +
            return error_tok(s, s->token, "invalid hex literal");
157 +
158 +
        while (is_hex_digit(*s->cursor))
159 +
            advance(s);
160 +
161 +
        return tok(s, T_NUMBER);
162 +
    }
163 +
164 +
    /* Check for binary literal (0b or 0B prefix) */
165 +
    if (s->cursor[-1] == '0' && (*s->cursor == 'b' || *s->cursor == 'B')) {
166 +
        advance(s); /* Consume the 'b' or 'B' */
167 +
        /* Must have at least one binary digit after 0b */
168 +
        if (!is_bin_digit(*s->cursor))
169 +
            return error_tok(s, s->token, "invalid binary literal");
170 +
171 +
        while (is_bin_digit(*s->cursor))
172 +
            advance(s);
173 +
174 +
        return tok(s, T_NUMBER);
175 +
    }
176 +
177 +
    /* Regular decimal number */
178 +
    while (is_digit(*s->cursor))
179 +
        advance(s);
180 +
181 +
    /* Look for decimal part. */
182 +
    if (*s->cursor == '.' && is_digit(peek(s))) {
183 +
        advance(s); /* Consume the "." */
184 +
        while (is_digit(*s->cursor))
185 +
            advance(s);
186 +
    }
187 +
    return tok(s, T_NUMBER);
188 +
}
189 +
190 +
/* Scan a string. */
191 +
static token_t scan_string(scanner_t *s) {
192 +
    while (*s->cursor != '"' && !is_eof(s)) {
193 +
        consume(s, '\\'); // Consume escapes.
194 +
        advance(s);
195 +
    }
196 +
    if (!consume(s, '"'))
197 +
        return error_tok(s, s->token, "unterminated string");
198 +
199 +
    return tok(s, T_STRING);
200 +
}
201 +
202 +
/* Scan a character, such as: 'z' */
203 +
static token_t scan_char(scanner_t *s) {
204 +
    while (*s->cursor != '\'' && !is_eof(s)) {
205 +
        if (!isprint(*s->cursor))
206 +
            return error_tok(s, s->token, "invalid character");
207 +
208 +
        consume(s, '\\');
209 +
        advance(s);
210 +
    }
211 +
    if (!consume(s, '\''))
212 +
        return error_tok(s, s->token, "unterminated character");
213 +
214 +
    return tok(s, T_CHAR);
215 +
}
216 +
217 +
/* Return a keyword or identifier token. */
218 +
static tokenclass_t keyword_or_ident(const char *start, usize length) {
219 +
    for (usize i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {
220 +
        if (length == keywords[i].length &&
221 +
            memcmp(start, keywords[i].name, length) == 0) {
222 +
            return keywords[i].tok;
223 +
        }
224 +
    }
225 +
    return T_IDENT;
226 +
}
227 +
228 +
/* Scan an identifier, label or keyword. */
229 +
static token_t scan_identifier(scanner_t *s) {
230 +
    while (is_alpha(*s->cursor) || is_digit(*s->cursor) || *s->cursor == '_' ||
231 +
           *s->cursor == '#')
232 +
        advance(s);
233 +
234 +
    return tok(s, keyword_or_ident(s->token, (usize)(s->cursor - s->token)));
235 +
}
236 +
237 +
/* Scan the next token. */
238 +
token_t scanner_next(scanner_t *s) {
239 +
    skip_whitespace(s);
240 +
    s->token = s->cursor;
241 +
242 +
    if (is_eof(s))
243 +
        return tok(s, T_EOF);
244 +
245 +
    char c = advance(s);
246 +
247 +
    if (is_digit(c))
248 +
        return scan_number(s);
249 +
    if (is_alpha(c))
250 +
        return scan_identifier(s);
251 +
252 +
    switch (c) {
253 +
    case '\'':
254 +
        return scan_char(s);
255 +
    case '"':
256 +
        return scan_string(s);
257 +
    case '(':
258 +
        return tok(s, T_LPAREN);
259 +
    case ')':
260 +
        return tok(s, T_RPAREN);
261 +
    case '{':
262 +
        return tok(s, T_LBRACE);
263 +
    case '}':
264 +
        return tok(s, T_RBRACE);
265 +
    case '[':
266 +
        return tok(s, T_LBRACKET);
267 +
    case ']':
268 +
        return tok(s, T_RBRACKET);
269 +
    case ';':
270 +
        return tok(s, T_SEMICOLON);
271 +
    case ',':
272 +
        return tok(s, T_COMMA);
273 +
    case '.':
274 +
        if (*s->cursor == '.') {
275 +
            advance(s);
276 +
            return tok(s, T_DOT_DOT);
277 +
        }
278 +
        return tok(s, T_DOT);
279 +
    case ':':
280 +
        if (*s->cursor == ':') {
281 +
            advance(s);
282 +
            return tok(s, T_COLON_COLON);
283 +
        }
284 +
        return tok(s, T_COLON);
285 +
    case '-':
286 +
        if (*s->cursor == '>') {
287 +
            advance(s);
288 +
            return tok(s, T_ARROW);
289 +
        }
290 +
        /* If followed by a digit, scan as negative number */
291 +
        if (is_digit(*s->cursor)) {
292 +
            return scan_number(s);
293 +
        }
294 +
        return tok(s, T_MINUS);
295 +
    case '+':
296 +
        if (is_digit(*s->cursor)) {
297 +
            return scan_number(s);
298 +
        }
299 +
        return tok(s, T_PLUS);
300 +
    case '/':
301 +
        return tok(s, T_SLASH);
302 +
    case '*':
303 +
        return tok(s, T_STAR);
304 +
    case '%':
305 +
        return tok(s, T_PERCENT);
306 +
    case '&':
307 +
        return tok(s, T_AMP);
308 +
    case '?':
309 +
        return tok(s, T_QUESTION);
310 +
    case '!':
311 +
        return tok(s, consume(s, '=') ? T_BANG_EQ : T_BANG);
312 +
    case '=':
313 +
        if (*s->cursor == '>') {
314 +
            advance(s);
315 +
            return tok(s, T_FAT_ARROW);
316 +
        }
317 +
        return tok(s, consume(s, '=') ? T_EQ_EQ : T_EQ);
318 +
    case '<':
319 +
        if (*s->cursor == '<') {
320 +
            advance(s);
321 +
            return tok(s, T_LSHIFT);
322 +
        }
323 +
        return tok(s, consume(s, '=') ? T_LT_EQ : T_LT);
324 +
    case '>':
325 +
        if (*s->cursor == '>') {
326 +
            advance(s);
327 +
            return tok(s, T_RSHIFT);
328 +
        }
329 +
        return tok(s, consume(s, '=') ? T_GT_EQ : T_GT);
330 +
    case '|':
331 +
        return tok(s, T_PIPE);
332 +
    case '^':
333 +
        return tok(s, T_CARET);
334 +
    case '~':
335 +
        return tok(s, T_TILDE);
336 +
    case '@':
337 +
        /* Scan @identifier as a single token. */
338 +
        if (!is_alpha(*s->cursor))
339 +
            return error_tok(s, s->token, "expected identifier after `@`");
340 +
        while (is_alpha(*s->cursor))
341 +
            advance(s);
342 +
        return tok(s, T_AT_IDENT);
343 +
    case '_':
344 +
        if (is_alpha(*s->cursor) || is_digit(*s->cursor) || *s->cursor == '_') {
345 +
            /* This is part of an identifier like `_foo` or `__start` */
346 +
            return scan_identifier(s);
347 +
        }
348 +
        return tok(s, T_UNDERSCORE);
349 +
    }
350 +
    return error_tok(s, s->token, "unexpected character");
351 +
}
352 +
353 +
/* Get the source code location from a byte offset. */
354 +
location_t scanner_get_location(scanner_t *s, u32 position) {
355 +
    u32 l = 1;
356 +
    u32 c = 1;
357 +
358 +
    for (u32 i = 0; i < position; i++) {
359 +
        if (s->source[i] == '\n') {
360 +
            l++;
361 +
            c = 1;
362 +
        } else {
363 +
            c++;
364 +
        }
365 +
    }
366 +
    return (location_t){
367 +
        .file = s->file,
368 +
        .src  = s->source + position,
369 +
        .line = l,
370 +
        .col  = c,
371 +
    };
372 +
}
scanner.h added +151 -0
1 +
#ifndef SCANNER_H
2 +
#define SCANNER_H
3 +
4 +
#include "types.h"
5 +
6 +
/* Token kinds. */
7 +
typedef enum {
8 +
    /* End of file token generated by the scanner
9 +
     * when the input is exhausted. */
10 +
    T_EOF,
11 +
12 +
    /* Special "error" token. */
13 +
    T_INVALID,
14 +
15 +
    /* Single-char tokens. */
16 +
    T_LPAREN,     /* ( */
17 +
    T_RPAREN,     /* ) */
18 +
    T_LBRACE,     /* { */
19 +
    T_RBRACE,     /* } */
20 +
    T_LBRACKET,   /* [ */
21 +
    T_RBRACKET,   /* ] */
22 +
    T_COMMA,      /* , */
23 +
    T_DOT,        /* . */
24 +
    T_DOT_DOT,    /* .. */
25 +
    T_MINUS,      /* - */
26 +
    T_PLUS,       /* + */
27 +
    T_SEMICOLON,  /* ; */
28 +
    T_SLASH,      /* / */
29 +
    T_STAR,       /* * */
30 +
    T_PERCENT,    /* % */
31 +
    T_AMP,        /* & */
32 +
    T_PIPE,       /* | */
33 +
    T_CARET,      /* ^ */
34 +
    T_TILDE,      /* ~ */
35 +
    T_UNDERSCORE, /* _ */
36 +
37 +
    /* One or two char tokens. */
38 +
    T_QUESTION, /* ? */
39 +
    T_BANG,     /* ! */
40 +
    T_BANG_EQ,  /* != */
41 +
    T_EQ,       /* = */
42 +
    T_EQ_EQ,    /* == */
43 +
    T_GT,       /* > */
44 +
    T_GT_EQ,    /* >= */
45 +
    T_LT,       /* < */
46 +
    T_LT_EQ,    /* <= */
47 +
    T_LSHIFT,   /* << */
48 +
    T_RSHIFT,   /* >> */
49 +
50 +
    /* Literals. */
51 +
    T_IDENT,    /* fnord */
52 +
    T_AT_IDENT, /* @sizeOf */
53 +
    T_STRING,   /* "fnord" */
54 +
    T_CHAR,     /* 'f' */
55 +
    T_NUMBER,   /* 42 */
56 +
    T_TRUE,     /* true */
57 +
    T_FALSE,    /* false */
58 +
    T_NIL,      /* nil */
59 +
    T_UNDEF,    /* undefined */
60 +
61 +
    /* Keywords. */
62 +
    T_IF,
63 +
    T_ELSE,
64 +
    T_RETURN,
65 +
    T_BREAK,
66 +
    T_CONTINUE,
67 +
    T_THROW,
68 +
    T_PANIC,
69 +
    T_WHILE,
70 +
    T_FOR,
71 +
    T_LOOP,
72 +
    T_TRY,
73 +
    T_CATCH,
74 +
    T_IN,
75 +
    T_FN,
76 +
    T_UNION,
77 +
    T_RECORD,
78 +
    T_DEFAULT,
79 +
    T_PUB,
80 +
    T_MUT,
81 +
    T_CONST,
82 +
    T_STATIC,
83 +
    T_LET,
84 +
    T_AND,
85 +
    T_OR,
86 +
    T_NOT,
87 +
    T_MATCH,
88 +
    T_CASE,
89 +
    T_USE,
90 +
    T_SUPER,  /* super */
91 +
    T_EXTERN, /* extern */
92 +
    T_MOD,    /* mod */
93 +
    T_AS,     /* as */
94 +
    T_ALIGN,  /* align */
95 +
    T_THROWS, /* throws */
96 +
97 +
    /* Type-related tokens. */
98 +
    T_COLON,       /* : */
99 +
    T_COLON_COLON, /* :: */
100 +
    T_ARROW,       /* -> */
101 +
    T_FAT_ARROW,   /* => */
102 +
103 +
    /* Builtin type names. */
104 +
    T_I8,
105 +
    T_I16,
106 +
    T_I32,
107 +
    T_I64,
108 +
    T_U8,
109 +
    T_U16,
110 +
    T_U32,
111 +
    T_U64,
112 +
    T_F32,
113 +
    T_BOOL,
114 +
    T_VOID,
115 +
    T_OPAQUE
116 +
} tokenclass_t;
117 +
118 +
/* Code location. */
119 +
typedef struct {
120 +
    const char *src;  /* Pointer to source code location. */
121 +
    const char *file; /* File path. */
122 +
    u32         line; /* line number. */
123 +
    u32         col;  /* Column number. */
124 +
} location_t;
125 +
126 +
/* Token structure. */
127 +
typedef struct {
128 +
    tokenclass_t cls;
129 +
    const char  *start;    /* Start of the token in the source code. */
130 +
    u32          length;   /* Byte length of token in source code. */
131 +
    u32          position; /* Byte offset in source. */
132 +
} token_t;
133 +
134 +
/* Scanner state. */
135 +
typedef struct {
136 +
    const char *file;   /* File path. */
137 +
    const char *source; /* Start of source buffer. */
138 +
    const char *token;  /* Start of current token. */
139 +
    const char *cursor; /* Current position. */
140 +
} scanner_t;
141 +
142 +
/* Initialize scanner with source text. */
143 +
void scanner_init(scanner_t *s, const char *file, const char *source);
144 +
145 +
/* Get next token from scanner. */
146 +
token_t scanner_next(scanner_t *s);
147 +
148 +
/* Get line and column information for a token. */
149 +
location_t scanner_get_location(scanner_t *s, u32 position);
150 +
151 +
#endif
strings.c added +100 -0
1 +
#include <string.h>
2 +
3 +
#include "strings.h"
4 +
#include "types.h"
5 +
6 +
/* Global string interning system */
7 +
static struct {
8 +
    char  strings[MAX_STRINGS][MAX_STRING_LEN];
9 +
    usize nstrings;
10 +
    bool  initialized;
11 +
} table = { 0 };
12 +
13 +
/* Initialize the global string interning system */
14 +
void strings_init(void) {
15 +
    table.nstrings    = 0;
16 +
    table.initialized = true;
17 +
}
18 +
19 +
/* Process escape sequences in a string */
20 +
static usize escape(const char *src, usize src_len, char *dst, usize dst_size) {
21 +
    usize dst_idx = 0;
22 +
23 +
    for (usize i = 0; i < src_len && dst_idx < dst_size - 1; i++) {
24 +
        if (src[i] == '\\' && i + 1 < src_len) {
25 +
            switch (src[i + 1]) {
26 +
            case 'n':
27 +
                dst[dst_idx++] = '\n';
28 +
                i++; /* Skip the next character */
29 +
                break;
30 +
            case 't':
31 +
                dst[dst_idx++] = '\t';
32 +
                i++;
33 +
                break;
34 +
            case 'r':
35 +
                dst[dst_idx++] = '\r';
36 +
                i++;
37 +
                break;
38 +
            case '\\':
39 +
                dst[dst_idx++] = '\\';
40 +
                i++;
41 +
                break;
42 +
            case '"':
43 +
                dst[dst_idx++] = '"';
44 +
                i++;
45 +
                break;
46 +
            case '0':
47 +
                dst[dst_idx++] = '\0';
48 +
                i++;
49 +
                break;
50 +
            default:
51 +
                /* Unknown escape sequence, keep the backslash */
52 +
                dst[dst_idx++] = src[i];
53 +
                break;
54 +
            }
55 +
        } else {
56 +
            dst[dst_idx++] = src[i];
57 +
        }
58 +
    }
59 +
    dst[dst_idx] = '\0';
60 +
61 +
    return dst_idx;
62 +
}
63 +
64 +
/* Find an existing interned string */
65 +
static const char *find(const char *str) {
66 +
    for (usize i = 0; i < table.nstrings; i++) {
67 +
        if (!strcmp(table.strings[i], str)) {
68 +
            return table.strings[i];
69 +
        }
70 +
    }
71 +
    return NULL;
72 +
}
73 +
74 +
/* Intern a raw string without escape sequence processing */
75 +
static const char *strings_alloc_raw(const char *str) {
76 +
    /* Check if already interned */
77 +
    const char *existing = find(str);
78 +
    if (existing) {
79 +
        return existing;
80 +
    }
81 +
82 +
    char *slot = table.strings[table.nstrings++];
83 +
    strncpy(slot, str, MAX_STRING_LEN);
84 +
85 +
    return slot;
86 +
}
87 +
88 +
/* Intern a string with escape sequence processing */
89 +
const char *strings_alloc_len(const char *str, u16 len) {
90 +
    /* Process escape sequences first */
91 +
    char escaped[MAX_STRING_LEN];
92 +
    escape(str, len, escaped, MAX_STRING_LEN);
93 +
94 +
    return strings_alloc_raw(escaped);
95 +
}
96 +
97 +
/* Intern a string with escape sequence processing */
98 +
const char *strings_alloc(const char *str) {
99 +
    return strings_alloc_len(str, strlen(str));
100 +
}
strings.h added +17 -0
1 +
#ifndef STRINGS_H
2 +
#define STRINGS_H
3 +
4 +
#include "types.h"
5 +
6 +
#define MAX_STRINGS    4096
7 +
#define MAX_STRING_LEN 64
8 +
9 +
/* Initialize the global string interning system */
10 +
void strings_init(void);
11 +
12 +
/* Intern a string with escape sequence processing */
13 +
const char *strings_alloc(const char *str);
14 +
/* Intern a string with escape sequence processing */
15 +
const char *strings_alloc_len(const char *str, u16 len);
16 +
17 +
#endif
symtab.c added +224 -0
1 +
#include <assert.h>
2 +
#include <string.h>
3 +
4 +
#include "ast.h"
5 +
#include "io.h"
6 +
#include "module.h"
7 +
#include "resolver.h"
8 +
#include "scanner.h"
9 +
#include "symtab.h"
10 +
11 +
/* Symbol storage across all scopes. */
12 +
static symbol_t SYMBOLS[MAX_SYMBOLS] = { 0 };
13 +
static usize    NSYMBOLS             = 0;
14 +
15 +
/* Scope storage across all modules/functions. */
16 +
static scope_t SCOPES[MAX_SCOPES] = { 0 };
17 +
static usize   NSCOPES            = 0;
18 +
19 +
static symkind_t symbol_entry(node_t *n) {
20 +
    switch (n->cls) {
21 +
    case NODE_IDENT:
22 +
    case NODE_VAR:
23 +
    case NODE_STATIC:
24 +
    case NODE_PARAM:
25 +
    case NODE_UNION_VARIANT: /* This should use SYM_VARIANT */
26 +
    case NODE_RECORD_FIELD:
27 +
        return SYM_VARIABLE;
28 +
    case NODE_CONST:
29 +
        return SYM_CONSTANT;
30 +
    case NODE_FN:
31 +
        return SYM_FUNCTION;
32 +
    case NODE_UNION:
33 +
    case NODE_RECORD:
34 +
    case NODE_PTR:
35 +
        return SYM_TYPE;
36 +
    case NODE_MOD:
37 +
    case NODE_USE:
38 +
        return SYM_MODULE;
39 +
    case NODE_REF:
40 +
        return symbol_entry(n->val.ref.target);
41 +
    case NODE_RETURN:
42 +
    case NODE_BLOCK:
43 +
    case NODE_LOOP:
44 +
    case NODE_WHILE:
45 +
    case NODE_WHILE_LET:
46 +
    case NODE_FOR:
47 +
    case NODE_IF:
48 +
    case NODE_IF_LET:
49 +
    case NODE_IF_CASE:
50 +
    case NODE_GUARD_CASE:
51 +
    case NODE_GUARD_LET:
52 +
    case NODE_BINOP:
53 +
    case NODE_UNOP:
54 +
    case NODE_TYPE:
55 +
    case NODE_NUMBER:
56 +
    case NODE_CHAR:
57 +
    case NODE_NIL:
58 +
    case NODE_UNDEF:
59 +
    case NODE_STRING:
60 +
    case NODE_BOOL:
61 +
    case NODE_ASSIGN:
62 +
    case NODE_CALL:
63 +
    case NODE_CALL_ARG:
64 +
    case NODE_BUILTIN:
65 +
    case NODE_ATTRIBUTE:
66 +
    case NODE_BREAK:
67 +
    case NODE_RECORD_TYPE:
68 +
    case NODE_RECORD_LIT:
69 +
    case NODE_ARRAY_LIT:
70 +
    case NODE_ARRAY_REPEAT_LIT:
71 +
    case NODE_ACCESS:
72 +
    case NODE_RECORD_LIT_FIELD:
73 +
    case NODE_EXPR_STMT:
74 +
    case NODE_ARRAY_INDEX:
75 +
    case NODE_RANGE:
76 +
    case NODE_MATCH_CASE:
77 +
    case NODE_MATCH:
78 +
    case NODE_SCOPE:
79 +
    case NODE_MOD_BODY:
80 +
    case NODE_AS:
81 +
    case NODE_PLACEHOLDER:
82 +
    case NODE_ALIGN:
83 +
    case NODE_THROW:
84 +
    case NODE_TRY:
85 +
    case NODE_CATCH:
86 +
    case NODE_PANIC:
87 +
    case NODE_SUPER:
88 +
        break;
89 +
    }
90 +
    bail("node of class %d cannot have a symbol table entry", n->cls);
91 +
}
92 +
93 +
/* Allocate a scope. */
94 +
scope_t *symtab_scope(scope_t *parent, module_t *mod) {
95 +
    if (NSCOPES >= MAX_SCOPES) {
96 +
        bail("scope overflow: too many scopes");
97 +
        return NULL;
98 +
    }
99 +
    scope_t *slot = &SCOPES[NSCOPES++];
100 +
101 +
    *slot = (scope_t){
102 +
        .mod      = mod,
103 +
        .parent   = parent,
104 +
        .symbols  = { 0 },
105 +
        .nsymbols = 0,
106 +
    };
107 +
    return slot;
108 +
}
109 +
110 +
/* Search for a symbol in the given scope only. */
111 +
symbol_t *symtab_scope_lookup(
112 +
    scope_t *s, const char *name, u16 length, symkind_t kind
113 +
) {
114 +
    for (usize i = 0; i < s->nsymbols; i++) {
115 +
        symbol_t *sym = s->symbols[i];
116 +
117 +
        if ((kind == SYM_ANY || sym->kind == kind) && sym->length == length &&
118 +
            memcmp(sym->name, name, length) == 0) {
119 +
            return sym;
120 +
        }
121 +
    }
122 +
    return NULL;
123 +
}
124 +
125 +
/* Search for a symbol from the current to the top-level scope. */
126 +
symbol_t *symtab_lookup(
127 +
    scope_t *s, const char *name, u16 length, symkind_t kind
128 +
) {
129 +
    for (scope_t *scope = s; scope != NULL; scope = scope->parent) {
130 +
        symbol_t *sym = NULL;
131 +
132 +
        if ((sym = symtab_scope_lookup(scope, name, length, kind)))
133 +
            return sym;
134 +
    }
135 +
    return NULL;
136 +
}
137 +
138 +
/* Add a symbol to the current scope. */
139 +
bool symtab_add_ident(scope_t *s, node_t *ident, node_t *n) {
140 +
    symkind_t   kind   = symbol_entry(n);
141 +
    const char *name   = ident->val.ident.name;
142 +
    u16         length = ident->val.ident.length;
143 +
144 +
    if (symtab_scope_lookup(s, name, length, kind))
145 +
        return false; /* Variable already defined in this scope. */
146 +
147 +
    symbol_t *sym = alloc_symbol((symbol_t){
148 +
        .name   = name,
149 +
        .length = length,
150 +
        .node   = n,
151 +
        .kind   = kind,
152 +
        .scope  = s,
153 +
    });
154 +
155 +
    /* Nb. static variables are never exposed. */
156 +
    if (kind == SYM_FUNCTION || kind == SYM_CONSTANT) {
157 +
        /* Copy qualified name for top-level symbols */
158 +
159 +
        module_path(sym->qualified, s->mod->qualified);
160 +
        module_qualify_str(sym->qualified, sym->name, sym->length);
161 +
    }
162 +
    assert(s->nsymbols < MAX_SCOPE_SYMBOLS);
163 +
    s->symbols[s->nsymbols++] = sym;
164 +
    n->sym                    = sym;
165 +
166 +
    return true;
167 +
}
168 +
169 +
/* Add a symbol to the current scope. */
170 +
bool symtab_insert(scope_t *s, const char *name, u16 length, node_t *n) {
171 +
    symkind_t kind = symbol_entry(n);
172 +
173 +
    if (symtab_lookup(s, name, length, kind)) {
174 +
        return false;
175 +
    }
176 +
    symbol_t *sym = alloc_symbol((symbol_t){
177 +
        .name   = name,
178 +
        .length = length,
179 +
        .node   = n,
180 +
        .kind   = kind,
181 +
    });
182 +
183 +
    assert(s->nsymbols < MAX_SCOPE_SYMBOLS);
184 +
    s->symbols[s->nsymbols++] = sym;
185 +
    n->sym                    = sym;
186 +
187 +
    return true;
188 +
}
189 +
190 +
/* Allocate a symbol. */
191 +
symbol_t *alloc_symbol(symbol_t sym) {
192 +
    if (NSYMBOLS >= MAX_SYMBOLS) {
193 +
        bail("symbol table overflow: too many symbols");
194 +
        return NULL;
195 +
    }
196 +
    symbol_t *slot = &SYMBOLS[NSYMBOLS++];
197 +
    *slot          = sym;
198 +
199 +
    strncpy(slot->qualified, slot->name, slot->length);
200 +
201 +
    return slot;
202 +
}
203 +
204 +
/* Add an imported symbol as an alias in the current scope. */
205 +
bool symtab_add_alias(scope_t *s, node_t *ident, symbol_t *original) {
206 +
    /* Check for conflicts */
207 +
    if (symtab_scope_lookup(
208 +
            s, ident->val.ident.name, ident->val.ident.length, original->kind
209 +
        )) {
210 +
        return false;
211 +
    }
212 +
    /* Create alias that points to the original symbol instead of copying it */
213 +
    assert(s->nsymbols < MAX_SCOPE_SYMBOLS);
214 +
    s->symbols[s->nsymbols++] = original;
215 +
    ident->sym                = original;
216 +
217 +
    return true;
218 +
}
219 +
220 +
/* Add a symbol directly to a scope. */
221 +
void symtab_add_symbol(scope_t *s, symbol_t *sym) {
222 +
    assert(s->nsymbols < MAX_SCOPE_SYMBOLS);
223 +
    s->symbols[s->nsymbols++] = sym;
224 +
}
symtab.h added +170 -0
1 +
#ifndef SYMTAB_H
2 +
#define SYMTAB_H
3 +
4 +
#include <stdio.h>
5 +
6 +
#include "limits.h"
7 +
#include "riscv.h"
8 +
#include "types.h"
9 +
10 +
struct symbol_t;
11 +
struct node_t;
12 +
struct scope_t;
13 +
struct type_t;
14 +
15 +
/* Type classes for values. */
16 +
typedef enum {
17 +
    TYPE_VOID   = 0, /* Special value for nodes without a type. */
18 +
    TYPE_I8     = 1, /* Primitive types. */
19 +
    TYPE_I16    = 2,
20 +
    TYPE_I32    = 3,
21 +
    TYPE_U8     = 4,
22 +
    TYPE_U16    = 5,
23 +
    TYPE_U32    = 6,
24 +
    TYPE_F32    = 7,
25 +
    TYPE_BOOL   = 8,
26 +
    TYPE_FN     = 9, /* Complex types. */
27 +
    TYPE_UNION  = 10,
28 +
    TYPE_RESULT = 11,
29 +
    TYPE_RECORD = 12,
30 +
    TYPE_ARRAY  = 13,
31 +
    TYPE_PTR    = 14,
32 +
    TYPE_SLICE  = 15,
33 +
    TYPE_OPT    = 16,
34 +
    TYPE_NEVER  = 17,
35 +
    TYPE_OPAQUE = 18
36 +
} typeclass_t;
37 +
38 +
typedef enum {
39 +
    SYM_ANY = 0, /* Match any symbol type */
40 +
    SYM_VARIABLE,
41 +
    SYM_CONSTANT, /* Constant value */
42 +
    SYM_FIELD,
43 +
    SYM_VARIANT,
44 +
    SYM_FUNCTION,
45 +
    SYM_TYPE,
46 +
    SYM_MODULE,
47 +
} symkind_t;
48 +
49 +
/* Symbol table scope. */
50 +
typedef struct scope_t {
51 +
    struct module_t *mod;
52 +
    struct scope_t  *parent;
53 +
    struct symbol_t *symbols[MAX_SCOPE_SYMBOLS];
54 +
    u16              nsymbols;
55 +
} scope_t;
56 +
57 +
/* Field, function or module attributes. */
58 +
typedef enum {
59 +
    ATTRIB_NONE      = 0,
60 +
    ATTRIB_PUB       = 1 << 0,
61 +
    ATTRIB_DEFAULT   = 1 << 1,
62 +
    ATTRIB_EXTERN    = 1 << 2,
63 +
    ATTRIB_TEST      = 1 << 3,
64 +
    ATTRIB_INTRINSIC = 1 << 4,
65 +
} attrib_t;
66 +
67 +
/* Stack frame. */
68 +
typedef struct frame_t {
69 +
    i32 size; /* Maximum temporary stack usage (includes frame header). */
70 +
    i32 sp;   /* Temporary allocation cursor. */
71 +
} frame_t;
72 +
73 +
/* Memory location for a value. */
74 +
typedef enum { LOC_NONE = 0, LOC_REG, LOC_STACK, LOC_IMM, LOC_ADDR } memloc_t;
75 +
76 +
/* Offset from register, on stack. */
77 +
typedef struct {
78 +
    reg_t base;
79 +
    int   offset;
80 +
} offset_t;
81 +
82 +
/* Address in memory with optional offset */
83 +
typedef struct {
84 +
    usize base;
85 +
    int   offset;
86 +
} addr_t;
87 +
88 +
/* Immediates. */
89 +
typedef union {
90 +
    bool b;
91 +
    i32  i;
92 +
    u32  u;
93 +
    f32  f;
94 +
} imm_t;
95 +
96 +
/* Value handled by code generator. */
97 +
typedef struct value_t {
98 +
    struct type_t *type;
99 +
    memloc_t       loc;
100 +
    bool           temp;
101 +
    union {
102 +
        reg_t    reg; /* Register. */
103 +
        offset_t off; /* Offset from a base register. */
104 +
        imm_t    imm; /* Stored as an immediate. */
105 +
        addr_t   adr; /* Stored at a static address */
106 +
    } as;
107 +
} value_t;
108 +
109 +
typedef struct symbol_t {
110 +
    const char     *name;                          /* Symbol name. */
111 +
    u16             length;                        /* Symbol name length. */
112 +
    char            qualified[MAX_QUALIFIED_NAME]; /* Fully qualified name */
113 +
    struct node_t  *node;  /* Node defining the symbol. */
114 +
    struct scope_t *scope; /* Scope where the symbol is defined */
115 +
    symkind_t       kind;  /* Kind of symbol. */
116 +
    union {
117 +
        struct {                  /* Variable entry. */
118 +
            struct type_t *typ;   /* Variable type. */
119 +
            struct value_t val;   /* Variable memory value. */
120 +
            i32            align; /* Alignment override (bytes). */
121 +
        } var;
122 +
123 +
        struct {
124 +
            struct type_t *typ;    /* Variable type. */
125 +
            i32            offset; /* Offset from start of object. */
126 +
        } field;
127 +
128 +
        struct {
129 +
            struct type_t *typ;
130 +
            i32            tag;
131 +
        } variant;
132 +
133 +
        struct {              /* Function entry. */
134 +
            scope_t *scope;   /* Inner scope. */
135 +
            usize    addr;    /* Address in memory. */
136 +
            frame_t  frame;   /* Stack frame information. */
137 +
            attrib_t attribs; /* Attributes */
138 +
            bool     used;    /* Whether function is called/used */
139 +
        } fn;
140 +
141 +
        struct { /* Type entry. */
142 +
            struct type_t *info;
143 +
        } typ;
144 +
145 +
        struct module_t *mod; /* Module entry. */
146 +
    } e;
147 +
} symbol_t;
148 +
149 +
/* Allocate a symbol. */
150 +
symbol_t *alloc_symbol(symbol_t);
151 +
/* Insert a symbol into a scope. */
152 +
bool symtab_insert(scope_t *s, const char *name, u16 length, struct node_t *n);
153 +
/* Insert an identifier into a scope. */
154 +
bool symtab_add_ident(scope_t *s, struct node_t *ident, struct node_t *n);
155 +
/* Add an imported symbol as an alias in the current scope. */
156 +
bool symtab_add_alias(scope_t *s, struct node_t *ident, symbol_t *original);
157 +
/* Add a symbol directly to a scope. */
158 +
void symtab_add_symbol(scope_t *s, symbol_t *sym);
159 +
/* Lookup a symbol in the given scope only. */
160 +
symbol_t *symtab_scope_lookup(
161 +
    scope_t *s, const char *name, u16 length, symkind_t kind
162 +
);
163 +
/* Lookup a symbol in current and parent scopes. */
164 +
symbol_t *symtab_lookup(
165 +
    scope_t *s, const char *name, u16 length, symkind_t kind
166 +
);
167 +
/* Create a new scope with the given parent. */
168 +
scope_t *symtab_scope(scope_t *parent, struct module_t *mod);
169 +
170 +
#endif
types.h added +49 -0
1 +
#ifndef TYPES_H
2 +
#define TYPES_H
3 +
4 +
typedef unsigned char      u8;
5 +
typedef unsigned short     u16;
6 +
typedef unsigned int       u32;
7 +
typedef unsigned long long u64;
8 +
typedef signed char        i8;
9 +
typedef short              i16;
10 +
typedef int                i32;
11 +
typedef long long          i64;
12 +
typedef float              f32;
13 +
typedef double             f64;
14 +
15 +
typedef unsigned long usize;
16 +
typedef long          isize;
17 +
18 +
typedef u8 bool;
19 +
20 +
#define true  1
21 +
#define false 0
22 +
23 +
#ifndef NULL
24 +
#define NULL ((void *)0)
25 +
#endif
26 +
27 +
#define U8_MIN  0
28 +
#define U8_MAX  255
29 +
#define U16_MIN 0
30 +
#define U16_MAX 65535
31 +
#define U32_MIN 0
32 +
#define U32_MAX 4294967295U
33 +
34 +
#define I8_MIN  (-128)
35 +
#define I8_MAX  127
36 +
#define I16_MIN (-32768)
37 +
#define I16_MAX 32767
38 +
#define I32_MIN -2147483648
39 +
#define I32_MAX 2147483647
40 +
41 +
/* Use appropriate syntax for no-discard function attribute, depending
42 +
 * on C standard used. */
43 +
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L
44 +
#define __nodiscard [[nodiscard]]
45 +
#else
46 +
#define __nodiscard __attribute__((warn_unused_result))
47 +
#endif
48 +
49 +
#endif
util.h added +59 -0
1 +
#ifndef UTIL_H
2 +
#define UTIL_H
3 +
4 +
#include <stdlib.h>
5 +
#include <string.h>
6 +
7 +
#include "types.h"
8 +
9 +
/**
10 +
 * Concatenate string src to the end of dst.
11 +
 */
12 +
static inline usize strlcat(char *dst, const char *src, usize dsize) {
13 +
    usize dst_len = strlen(dst);
14 +
    usize src_len = strlen(src);
15 +
16 +
    /* If destination buffer is already full or too small, can't append */
17 +
    if (dst_len >= dsize) {
18 +
        return dst_len + src_len; /* Return what length would be */
19 +
    }
20 +
21 +
    /* Calculate remaining space in destination */
22 +
    usize remaining = dsize - dst_len - 1; /* -1 for null terminator */
23 +
24 +
    if (remaining > 0) {
25 +
        /* Use strncat to append, but limit to remaining space */
26 +
        strncat(dst, src, remaining);
27 +
    }
28 +
    /* Return total length that would be created */
29 +
    return dst_len + src_len;
30 +
}
31 +
32 +
/* Copy a string safely */
33 +
static inline void strndup(char *dst, const char *src, size_t maxlen) {
34 +
    if (!dst || !src)
35 +
        return;
36 +
37 +
    size_t srclen  = strlen(src);
38 +
    size_t copylen = srclen < maxlen - 1 ? srclen : maxlen - 1;
39 +
40 +
    memcpy(dst, src, copylen);
41 +
    dst[copylen] = '\0';
42 +
}
43 +
44 +
/* Like `strstr` but find the _last_ occurrence. */
45 +
static inline char *strrstr(const char *haystack, const char *needle) {
46 +
    if (*needle == '\0') {
47 +
        return (char *)haystack + strlen(haystack);
48 +
    }
49 +
    char *result = NULL;
50 +
    char *p      = strstr(haystack, needle);
51 +
52 +
    while (p != NULL) {
53 +
        result = p;
54 +
        p      = strstr(p + 1, needle);
55 +
    }
56 +
    return result;
57 +
}
58 +
59 +
#endif