Initial commit
7834d3a9d44fb48ae3d3c06da992922f3e46b580b3d92df36372081b2fe475c3
Radiance bootstrapping compiler written in C99. Licensed under the MIT license.
.clang-format
added
+19 -0
| 1 | + | IndentWidth: 4 |
|
| 2 | + | ColumnLimit: 80 |
|
| 3 | + | UseTab: Never |
|
| 4 | + | AllowShortBlocksOnASingleLine: Always # or use 'Empty' for only empty blocks |
|
| 5 | + | AllowShortFunctionsOnASingleLine: Empty |
|
| 6 | + | AlignConsecutiveMacros: AcrossComments |
|
| 7 | + | AlignAfterOpenBracket: BlockIndent |
|
| 8 | + | AlignConsecutiveBitFields: AcrossEmptyLinesAndComments |
|
| 9 | + | AlignConsecutiveDeclarations: |
|
| 10 | + | Enabled: true |
|
| 11 | + | AlignConsecutiveAssignments: |
|
| 12 | + | Enabled: true |
|
| 13 | + | AlignCompound: true |
|
| 14 | + | PadOperators: true |
|
| 15 | + | BinPackParameters: false |
|
| 16 | + | BinPackArguments: false |
|
| 17 | + | BreakAfterReturnType: Automatic |
|
| 18 | + | PenaltyReturnTypeOnItsOwnLine: 999 |
|
| 19 | + | Cpp11BracedListStyle: false |
.gitignore
added
+4 -0
| 1 | + | /bin |
|
| 2 | + | /radiance |
|
| 3 | + | /bootstrap |
|
| 4 | + | *.o |
.gitsigners
added
+1 -0
| 1 | + | alexis ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICpDRmIwBm4ajzW+METm9tBdK4CG2/v0qmO4bPfi+s+c alexis@radiant.computer |
LICENSE
added
+19 -0
| 1 | + | Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer) |
|
| 2 | + | ||
| 3 | + | Permission is hereby granted, free of charge, to any person obtaining a copy of |
|
| 4 | + | this software and associated documentation files (the "Software"), to deal in |
|
| 5 | + | the Software without restriction, including without limitation the rights to |
|
| 6 | + | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
|
| 7 | + | of the Software, and to permit persons to whom the Software is furnished to do |
|
| 8 | + | so, subject to the following conditions: |
|
| 9 | + | ||
| 10 | + | The above copyright notice and this permission notice shall be included in all |
|
| 11 | + | copies or substantial portions of the Software. |
|
| 12 | + | ||
| 13 | + | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
| 14 | + | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
| 15 | + | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
| 16 | + | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
| 17 | + | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
| 18 | + | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
| 19 | + | SOFTWARE. |
Makefile
added
+39 -0
| 1 | + | # Builds the C bootstrap compiler. |
|
| 2 | + | ||
| 3 | + | CC := clang |
|
| 4 | + | CFLAGS := -fvisibility=hidden -std=c99 -Os \ |
|
| 5 | + | -Wall -Wextra -Wpedantic \ |
|
| 6 | + | -Wformat=2 -Wformat-security \ |
|
| 7 | + | -Wnull-dereference \ |
|
| 8 | + | -Wno-format-nonliteral \ |
|
| 9 | + | -Wcast-align \ |
|
| 10 | + | -Wunused -Wuninitialized \ |
|
| 11 | + | -Wmissing-field-initializers \ |
|
| 12 | + | -fno-common -fstack-protector-all \ |
|
| 13 | + | -mcmodel=medium |
|
| 14 | + | LDFLAGS := -fuse-ld=lld -Wl,-z,stack-size=33554432 |
|
| 15 | + | ||
| 16 | + | SRC := $(wildcard *.c) $(wildcard gen/*.c) |
|
| 17 | + | HDR := $(wildcard *.h) $(wildcard gen/*.h) |
|
| 18 | + | OBJ := $(SRC:.c=.o) |
|
| 19 | + | BIN := bin/radiance.s0 |
|
| 20 | + | ||
| 21 | + | default: $(BIN) |
|
| 22 | + | ||
| 23 | + | $(BIN): $(OBJ) |
|
| 24 | + | @echo "ld $^ => $@" |
|
| 25 | + | @mkdir -p bin |
|
| 26 | + | @$(CC) $(LDFLAGS) $(OBJ) -o $@ |
|
| 27 | + | @echo "ok $@" |
|
| 28 | + | ||
| 29 | + | %.o: %.c $(HDR) |
|
| 30 | + | @echo "cc $< => $@" |
|
| 31 | + | @$(CC) $(CFLAGS) -c $< -o $@ |
|
| 32 | + | ||
| 33 | + | clean: |
|
| 34 | + | @rm -f $(OBJ) $(BIN) |
|
| 35 | + | ||
| 36 | + | fmt: |
|
| 37 | + | git ls-files "*.c" "*.h" | xargs clang-format -i |
|
| 38 | + | ||
| 39 | + | .PHONY: default clean fmt |
README
added
+66 -0
| 1 | + | ||
| 2 | + | RADIANCE BOOTSTRAPPING COMPILER |
|
| 3 | + | ||
| 4 | + | Bootstrap compiler for the Radiance programming language, written in C99. |
|
| 5 | + | ||
| 6 | + | This is the Stage 0 compiler used to bootstrap Radiance from scratch. It |
|
| 7 | + | compiles Radiance source code to RISC-V machine code. Once built, it can be |
|
| 8 | + | used to compile the self-hosted Radiance compiler, which then recompiles |
|
| 9 | + | itself until a fixed point is reached. |
|
| 10 | + | ||
| 11 | + | BUILDING |
|
| 12 | + | ||
| 13 | + | Requirements: |
|
| 14 | + | ||
| 15 | + | * clang (or another C99 compiler) |
|
| 16 | + | * lld (the LLVM linker) |
|
| 17 | + | ||
| 18 | + | To build the compiler: |
|
| 19 | + | ||
| 20 | + | make |
|
| 21 | + | ||
| 22 | + | This produces `bin/radiance.s0`, the Stage 0 compiler binary. |
|
| 23 | + | You can specify a different C compiler with: |
|
| 24 | + | ||
| 25 | + | make CC=gcc |
|
| 26 | + | ||
| 27 | + | USAGE |
|
| 28 | + | ||
| 29 | + | bin/radiance.s0 [options] <input.rad> |
|
| 30 | + | ||
| 31 | + | Options: |
|
| 32 | + | ||
| 33 | + | -o <path> Output file path (required) |
|
| 34 | + | -mod <path> Register an additional module |
|
| 35 | + | ||
| 36 | + | COMPILER PIPELINE |
|
| 37 | + | ||
| 38 | + | The compiler is structured as a series of passes: |
|
| 39 | + | ||
| 40 | + | Scanner (scanner.c) Tokenizes source text |
|
| 41 | + | Parser (parser.c) Builds an AST from tokens |
|
| 42 | + | Desugar (desugar.c) Syntactic transformations on the AST |
|
| 43 | + | Resolver (resolver.c) Name resolution and type checking |
|
| 44 | + | Gen (gen.c) Code generation targeting RISC-V 64-bit |
|
| 45 | + | RISC-V (riscv.c) Instruction encoding |
|
| 46 | + | ||
| 47 | + | Supporting modules: |
|
| 48 | + | ||
| 49 | + | ast.c AST node definitions and utilities |
|
| 50 | + | module.c Module loading and management |
|
| 51 | + | symtab.c Symbol table |
|
| 52 | + | strings.c Interned string table |
|
| 53 | + | ralloc.c Region-based memory allocator |
|
| 54 | + | io.c File I/O helpers |
|
| 55 | + | options.c Command line option parsing |
|
| 56 | + | gen/emit.c Binary emission |
|
| 57 | + | gen/data.c Read-only and read-write data sections |
|
| 58 | + | ||
| 59 | + | FORMATTING |
|
| 60 | + | ||
| 61 | + | To format source files before committing, run `make fmt`. |
|
| 62 | + | ||
| 63 | + | LICENSE |
|
| 64 | + | ||
| 65 | + | Licensed under the MIT License, |
|
| 66 | + | Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer) |
ast.c
added
+159 -0
| 1 | + | #include <stdio.h> |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | #include <string.h> |
|
| 4 | + | ||
| 5 | + | #include "ast.h" |
|
| 6 | + | #include "module.h" |
|
| 7 | + | #include "parser.h" |
|
| 8 | + | #include "resolver.h" |
|
| 9 | + | #include "symtab.h" |
|
| 10 | + | ||
| 11 | + | /* String representations of value types. */ |
|
| 12 | + | const char *type_names[] = { |
|
| 13 | + | [TYPE_VOID] = "void", [TYPE_I8] = "i8", [TYPE_I16] = "i16", |
|
| 14 | + | [TYPE_I32] = "i32", [TYPE_U8] = "u8", [TYPE_U16] = "u16", |
|
| 15 | + | [TYPE_U32] = "u32", [TYPE_F32] = "f32", [TYPE_BOOL] = "bool", |
|
| 16 | + | [TYPE_FN] = "fn", [TYPE_UNION] = "union", [TYPE_RESULT] = "result", |
|
| 17 | + | [TYPE_RECORD] = "record", [TYPE_ARRAY] = "array", [TYPE_SLICE] = "slice", |
|
| 18 | + | [TYPE_PTR] = "pointer", [TYPE_OPT] = "optional", [TYPE_NEVER] = "never", |
|
| 19 | + | [TYPE_OPAQUE] = "opaque" |
|
| 20 | + | }; |
|
| 21 | + | ||
| 22 | + | /* String representations of node classes. */ |
|
| 23 | + | const char *node_names[] = { |
|
| 24 | + | [NODE_TYPE] = "TYPE", |
|
| 25 | + | [NODE_NUMBER] = "NUMBER", |
|
| 26 | + | [NODE_BOOL] = "BOOL", |
|
| 27 | + | [NODE_CHAR] = "CHAR", |
|
| 28 | + | [NODE_STRING] = "STRING", |
|
| 29 | + | [NODE_UNDEF] = "UNDEFINED", |
|
| 30 | + | [NODE_NIL] = "NIL", |
|
| 31 | + | [NODE_IDENT] = "IDENT", |
|
| 32 | + | [NODE_SUPER] = "SUPER", |
|
| 33 | + | [NODE_BINOP] = "BINOP", |
|
| 34 | + | [NODE_UNOP] = "UNOP", |
|
| 35 | + | [NODE_BLOCK] = "BLOCK", |
|
| 36 | + | [NODE_CALL] = "CALL", |
|
| 37 | + | [NODE_BUILTIN] = "BUILTIN", |
|
| 38 | + | [NODE_CALL_ARG] = "ARG", |
|
| 39 | + | [NODE_ASSIGN] = "ASSIGN", |
|
| 40 | + | [NODE_PTR] = "PTR", |
|
| 41 | + | [NODE_MOD] = "MOD", |
|
| 42 | + | [NODE_MOD_BODY] = "MODULE", |
|
| 43 | + | ||
| 44 | + | [NODE_PANIC] = "PANIC", |
|
| 45 | + | [NODE_RETURN] = "RETURN", |
|
| 46 | + | [NODE_THROW] = "THROW", |
|
| 47 | + | [NODE_WHILE] = "WHILE", |
|
| 48 | + | [NODE_WHILE_LET] = "WHILE_LET", |
|
| 49 | + | [NODE_FOR] = "FOR", |
|
| 50 | + | [NODE_LOOP] = "LOOP", |
|
| 51 | + | [NODE_TRY] = "TRY", |
|
| 52 | + | [NODE_IF] = "IF", |
|
| 53 | + | [NODE_IF_LET] = "IF_LET", |
|
| 54 | + | [NODE_IF_CASE] = "IF_CASE", |
|
| 55 | + | [NODE_GUARD_CASE] = "GUARD_CASE", |
|
| 56 | + | [NODE_GUARD_LET] = "GUARD_LET", |
|
| 57 | + | [NODE_MATCH] = "SWITCH", |
|
| 58 | + | [NODE_MATCH_CASE] = "SWITCH_CASE", |
|
| 59 | + | [NODE_CATCH] = "CATCH", |
|
| 60 | + | [NODE_FN] = "FUNCTION", |
|
| 61 | + | [NODE_VAR] = "VAR", |
|
| 62 | + | [NODE_CONST] = "CONST", |
|
| 63 | + | [NODE_STATIC] = "STATIC", |
|
| 64 | + | [NODE_REF] = "REF", |
|
| 65 | + | [NODE_PARAM] = "PARAM", |
|
| 66 | + | [NODE_ATTRIBUTE] = "ATTRIBUTE", |
|
| 67 | + | [NODE_BREAK] = "BREAK", |
|
| 68 | + | [NODE_EXPR_STMT] = "EXPR_STMT", |
|
| 69 | + | [NODE_RECORD] = "RECORD", |
|
| 70 | + | [NODE_RECORD_FIELD] = "RECORD_FIELD", |
|
| 71 | + | [NODE_RECORD_TYPE] = "RECORD_TYPE", |
|
| 72 | + | [NODE_UNION] = "UNION", |
|
| 73 | + | [NODE_UNION_VARIANT] = "UNION_VARIANT", |
|
| 74 | + | [NODE_RECORD_LIT] = "RECORD_LIT", |
|
| 75 | + | [NODE_RECORD_LIT_FIELD] = "RECORD_LIT_FIELD", |
|
| 76 | + | [NODE_ARRAY_LIT] = "ARRAY_LIT", |
|
| 77 | + | [NODE_ARRAY_INDEX] = "ARRAY_INDEX", |
|
| 78 | + | [NODE_ACCESS] = "ACCESS", |
|
| 79 | + | [NODE_SCOPE] = "SCOPE", |
|
| 80 | + | [NODE_AS] = "AS", |
|
| 81 | + | [NODE_RANGE] = "RANGE", |
|
| 82 | + | [NODE_USE] = "USE", |
|
| 83 | + | [NODE_PLACEHOLDER] = "PLACEHOLDER", |
|
| 84 | + | }; |
|
| 85 | + | ||
| 86 | + | /* Check if the node is a binary comparison operation. */ |
|
| 87 | + | bool node_is_comp(node_t *n) { |
|
| 88 | + | if (n->cls != NODE_BINOP) |
|
| 89 | + | return false; |
|
| 90 | + | ||
| 91 | + | binop_t op = n->val.binop.op; |
|
| 92 | + | ||
| 93 | + | return op == OP_EQ || op == OP_NE || op == OP_LT || op == OP_GT || |
|
| 94 | + | op == OP_LE || op == OP_GE || op == OP_AND || op == OP_OR; |
|
| 95 | + | } |
|
| 96 | + | ||
| 97 | + | /* Check if the node is a literal value. */ |
|
| 98 | + | bool node_is_literal(node_t *n) { |
|
| 99 | + | switch (n->cls) { |
|
| 100 | + | case NODE_ARRAY_LIT: |
|
| 101 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 102 | + | case NODE_RECORD_LIT: |
|
| 103 | + | case NODE_UNION_VARIANT: |
|
| 104 | + | case NODE_NUMBER: |
|
| 105 | + | case NODE_BOOL: |
|
| 106 | + | case NODE_STRING: |
|
| 107 | + | case NODE_CHAR: |
|
| 108 | + | case NODE_NIL: |
|
| 109 | + | case NODE_UNDEF: |
|
| 110 | + | return true; |
|
| 111 | + | default: |
|
| 112 | + | return false; |
|
| 113 | + | } |
|
| 114 | + | } |
|
| 115 | + | ||
| 116 | + | /* Check if the binary operator is a logical one. */ |
|
| 117 | + | bool op_is_logical(binop_t op) { |
|
| 118 | + | return op == OP_AND || op == OP_OR; |
|
| 119 | + | } |
|
| 120 | + | ||
| 121 | + | /* Access node pointers from a span. */ |
|
| 122 | + | node_t **nodespan_ptrs(parser_t *p, nodespan_t span) { |
|
| 123 | + | return &p->ptrs[span.idx]; |
|
| 124 | + | } |
|
| 125 | + | ||
| 126 | + | /* Allocate a new span from the parser's pointer pool. */ |
|
| 127 | + | nodespan_t nodespan_alloc(parser_t *p, u16 cap) { |
|
| 128 | + | nodespan_t span = { .idx = (u32)p->nptrs, .len = 0, .cap = cap }; |
|
| 129 | + | p->nptrs += cap; |
|
| 130 | + | return span; |
|
| 131 | + | } |
|
| 132 | + | ||
| 133 | + | /* Append a node to a span. Returns false on overflow. */ |
|
| 134 | + | bool nodespan_push(parser_t *p, nodespan_t *span, node_t *node) { |
|
| 135 | + | if (span->len >= span->cap) { |
|
| 136 | + | u16 newcap = span->cap == 0 ? 8 : span->cap * 2; |
|
| 137 | + | if (p->nptrs + newcap > MAX_NODEPTR_POOL) { |
|
| 138 | + | return false; |
|
| 139 | + | } |
|
| 140 | + | u32 newidx = (u32)p->nptrs; |
|
| 141 | + | for (u16 i = 0; i < span->len; i++) { |
|
| 142 | + | p->ptrs[newidx + i] = p->ptrs[span->idx + i]; |
|
| 143 | + | } |
|
| 144 | + | span->idx = newidx; |
|
| 145 | + | span->cap = newcap; |
|
| 146 | + | p->nptrs += newcap; |
|
| 147 | + | } |
|
| 148 | + | p->ptrs[span->idx + span->len++] = node; |
|
| 149 | + | return true; |
|
| 150 | + | } |
|
| 151 | + | ||
| 152 | + | /* Add a statement to a block node using module's parser. */ |
|
| 153 | + | void node_block_add_stmt(module_t *mod, node_t *block, node_t *stmt) { |
|
| 154 | + | nodespan_push(&mod->parser, &block->val.block.stmts, stmt); |
|
| 155 | + | } |
|
| 156 | + | ||
| 157 | + | void node_fn_add_param(parser_t *p, node_t *fn, node_t *param) { |
|
| 158 | + | nodespan_push(p, &fn->val.fn_decl.params, param); |
|
| 159 | + | } |
ast.h
added
+512 -0
| 1 | + | #ifndef AST_H |
|
| 2 | + | #define AST_H |
|
| 3 | + | ||
| 4 | + | #include <stdio.h> |
|
| 5 | + | ||
| 6 | + | #include "limits.h" |
|
| 7 | + | #include "symtab.h" |
|
| 8 | + | #include "types.h" |
|
| 9 | + | ||
| 10 | + | /* String representations of value types. */ |
|
| 11 | + | extern const char *type_names[]; |
|
| 12 | + | /* String representations of node classes. */ |
|
| 13 | + | extern const char *node_names[]; |
|
| 14 | + | ||
| 15 | + | /* Span into parser's pointer pool. Used instead of embedded arrays |
|
| 16 | + | * to keep node_t small. Access via parser_get_ptrs(). */ |
|
| 17 | + | typedef struct { |
|
| 18 | + | u32 idx; /* Starting index into parser_t.ptrs */ |
|
| 19 | + | u16 len; /* Number of elements */ |
|
| 20 | + | u16 cap; /* Capacity (for growable spans) */ |
|
| 21 | + | } nodespan_t; |
|
| 22 | + | ||
| 23 | + | /* Variable declaration */ |
|
| 24 | + | typedef struct { |
|
| 25 | + | struct node_t *ident; |
|
| 26 | + | struct node_t *type; |
|
| 27 | + | struct node_t *value; |
|
| 28 | + | struct node_t *align; |
|
| 29 | + | bool mutable; |
|
| 30 | + | } var_decl_t; |
|
| 31 | + | ||
| 32 | + | /* Constant declaration */ |
|
| 33 | + | typedef struct { |
|
| 34 | + | struct node_t *ident; |
|
| 35 | + | struct node_t *type; |
|
| 36 | + | struct node_t *value; |
|
| 37 | + | } const_decl_t; |
|
| 38 | + | ||
| 39 | + | typedef struct { |
|
| 40 | + | struct node_t *ident; |
|
| 41 | + | struct node_t *type; |
|
| 42 | + | struct node_t *value; |
|
| 43 | + | } static_decl_t; |
|
| 44 | + | ||
| 45 | + | /* Record type declaration. */ |
|
| 46 | + | typedef struct { |
|
| 47 | + | struct node_t *name; /* Name of the record */ |
|
| 48 | + | nodespan_t fields; /* Fields of the record */ |
|
| 49 | + | struct node_t *attribs; /* Attributes (e.g. pub) */ |
|
| 50 | + | bool tuple; /* Unlabeled fields */ |
|
| 51 | + | } record_decl_t; |
|
| 52 | + | ||
| 53 | + | typedef struct { |
|
| 54 | + | nodespan_t fields; /* Fields of the record */ |
|
| 55 | + | } record_type_t; |
|
| 56 | + | ||
| 57 | + | typedef struct { |
|
| 58 | + | struct node_t *name; /* Field name */ |
|
| 59 | + | struct node_t *value; /* Field value expression */ |
|
| 60 | + | } record_lit_field_t; |
|
| 61 | + | ||
| 62 | + | /* Union variant definition. */ |
|
| 63 | + | typedef struct { |
|
| 64 | + | struct node_t *name; /* Name of the variant */ |
|
| 65 | + | struct node_t *type; /* Optional payload type */ |
|
| 66 | + | i32 value; /* Value or tag for the variant */ |
|
| 67 | + | struct node_t *value_expr; /* Literal expression when explicitly assigned */ |
|
| 68 | + | } union_variant_t; |
|
| 69 | + | ||
| 70 | + | /* Union type declaration. */ |
|
| 71 | + | typedef struct { |
|
| 72 | + | struct node_t *name; /* Name of the union */ |
|
| 73 | + | nodespan_t variants; /* Variants of the union */ |
|
| 74 | + | struct node_t *attribs; /* Attributes (e.g. pub) */ |
|
| 75 | + | } union_decl_t; |
|
| 76 | + | ||
| 77 | + | /* Function definition. */ |
|
| 78 | + | typedef struct { |
|
| 79 | + | struct node_t *ident; |
|
| 80 | + | nodespan_t params; |
|
| 81 | + | struct node_t *return_type; |
|
| 82 | + | nodespan_t throws; |
|
| 83 | + | struct node_t *body; /* Will be NULL for `extern` functions */ |
|
| 84 | + | struct node_t *attribs; |
|
| 85 | + | } fn_decl_t; |
|
| 86 | + | ||
| 87 | + | /* Node type. */ |
|
| 88 | + | typedef enum { |
|
| 89 | + | /* Literals. */ |
|
| 90 | + | NODE_NUMBER, |
|
| 91 | + | NODE_CHAR, |
|
| 92 | + | NODE_STRING, |
|
| 93 | + | NODE_BOOL, |
|
| 94 | + | NODE_NIL, /* Nil literal */ |
|
| 95 | + | NODE_UNDEF, /* Undefined literal */ |
|
| 96 | + | NODE_RECORD_LIT, /* Record literal (e.g. Foo { x: 1, y: 2 }) */ |
|
| 97 | + | NODE_RECORD_LIT_FIELD, /* Record literal field */ |
|
| 98 | + | NODE_ARRAY_LIT, /* Array literal (e.g. [1, 2, 3]) */ |
|
| 99 | + | NODE_ARRAY_REPEAT_LIT, /* Array repeat literal (e.g. [0; 24]) */ |
|
| 100 | + | NODE_ARRAY_INDEX, /* Array indexing (e.g. arr[0]) */ |
|
| 101 | + | NODE_RANGE, /* Range expression (e.g. 0..5 or ..) */ |
|
| 102 | + | ||
| 103 | + | /* Expressions. */ |
|
| 104 | + | NODE_IDENT, |
|
| 105 | + | NODE_SUPER, /* `super` path segment */ |
|
| 106 | + | NODE_BINOP, |
|
| 107 | + | NODE_UNOP, /* Unary operation (e.g., not x) */ |
|
| 108 | + | NODE_BLOCK, |
|
| 109 | + | NODE_CALL, |
|
| 110 | + | NODE_BUILTIN, |
|
| 111 | + | NODE_CALL_ARG, /* Function call argument, optionally labeled (e.g. x: 1) */ |
|
| 112 | + | NODE_ACCESS, /* (eg. foo.bar) */ |
|
| 113 | + | NODE_SCOPE, /* (eg. foo::bar) */ |
|
| 114 | + | NODE_REF, /* (eg., &foo) */ |
|
| 115 | + | NODE_AS, /* (eg. x as i32) */ |
|
| 116 | + | ||
| 117 | + | /* Statements. */ |
|
| 118 | + | NODE_MOD, /* Module declaration: mod foo; */ |
|
| 119 | + | NODE_MOD_BODY, /* Module body content */ |
|
| 120 | + | NODE_RETURN, |
|
| 121 | + | NODE_THROW, |
|
| 122 | + | NODE_WHILE, |
|
| 123 | + | NODE_WHILE_LET, /* While let statement */ |
|
| 124 | + | NODE_LOOP, |
|
| 125 | + | NODE_TRY, |
|
| 126 | + | NODE_IF, |
|
| 127 | + | NODE_IF_LET, /* If let statement (eg. if let x in (opt) { ... }) */ |
|
| 128 | + | NODE_IF_CASE, /* If case statement (eg. if case Foo::Bar(x) = expr) */ |
|
| 129 | + | NODE_GUARD_CASE, /* let case else statement */ |
|
| 130 | + | NODE_GUARD_LET, /* let else statement */ |
|
| 131 | + | NODE_MATCH, /* Switch statement (eg. switch (x) { ... }) */ |
|
| 132 | + | NODE_MATCH_CASE, /* Switch case (eg. case 1 => { ... }) */ |
|
| 133 | + | NODE_CATCH, |
|
| 134 | + | NODE_FN, |
|
| 135 | + | NODE_VAR, /* Variable declaration */ |
|
| 136 | + | NODE_CONST, |
|
| 137 | + | NODE_STATIC, |
|
| 138 | + | NODE_PARAM, /* Function parameter */ |
|
| 139 | + | NODE_BREAK, |
|
| 140 | + | NODE_FOR, |
|
| 141 | + | NODE_ASSIGN, |
|
| 142 | + | NODE_EXPR_STMT, |
|
| 143 | + | NODE_USE, /* Module use declaration (e.g. use std.net) */ |
|
| 144 | + | NODE_PANIC, /* Panic statement */ |
|
| 145 | + | ||
| 146 | + | /* Type declarations. */ |
|
| 147 | + | NODE_TYPE, |
|
| 148 | + | NODE_PTR, /* Pointer type */ |
|
| 149 | + | NODE_ALIGN, |
|
| 150 | + | NODE_ATTRIBUTE, |
|
| 151 | + | NODE_RECORD, /* Record type declaration. */ |
|
| 152 | + | NODE_RECORD_FIELD, /* Field in record declaration */ |
|
| 153 | + | NODE_RECORD_TYPE, /* Anonymous record type */ |
|
| 154 | + | NODE_UNION, |
|
| 155 | + | NODE_UNION_VARIANT, |
|
| 156 | + | NODE_PLACEHOLDER, /* Placeholder `_` for unused bindings */ |
|
| 157 | + | } nodeclass_t; |
|
| 158 | + | ||
| 159 | + | /* Compiler built-ins */ |
|
| 160 | + | typedef enum { |
|
| 161 | + | BUILTIN_SIZE_OF, |
|
| 162 | + | BUILTIN_ALIGN_OF, |
|
| 163 | + | BUILTIN_SLICE_OF |
|
| 164 | + | } builtin_kind_t; |
|
| 165 | + | ||
| 166 | + | /* Binary operators. */ |
|
| 167 | + | typedef enum { |
|
| 168 | + | OP_ADD, |
|
| 169 | + | OP_SUB, |
|
| 170 | + | OP_MUL, |
|
| 171 | + | OP_DIV, |
|
| 172 | + | OP_MOD, |
|
| 173 | + | OP_EQ, |
|
| 174 | + | OP_NE, |
|
| 175 | + | OP_LT, |
|
| 176 | + | OP_GT, |
|
| 177 | + | OP_LE, |
|
| 178 | + | OP_GE, |
|
| 179 | + | OP_AND, |
|
| 180 | + | OP_OR, |
|
| 181 | + | OP_BAND, /* Bitwise AND (&) */ |
|
| 182 | + | OP_BOR, /* Bitwise OR (|) */ |
|
| 183 | + | OP_XOR, /* Bitwise XOR (^) */ |
|
| 184 | + | OP_SHL, /* Shift left (<<) */ |
|
| 185 | + | OP_SHR, /* Shift right (>>) */ |
|
| 186 | + | } binop_t; |
|
| 187 | + | ||
| 188 | + | /* Unary operators. */ |
|
| 189 | + | typedef enum { |
|
| 190 | + | OP_NOT, |
|
| 191 | + | OP_NEG, /* Numeric negation */ |
|
| 192 | + | OP_DEREF, |
|
| 193 | + | OP_BNOT, /* Bitwise NOT (~) */ |
|
| 194 | + | } unop_t; |
|
| 195 | + | ||
| 196 | + | /* A node in the abstract syntax tree. */ |
|
| 197 | + | typedef struct node_t { |
|
| 198 | + | /* Fields set by parser. */ |
|
| 199 | + | nodeclass_t cls; /* Node class. */ |
|
| 200 | + | u32 offset; /* Byte offset into source code for this node. */ |
|
| 201 | + | u32 length; /* Length of source code for this node. */ |
|
| 202 | + | const char *file; /* Source file this node was parsed from. */ |
|
| 203 | + | ||
| 204 | + | /* Fields set by resolver. */ |
|
| 205 | + | symbol_t *sym; /* Symbol table entry, if any. */ |
|
| 206 | + | struct type_t *type; /* Type context, if any. */ |
|
| 207 | + | ||
| 208 | + | /* Node value, set during parsing. */ |
|
| 209 | + | union { |
|
| 210 | + | /* Boolean literal. */ |
|
| 211 | + | bool bool_lit; |
|
| 212 | + | /* Character literal. */ |
|
| 213 | + | char char_lit; |
|
| 214 | + | ||
| 215 | + | struct { |
|
| 216 | + | const char *data; |
|
| 217 | + | u16 length; |
|
| 218 | + | } string_lit; |
|
| 219 | + | ||
| 220 | + | /* Expression statement. */ |
|
| 221 | + | struct node_t *expr_stmt; |
|
| 222 | + | ||
| 223 | + | /* Attribute node. */ |
|
| 224 | + | attrib_t attrib; |
|
| 225 | + | ||
| 226 | + | /* Type node. */ |
|
| 227 | + | struct { |
|
| 228 | + | /* The type represented by this node. If a complex type, |
|
| 229 | + | * additional information is found in the `info` union. */ |
|
| 230 | + | typeclass_t tclass; |
|
| 231 | + | struct node_t *elem_type; |
|
| 232 | + | ||
| 233 | + | union { |
|
| 234 | + | struct { |
|
| 235 | + | struct node_t *length; |
|
| 236 | + | } array; |
|
| 237 | + | ||
| 238 | + | struct { |
|
| 239 | + | bool mut; |
|
| 240 | + | } ptr; |
|
| 241 | + | ||
| 242 | + | struct { |
|
| 243 | + | bool mut; |
|
| 244 | + | } slice; |
|
| 245 | + | ||
| 246 | + | struct { |
|
| 247 | + | nodespan_t params; /* Parameter types */ |
|
| 248 | + | struct node_t *ret; /* Return type */ |
|
| 249 | + | nodespan_t throws; |
|
| 250 | + | } fn; |
|
| 251 | + | } info; |
|
| 252 | + | } type; |
|
| 253 | + | ||
| 254 | + | /* Reference node. */ |
|
| 255 | + | struct { |
|
| 256 | + | struct node_t *target; /* Target of the reference. */ |
|
| 257 | + | bool mut; /* If this is a mutable reference. */ |
|
| 258 | + | } ref; |
|
| 259 | + | ||
| 260 | + | /* Align expression */ |
|
| 261 | + | struct node_t *align; |
|
| 262 | + | ||
| 263 | + | /* Expression nodes. */ |
|
| 264 | + | struct { |
|
| 265 | + | const char *text; /* Original text of the number literal */ |
|
| 266 | + | u16 text_len; /* Length of the original text */ |
|
| 267 | + | imm_t value; /* Parsed value based on type context */ |
|
| 268 | + | } number; |
|
| 269 | + | ||
| 270 | + | struct { |
|
| 271 | + | const char *name; |
|
| 272 | + | u16 length; |
|
| 273 | + | } ident; |
|
| 274 | + | ||
| 275 | + | struct { |
|
| 276 | + | binop_t op; |
|
| 277 | + | struct node_t *left; |
|
| 278 | + | struct node_t *right; |
|
| 279 | + | } binop; |
|
| 280 | + | ||
| 281 | + | struct { |
|
| 282 | + | unop_t op; |
|
| 283 | + | struct node_t *expr; |
|
| 284 | + | } unop; |
|
| 285 | + | ||
| 286 | + | struct { |
|
| 287 | + | struct node_t *expr; /* Expression to cast */ |
|
| 288 | + | struct node_t *type; /* Target type */ |
|
| 289 | + | } as_expr; |
|
| 290 | + | ||
| 291 | + | struct { |
|
| 292 | + | nodespan_t stmts; |
|
| 293 | + | struct scope_t *scope; /* Scope for this block */ |
|
| 294 | + | } block; |
|
| 295 | + | ||
| 296 | + | struct { |
|
| 297 | + | struct node_t *callee; |
|
| 298 | + | nodespan_t args; |
|
| 299 | + | } call; |
|
| 300 | + | ||
| 301 | + | struct { |
|
| 302 | + | builtin_kind_t kind; |
|
| 303 | + | nodespan_t args; /* Arguments to builtin */ |
|
| 304 | + | } builtin; |
|
| 305 | + | ||
| 306 | + | struct { |
|
| 307 | + | struct node_t *label; /* Optional label, or `NULL`. */ |
|
| 308 | + | struct node_t *expr; /* Argument expression. */ |
|
| 309 | + | } call_arg; |
|
| 310 | + | ||
| 311 | + | struct { |
|
| 312 | + | struct node_t *lval; |
|
| 313 | + | struct node_t *rval; |
|
| 314 | + | } assign; |
|
| 315 | + | ||
| 316 | + | struct { |
|
| 317 | + | struct node_t *value; |
|
| 318 | + | } return_stmt; |
|
| 319 | + | ||
| 320 | + | struct { |
|
| 321 | + | struct node_t *expr; |
|
| 322 | + | } throw_stmt; |
|
| 323 | + | ||
| 324 | + | struct { |
|
| 325 | + | struct node_t *message; /* Optional message expression */ |
|
| 326 | + | } panic_stmt; |
|
| 327 | + | ||
| 328 | + | struct { |
|
| 329 | + | struct node_t *cond; |
|
| 330 | + | struct node_t *body; |
|
| 331 | + | struct node_t *rbranch; /* Optional else clause */ |
|
| 332 | + | } while_stmt; |
|
| 333 | + | ||
| 334 | + | struct { |
|
| 335 | + | struct node_t *var; /* Variable to bind */ |
|
| 336 | + | struct node_t *expr; /* Optional expression to unwrap */ |
|
| 337 | + | struct node_t *guard; /* Optional guard condition */ |
|
| 338 | + | struct node_t *body; /* Loop body */ |
|
| 339 | + | struct node_t *rbranch; /* Optional else clause */ |
|
| 340 | + | struct scope_t *scope; /* Holds the bound variable */ |
|
| 341 | + | } while_let_stmt; |
|
| 342 | + | ||
| 343 | + | struct { |
|
| 344 | + | struct node_t *var; |
|
| 345 | + | struct node_t *idx; /* Optional index variable */ |
|
| 346 | + | struct node_t *iter; |
|
| 347 | + | struct node_t *body; |
|
| 348 | + | struct node_t *rbranch; /* Optional else clause */ |
|
| 349 | + | struct scope_t *scope; /* Holds the temporary variable. */ |
|
| 350 | + | } for_stmt; |
|
| 351 | + | ||
| 352 | + | struct { |
|
| 353 | + | struct node_t *body; |
|
| 354 | + | } loop_stmt; |
|
| 355 | + | ||
| 356 | + | struct { |
|
| 357 | + | struct node_t *expr; /* Expression guarded by try */ |
|
| 358 | + | struct node_t *catch_expr; /* Fallback expression for catch */ |
|
| 359 | + | nodespan_t handlers; |
|
| 360 | + | bool panic; /* Emit ebreak on error when true */ |
|
| 361 | + | bool optional; /* Return optional instead of propagating */ |
|
| 362 | + | } try_expr; |
|
| 363 | + | ||
| 364 | + | struct { |
|
| 365 | + | struct node_t *cond; |
|
| 366 | + | struct node_t *lbranch; |
|
| 367 | + | struct node_t *rbranch; |
|
| 368 | + | } if_stmt; |
|
| 369 | + | ||
| 370 | + | struct { |
|
| 371 | + | struct node_t *var; /* Variable to bind */ |
|
| 372 | + | struct node_t *expr; /* Optional expression to unwrap */ |
|
| 373 | + | struct node_t *guard; /* Optional guard */ |
|
| 374 | + | struct node_t *lbranch; /* Then branch */ |
|
| 375 | + | struct node_t *rbranch; /* Else branch (optional) */ |
|
| 376 | + | struct scope_t *scope; /* Holds the bound variable */ |
|
| 377 | + | } if_let_stmt; |
|
| 378 | + | ||
| 379 | + | struct { |
|
| 380 | + | struct node_t *pattern; /* Pattern to match */ |
|
| 381 | + | struct node_t *expr; /* Expression being tested */ |
|
| 382 | + | struct node_t *guard; /* Optional guard */ |
|
| 383 | + | struct node_t *lbranch; /* Then branch */ |
|
| 384 | + | struct node_t *rbranch; /* Else branch (optional) */ |
|
| 385 | + | } if_case_stmt; |
|
| 386 | + | ||
| 387 | + | struct { |
|
| 388 | + | struct node_t *pattern; /* Pattern to match */ |
|
| 389 | + | struct node_t *expr; /* Expression being tested */ |
|
| 390 | + | struct node_t *guard; /* Optional guard */ |
|
| 391 | + | struct node_t *rbranch; /* Else branch */ |
|
| 392 | + | } guard_case_stmt; |
|
| 393 | + | ||
| 394 | + | struct { |
|
| 395 | + | struct node_t *var; /* Bound variable */ |
|
| 396 | + | struct node_t *expr; /* Optional expression to unwrap */ |
|
| 397 | + | struct node_t *rbranch; /* Else branch */ |
|
| 398 | + | } guard_let_stmt; |
|
| 399 | + | ||
| 400 | + | struct { |
|
| 401 | + | struct node_t *expr; |
|
| 402 | + | nodespan_t cases; /* Switch cases */ |
|
| 403 | + | } match_stmt; |
|
| 404 | + | ||
| 405 | + | struct { |
|
| 406 | + | nodespan_t patterns; |
|
| 407 | + | struct node_t *body; /* Case body */ |
|
| 408 | + | struct node_t *guard; /* Optional guard condition */ |
|
| 409 | + | struct node_t *variable; /* Bound variable */ |
|
| 410 | + | } match_case; |
|
| 411 | + | ||
| 412 | + | struct { |
|
| 413 | + | struct node_t *binding; /* Optional bound identifier */ |
|
| 414 | + | struct node_t *body; /* Catch handler body */ |
|
| 415 | + | struct scope_t *scope; /* Scope for the bound variable */ |
|
| 416 | + | } catch_clause; |
|
| 417 | + | ||
| 418 | + | var_decl_t var; |
|
| 419 | + | const_decl_t constant; |
|
| 420 | + | static_decl_t static_decl; |
|
| 421 | + | ||
| 422 | + | struct { |
|
| 423 | + | struct node_t *ident; |
|
| 424 | + | struct node_t *type; |
|
| 425 | + | } param; |
|
| 426 | + | ||
| 427 | + | union_decl_t union_decl; |
|
| 428 | + | union_variant_t union_variant; |
|
| 429 | + | ||
| 430 | + | struct { |
|
| 431 | + | struct node_t *type; /* Type identifier */ |
|
| 432 | + | nodespan_t fields; /* Field initializers */ |
|
| 433 | + | bool etc; /* Pattern discards remaining fields */ |
|
| 434 | + | } record_lit; |
|
| 435 | + | ||
| 436 | + | /* Field initialization in a record literal. */ |
|
| 437 | + | record_lit_field_t record_lit_field; |
|
| 438 | + | ||
| 439 | + | /* Record declarations. */ |
|
| 440 | + | record_decl_t record_decl; |
|
| 441 | + | /* Anonymous record */ |
|
| 442 | + | record_type_t record_type; |
|
| 443 | + | ||
| 444 | + | /* Array indexing and record field access, eg. `x.y` or `x[y]`. */ |
|
| 445 | + | struct { |
|
| 446 | + | struct node_t *lval; |
|
| 447 | + | struct node_t *rval; |
|
| 448 | + | } access; |
|
| 449 | + | ||
| 450 | + | /* Range expression, e.g. `0..5` or `..` */ |
|
| 451 | + | struct { |
|
| 452 | + | struct node_t *start; /* Start expression (NULL if omitted) */ |
|
| 453 | + | struct node_t *end; /* End expression (NULL if omitted) */ |
|
| 454 | + | } range; |
|
| 455 | + | ||
| 456 | + | /* Array literal, e.g. `[1, 2, 3]` */ |
|
| 457 | + | struct { |
|
| 458 | + | nodespan_t elems; |
|
| 459 | + | } array_lit; |
|
| 460 | + | ||
| 461 | + | /* Array repeat literal, e.g. `[0; 24]` */ |
|
| 462 | + | struct { |
|
| 463 | + | struct node_t *value; /* Value to repeat */ |
|
| 464 | + | struct node_t *count; /* Number of repetitions */ |
|
| 465 | + | } array_repeat_lit; |
|
| 466 | + | ||
| 467 | + | /* Use declaration, e.g. `use std::net::tcp;` */ |
|
| 468 | + | struct { |
|
| 469 | + | struct node_t *path; /* Path to the module being used */ |
|
| 470 | + | struct node_t *attribs; /* Attributes (e.g. pub) */ |
|
| 471 | + | bool wildcard; /* Whether this is a wildcard import */ |
|
| 472 | + | } use_decl; |
|
| 473 | + | ||
| 474 | + | /* Module declaration, e.g. `mod util;` */ |
|
| 475 | + | struct { |
|
| 476 | + | struct node_t *ident; /* Name of the module */ |
|
| 477 | + | struct node_t *attribs; /* Attributes (e.g. pub) */ |
|
| 478 | + | } mod_decl; |
|
| 479 | + | ||
| 480 | + | fn_decl_t fn_decl; |
|
| 481 | + | } val; |
|
| 482 | + | } node_t; |
|
| 483 | + | ||
| 484 | + | /* Forward declaration for parser_t */ |
|
| 485 | + | struct parser_t; |
|
| 486 | + | ||
| 487 | + | /* Check if the node is a binary comparison operation. */ |
|
| 488 | + | bool node_is_comp(node_t *n); |
|
| 489 | + | /* Check if the node is a literal */ |
|
| 490 | + | bool node_is_literal(node_t *n); |
|
| 491 | + | /* Check if the binary operator is a logical one. */ |
|
| 492 | + | bool op_is_logical(binop_t); |
|
| 493 | + | /* Add a parameter to a function. */ |
|
| 494 | + | void node_fn_add_param(struct parser_t *p, node_t *fn, node_t *param); |
|
| 495 | + | ||
| 496 | + | /* Access node pointers from a span. Requires parser context. */ |
|
| 497 | + | struct parser_t; |
|
| 498 | + | node_t **nodespan_ptrs(struct parser_t *p, nodespan_t span); |
|
| 499 | + | ||
| 500 | + | /* Allocate a new span from the parser's pointer pool. */ |
|
| 501 | + | nodespan_t nodespan_alloc(struct parser_t *p, u16 cap); |
|
| 502 | + | ||
| 503 | + | /* Append a node to a span, growing if needed. Returns false on overflow. */ |
|
| 504 | + | bool nodespan_push(struct parser_t *p, nodespan_t *span, node_t *node); |
|
| 505 | + | ||
| 506 | + | /* Forward declaration for module_t */ |
|
| 507 | + | struct module_t; |
|
| 508 | + | ||
| 509 | + | /* Add a statement to a block node using module's parser. */ |
|
| 510 | + | void node_block_add_stmt(struct module_t *mod, node_t *block, node_t *stmt); |
|
| 511 | + | ||
| 512 | + | #endif |
desugar.c
added
+744 -0
| 1 | + | #include <stdio.h> |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | #include <string.h> |
|
| 4 | + | ||
| 5 | + | #include "ast.h" |
|
| 6 | + | #include "desugar.h" |
|
| 7 | + | #include "io.h" |
|
| 8 | + | #include "module.h" |
|
| 9 | + | #include "parser.h" |
|
| 10 | + | #include "resolver.h" |
|
| 11 | + | #include "symtab.h" |
|
| 12 | + | ||
| 13 | + | /* |
|
| 14 | + | * AST desugaring pass |
|
| 15 | + | * |
|
| 16 | + | * This pass runs before resolving and transforms the AST. |
|
| 17 | + | */ |
|
| 18 | + | ||
| 19 | + | /* Forward declarations */ |
|
| 20 | + | static node_t *desugar_node(desugar_t *d, module_t *mod, node_t *n); |
|
| 21 | + | static node_t *desugar_and_operator(desugar_t *d, module_t *mod, node_t *binop); |
|
| 22 | + | static node_t *desugar_or_operator(desugar_t *d, module_t *mod, node_t *binop); |
|
| 23 | + | static node_t *desugar_guard_stmt( |
|
| 24 | + | desugar_t *d, module_t *mod, node_t *block, usize ix, node_t *guard |
|
| 25 | + | ); |
|
| 26 | + | static node_t *desugar_block(desugar_t *d, module_t *mod, node_t *block); |
|
| 27 | + | ||
| 28 | + | /* Allocate a new AST node using the module's parser */ |
|
| 29 | + | static node_t *node(module_t *mod, nodeclass_t cls, node_t *original) { |
|
| 30 | + | parser_t *p = &mod->parser; |
|
| 31 | + | if (p->nnodes >= MAX_NODES) { |
|
| 32 | + | bail("maximum number of AST nodes reached"); |
|
| 33 | + | } |
|
| 34 | + | node_t *n = &p->nodes[p->nnodes++]; |
|
| 35 | + | n->cls = cls; |
|
| 36 | + | n->type = NULL; |
|
| 37 | + | n->sym = NULL; |
|
| 38 | + | n->offset = original ? original->offset : 0; |
|
| 39 | + | n->length = original ? original->length : 0; |
|
| 40 | + | ||
| 41 | + | return n; |
|
| 42 | + | } |
|
| 43 | + | ||
| 44 | + | static node_t *node_bool(module_t *mod, bool b, node_t *loc) { |
|
| 45 | + | node_t *lit = node(mod, NODE_BOOL, loc); |
|
| 46 | + | lit->val.bool_lit = b; |
|
| 47 | + | ||
| 48 | + | return lit; |
|
| 49 | + | } |
|
| 50 | + | ||
| 51 | + | /* Create an empty block node. */ |
|
| 52 | + | static node_t *node_block(module_t *mod, node_t *original) { |
|
| 53 | + | node_t *block = node(mod, NODE_BLOCK, original); |
|
| 54 | + | block->val.block.stmts = (nodespan_t){ 0 }; |
|
| 55 | + | block->val.block.scope = NULL; |
|
| 56 | + | ||
| 57 | + | return block; |
|
| 58 | + | } |
|
| 59 | + | ||
| 60 | + | /* Transform a while-let loop into: |
|
| 61 | + | * |
|
| 62 | + | * loop { |
|
| 63 | + | * if let var = expr; guard { |
|
| 64 | + | * body; |
|
| 65 | + | * } else { |
|
| 66 | + | * rbranch; |
|
| 67 | + | * break; |
|
| 68 | + | * } |
|
| 69 | + | * } |
|
| 70 | + | */ |
|
| 71 | + | static node_t *desugar_while_let( |
|
| 72 | + | desugar_t *d, module_t *mod, node_t *while_let_node |
|
| 73 | + | ) { |
|
| 74 | + | /* Create the loop node */ |
|
| 75 | + | node_t *loop_node = node(mod, NODE_LOOP, while_let_node); |
|
| 76 | + | /* Create the if-let node */ |
|
| 77 | + | node_t *if_let_node = node(mod, NODE_IF_LET, while_let_node); |
|
| 78 | + | /* Create the break statement */ |
|
| 79 | + | node_t *break_node = node(mod, NODE_BREAK, while_let_node); |
|
| 80 | + | ||
| 81 | + | /* Handle the else clause */ |
|
| 82 | + | node_t *else_clause = node_block(mod, while_let_node); |
|
| 83 | + | if (while_let_node->val.while_let_stmt.rbranch) { |
|
| 84 | + | node_block_add_stmt( |
|
| 85 | + | mod, |
|
| 86 | + | else_clause, |
|
| 87 | + | desugar_node(d, mod, while_let_node->val.while_let_stmt.rbranch) |
|
| 88 | + | ); |
|
| 89 | + | } |
|
| 90 | + | node_block_add_stmt(mod, else_clause, break_node); |
|
| 91 | + | ||
| 92 | + | node_t *loop_body = node_block(mod, while_let_node); |
|
| 93 | + | node_block_add_stmt(mod, loop_body, if_let_node); |
|
| 94 | + | ||
| 95 | + | /* Set up the if-let statement */ |
|
| 96 | + | if_let_node->val.if_let_stmt.var = while_let_node->val.while_let_stmt.var; |
|
| 97 | + | if_let_node->val.if_let_stmt.expr = |
|
| 98 | + | desugar_node(d, mod, while_let_node->val.while_let_stmt.expr); |
|
| 99 | + | if_let_node->val.if_let_stmt.guard = |
|
| 100 | + | while_let_node->val.while_let_stmt.guard |
|
| 101 | + | ? desugar_node(d, mod, while_let_node->val.while_let_stmt.guard) |
|
| 102 | + | : NULL; |
|
| 103 | + | if_let_node->val.if_let_stmt.lbranch = |
|
| 104 | + | desugar_node(d, mod, while_let_node->val.while_let_stmt.body); |
|
| 105 | + | if_let_node->val.if_let_stmt.rbranch = else_clause; |
|
| 106 | + | if_let_node->val.if_let_stmt.scope = |
|
| 107 | + | NULL; /* Will be set during resolving */ |
|
| 108 | + | ||
| 109 | + | /* Set up the loop statement */ |
|
| 110 | + | loop_node->val.loop_stmt.body = loop_body; |
|
| 111 | + | ||
| 112 | + | return loop_node; |
|
| 113 | + | } |
|
| 114 | + | ||
| 115 | + | /* Transform a while loop into: |
|
| 116 | + | * |
|
| 117 | + | * loop { |
|
| 118 | + | * if (condition) { |
|
| 119 | + | * body; |
|
| 120 | + | * } else { |
|
| 121 | + | * else_clause; |
|
| 122 | + | * break |
|
| 123 | + | * } |
|
| 124 | + | * } |
|
| 125 | + | */ |
|
| 126 | + | static node_t *desugar_while(desugar_t *d, module_t *mod, node_t *while_node) { |
|
| 127 | + | /* Create the loop node */ |
|
| 128 | + | node_t *loop_node = node(mod, NODE_LOOP, while_node); |
|
| 129 | + | /* Create the condition check */ |
|
| 130 | + | node_t *if_node = node(mod, NODE_IF, while_node); |
|
| 131 | + | /* Create the break statement */ |
|
| 132 | + | node_t *break_node = node(mod, NODE_BREAK, while_node); |
|
| 133 | + | ||
| 134 | + | /* Handle the else clause */ |
|
| 135 | + | node_t *else_clause = node_block(mod, while_node); |
|
| 136 | + | if (while_node->val.while_stmt.rbranch) { |
|
| 137 | + | node_block_add_stmt( |
|
| 138 | + | mod, |
|
| 139 | + | else_clause, |
|
| 140 | + | desugar_node(d, mod, while_node->val.while_stmt.rbranch) |
|
| 141 | + | ); |
|
| 142 | + | } |
|
| 143 | + | node_block_add_stmt(mod, else_clause, break_node); |
|
| 144 | + | ||
| 145 | + | node_t *loop_body = node_block(mod, while_node->val.while_stmt.body); |
|
| 146 | + | node_block_add_stmt(mod, loop_body, if_node); |
|
| 147 | + | ||
| 148 | + | /* Set up the if statement. */ |
|
| 149 | + | if_node->val.if_stmt.cond = |
|
| 150 | + | desugar_node(d, mod, while_node->val.while_stmt.cond); |
|
| 151 | + | if_node->val.if_stmt.lbranch = |
|
| 152 | + | desugar_node(d, mod, while_node->val.while_stmt.body); |
|
| 153 | + | if_node->val.if_stmt.rbranch = else_clause; |
|
| 154 | + | ||
| 155 | + | /* Set up the loop statement */ |
|
| 156 | + | loop_node->val.loop_stmt.body = loop_body; |
|
| 157 | + | ||
| 158 | + | return loop_node; |
|
| 159 | + | } |
|
| 160 | + | ||
| 161 | + | static node_t *node_ident(module_t *mod, const char *name, node_t *loc) { |
|
| 162 | + | node_t *ident = node(mod, NODE_IDENT, loc); |
|
| 163 | + | ident->val.ident.name = name; |
|
| 164 | + | ident->val.ident.length = strlen(name); |
|
| 165 | + | ||
| 166 | + | return ident; |
|
| 167 | + | } |
|
| 168 | + | ||
| 169 | + | static node_t *node_var( |
|
| 170 | + | module_t *mod, |
|
| 171 | + | node_t *ident, |
|
| 172 | + | node_t *typ, |
|
| 173 | + | node_t *val, |
|
| 174 | + | bool mut, |
|
| 175 | + | node_t *loc |
|
| 176 | + | ) { |
|
| 177 | + | node_t *var = node(mod, NODE_VAR, loc); |
|
| 178 | + | ||
| 179 | + | var->val.var.ident = ident; |
|
| 180 | + | var->val.var.type = typ; |
|
| 181 | + | var->val.var.value = val; |
|
| 182 | + | var->val.var.mutable = mut; |
|
| 183 | + | ||
| 184 | + | return var; |
|
| 185 | + | } |
|
| 186 | + | ||
| 187 | + | static node_t *node_number(module_t *mod, const char *text, node_t *loc) { |
|
| 188 | + | node_t *n = node(mod, NODE_NUMBER, loc); |
|
| 189 | + | n->val.number.text = text; |
|
| 190 | + | n->val.number.text_len = strlen(text); |
|
| 191 | + | ||
| 192 | + | return n; |
|
| 193 | + | } |
|
| 194 | + | ||
| 195 | + | static node_t *node_type(module_t *mod, typeclass_t tc, node_t *loc) { |
|
| 196 | + | node_t *typ = node(mod, NODE_TYPE, loc); |
|
| 197 | + | typ->val.type.tclass = tc; |
|
| 198 | + | ||
| 199 | + | return typ; |
|
| 200 | + | } |
|
| 201 | + | ||
| 202 | + | static node_t *node_access( |
|
| 203 | + | module_t *mod, node_t *lval, node_t *rval, node_t *loc |
|
| 204 | + | ) { |
|
| 205 | + | node_t *access = node(mod, NODE_ACCESS, loc); |
|
| 206 | + | access->val.access.lval = lval; |
|
| 207 | + | access->val.access.rval = rval; |
|
| 208 | + | ||
| 209 | + | return access; |
|
| 210 | + | } |
|
| 211 | + | ||
| 212 | + | static node_t *node_access_str( |
|
| 213 | + | module_t *mod, node_t *lval, const char *field, node_t *loc |
|
| 214 | + | ) { |
|
| 215 | + | return node_access(mod, lval, node_ident(mod, field, loc), loc); |
|
| 216 | + | } |
|
| 217 | + | ||
| 218 | + | static node_t *node_binop( |
|
| 219 | + | module_t *mod, binop_t op, node_t *left, node_t *right, node_t *loc |
|
| 220 | + | ) { |
|
| 221 | + | node_t *binop = node(mod, NODE_BINOP, loc); |
|
| 222 | + | binop->val.binop.op = op; |
|
| 223 | + | binop->val.binop.left = left; |
|
| 224 | + | binop->val.binop.right = right; |
|
| 225 | + | ||
| 226 | + | return binop; |
|
| 227 | + | } |
|
| 228 | + | ||
| 229 | + | static node_t *node_increment( |
|
| 230 | + | module_t *mod, node_t *lval_ident, node_t *expr_ident, node_t *loc |
|
| 231 | + | ) { |
|
| 232 | + | node_t *assign = node(mod, NODE_ASSIGN, loc); |
|
| 233 | + | assign->val.assign.lval = lval_ident; |
|
| 234 | + | assign->val.assign.rval = |
|
| 235 | + | node_binop(mod, OP_ADD, expr_ident, node_number(mod, "1", loc), loc); |
|
| 236 | + | ||
| 237 | + | return assign; |
|
| 238 | + | } |
|
| 239 | + | ||
| 240 | + | static node_t *node_match(module_t *mod, node_t *expr, node_t *loc) { |
|
| 241 | + | node_t *swtch = node(mod, NODE_MATCH, loc); |
|
| 242 | + | swtch->val.match_stmt.expr = expr; |
|
| 243 | + | swtch->val.match_stmt.cases = (nodespan_t){ 0 }; |
|
| 244 | + | ||
| 245 | + | return swtch; |
|
| 246 | + | } |
|
| 247 | + | ||
| 248 | + | static node_t *node_match_case( |
|
| 249 | + | module_t *mod, node_t *pattern, node_t *guard, node_t *body, node_t *loc |
|
| 250 | + | ) { |
|
| 251 | + | node_t *swtch_case = node(mod, NODE_MATCH_CASE, loc); |
|
| 252 | + | swtch_case->val.match_case.patterns = (nodespan_t){ 0 }; |
|
| 253 | + | if (pattern != NULL) { |
|
| 254 | + | nodespan_push( |
|
| 255 | + | &mod->parser, &swtch_case->val.match_case.patterns, pattern |
|
| 256 | + | ); |
|
| 257 | + | } |
|
| 258 | + | swtch_case->val.match_case.body = body; |
|
| 259 | + | swtch_case->val.match_case.guard = guard; |
|
| 260 | + | swtch_case->val.match_case.variable = NULL; |
|
| 261 | + | ||
| 262 | + | return swtch_case; |
|
| 263 | + | } |
|
| 264 | + | ||
| 265 | + | /* |
|
| 266 | + | * Transform guard statements into their desugared control flow. |
|
| 267 | + | * |
|
| 268 | + | * For example, `let value = opt else { handle(); }; rest;` becomes: |
|
| 269 | + | * |
|
| 270 | + | * if let value = opt { |
|
| 271 | + | * rest; |
|
| 272 | + | * } else { |
|
| 273 | + | * handle(); |
|
| 274 | + | * } |
|
| 275 | + | * |
|
| 276 | + | * Likewise, `let case Pattern(x) = expr else { ... };` becomes an |
|
| 277 | + | * equivalent `if case` construct with the suffix statements placed in |
|
| 278 | + | * the success branch so they retain access to bound names. |
|
| 279 | + | */ |
|
| 280 | + | static node_t *desugar_guard_stmt( |
|
| 281 | + | desugar_t *d, module_t *mod, node_t *block, usize index, node_t *guard |
|
| 282 | + | ) { |
|
| 283 | + | node_t *success = node_block(mod, guard); |
|
| 284 | + | node_t *if_stmt = NULL; |
|
| 285 | + | ||
| 286 | + | /* Add the rest of the surrounding block into the success branch. */ |
|
| 287 | + | node_t **stmts = nodespan_ptrs(&mod->parser, block->val.block.stmts); |
|
| 288 | + | for (usize j = index + 1; j < block->val.block.stmts.len; j++) { |
|
| 289 | + | node_block_add_stmt(mod, success, stmts[j]); |
|
| 290 | + | } |
|
| 291 | + | if (guard->cls == NODE_GUARD_LET) { |
|
| 292 | + | if_stmt = node(mod, NODE_IF_LET, guard); |
|
| 293 | + | if_stmt->val.if_let_stmt.var = guard->val.guard_let_stmt.var; |
|
| 294 | + | if_stmt->val.if_let_stmt.expr = guard->val.guard_let_stmt.expr; |
|
| 295 | + | if_stmt->val.if_let_stmt.guard = NULL; |
|
| 296 | + | if_stmt->val.if_let_stmt.lbranch = success; |
|
| 297 | + | if_stmt->val.if_let_stmt.rbranch = guard->val.guard_let_stmt.rbranch; |
|
| 298 | + | if_stmt->val.if_let_stmt.scope = NULL; |
|
| 299 | + | } else { |
|
| 300 | + | if_stmt = node(mod, NODE_IF_CASE, guard); |
|
| 301 | + | if_stmt->val.if_case_stmt.pattern = guard->val.guard_case_stmt.pattern; |
|
| 302 | + | if_stmt->val.if_case_stmt.expr = guard->val.guard_case_stmt.expr; |
|
| 303 | + | if_stmt->val.if_case_stmt.guard = guard->val.guard_case_stmt.guard; |
|
| 304 | + | if_stmt->val.if_case_stmt.lbranch = success; |
|
| 305 | + | if_stmt->val.if_case_stmt.rbranch = guard->val.guard_case_stmt.rbranch; |
|
| 306 | + | } |
|
| 307 | + | block->val.block.stmts.len = index + 1; |
|
| 308 | + | ||
| 309 | + | return desugar_node(d, mod, if_stmt); |
|
| 310 | + | } |
|
| 311 | + | ||
| 312 | + | static node_t *desugar_block(desugar_t *d, module_t *mod, node_t *block) { |
|
| 313 | + | node_t **stmts = nodespan_ptrs(&mod->parser, block->val.block.stmts); |
|
| 314 | + | for (usize i = 0; i < block->val.block.stmts.len; i++) { |
|
| 315 | + | node_t *stmt = stmts[i]; |
|
| 316 | + | /* Guard statements fold the rest of the block under the success |
|
| 317 | + | * branch of the generated `if` statement, therefore we continue |
|
| 318 | + | * processing the block inside the guard statement desugar. */ |
|
| 319 | + | if (stmt->cls == NODE_GUARD_LET || stmt->cls == NODE_GUARD_CASE) { |
|
| 320 | + | stmts[i] = desugar_guard_stmt(d, mod, block, i, stmt); |
|
| 321 | + | return block; |
|
| 322 | + | } |
|
| 323 | + | stmts[i] = desugar_node(d, mod, stmt); |
|
| 324 | + | } |
|
| 325 | + | return block; |
|
| 326 | + | } |
|
| 327 | + | ||
| 328 | + | static void node_match_add_case( |
|
| 329 | + | module_t *mod, node_t *swtch, node_t *swtch_case |
|
| 330 | + | ) { |
|
| 331 | + | nodespan_push(&mod->parser, &swtch->val.match_stmt.cases, swtch_case); |
|
| 332 | + | } |
|
| 333 | + | ||
| 334 | + | static node_t *desugar_for_range( |
|
| 335 | + | desugar_t *d, module_t *mod, node_t *for_node |
|
| 336 | + | ) { |
|
| 337 | + | node_t *range = for_node->val.for_stmt.iter; |
|
| 338 | + | ||
| 339 | + | node_t *index_name = for_node->val.for_stmt.idx |
|
| 340 | + | ? for_node->val.for_stmt.idx |
|
| 341 | + | : node_ident(mod, "$i", for_node); |
|
| 342 | + | node_t *end_name = node_ident(mod, "$end", for_node); |
|
| 343 | + | node_t *start_expr = range->val.range.start |
|
| 344 | + | ? desugar_node(d, mod, range->val.range.start) |
|
| 345 | + | : node_number(mod, "0", range); |
|
| 346 | + | node_t *index_typ = node_type(mod, TYPE_U32, for_node); |
|
| 347 | + | node_t *index_var = |
|
| 348 | + | node_var(mod, index_name, index_typ, start_expr, true, for_node); |
|
| 349 | + | ||
| 350 | + | node_t *end_expr = desugar_node(d, mod, range->val.range.end); |
|
| 351 | + | node_t *end_typ = node_type(mod, TYPE_U32, for_node); |
|
| 352 | + | node_t *end_var = node_var(mod, end_name, end_typ, end_expr, false, range); |
|
| 353 | + | ||
| 354 | + | node_t *cond = node_binop( |
|
| 355 | + | mod, OP_LT, index_var->val.var.ident, end_var->val.var.ident, for_node |
|
| 356 | + | ); |
|
| 357 | + | node_t *loop_body = node_block(mod, for_node); |
|
| 358 | + | node_t *loop_var = node_var( |
|
| 359 | + | mod, |
|
| 360 | + | for_node->val.for_stmt.var, |
|
| 361 | + | NULL, |
|
| 362 | + | index_var->val.var.ident, |
|
| 363 | + | false, |
|
| 364 | + | for_node->val.for_stmt.var |
|
| 365 | + | ); |
|
| 366 | + | node_block_add_stmt(mod, loop_body, loop_var); |
|
| 367 | + | node_block_add_stmt( |
|
| 368 | + | mod, loop_body, desugar_node(d, mod, for_node->val.for_stmt.body) |
|
| 369 | + | ); |
|
| 370 | + | ||
| 371 | + | node_t *increment = node_increment( |
|
| 372 | + | mod, index_var->val.var.ident, index_var->val.var.ident, for_node |
|
| 373 | + | ); |
|
| 374 | + | node_block_add_stmt(mod, loop_body, increment); |
|
| 375 | + | ||
| 376 | + | node_t *while_node = node(mod, NODE_WHILE, for_node); |
|
| 377 | + | while_node->val.while_stmt.cond = cond; |
|
| 378 | + | while_node->val.while_stmt.body = loop_body; |
|
| 379 | + | while_node->val.while_stmt.rbranch = |
|
| 380 | + | for_node->val.for_stmt.rbranch |
|
| 381 | + | ? desugar_node(d, mod, for_node->val.for_stmt.rbranch) |
|
| 382 | + | : NULL; |
|
| 383 | + | ||
| 384 | + | node_t *wrapper = node_block(mod, for_node); |
|
| 385 | + | node_block_add_stmt(mod, wrapper, index_var); |
|
| 386 | + | node_block_add_stmt(mod, wrapper, end_var); |
|
| 387 | + | node_block_add_stmt(mod, wrapper, desugar_while(d, mod, while_node)); |
|
| 388 | + | ||
| 389 | + | return wrapper; |
|
| 390 | + | } |
|
| 391 | + | ||
| 392 | + | /* Transform a for loop into a while loop: |
|
| 393 | + | * |
|
| 394 | + | * for var in (iter) { |
|
| 395 | + | * body; |
|
| 396 | + | * } else { |
|
| 397 | + | * rbranch; |
|
| 398 | + | * } |
|
| 399 | + | * |
|
| 400 | + | * becomes: |
|
| 401 | + | * |
|
| 402 | + | * { |
|
| 403 | + | * let $i: u32 = 0; |
|
| 404 | + | * let $len: u32 = iter.len; |
|
| 405 | + | * while ($i < $len) { |
|
| 406 | + | * let var = iter[$i]; |
|
| 407 | + | * body; |
|
| 408 | + | * $i = $i + 1; |
|
| 409 | + | * } else { |
|
| 410 | + | * rbranch; |
|
| 411 | + | * } |
|
| 412 | + | * } |
|
| 413 | + | */ |
|
| 414 | + | static node_t *desugar_for(desugar_t *d, module_t *mod, node_t *for_node) { |
|
| 415 | + | if (for_node->val.for_stmt.iter->cls == NODE_RANGE) { |
|
| 416 | + | return desugar_for_range(d, mod, for_node); |
|
| 417 | + | } |
|
| 418 | + | /* Use simple temporary variable names or user-provided index variable */ |
|
| 419 | + | node_t *index_name = for_node->val.for_stmt.idx |
|
| 420 | + | ? for_node->val.for_stmt.idx |
|
| 421 | + | : node_ident(mod, "$i", for_node); |
|
| 422 | + | node_t *length_name = node_ident(mod, "$len", for_node); |
|
| 423 | + | ||
| 424 | + | /* Create index variable: let $i: u32 = 0; */ |
|
| 425 | + | node_t *index_val = node_number(mod, "0", for_node); |
|
| 426 | + | node_t *index_typ = node_type(mod, TYPE_U32, for_node); |
|
| 427 | + | node_t *index_var = |
|
| 428 | + | node_var(mod, index_name, index_typ, index_val, true, for_node); |
|
| 429 | + | ||
| 430 | + | /* Create length variable: let $len: u32 = iter.len; */ |
|
| 431 | + | node_t *len_field = node_access_str( |
|
| 432 | + | mod, |
|
| 433 | + | desugar_node(d, mod, for_node->val.for_stmt.iter), |
|
| 434 | + | "len", |
|
| 435 | + | for_node->val.for_stmt.iter |
|
| 436 | + | ); |
|
| 437 | + | node_t *length_typ = node_type(mod, TYPE_U32, for_node); |
|
| 438 | + | node_t *length_var = |
|
| 439 | + | node_var(mod, length_name, length_typ, len_field, false, for_node); |
|
| 440 | + | ||
| 441 | + | /* Create while condition: $i < $len */ |
|
| 442 | + | node_t *cond = node_binop( |
|
| 443 | + | mod, |
|
| 444 | + | OP_LT, |
|
| 445 | + | index_var->val.var.ident, |
|
| 446 | + | length_var->val.var.ident, |
|
| 447 | + | for_node |
|
| 448 | + | ); |
|
| 449 | + | ||
| 450 | + | /* Create array index access: iter[$i] */ |
|
| 451 | + | node_t *array_idx = node(mod, NODE_ARRAY_INDEX, for_node); |
|
| 452 | + | array_idx->val.access.lval = |
|
| 453 | + | desugar_node(d, mod, for_node->val.for_stmt.iter); |
|
| 454 | + | array_idx->val.access.rval = index_var->val.var.ident; |
|
| 455 | + | ||
| 456 | + | /* Create loop variable assignment: let var = iter[$i]; */ |
|
| 457 | + | node_t *var_name = for_node->val.for_stmt.var; |
|
| 458 | + | node_t *loop_var = node_var( |
|
| 459 | + | mod, var_name, NULL, array_idx, false, for_node->val.for_stmt.var |
|
| 460 | + | ); |
|
| 461 | + | ||
| 462 | + | /* Create increment statement: $i = $i + 1; */ |
|
| 463 | + | node_t *increment = node_increment( |
|
| 464 | + | mod, index_var->val.var.ident, index_var->val.var.ident, for_node |
|
| 465 | + | ); |
|
| 466 | + | ||
| 467 | + | /* Create while body */ |
|
| 468 | + | node_t *body = node_block(mod, for_node); |
|
| 469 | + | node_block_add_stmt(mod, body, loop_var); |
|
| 470 | + | node_block_add_stmt( |
|
| 471 | + | mod, body, desugar_node(d, mod, for_node->val.for_stmt.body) |
|
| 472 | + | ); |
|
| 473 | + | node_block_add_stmt(mod, body, increment); |
|
| 474 | + | ||
| 475 | + | /* Create while node */ |
|
| 476 | + | node_t *while_node = node(mod, NODE_WHILE, for_node); |
|
| 477 | + | while_node->val.while_stmt.cond = cond; |
|
| 478 | + | while_node->val.while_stmt.body = body; |
|
| 479 | + | while_node->val.while_stmt.rbranch = |
|
| 480 | + | for_node->val.for_stmt.rbranch |
|
| 481 | + | ? desugar_node(d, mod, for_node->val.for_stmt.rbranch) |
|
| 482 | + | : NULL; |
|
| 483 | + | ||
| 484 | + | /* Create wrapper block containing the initialization and while loop */ |
|
| 485 | + | node_t *wrapper = node_block(mod, for_node); |
|
| 486 | + | node_block_add_stmt(mod, wrapper, index_var); |
|
| 487 | + | node_block_add_stmt(mod, wrapper, length_var); |
|
| 488 | + | node_block_add_stmt(mod, wrapper, desugar_while(d, mod, while_node)); |
|
| 489 | + | ||
| 490 | + | return wrapper; |
|
| 491 | + | } |
|
| 492 | + | ||
| 493 | + | /* Transform `x and y` into: |
|
| 494 | + | * |
|
| 495 | + | * if (x) { |
|
| 496 | + | * y |
|
| 497 | + | * } else { |
|
| 498 | + | * false |
|
| 499 | + | * } |
|
| 500 | + | */ |
|
| 501 | + | static node_t *desugar_and_operator( |
|
| 502 | + | desugar_t *d, module_t *mod, node_t *binop |
|
| 503 | + | ) { |
|
| 504 | + | node_t *if_node = node(mod, NODE_IF, binop); |
|
| 505 | + | node_t *false_lit = node_bool(mod, false, binop); |
|
| 506 | + | ||
| 507 | + | if_node->val.if_stmt.cond = desugar_node(d, mod, binop->val.binop.left); |
|
| 508 | + | if_node->val.if_stmt.lbranch = desugar_node(d, mod, binop->val.binop.right); |
|
| 509 | + | if_node->val.if_stmt.rbranch = false_lit; |
|
| 510 | + | ||
| 511 | + | return if_node; |
|
| 512 | + | } |
|
| 513 | + | ||
| 514 | + | /* Transform `x or y` into: |
|
| 515 | + | * |
|
| 516 | + | * if (x) { |
|
| 517 | + | * true |
|
| 518 | + | * } else { |
|
| 519 | + | * y |
|
| 520 | + | * } |
|
| 521 | + | */ |
|
| 522 | + | static node_t *desugar_or_operator(desugar_t *d, module_t *mod, node_t *binop) { |
|
| 523 | + | node_t *if_node = node(mod, NODE_IF, binop); |
|
| 524 | + | node_t *true_lit = node_bool(mod, true, binop); |
|
| 525 | + | ||
| 526 | + | if_node->val.if_stmt.cond = desugar_node(d, mod, binop->val.binop.left); |
|
| 527 | + | if_node->val.if_stmt.lbranch = true_lit; |
|
| 528 | + | if_node->val.if_stmt.rbranch = desugar_node(d, mod, binop->val.binop.right); |
|
| 529 | + | ||
| 530 | + | return if_node; |
|
| 531 | + | } |
|
| 532 | + | ||
| 533 | + | /* Recursively desugar a node and its children */ |
|
| 534 | + | static node_t *desugar_node(desugar_t *d, module_t *mod, node_t *n) { |
|
| 535 | + | if (!n) |
|
| 536 | + | return NULL; |
|
| 537 | + | ||
| 538 | + | switch (n->cls) { |
|
| 539 | + | case NODE_WHILE: |
|
| 540 | + | return desugar_while(d, mod, n); |
|
| 541 | + | ||
| 542 | + | case NODE_WHILE_LET: |
|
| 543 | + | return desugar_while_let(d, mod, n); |
|
| 544 | + | ||
| 545 | + | case NODE_MOD_BODY: |
|
| 546 | + | case NODE_BLOCK: |
|
| 547 | + | return desugar_block(d, mod, n); |
|
| 548 | + | ||
| 549 | + | case NODE_IF: |
|
| 550 | + | n->val.if_stmt.cond = desugar_node(d, mod, n->val.if_stmt.cond); |
|
| 551 | + | n->val.if_stmt.lbranch = desugar_node(d, mod, n->val.if_stmt.lbranch); |
|
| 552 | + | if (n->val.if_stmt.rbranch) { |
|
| 553 | + | n->val.if_stmt.rbranch = |
|
| 554 | + | desugar_node(d, mod, n->val.if_stmt.rbranch); |
|
| 555 | + | } |
|
| 556 | + | return n; |
|
| 557 | + | ||
| 558 | + | case NODE_IF_LET: |
|
| 559 | + | n->val.if_let_stmt.expr = desugar_node(d, mod, n->val.if_let_stmt.expr); |
|
| 560 | + | if (n->val.if_let_stmt.guard) { |
|
| 561 | + | n->val.if_let_stmt.guard = |
|
| 562 | + | desugar_node(d, mod, n->val.if_let_stmt.guard); |
|
| 563 | + | } |
|
| 564 | + | n->val.if_let_stmt.lbranch = |
|
| 565 | + | desugar_node(d, mod, n->val.if_let_stmt.lbranch); |
|
| 566 | + | if (n->val.if_let_stmt.rbranch) { |
|
| 567 | + | n->val.if_let_stmt.rbranch = |
|
| 568 | + | desugar_node(d, mod, n->val.if_let_stmt.rbranch); |
|
| 569 | + | } |
|
| 570 | + | return n; |
|
| 571 | + | ||
| 572 | + | case NODE_IF_CASE: { |
|
| 573 | + | node_t *pattern = desugar_node(d, mod, n->val.if_case_stmt.pattern); |
|
| 574 | + | node_t *expr = desugar_node(d, mod, n->val.if_case_stmt.expr); |
|
| 575 | + | node_t *guard = NULL; |
|
| 576 | + | ||
| 577 | + | if (n->val.if_case_stmt.guard) { |
|
| 578 | + | guard = desugar_node(d, mod, n->val.if_case_stmt.guard); |
|
| 579 | + | } |
|
| 580 | + | node_t *then_block = desugar_node(d, mod, n->val.if_case_stmt.lbranch); |
|
| 581 | + | node_t *swtch = node_match(mod, expr, n); |
|
| 582 | + | ||
| 583 | + | node_t *case_node = node_match_case(mod, pattern, guard, then_block, n); |
|
| 584 | + | ||
| 585 | + | node_match_add_case(mod, swtch, case_node); |
|
| 586 | + | ||
| 587 | + | if (n->val.if_case_stmt.rbranch) { |
|
| 588 | + | node_t *else_body = |
|
| 589 | + | desugar_node(d, mod, n->val.if_case_stmt.rbranch); |
|
| 590 | + | node_t *default_case = |
|
| 591 | + | node_match_case(mod, NULL, NULL, else_body, n); |
|
| 592 | + | ||
| 593 | + | node_match_add_case(mod, swtch, default_case); |
|
| 594 | + | } |
|
| 595 | + | return swtch; |
|
| 596 | + | } |
|
| 597 | + | ||
| 598 | + | case NODE_LOOP: |
|
| 599 | + | n->val.loop_stmt.body = desugar_node(d, mod, n->val.loop_stmt.body); |
|
| 600 | + | return n; |
|
| 601 | + | ||
| 602 | + | case NODE_FN: |
|
| 603 | + | n->val.fn_decl.body = desugar_node(d, mod, n->val.fn_decl.body); |
|
| 604 | + | return n; |
|
| 605 | + | ||
| 606 | + | case NODE_BINOP: |
|
| 607 | + | /* Handle logical operators with short-circuit evaluation */ |
|
| 608 | + | if (n->val.binop.op == OP_AND) { |
|
| 609 | + | return desugar_and_operator(d, mod, n); |
|
| 610 | + | } |
|
| 611 | + | if (n->val.binop.op == OP_OR) { |
|
| 612 | + | return desugar_or_operator(d, mod, n); |
|
| 613 | + | } |
|
| 614 | + | /* For other binary operators, recursively desugar operands */ |
|
| 615 | + | n->val.binop.left = desugar_node(d, mod, n->val.binop.left); |
|
| 616 | + | n->val.binop.right = desugar_node(d, mod, n->val.binop.right); |
|
| 617 | + | return n; |
|
| 618 | + | ||
| 619 | + | case NODE_UNOP: |
|
| 620 | + | n->val.unop.expr = desugar_node(d, mod, n->val.unop.expr); |
|
| 621 | + | return n; |
|
| 622 | + | ||
| 623 | + | case NODE_CALL: { |
|
| 624 | + | node_t **args = nodespan_ptrs(&mod->parser, n->val.call.args); |
|
| 625 | + | for (usize i = 0; i < n->val.call.args.len; i++) { |
|
| 626 | + | args[i] = desugar_node(d, mod, args[i]); |
|
| 627 | + | } |
|
| 628 | + | return n; |
|
| 629 | + | } |
|
| 630 | + | ||
| 631 | + | case NODE_BUILTIN: { |
|
| 632 | + | node_t **args = nodespan_ptrs(&mod->parser, n->val.builtin.args); |
|
| 633 | + | for (usize i = 0; i < n->val.builtin.args.len; i++) { |
|
| 634 | + | args[i] = desugar_node(d, mod, args[i]); |
|
| 635 | + | } |
|
| 636 | + | return n; |
|
| 637 | + | } |
|
| 638 | + | ||
| 639 | + | case NODE_RETURN: |
|
| 640 | + | if (n->val.return_stmt.value) { |
|
| 641 | + | n->val.return_stmt.value = |
|
| 642 | + | desugar_node(d, mod, n->val.return_stmt.value); |
|
| 643 | + | } |
|
| 644 | + | return n; |
|
| 645 | + | ||
| 646 | + | case NODE_VAR: |
|
| 647 | + | if (n->val.var.value) { |
|
| 648 | + | n->val.var.value = desugar_node(d, mod, n->val.var.value); |
|
| 649 | + | } |
|
| 650 | + | return n; |
|
| 651 | + | ||
| 652 | + | case NODE_ASSIGN: |
|
| 653 | + | n->val.assign.lval = desugar_node(d, mod, n->val.assign.lval); |
|
| 654 | + | n->val.assign.rval = desugar_node(d, mod, n->val.assign.rval); |
|
| 655 | + | return n; |
|
| 656 | + | ||
| 657 | + | case NODE_EXPR_STMT: |
|
| 658 | + | n->val.expr_stmt = desugar_node(d, mod, n->val.expr_stmt); |
|
| 659 | + | return n; |
|
| 660 | + | ||
| 661 | + | case NODE_FOR: |
|
| 662 | + | return desugar_for(d, mod, n); |
|
| 663 | + | ||
| 664 | + | case NODE_MATCH: { |
|
| 665 | + | n->val.match_stmt.expr = desugar_node(d, mod, n->val.match_stmt.expr); |
|
| 666 | + | node_t **cases = nodespan_ptrs(&mod->parser, n->val.match_stmt.cases); |
|
| 667 | + | for (usize i = 0; i < n->val.match_stmt.cases.len; i++) { |
|
| 668 | + | cases[i] = desugar_node(d, mod, cases[i]); |
|
| 669 | + | } |
|
| 670 | + | return n; |
|
| 671 | + | } |
|
| 672 | + | ||
| 673 | + | case NODE_MATCH_CASE: { |
|
| 674 | + | node_t **patterns = |
|
| 675 | + | nodespan_ptrs(&mod->parser, n->val.match_case.patterns); |
|
| 676 | + | for (usize i = 0; i < n->val.match_case.patterns.len; i++) { |
|
| 677 | + | patterns[i] = desugar_node(d, mod, patterns[i]); |
|
| 678 | + | } |
|
| 679 | + | if (n->val.match_case.guard) { |
|
| 680 | + | n->val.match_case.guard = |
|
| 681 | + | desugar_node(d, mod, n->val.match_case.guard); |
|
| 682 | + | } |
|
| 683 | + | n->val.match_case.body = desugar_node(d, mod, n->val.match_case.body); |
|
| 684 | + | return n; |
|
| 685 | + | } |
|
| 686 | + | ||
| 687 | + | case NODE_ARRAY_INDEX: |
|
| 688 | + | case NODE_ARRAY_LIT: |
|
| 689 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 690 | + | case NODE_RECORD_LIT: |
|
| 691 | + | case NODE_CALL_ARG: |
|
| 692 | + | case NODE_REF: |
|
| 693 | + | case NODE_ACCESS: |
|
| 694 | + | case NODE_NUMBER: |
|
| 695 | + | case NODE_CHAR: |
|
| 696 | + | case NODE_STRING: |
|
| 697 | + | case NODE_BOOL: |
|
| 698 | + | case NODE_NIL: |
|
| 699 | + | case NODE_UNDEF: |
|
| 700 | + | case NODE_SCOPE: |
|
| 701 | + | case NODE_IDENT: |
|
| 702 | + | case NODE_PLACEHOLDER: |
|
| 703 | + | case NODE_BREAK: |
|
| 704 | + | case NODE_USE: |
|
| 705 | + | case NODE_AS: |
|
| 706 | + | case NODE_CONST: |
|
| 707 | + | case NODE_STATIC: |
|
| 708 | + | case NODE_MOD: |
|
| 709 | + | case NODE_UNION: |
|
| 710 | + | case NODE_RECORD: |
|
| 711 | + | case NODE_PANIC: |
|
| 712 | + | case NODE_TYPE: |
|
| 713 | + | case NODE_RECORD_TYPE: |
|
| 714 | + | return n; |
|
| 715 | + | ||
| 716 | + | case NODE_THROW: |
|
| 717 | + | n->val.throw_stmt.expr = desugar_node(d, mod, n->val.throw_stmt.expr); |
|
| 718 | + | return n; |
|
| 719 | + | ||
| 720 | + | case NODE_TRY: { |
|
| 721 | + | n->val.try_expr.expr = desugar_node(d, mod, n->val.try_expr.expr); |
|
| 722 | + | node_t **handlers = |
|
| 723 | + | nodespan_ptrs(&mod->parser, n->val.try_expr.handlers); |
|
| 724 | + | for (usize i = 0; i < n->val.try_expr.handlers.len; i++) { |
|
| 725 | + | handlers[i] = desugar_node(d, mod, handlers[i]); |
|
| 726 | + | } |
|
| 727 | + | n->val.try_expr.catch_expr = |
|
| 728 | + | desugar_node(d, mod, n->val.try_expr.catch_expr); |
|
| 729 | + | return n; |
|
| 730 | + | } |
|
| 731 | + | case NODE_CATCH: |
|
| 732 | + | n->val.catch_clause.body = |
|
| 733 | + | desugar_node(d, mod, n->val.catch_clause.body); |
|
| 734 | + | return n; |
|
| 735 | + | ||
| 736 | + | default: |
|
| 737 | + | bail("unsupported node type %s", node_names[n->cls]); |
|
| 738 | + | return NULL; |
|
| 739 | + | } |
|
| 740 | + | } |
|
| 741 | + | ||
| 742 | + | node_t *desugar_run(desugar_t *d, module_t *mod, node_t *ast) { |
|
| 743 | + | return desugar_node(d, mod, ast); |
|
| 744 | + | } |
desugar.h
added
+15 -0
| 1 | + | #ifndef DESUGAR_H |
|
| 2 | + | #define DESUGAR_H |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "module.h" |
|
| 6 | + | ||
| 7 | + | /* Desugar pass context */ |
|
| 8 | + | typedef struct { |
|
| 9 | + | u32 flags; |
|
| 10 | + | } desugar_t; |
|
| 11 | + | ||
| 12 | + | /* AST desugaring pass; runs after parsing to transform the AST */ |
|
| 13 | + | node_t *desugar_run(desugar_t *d, module_t *mod, node_t *ast); |
|
| 14 | + | ||
| 15 | + | #endif |
gen.c
added
+3233 -0
| 1 | + | #include <assert.h> |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | #include <string.h> |
|
| 4 | + | ||
| 5 | + | #include "ast.h" |
|
| 6 | + | #include "gen.h" |
|
| 7 | + | #include "gen/data.h" |
|
| 8 | + | #include "gen/emit.h" |
|
| 9 | + | #include "io.h" |
|
| 10 | + | #include "limits.h" |
|
| 11 | + | #include "module.h" |
|
| 12 | + | #include "options.h" |
|
| 13 | + | #include "ralloc.h" |
|
| 14 | + | #include "riscv.h" |
|
| 15 | + | ||
| 16 | + | #include "resolver.h" |
|
| 17 | + | #include "symtab.h" |
|
| 18 | + | #include "types.h" |
|
| 19 | + | ||
| 20 | + | /* Shorthand: get node pointer array from span in current module's parser. */ |
|
| 21 | + | #define SPAN(g, s) nodespan_ptrs(&(g)->mod->parser, (s)) |
|
| 22 | + | ||
| 23 | + | static void gen_assign(gen_t *g, node_t *n); |
|
| 24 | + | static void gen_return(gen_t *g, node_t *n); |
|
| 25 | + | static void gen_fn(gen_t *g, node_t *n); |
|
| 26 | + | static value_t gen_array_index(gen_t *g, node_t *n, bool ref); |
|
| 27 | + | static value_t gen_array_slice(gen_t *g, value_t array_val, node_t *range); |
|
| 28 | + | static value_t gen_array_literal(gen_t *g, node_t *n); |
|
| 29 | + | static value_t gen_array_repeat(gen_t *g, node_t *n); |
|
| 30 | + | static value_t gen_expr(gen_t *g, node_t *n, bool lvalue); |
|
| 31 | + | static void gen_expr_stmt(gen_t *g, node_t *n); |
|
| 32 | + | static void gen_match(gen_t *g, node_t *n); |
|
| 33 | + | static void gen_block(gen_t *g, node_t *n); |
|
| 34 | + | static void gen_if(gen_t *g, node_t *n); |
|
| 35 | + | static void gen_if_let(gen_t *g, node_t *n); |
|
| 36 | + | static value_t gen_if_expr(gen_t *g, node_t *n); |
|
| 37 | + | static void gen_loop(gen_t *g, node_t *n); |
|
| 38 | + | static void gen_break(gen_t *g, node_t *n); |
|
| 39 | + | static void gen_var(gen_t *g, node_t *n); |
|
| 40 | + | static void gen_const(gen_t *g, node_t *n); |
|
| 41 | + | static void gen_static(gen_t *g, node_t *n); |
|
| 42 | + | static void gen_nop(gen_t *g, node_t *n); |
|
| 43 | + | static value_t gen_deref(gen_t *g, node_t *n, value_t ref_val, bool lval); |
|
| 44 | + | static void gen_ecall(gen_t *g, node_t *n); |
|
| 45 | + | static void gen_ebreak(gen_t *g, node_t *n); |
|
| 46 | + | static void gen_panic(gen_t *g, node_t *n); |
|
| 47 | + | static void gen_mod(gen_t *g, node_t *n); |
|
| 48 | + | static void gen_use(gen_t *g, node_t *n); |
|
| 49 | + | static value_t gen_as_cast(gen_t *g, node_t *n); |
|
| 50 | + | static value_t gen_union_constructor(gen_t *g, node_t *n); |
|
| 51 | + | static value_t gen_record_lit(gen_t *g, node_t *n); |
|
| 52 | + | static void gen_throw(gen_t *g, node_t *n); |
|
| 53 | + | static value_t gen_try(gen_t *g, node_t *n); |
|
| 54 | + | static value_t gen_union_store( |
|
| 55 | + | gen_t *g, type_t *union_type, symbol_t *variant_sym, value_t payload |
|
| 56 | + | ); |
|
| 57 | + | static void useval(gen_t *g, value_t val); |
|
| 58 | + | static void freeval(gen_t *g, value_t val); |
|
| 59 | + | static value_t value_none(void); |
|
| 60 | + | i32 tval_payload_offset(type_t *container); |
|
| 61 | + | ||
| 62 | + | /* Convert a value into a tagged value by calculating its offsets. */ |
|
| 63 | + | tval_t tval_from_val(gen_t *g, value_t val) { |
|
| 64 | + | /* For unions with payloads, we don't know the value type in advance. */ |
|
| 65 | + | type_t *val_typ = NULL; |
|
| 66 | + | ||
| 67 | + | if (val.type->cls == TYPE_OPT) { |
|
| 68 | + | val_typ = val.type->info.opt.elem; |
|
| 69 | + | } else if (val.type->cls == TYPE_RESULT) { |
|
| 70 | + | val_typ = val.type->info.res.payload; |
|
| 71 | + | } |
|
| 72 | + | i32 val_off = tval_payload_offset(val.type); |
|
| 73 | + | ||
| 74 | + | tval_t tval = { 0 }; |
|
| 75 | + | tval.tag = |
|
| 76 | + | (value_t){ .type = g->types->type_u8, .loc = val.loc, .as = val.as }; |
|
| 77 | + | tval.typ = val.type; |
|
| 78 | + | tval.val = (value_t){ |
|
| 79 | + | .type = val_typ, |
|
| 80 | + | .loc = val.loc, |
|
| 81 | + | .as = val.as, |
|
| 82 | + | }; |
|
| 83 | + | ||
| 84 | + | if (val.loc == LOC_STACK) { |
|
| 85 | + | tval.val.as.off.offset = val.as.off.offset + val_off; |
|
| 86 | + | } else if (val.loc == LOC_ADDR) { |
|
| 87 | + | tval.val.as.adr.offset = val.as.adr.offset + val_off; |
|
| 88 | + | } else if (val.loc == LOC_REG) { |
|
| 89 | + | /* Register contains the address of the optional in memory */ |
|
| 90 | + | tval.tag.loc = LOC_STACK; |
|
| 91 | + | tval.tag.as.off.base = val.as.reg; |
|
| 92 | + | tval.tag.as.off.offset = 0; |
|
| 93 | + | ||
| 94 | + | tval.val.loc = LOC_STACK; |
|
| 95 | + | tval.val.as.off.base = val.as.reg; |
|
| 96 | + | tval.val.as.off.offset = val_off; |
|
| 97 | + | } else { |
|
| 98 | + | bail("cannot load tagged value from location %d", val.loc); |
|
| 99 | + | } |
|
| 100 | + | return tval; |
|
| 101 | + | } |
|
| 102 | + | ||
| 103 | + | /* Return the byte offset of the payload within a tagged value. */ |
|
| 104 | + | i32 tval_payload_offset(type_t *container) { |
|
| 105 | + | return container->size > TAG_SIZE ? align(TAG_SIZE, container->align) |
|
| 106 | + | : TAG_SIZE; |
|
| 107 | + | } |
|
| 108 | + | ||
| 109 | + | /* Return the number of payload bytes to zero before writing a new value. */ |
|
| 110 | + | i32 tval_payload_zero_size(type_t *container) { |
|
| 111 | + | switch (container->cls) { |
|
| 112 | + | case TYPE_OPT: |
|
| 113 | + | return container->size - tval_payload_offset(container); |
|
| 114 | + | case TYPE_UNION: |
|
| 115 | + | return container->size - tval_payload_offset(container); |
|
| 116 | + | default: |
|
| 117 | + | return 0; |
|
| 118 | + | } |
|
| 119 | + | } |
|
| 120 | + | ||
| 121 | + | void tval_store(gen_t *g, value_t dest, value_t value, i32 tag) { |
|
| 122 | + | /* Optional values treat tag 0 as nil; everything else always stores a |
|
| 123 | + | * payload area. */ |
|
| 124 | + | bool nil = (dest.type->cls == TYPE_OPT && tag == 0); |
|
| 125 | + | ||
| 126 | + | /* Compute base/offset for tag and payload. For addresses, materialize a |
|
| 127 | + | * temporary base register so regstore/memzero can operate safely. */ |
|
| 128 | + | reg_t base = ZERO; |
|
| 129 | + | i32 tag_off = 0; |
|
| 130 | + | bool base_temp = false; |
|
| 131 | + | ||
| 132 | + | switch (dest.loc) { |
|
| 133 | + | case LOC_STACK: |
|
| 134 | + | base = dest.as.off.base; |
|
| 135 | + | tag_off = dest.as.off.offset; |
|
| 136 | + | break; |
|
| 137 | + | case LOC_ADDR: |
|
| 138 | + | base = nextreg(g); |
|
| 139 | + | base_temp = true; |
|
| 140 | + | emit_li(g, base, dest.as.adr.base); |
|
| 141 | + | tag_off = dest.as.adr.offset; |
|
| 142 | + | break; |
|
| 143 | + | case LOC_REG: { |
|
| 144 | + | /* Register holds the address; copy into a reserved temp. */ |
|
| 145 | + | base = nextreg(g); |
|
| 146 | + | base_temp = true; |
|
| 147 | + | emit_mv(g, base, dest.as.reg); |
|
| 148 | + | tag_off = 0; |
|
| 149 | + | break; |
|
| 150 | + | } |
|
| 151 | + | default: |
|
| 152 | + | bail("cannot store tagged value at location %d", dest.loc); |
|
| 153 | + | } |
|
| 154 | + | i32 payload_off = tag_off + tval_payload_offset(dest.type); |
|
| 155 | + | ||
| 156 | + | /* Store tag (1 byte) */ |
|
| 157 | + | reg_t rd = nextreg(g); |
|
| 158 | + | emit_li(g, rd, tag); |
|
| 159 | + | emit_regstore(g, rd, base, tag_off, g->types->type_u8); |
|
| 160 | + | freereg(g, rd); |
|
| 161 | + | ||
| 162 | + | /* Zero padding between tag byte and payload start so that byte-level |
|
| 163 | + | * equality comparisons of tagged values work correctly. */ |
|
| 164 | + | i32 pad_off = tag_off + TAG_SIZE; |
|
| 165 | + | i32 pad_size = payload_off - pad_off; |
|
| 166 | + | if (pad_size > 0) { |
|
| 167 | + | emit_memzero(g, OFFSET(base, pad_off), pad_size); |
|
| 168 | + | } |
|
| 169 | + | ||
| 170 | + | /* Clear payload region before writing a new value (or when nil). */ |
|
| 171 | + | i32 payload_size = tval_payload_zero_size(dest.type); |
|
| 172 | + | emit_memzero(g, OFFSET(base, payload_off), payload_size); |
|
| 173 | + | ||
| 174 | + | if (!nil && value.type && value.type->cls != TYPE_VOID) { |
|
| 175 | + | emit_store(g, value, base, payload_off); |
|
| 176 | + | } |
|
| 177 | + | if (base_temp) |
|
| 178 | + | freereg(g, base); |
|
| 179 | + | } |
|
| 180 | + | ||
| 181 | + | /* Helper function to create an optional value from a primitive immediate. */ |
|
| 182 | + | static value_t optval_from_prim(gen_t *g, type_t *opt_type, value_t prim_val) { |
|
| 183 | + | i32 offset = reserve(g, opt_type); |
|
| 184 | + | value_t opt_val = value_stack(OFFSET(FP, offset), opt_type); |
|
| 185 | + | tval_store(g, opt_val, prim_val, 1); |
|
| 186 | + | ||
| 187 | + | return opt_val; |
|
| 188 | + | } |
|
| 189 | + | ||
| 190 | + | static value_t optval_from_value(gen_t *g, type_t *opt_type, value_t value) { |
|
| 191 | + | if (value.type == opt_type) |
|
| 192 | + | return value; |
|
| 193 | + | ||
| 194 | + | i32 offset = reserve(g, opt_type); |
|
| 195 | + | value_t opt_val = value_stack(OFFSET(FP, offset), opt_type); |
|
| 196 | + | ||
| 197 | + | tval_store(g, opt_val, value, 1); |
|
| 198 | + | ||
| 199 | + | return opt_val; |
|
| 200 | + | } |
|
| 201 | + | ||
| 202 | + | /* Load the tag of tagged value into a register. */ |
|
| 203 | + | static reg_t tval_load_tag(gen_t *g, value_t opt_val) { |
|
| 204 | + | tval_t opt = tval_from_val(g, opt_val); |
|
| 205 | + | ||
| 206 | + | return emit_load(g, opt.tag); |
|
| 207 | + | } |
|
| 208 | + | ||
| 209 | + | /* Helper to bind a union variant payload to a variable. */ |
|
| 210 | + | static void bind_union_value(gen_t *g, value_t union_src, node_t *bound_var) { |
|
| 211 | + | symbol_t *var_sym = bound_var->sym; |
|
| 212 | + | ||
| 213 | + | /* Allocate storage for the bound variable if not already allocated */ |
|
| 214 | + | if (var_sym->e.var.val.loc == LOC_NONE) { |
|
| 215 | + | i32 off = reserve_aligned(g, var_sym->e.var.typ, var_sym->e.var.align); |
|
| 216 | + | var_sym->e.var.val = value_stack(OFFSET(FP, off), var_sym->e.var.typ); |
|
| 217 | + | } |
|
| 218 | + | /* Create a value pointing to the value part of the union (after the tag) */ |
|
| 219 | + | type_t *union_type = union_src.type; |
|
| 220 | + | if (union_type->cls == TYPE_PTR) |
|
| 221 | + | union_type = union_type->info.ptr.target; |
|
| 222 | + | i32 val_off = tval_payload_offset(union_type); |
|
| 223 | + | value_t union_val_part = union_src; |
|
| 224 | + | union_val_part.type = bound_var->type; |
|
| 225 | + | ||
| 226 | + | if (union_src.loc == LOC_STACK) { |
|
| 227 | + | union_val_part.as.off.offset += val_off; |
|
| 228 | + | } else if (union_src.loc == LOC_ADDR) { |
|
| 229 | + | union_val_part.as.adr.offset += val_off; |
|
| 230 | + | } else { |
|
| 231 | + | bail("cannot bind union value from this location"); |
|
| 232 | + | } |
|
| 233 | + | /* Copy the union payload to the bound variable */ |
|
| 234 | + | emit_replace(g, var_sym->e.var.val, union_val_part); |
|
| 235 | + | } |
|
| 236 | + | ||
| 237 | + | /* Copy the value part of an optional to a destination */ |
|
| 238 | + | static void optval_copy_value(gen_t *g, value_t opt_src, value_t value_dest) { |
|
| 239 | + | tval_t opt = tval_from_val(g, opt_src); |
|
| 240 | + | emit_replace(g, value_dest, opt.val); |
|
| 241 | + | } |
|
| 242 | + | ||
| 243 | + | /* Generate a union constructor call like Expr::number(42) */ |
|
| 244 | + | static value_t gen_union_constructor(gen_t *g, node_t *call) { |
|
| 245 | + | type_t *variant_type = call->sym->node->type; |
|
| 246 | + | value_t payload = value_none(); |
|
| 247 | + | ||
| 248 | + | if (variant_type->cls != TYPE_VOID) { |
|
| 249 | + | node_t *arg_node = SPAN(g, call->val.call.args)[0]; |
|
| 250 | + | node_t *arg_expr = arg_node->val.call_arg.expr; |
|
| 251 | + | payload = gen_expr(g, arg_expr, false); |
|
| 252 | + | } |
|
| 253 | + | return gen_union_store(g, call->type, call->sym, payload); |
|
| 254 | + | } |
|
| 255 | + | ||
| 256 | + | static value_t gen_union_store( |
|
| 257 | + | gen_t *g, type_t *union_type, symbol_t *variant_sym, value_t payload |
|
| 258 | + | ) { |
|
| 259 | + | i32 tag = variant_sym->node->val.union_variant.value; |
|
| 260 | + | ||
| 261 | + | /* Allocate space for the union on the stack */ |
|
| 262 | + | i32 offset = reserve(g, union_type); |
|
| 263 | + | value_t union_val = value_stack(OFFSET(FP, offset), union_type); |
|
| 264 | + | ||
| 265 | + | /* Store the union value */ |
|
| 266 | + | useval(g, payload); |
|
| 267 | + | tval_store(g, union_val, payload, tag); |
|
| 268 | + | freeval(g, payload); |
|
| 269 | + | ||
| 270 | + | return union_val; |
|
| 271 | + | } |
|
| 272 | + | ||
| 273 | + | /* Node type to generator function mapping. */ |
|
| 274 | + | static void (*GENERATORS[])(gen_t *, node_t *) = { |
|
| 275 | + | [NODE_TYPE] = NULL, |
|
| 276 | + | [NODE_NUMBER] = NULL, |
|
| 277 | + | [NODE_BOOL] = NULL, |
|
| 278 | + | [NODE_STRING] = NULL, |
|
| 279 | + | [NODE_CHAR] = NULL, |
|
| 280 | + | [NODE_IDENT] = NULL, |
|
| 281 | + | [NODE_BINOP] = NULL, |
|
| 282 | + | [NODE_BLOCK] = gen_block, |
|
| 283 | + | [NODE_CALL] = NULL, |
|
| 284 | + | [NODE_CALL_ARG] = NULL, |
|
| 285 | + | [NODE_VAR] = gen_var, |
|
| 286 | + | [NODE_CONST] = gen_const, |
|
| 287 | + | [NODE_STATIC] = gen_static, |
|
| 288 | + | [NODE_ASSIGN] = gen_assign, |
|
| 289 | + | [NODE_RETURN] = gen_return, |
|
| 290 | + | [NODE_THROW] = gen_throw, |
|
| 291 | + | [NODE_PANIC] = gen_panic, |
|
| 292 | + | [NODE_WHILE] = NULL, |
|
| 293 | + | [NODE_WHILE_LET] = NULL, |
|
| 294 | + | [NODE_FOR] = NULL, |
|
| 295 | + | [NODE_LOOP] = gen_loop, |
|
| 296 | + | [NODE_IF] = gen_if, |
|
| 297 | + | [NODE_IF_LET] = gen_if_let, |
|
| 298 | + | [NODE_IF_CASE] = NULL, |
|
| 299 | + | [NODE_GUARD_CASE] = NULL, |
|
| 300 | + | [NODE_GUARD_LET] = NULL, |
|
| 301 | + | [NODE_MATCH] = gen_match, |
|
| 302 | + | [NODE_MATCH_CASE] = gen_nop, /* Cases are handled by gen_match */ |
|
| 303 | + | [NODE_FN] = gen_fn, |
|
| 304 | + | [NODE_BREAK] = gen_break, |
|
| 305 | + | [NODE_RECORD] = gen_nop, |
|
| 306 | + | [NODE_UNION] = gen_nop, |
|
| 307 | + | [NODE_EXPR_STMT] = gen_expr_stmt, |
|
| 308 | + | [NODE_MOD] = gen_mod, |
|
| 309 | + | [NODE_USE] = gen_use, |
|
| 310 | + | }; |
|
| 311 | + | ||
| 312 | + | /* Built-in functions */ |
|
| 313 | + | static const struct { |
|
| 314 | + | const char *name; |
|
| 315 | + | usize length; |
|
| 316 | + | void (*gen)(gen_t *, node_t *); |
|
| 317 | + | } BUILTINS[] = { |
|
| 318 | + | { "std::intrinsics::ecall", 22, gen_ecall }, |
|
| 319 | + | { "std::intrinsics::ebreak", 23, gen_ebreak }, |
|
| 320 | + | { NULL, 0, NULL }, |
|
| 321 | + | }; |
|
| 322 | + | ||
| 323 | + | /******************************************************************************/ |
|
| 324 | + | ||
| 325 | + | value_t value_addr(usize addr, i32 off, type_t *ty) { |
|
| 326 | + | return (value_t){ |
|
| 327 | + | .type = ty, |
|
| 328 | + | .loc = LOC_ADDR, |
|
| 329 | + | .as.adr.base = addr, |
|
| 330 | + | .as.adr.offset = off, |
|
| 331 | + | }; |
|
| 332 | + | } |
|
| 333 | + | ||
| 334 | + | value_t value_stack(offset_t off, type_t *ty) { |
|
| 335 | + | return (value_t){ |
|
| 336 | + | .type = ty, |
|
| 337 | + | .loc = LOC_STACK, |
|
| 338 | + | .as.off.base = off.base, |
|
| 339 | + | .as.off.offset = off.offset, |
|
| 340 | + | }; |
|
| 341 | + | } |
|
| 342 | + | ||
| 343 | + | value_t value_reg(reg_t r, type_t *ty) { |
|
| 344 | + | return (value_t){ |
|
| 345 | + | .temp = true, |
|
| 346 | + | .type = ty, |
|
| 347 | + | .loc = LOC_REG, |
|
| 348 | + | .as.reg = r, |
|
| 349 | + | }; |
|
| 350 | + | } |
|
| 351 | + | ||
| 352 | + | value_t value_imm(imm_t imm, type_t *ty) { |
|
| 353 | + | return (value_t){ |
|
| 354 | + | .type = ty, |
|
| 355 | + | .loc = LOC_IMM, |
|
| 356 | + | .as.imm = imm, |
|
| 357 | + | }; |
|
| 358 | + | } |
|
| 359 | + | ||
| 360 | + | static value_t value_none(void) { |
|
| 361 | + | return (value_t){ |
|
| 362 | + | .type = NULL, |
|
| 363 | + | .loc = LOC_NONE, |
|
| 364 | + | }; |
|
| 365 | + | } |
|
| 366 | + | ||
| 367 | + | i32 align_stack(i32 addr, i32 alignment) { |
|
| 368 | + | /* Verify alignment is a power of 2. */ |
|
| 369 | + | ||
| 370 | + | /* For negative addresses (stack growth downward), |
|
| 371 | + | * we round down to the next multiple of alignment. */ |
|
| 372 | + | return addr & ~(alignment - 1); |
|
| 373 | + | } |
|
| 374 | + | ||
| 375 | + | i32 jump_offset(usize from, usize to) { |
|
| 376 | + | return ((i32)to - (i32)from) * INSTR_SIZE; |
|
| 377 | + | } |
|
| 378 | + | ||
| 379 | + | /* Provide a sentinel patch so callers can keep a uniform interface. */ |
|
| 380 | + | static branch_patch_t branch_patch_invalid(void) { |
|
| 381 | + | return (branch_patch_t){ |
|
| 382 | + | .pc = (usize)-1, |
|
| 383 | + | .tramp_pc = (usize)-1, |
|
| 384 | + | .op = I_BEQ, |
|
| 385 | + | .rs1 = ZERO, |
|
| 386 | + | .rs2 = ZERO, |
|
| 387 | + | .valid = false, |
|
| 388 | + | }; |
|
| 389 | + | } |
|
| 390 | + | ||
| 391 | + | /* Reserve space for the branch and a fallback trampoline in one call. */ |
|
| 392 | + | static branch_patch_t branch_patch_make( |
|
| 393 | + | gen_t *g, iname_t op, reg_t rs1, reg_t rs2 |
|
| 394 | + | ) { |
|
| 395 | + | branch_patch_t patch = { |
|
| 396 | + | .pc = emit(g, NOP), |
|
| 397 | + | .tramp_pc = emit(g, NOP), |
|
| 398 | + | .op = op, |
|
| 399 | + | .rs1 = rs1, |
|
| 400 | + | .rs2 = rs2, |
|
| 401 | + | .valid = true, |
|
| 402 | + | }; |
|
| 403 | + | return patch; |
|
| 404 | + | } |
|
| 405 | + | ||
| 406 | + | /* Flip a branch opcode so the trampoline executes on the opposite outcome. */ |
|
| 407 | + | static iname_t branch_op_inverse(iname_t op) { |
|
| 408 | + | switch (op) { |
|
| 409 | + | case I_BEQ: |
|
| 410 | + | return I_BNE; |
|
| 411 | + | case I_BNE: |
|
| 412 | + | return I_BEQ; |
|
| 413 | + | case I_BLT: |
|
| 414 | + | return I_BGE; |
|
| 415 | + | case I_BGE: |
|
| 416 | + | return I_BLT; |
|
| 417 | + | case I_BLTU: |
|
| 418 | + | return I_BGEU; |
|
| 419 | + | case I_BGEU: |
|
| 420 | + | return I_BLTU; |
|
| 421 | + | default: |
|
| 422 | + | return 0; |
|
| 423 | + | } |
|
| 424 | + | } |
|
| 425 | + | ||
| 426 | + | /* Finalize the branch, rewriting to a long-range form when necessary. */ |
|
| 427 | + | static void branch_patch_apply(gen_t *g, branch_patch_t patch, usize target) { |
|
| 428 | + | if (!patch.valid) |
|
| 429 | + | return; |
|
| 430 | + | ||
| 431 | + | i32 imm = jump_offset(patch.pc, target); |
|
| 432 | + | if (is_branch_imm(imm)) { |
|
| 433 | + | g->instrs[patch.pc] = instr(patch.op, ZERO, patch.rs1, patch.rs2, imm); |
|
| 434 | + | g->instrs[patch.tramp_pc] = NOP; |
|
| 435 | + | return; |
|
| 436 | + | } |
|
| 437 | + | ||
| 438 | + | usize fallthrough = patch.tramp_pc + 1; |
|
| 439 | + | i32 skip_imm = jump_offset(patch.pc, fallthrough); |
|
| 440 | + | ||
| 441 | + | iname_t inv = branch_op_inverse(patch.op); |
|
| 442 | + | g->instrs[patch.pc] = instr(inv, ZERO, patch.rs1, patch.rs2, skip_imm); |
|
| 443 | + | ||
| 444 | + | i32 jmp_imm = jump_offset(patch.tramp_pc, target); |
|
| 445 | + | g->instrs[patch.tramp_pc] = JMP(jmp_imm); |
|
| 446 | + | } |
|
| 447 | + | ||
| 448 | + | i32 reserve(gen_t *g, type_t *ty) { |
|
| 449 | + | return reserve_aligned(g, ty, ty->align); |
|
| 450 | + | } |
|
| 451 | + | ||
| 452 | + | static void useval(gen_t *g, value_t val) { |
|
| 453 | + | if (val.loc == LOC_REG) { |
|
| 454 | + | usereg(g, val.as.reg); |
|
| 455 | + | } else if (val.loc == LOC_STACK) { |
|
| 456 | + | usereg(g, val.as.off.base); |
|
| 457 | + | } |
|
| 458 | + | } |
|
| 459 | + | ||
| 460 | + | static void freeval(gen_t *g, value_t val) { |
|
| 461 | + | if (val.loc == LOC_REG && val.temp) { |
|
| 462 | + | freereg(g, val.as.reg); |
|
| 463 | + | } |
|
| 464 | + | } |
|
| 465 | + | ||
| 466 | + | /******************************************************************************/ |
|
| 467 | + | ||
| 468 | + | /* Patch all break statements for a loop. */ |
|
| 469 | + | static void patch_break_stmts(gen_t *g) { |
|
| 470 | + | for (usize i = 0; i < g->fn.nbrkpatches; i++) { |
|
| 471 | + | ctpatch_t *p = &g->fn.brkpatches[i]; |
|
| 472 | + | if (!p->applied && p->loop == g->loop.current) { |
|
| 473 | + | /* Calculate jump offset to the loop end, and apply patch. */ |
|
| 474 | + | i32 offset = jump_offset(p->pc, g->loop.end); |
|
| 475 | + | g->instrs[p->pc] = JAL(ZERO, offset); |
|
| 476 | + | p->applied = true; |
|
| 477 | + | } |
|
| 478 | + | } |
|
| 479 | + | } |
|
| 480 | + | ||
| 481 | + | /******************************************************************************/ |
|
| 482 | + | ||
| 483 | + | /* Generate code for a node. */ |
|
| 484 | + | static void gen_node(gen_t *g, node_t *n) { |
|
| 485 | + | if (!n) |
|
| 486 | + | return; |
|
| 487 | + | ||
| 488 | + | if (!GENERATORS[n->cls]) |
|
| 489 | + | bail("unsupported node type '%s'", node_names[n->cls]); |
|
| 490 | + | ||
| 491 | + | /* Restore register allocation state between statements to avoid leaking */ |
|
| 492 | + | bool regs[RALLOC_NREGS] = { false }; |
|
| 493 | + | ||
| 494 | + | ralloc_save(&g->regs, regs); |
|
| 495 | + | GENERATORS[n->cls](g, n); |
|
| 496 | + | ralloc_restore(&g->regs, regs); |
|
| 497 | + | } |
|
| 498 | + | ||
| 499 | + | /* System call (ecall): Takes four arguments (A0, A1, A2, A3) */ |
|
| 500 | + | static void gen_ecall(gen_t *g, node_t *n) { |
|
| 501 | + | node_t **cargs = SPAN(g, n->val.call.args); |
|
| 502 | + | node_t *num = cargs[0]; |
|
| 503 | + | node_t *arg0 = cargs[1]; |
|
| 504 | + | node_t *arg1 = cargs[2]; |
|
| 505 | + | node_t *arg2 = cargs[3]; |
|
| 506 | + | node_t *arg3 = cargs[4]; |
|
| 507 | + | ||
| 508 | + | value_t numval = gen_expr(g, num->val.call_arg.expr, false); |
|
| 509 | + | value_t arg0val = gen_expr(g, arg0->val.call_arg.expr, false); |
|
| 510 | + | value_t arg1val = gen_expr(g, arg1->val.call_arg.expr, false); |
|
| 511 | + | value_t arg2val = gen_expr(g, arg2->val.call_arg.expr, false); |
|
| 512 | + | value_t arg3val = gen_expr(g, arg3->val.call_arg.expr, false); |
|
| 513 | + | ||
| 514 | + | /* Move the arguments to the appropriate registers. Load higher-numbered |
|
| 515 | + | * argument registers first so we don't overwrite values that are still |
|
| 516 | + | * needed for lower-numbered arguments (e.g. when the source value lives in |
|
| 517 | + | * A0). */ |
|
| 518 | + | usereg(g, A7); |
|
| 519 | + | emit_load_into(g, A7, numval); /* Syscall number is stored in A7 */ |
|
| 520 | + | ||
| 521 | + | usereg(g, A3); |
|
| 522 | + | emit_load_into(g, A3, arg3val); |
|
| 523 | + | ||
| 524 | + | usereg(g, A2); |
|
| 525 | + | emit_load_into(g, A2, arg2val); |
|
| 526 | + | ||
| 527 | + | usereg(g, A1); |
|
| 528 | + | emit_load_into(g, A1, arg1val); |
|
| 529 | + | ||
| 530 | + | usereg(g, A0); |
|
| 531 | + | emit_load_into(g, A0, arg0val); |
|
| 532 | + | ||
| 533 | + | emit(g, ECALL); |
|
| 534 | + | ||
| 535 | + | freereg(g, A3); |
|
| 536 | + | freereg(g, A2); |
|
| 537 | + | freereg(g, A1); |
|
| 538 | + | freereg(g, A7); |
|
| 539 | + | } |
|
| 540 | + | ||
| 541 | + | /* Emit an EBREAK instruction */ |
|
| 542 | + | static void gen_ebreak(gen_t *g, node_t *n) { |
|
| 543 | + | (void)n; |
|
| 544 | + | emit(g, EBREAK); |
|
| 545 | + | } |
|
| 546 | + | ||
| 547 | + | /* Generate panic statement */ |
|
| 548 | + | static void gen_panic(gen_t *g, node_t *n) { |
|
| 549 | + | (void)n; |
|
| 550 | + | emit(g, EBREAK); |
|
| 551 | + | } |
|
| 552 | + | ||
| 553 | + | static void gen_expr_stmt(gen_t *g, node_t *n) { |
|
| 554 | + | /* Generate the expression as a statement; result will be discarded. */ |
|
| 555 | + | value_t result = gen_expr(g, n->val.expr_stmt, false); |
|
| 556 | + | /* For non-void expressions, we free any allocated registers */ |
|
| 557 | + | if (result.loc == LOC_REG) { |
|
| 558 | + | freereg(g, result.as.reg); |
|
| 559 | + | } |
|
| 560 | + | } |
|
| 561 | + | ||
| 562 | + | /* Generate conditional branch code. */ |
|
| 563 | + | static void gen_branch(gen_t *g, node_t *cond, node_t *lbranch) { |
|
| 564 | + | binop_t op = cond->val.binop.op; |
|
| 565 | + | value_t lval = gen_expr(g, cond->val.binop.left, false); |
|
| 566 | + | value_t rval = gen_expr(g, cond->val.binop.right, false); |
|
| 567 | + | reg_t left = emit_load(g, lval); |
|
| 568 | + | reg_t right = emit_load(g, rval); |
|
| 569 | + | ||
| 570 | + | iname_t branch_op = I_BEQ; |
|
| 571 | + | reg_t rs1 = left; |
|
| 572 | + | reg_t rs2 = right; |
|
| 573 | + | bool is_unsigned = false; |
|
| 574 | + | ||
| 575 | + | if (cond->val.binop.left->type) { |
|
| 576 | + | is_unsigned = type_is_unsigned(cond->val.binop.left->type->cls); |
|
| 577 | + | } |
|
| 578 | + | ||
| 579 | + | /* Select the appropriate branch instruction based on the comparison |
|
| 580 | + | * operator. Nb. we're branching if the condition is *false*, so we use the |
|
| 581 | + | * opposite branch instruction. */ |
|
| 582 | + | switch (op) { |
|
| 583 | + | case OP_EQ: |
|
| 584 | + | branch_op = I_BNE; |
|
| 585 | + | break; |
|
| 586 | + | case OP_LT: |
|
| 587 | + | branch_op = is_unsigned ? I_BGEU : I_BGE; |
|
| 588 | + | break; |
|
| 589 | + | case OP_GT: |
|
| 590 | + | branch_op = is_unsigned ? I_BGEU : I_BGE; |
|
| 591 | + | rs1 = right; |
|
| 592 | + | rs2 = left; |
|
| 593 | + | break; |
|
| 594 | + | case OP_LE: |
|
| 595 | + | branch_op = is_unsigned ? I_BLTU : I_BLT; |
|
| 596 | + | rs1 = right; |
|
| 597 | + | rs2 = left; |
|
| 598 | + | break; |
|
| 599 | + | case OP_GE: |
|
| 600 | + | branch_op = is_unsigned ? I_BLTU : I_BLT; |
|
| 601 | + | break; |
|
| 602 | + | case OP_NE: |
|
| 603 | + | /* For not equals, branch if they are equal. */ |
|
| 604 | + | branch_op = I_BEQ; |
|
| 605 | + | break; |
|
| 606 | + | case OP_AND: |
|
| 607 | + | case OP_OR: |
|
| 608 | + | case OP_ADD: |
|
| 609 | + | case OP_SUB: |
|
| 610 | + | case OP_DIV: |
|
| 611 | + | case OP_MUL: |
|
| 612 | + | case OP_MOD: |
|
| 613 | + | case OP_BAND: |
|
| 614 | + | case OP_BOR: |
|
| 615 | + | case OP_XOR: |
|
| 616 | + | case OP_SHL: |
|
| 617 | + | case OP_SHR: |
|
| 618 | + | abort(); |
|
| 619 | + | } |
|
| 620 | + | ||
| 621 | + | branch_patch_t patch = branch_patch_make(g, branch_op, rs1, rs2); |
|
| 622 | + | ||
| 623 | + | freereg(g, left); |
|
| 624 | + | freereg(g, right); |
|
| 625 | + | ||
| 626 | + | /* Generate code for the left (true) branch. */ |
|
| 627 | + | gen_block(g, lbranch); |
|
| 628 | + | ||
| 629 | + | /* Patch the branch to jump past the left branch when false. */ |
|
| 630 | + | branch_patch_apply(g, patch, g->ninstrs); |
|
| 631 | + | } |
|
| 632 | + | ||
| 633 | + | /* Generate code for an if/else condition with arbitrary condition and branches. |
|
| 634 | + | * This function is used both for regular if statements and for match cases. */ |
|
| 635 | + | static void gen_if_else( |
|
| 636 | + | gen_t *g, |
|
| 637 | + | value_t condition_val, /* Condition value to test */ |
|
| 638 | + | node_t *lbranch, /* Code to execute if condition is true */ |
|
| 639 | + | node_t *rbranch /* Code to execute if condition is false */ |
|
| 640 | + | ) { |
|
| 641 | + | /* Load the condition value into a register */ |
|
| 642 | + | reg_t condreg = emit_load(g, condition_val); |
|
| 643 | + | ||
| 644 | + | /* Emit a conditional branch: if condition is zero (false), |
|
| 645 | + | * jump past the left branch. */ |
|
| 646 | + | branch_patch_t lb_branch = branch_patch_make(g, I_BEQ, condreg, ZERO); |
|
| 647 | + | /* Nb. we free this register here even though the register _name_ is used |
|
| 648 | + | * lower, because it's only used for patching the instruction above. */ |
|
| 649 | + | freereg(g, condreg); |
|
| 650 | + | ||
| 651 | + | /* Generate code for the true branch. */ |
|
| 652 | + | gen_block(g, lbranch); |
|
| 653 | + | ||
| 654 | + | if (rbranch) { |
|
| 655 | + | /* If we have an else branch, emit jump to skip over it. */ |
|
| 656 | + | const usize lb_end = emit(g, NOP); |
|
| 657 | + | const usize rb_start = g->ninstrs; |
|
| 658 | + | ||
| 659 | + | /* Patch the branch instruction to jump to else. */ |
|
| 660 | + | branch_patch_apply(g, lb_branch, rb_start); |
|
| 661 | + | ||
| 662 | + | /* Generate code for the false branch. */ |
|
| 663 | + | gen_block(g, rbranch); |
|
| 664 | + | ||
| 665 | + | /* Patch the jump past else. */ |
|
| 666 | + | const usize rb_end = g->ninstrs; |
|
| 667 | + | g->instrs[lb_end] = JMP(jump_offset(lb_end, rb_end)); |
|
| 668 | + | } else { |
|
| 669 | + | /* No false branch, just patch the conditional branch to jump to the |
|
| 670 | + | * end. */ |
|
| 671 | + | const usize end = g->ninstrs; |
|
| 672 | + | branch_patch_apply(g, lb_branch, end); |
|
| 673 | + | } |
|
| 674 | + | } |
|
| 675 | + | ||
| 676 | + | /* Generate guard check for a match case. Updates ctrl->guard_branch if guard |
|
| 677 | + | * present. */ |
|
| 678 | + | static void gen_case_guard(gen_t *g, node_t *n, match_case_ctrl_t *ctrl) { |
|
| 679 | + | if (n->val.match_case.guard) { |
|
| 680 | + | value_t guard_val = gen_expr(g, n->val.match_case.guard, false); |
|
| 681 | + | reg_t guard_reg = emit_load(g, guard_val); |
|
| 682 | + | ctrl->guard_branch = branch_patch_make(g, I_BEQ, guard_reg, ZERO); |
|
| 683 | + | freereg(g, guard_reg); |
|
| 684 | + | } |
|
| 685 | + | } |
|
| 686 | + | ||
| 687 | + | /* Bind a pattern variable to a record field value. Allocates stack space for |
|
| 688 | + | * the variable if needed and copies the field value into it. |
|
| 689 | + | * For ref matches (variable type is pointer to field type), stores the address |
|
| 690 | + | * of the field instead of copying its value. */ |
|
| 691 | + | static void bind_var_to_field( |
|
| 692 | + | gen_t *g, value_t record_val, symbol_t *field_sym, symbol_t *var_sym |
|
| 693 | + | ) { |
|
| 694 | + | if (var_sym->e.var.val.loc == LOC_NONE) { |
|
| 695 | + | i32 off = reserve_aligned(g, var_sym->e.var.typ, var_sym->e.var.align); |
|
| 696 | + | var_sym->e.var.val = value_stack(OFFSET(FP, off), var_sym->e.var.typ); |
|
| 697 | + | } |
|
| 698 | + | value_t field_val = record_val; |
|
| 699 | + | field_val.type = field_sym->e.field.typ; |
|
| 700 | + | if (field_val.loc == LOC_STACK) |
|
| 701 | + | field_val.as.off.offset += field_sym->e.field.offset; |
|
| 702 | + | else if (field_val.loc == LOC_ADDR) |
|
| 703 | + | field_val.as.adr.offset += field_sym->e.field.offset; |
|
| 704 | + | else if (field_val.loc == LOC_REG) { |
|
| 705 | + | /* Register holds the address of the record. Convert to LOC_STACK |
|
| 706 | + | * so that the field offset is applied when loading. */ |
|
| 707 | + | reg_t base_reg = field_val.as.reg; |
|
| 708 | + | field_val.loc = LOC_STACK; |
|
| 709 | + | field_val.as.off.base = base_reg; |
|
| 710 | + | field_val.as.off.offset = field_sym->e.field.offset; |
|
| 711 | + | } |
|
| 712 | + | ||
| 713 | + | /* Check if this is a ref match (variable is pointer to field type) */ |
|
| 714 | + | type_t *var_typ = var_sym->e.var.typ; |
|
| 715 | + | if (var_typ->cls == TYPE_PTR && |
|
| 716 | + | var_typ->info.ptr.target == field_sym->e.field.typ) { |
|
| 717 | + | /* Store address of field instead of copying value */ |
|
| 718 | + | reg_t addr_reg = nextreg(g); |
|
| 719 | + | if (field_val.loc == LOC_STACK) { |
|
| 720 | + | emit_addr_offset( |
|
| 721 | + | g, addr_reg, field_val.as.off.base, field_val.as.off.offset |
|
| 722 | + | ); |
|
| 723 | + | } else if (field_val.loc == LOC_ADDR) { |
|
| 724 | + | emit_li( |
|
| 725 | + | g, addr_reg, field_val.as.adr.base + field_val.as.adr.offset |
|
| 726 | + | ); |
|
| 727 | + | } else if (field_val.loc == LOC_REG) { |
|
| 728 | + | emit( |
|
| 729 | + | g, ADDI(addr_reg, field_val.as.reg, field_sym->e.field.offset) |
|
| 730 | + | ); |
|
| 731 | + | } else { |
|
| 732 | + | bail("cannot take address of field for ref match"); |
|
| 733 | + | } |
|
| 734 | + | /* Store the address register into the variable's stack location */ |
|
| 735 | + | emit_regstore( |
|
| 736 | + | g, |
|
| 737 | + | addr_reg, |
|
| 738 | + | var_sym->e.var.val.as.off.base, |
|
| 739 | + | var_sym->e.var.val.as.off.offset, |
|
| 740 | + | var_sym->e.var.typ |
|
| 741 | + | ); |
|
| 742 | + | freereg(g, addr_reg); |
|
| 743 | + | } else { |
|
| 744 | + | emit_replace(g, var_sym->e.var.val, field_val); |
|
| 745 | + | } |
|
| 746 | + | } |
|
| 747 | + | ||
| 748 | + | /* Bind fields from a record value to pattern variables. Handles both |
|
| 749 | + | * tuple-style patterns like `S(x, y)` and labeled patterns like `T { x, y }`. |
|
| 750 | + | */ |
|
| 751 | + | static void gen_bind_record_fields( |
|
| 752 | + | gen_t *g, value_t record_val, node_t *pattern, type_t *record_type |
|
| 753 | + | ) { |
|
| 754 | + | if (pattern->cls == NODE_CALL) { |
|
| 755 | + | for (usize i = 0; i < pattern->val.call.args.len; i++) { |
|
| 756 | + | node_t *arg_node = SPAN(g, pattern->val.call.args)[i]; |
|
| 757 | + | node_t *arg = (arg_node->cls == NODE_CALL_ARG) |
|
| 758 | + | ? arg_node->val.call_arg.expr |
|
| 759 | + | : arg_node; |
|
| 760 | + | if (arg->cls == NODE_IDENT && arg->sym) { |
|
| 761 | + | bind_var_to_field( |
|
| 762 | + | g, record_val, record_type->info.srt.fields[i], arg->sym |
|
| 763 | + | ); |
|
| 764 | + | } |
|
| 765 | + | } |
|
| 766 | + | } else if (pattern->cls == NODE_RECORD_LIT) { |
|
| 767 | + | node_t **fields = |
|
| 768 | + | nodespan_ptrs(&g->mod->parser, pattern->val.record_lit.fields); |
|
| 769 | + | for (usize i = 0; i < pattern->val.record_lit.fields.len; i++) { |
|
| 770 | + | node_t *binding = fields[i]->val.record_lit_field.value; |
|
| 771 | + | if (binding->cls == NODE_IDENT && binding->sym) { |
|
| 772 | + | bind_var_to_field(g, record_val, fields[i]->sym, binding->sym); |
|
| 773 | + | } |
|
| 774 | + | } |
|
| 775 | + | } |
|
| 776 | + | } |
|
| 777 | + | ||
| 778 | + | static match_case_ctrl_t gen_match_case_union_payload( |
|
| 779 | + | gen_t *g, value_t match_val, node_t *n |
|
| 780 | + | ) { |
|
| 781 | + | match_case_ctrl_t ctrl = { |
|
| 782 | + | .skip_body = 0, |
|
| 783 | + | .guard_branch = branch_patch_invalid(), |
|
| 784 | + | }; |
|
| 785 | + | /* Array to store jumps to body when a pattern matches */ |
|
| 786 | + | branch_patch_t jumps[MAX_CASE_PATTERNS]; |
|
| 787 | + | usize njumps = 0; |
|
| 788 | + | ||
| 789 | + | /* union pattern matching - generate tag comparisons */ |
|
| 790 | + | node_t **patterns = |
|
| 791 | + | nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns); |
|
| 792 | + | for (usize p = 0; p < n->val.match_case.patterns.len; p++) { |
|
| 793 | + | node_t *patt_node = patterns[p]; |
|
| 794 | + | node_t *callee = NULL; |
|
| 795 | + | ||
| 796 | + | if (patt_node->cls == NODE_CALL) { |
|
| 797 | + | callee = patt_node->val.call.callee; |
|
| 798 | + | } else if (patt_node->cls == NODE_RECORD_LIT) { |
|
| 799 | + | callee = patt_node->val.record_lit.type; |
|
| 800 | + | } else { |
|
| 801 | + | callee = patt_node; |
|
| 802 | + | } |
|
| 803 | + | ||
| 804 | + | /* Use the stored variant index */ |
|
| 805 | + | node_t *variant_ident = callee->val.access.rval; |
|
| 806 | + | usize variant_tag = variant_ident->sym->node->val.union_variant.value; |
|
| 807 | + | ||
| 808 | + | /* Generate tag comparison. |
|
| 809 | + | * For ref matching (pointer-to-union), the register holds an address |
|
| 810 | + | * we need to load from. */ |
|
| 811 | + | reg_t tag_reg; |
|
| 812 | + | if (match_val.loc == LOC_REG && match_val.type->cls == TYPE_PTR) { |
|
| 813 | + | /* Load tag byte from address in register */ |
|
| 814 | + | tag_reg = nextreg(g); |
|
| 815 | + | emit(g, LBU(tag_reg, match_val.as.reg, 0)); |
|
| 816 | + | } else { |
|
| 817 | + | value_t tag_val = match_val; |
|
| 818 | + | tag_val.type = g->types->type_u8; |
|
| 819 | + | tag_reg = emit_load(g, tag_val); |
|
| 820 | + | } |
|
| 821 | + | reg_t variant_idx_reg = nextreg(g); |
|
| 822 | + | emit(g, ADDI(variant_idx_reg, ZERO, variant_tag)); |
|
| 823 | + | jumps[njumps++] = branch_patch_make(g, I_BEQ, tag_reg, variant_idx_reg); |
|
| 824 | + | ||
| 825 | + | freereg(g, variant_idx_reg); |
|
| 826 | + | freereg(g, tag_reg); |
|
| 827 | + | } |
|
| 828 | + | ||
| 829 | + | /* If none of the patterns match, jump past the body */ |
|
| 830 | + | ctrl.skip_body = emit(g, NOP); /* Will be patched later */ |
|
| 831 | + | usize body_start = g->ninstrs; /* Body starts here */ |
|
| 832 | + | ||
| 833 | + | /* Patch all the pattern match jumps to point to the body start */ |
|
| 834 | + | for (usize p = 0; p < njumps; p++) { |
|
| 835 | + | branch_patch_apply(g, jumps[p], body_start); |
|
| 836 | + | } |
|
| 837 | + | /* Set up bound variable for payload binding */ |
|
| 838 | + | if (n->val.match_case.variable) { |
|
| 839 | + | /* If variable doesn't have a symbol, it's likely a placeholder, |
|
| 840 | + | * eg. `_`, so we don't bind anything. */ |
|
| 841 | + | if (n->val.match_case.variable->sym) { |
|
| 842 | + | bind_union_value(g, match_val, n->val.match_case.variable); |
|
| 843 | + | } |
|
| 844 | + | } |
|
| 845 | + | /* Handle record literal pattern field bindings */ |
|
| 846 | + | if (n->val.match_case.patterns.len == 1) { |
|
| 847 | + | node_t *patt = patterns[0]; |
|
| 848 | + | if (patt->cls == NODE_RECORD_LIT) { |
|
| 849 | + | node_t *callee = patt->val.record_lit.type; |
|
| 850 | + | node_t *variant_node = callee->val.access.rval; |
|
| 851 | + | type_t *payload_type = variant_node->sym->node->type; |
|
| 852 | + | ||
| 853 | + | /* Create a value pointing to the payload (after tag). |
|
| 854 | + | * When matching on a reference, match_val.type is a pointer; |
|
| 855 | + | * dereference to get the underlying union type. */ |
|
| 856 | + | type_t *union_type = match_val.type; |
|
| 857 | + | if (union_type->cls == TYPE_PTR) |
|
| 858 | + | union_type = union_type->info.ptr.target; |
|
| 859 | + | i32 val_off = tval_payload_offset(union_type); |
|
| 860 | + | value_t payload = match_val; |
|
| 861 | + | if (payload.loc == LOC_STACK) { |
|
| 862 | + | payload.as.off.offset += val_off; |
|
| 863 | + | } else if (payload.loc == LOC_ADDR) { |
|
| 864 | + | payload.as.adr.offset += val_off; |
|
| 865 | + | } else if (payload.loc == LOC_REG) { |
|
| 866 | + | /* Register contains union address; add offset to get payload */ |
|
| 867 | + | reg_t payload_reg = nextreg(g); |
|
| 868 | + | emit(g, ADDI(payload_reg, payload.as.reg, val_off)); |
|
| 869 | + | payload = value_reg(payload_reg, payload_type); |
|
| 870 | + | } |
|
| 871 | + | payload.type = payload_type; |
|
| 872 | + | ||
| 873 | + | gen_bind_record_fields(g, payload, patt, payload_type); |
|
| 874 | + | } |
|
| 875 | + | } |
|
| 876 | + | gen_case_guard(g, n, &ctrl); |
|
| 877 | + | return ctrl; |
|
| 878 | + | } |
|
| 879 | + | ||
| 880 | + | /* Generate code for a match case with a standalone record pattern. |
|
| 881 | + | * Record patterns always match (no tag comparison), so we just bind fields. */ |
|
| 882 | + | static match_case_ctrl_t gen_match_case_record( |
|
| 883 | + | gen_t *g, value_t match_val, node_t *n |
|
| 884 | + | ) { |
|
| 885 | + | match_case_ctrl_t ctrl = { 0, branch_patch_invalid() }; |
|
| 886 | + | node_t **patterns = |
|
| 887 | + | nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns); |
|
| 888 | + | ||
| 889 | + | if (n->val.match_case.patterns.len >= 1) |
|
| 890 | + | gen_bind_record_fields(g, match_val, patterns[0], match_val.type); |
|
| 891 | + | ||
| 892 | + | gen_case_guard(g, n, &ctrl); |
|
| 893 | + | return ctrl; |
|
| 894 | + | } |
|
| 895 | + | ||
| 896 | + | static match_case_ctrl_t gen_match_case(gen_t *g, reg_t match_reg, node_t *n) { |
|
| 897 | + | match_case_ctrl_t ctrl = { |
|
| 898 | + | .skip_body = 0, |
|
| 899 | + | .guard_branch = branch_patch_invalid(), |
|
| 900 | + | }; |
|
| 901 | + | /* Array to store jumps to body when a pattern matches */ |
|
| 902 | + | branch_patch_t jumps[MAX_CASE_PATTERNS]; |
|
| 903 | + | usize njumps = 0; |
|
| 904 | + | ||
| 905 | + | /* Regular pattern matching (non-payload types) */ |
|
| 906 | + | node_t **patterns = |
|
| 907 | + | nodespan_ptrs(&g->mod->parser, n->val.match_case.patterns); |
|
| 908 | + | for (usize p = 0; p < n->val.match_case.patterns.len; p++) { |
|
| 909 | + | node_t *patt_node = patterns[p]; |
|
| 910 | + | value_t patt_val = gen_expr(g, patt_node, false); |
|
| 911 | + | reg_t patt_reg = emit_load(g, patt_val); |
|
| 912 | + | ||
| 913 | + | /* If this pattern matches, jump to the body |
|
| 914 | + | * (Will be patched later) */ |
|
| 915 | + | jumps[njumps++] = branch_patch_make(g, I_BEQ, match_reg, patt_reg); |
|
| 916 | + | freereg(g, patt_reg); |
|
| 917 | + | } |
|
| 918 | + | /* If none of the patterns match, jump past the body */ |
|
| 919 | + | ctrl.skip_body = emit(g, NOP); /* Will be patched later */ |
|
| 920 | + | usize body_start = g->ninstrs; /* Body starts here */ |
|
| 921 | + | ||
| 922 | + | /* Patch all the pattern match jumps to point to the body start */ |
|
| 923 | + | for (usize p = 0; p < njumps; p++) { |
|
| 924 | + | branch_patch_apply(g, jumps[p], body_start); |
|
| 925 | + | } |
|
| 926 | + | gen_case_guard(g, n, &ctrl); |
|
| 927 | + | return ctrl; |
|
| 928 | + | } |
|
| 929 | + | ||
| 930 | + | /* Generate code for a match statement by converting it to a series of |
|
| 931 | + | * equality comparisons */ |
|
| 932 | + | static void gen_match(gen_t *g, node_t *n) { |
|
| 933 | + | /* If there are no cases, nothing to do */ |
|
| 934 | + | if (n->val.match_stmt.cases.len == 0) |
|
| 935 | + | return; |
|
| 936 | + | ||
| 937 | + | /* Generate code for the match operand and load it into a register */ |
|
| 938 | + | value_t match_val = gen_expr(g, n->val.match_stmt.expr, false); |
|
| 939 | + | reg_t match_reg = emit_load(g, match_val); |
|
| 940 | + | ||
| 941 | + | /* Track jump locations to the end of the match */ |
|
| 942 | + | usize end_jumps[MAX_SWITCH_CASES]; |
|
| 943 | + | usize nend_jumps = 0; |
|
| 944 | + | ||
| 945 | + | /* Process each case from first to last */ |
|
| 946 | + | node_t **cases = nodespan_ptrs(&g->mod->parser, n->val.match_stmt.cases); |
|
| 947 | + | for (usize i = 0; i < n->val.match_stmt.cases.len; i++) { |
|
| 948 | + | node_t *cn = cases[i]; |
|
| 949 | + | ||
| 950 | + | if (!cn->val.match_case.patterns.len) { |
|
| 951 | + | /* Default/else case: generate block body */ |
|
| 952 | + | gen_node(g, cn->val.match_case.body); |
|
| 953 | + | break; |
|
| 954 | + | } |
|
| 955 | + | /* For cases with patterns, we need to: |
|
| 956 | + | * 1. Generate pattern tests with jumps to the body if matching |
|
| 957 | + | * 2. Jump to the next case if no patterns match |
|
| 958 | + | * 3. Generate the body |
|
| 959 | + | * 4. Jump to the end of the match after the body */ |
|
| 960 | + | type_t *match_type = n->val.match_stmt.expr->type; |
|
| 961 | + | match_case_ctrl_t ctrl; |
|
| 962 | + | ||
| 963 | + | /* Check if matching on a pointer to a union */ |
|
| 964 | + | type_t *union_type = match_type; |
|
| 965 | + | if (match_type->cls == TYPE_PTR && |
|
| 966 | + | type_is_union_with_payload(match_type->info.ptr.target)) { |
|
| 967 | + | union_type = match_type->info.ptr.target; |
|
| 968 | + | } |
|
| 969 | + | ||
| 970 | + | if (type_is_union_with_payload(union_type)) { |
|
| 971 | + | ctrl = gen_match_case_union_payload(g, match_val, cn); |
|
| 972 | + | } else if (union_type->cls == TYPE_RECORD) { |
|
| 973 | + | ctrl = gen_match_case_record(g, match_val, cn); |
|
| 974 | + | } else { |
|
| 975 | + | ctrl = gen_match_case(g, match_reg, cn); |
|
| 976 | + | } |
|
| 977 | + | /* Generate the case body */ |
|
| 978 | + | gen_node(g, cn->val.match_case.body); |
|
| 979 | + | /* Jump to end of the match after the body (patched later) */ |
|
| 980 | + | end_jumps[nend_jumps++] = emit(g, NOP); |
|
| 981 | + | /* Patch the jump over the body (skip_body=0 means no patching needed) |
|
| 982 | + | */ |
|
| 983 | + | if (ctrl.guard_branch.valid) { |
|
| 984 | + | branch_patch_apply(g, ctrl.guard_branch, g->ninstrs); |
|
| 985 | + | } |
|
| 986 | + | if (ctrl.skip_body) { |
|
| 987 | + | g->instrs[ctrl.skip_body] = |
|
| 988 | + | JMP(jump_offset(ctrl.skip_body, g->ninstrs)); |
|
| 989 | + | } |
|
| 990 | + | } |
|
| 991 | + | ||
| 992 | + | /* Patch all jumps to the end of the match */ |
|
| 993 | + | usize end = g->ninstrs; |
|
| 994 | + | for (usize i = 0; i < nend_jumps; i++) { |
|
| 995 | + | g->instrs[end_jumps[i]] = JMP(jump_offset(end_jumps[i], end)); |
|
| 996 | + | } |
|
| 997 | + | freeval(g, match_val); |
|
| 998 | + | } |
|
| 999 | + | ||
| 1000 | + | /* Generate code for an `if` statement. */ |
|
| 1001 | + | static void gen_if(gen_t *g, node_t *n) { |
|
| 1002 | + | node_t *cond = n->val.if_stmt.cond; |
|
| 1003 | + | node_t *lbranch = n->val.if_stmt.lbranch; |
|
| 1004 | + | node_t *rbranch = n->val.if_stmt.rbranch; |
|
| 1005 | + | ||
| 1006 | + | /* Special case for comparison operations. */ |
|
| 1007 | + | if (node_is_comp(cond)) { |
|
| 1008 | + | /* If there's no else branch, use the simple branch generation, |
|
| 1009 | + | * but only for primitive types that are compatible with BEQ or BNE. */ |
|
| 1010 | + | if (!rbranch && type_is_primitive(cond->val.binop.left->type)) { |
|
| 1011 | + | gen_branch(g, cond, lbranch); |
|
| 1012 | + | return; |
|
| 1013 | + | } |
|
| 1014 | + | } |
|
| 1015 | + | gen_if_else(g, gen_expr(g, cond, false), lbranch, rbranch); |
|
| 1016 | + | } |
|
| 1017 | + | ||
| 1018 | + | /* Generate code for an if expression */ |
|
| 1019 | + | static value_t gen_if_expr(gen_t *g, node_t *n) { |
|
| 1020 | + | /* Allocate space for the result value */ |
|
| 1021 | + | i32 result_off = reserve(g, n->type); |
|
| 1022 | + | value_t result_val = value_stack(OFFSET(FP, result_off), n->type); |
|
| 1023 | + | ||
| 1024 | + | /* Generate condition */ |
|
| 1025 | + | value_t cond_val = gen_expr(g, n->val.if_stmt.cond, false); |
|
| 1026 | + | reg_t cond_reg = emit_load(g, cond_val); |
|
| 1027 | + | ||
| 1028 | + | /* Branch to else if condition is false */ |
|
| 1029 | + | branch_patch_t else_branch = branch_patch_make(g, I_BEQ, cond_reg, ZERO); |
|
| 1030 | + | freereg(g, cond_reg); |
|
| 1031 | + | ||
| 1032 | + | /* Generate then branch and store result */ |
|
| 1033 | + | value_t then_val = gen_expr(g, n->val.if_stmt.lbranch, false); |
|
| 1034 | + | emit_store(g, then_val, result_val.as.off.base, result_val.as.off.offset); |
|
| 1035 | + | ||
| 1036 | + | /* Jump over else branch */ |
|
| 1037 | + | usize end_jump = emit(g, NOP); /* Placeholder for unconditional jump */ |
|
| 1038 | + | ||
| 1039 | + | /* Patch else branch jump */ |
|
| 1040 | + | usize else_start = g->ninstrs; |
|
| 1041 | + | branch_patch_apply(g, else_branch, else_start); |
|
| 1042 | + | ||
| 1043 | + | /* Generate else branch and store result */ |
|
| 1044 | + | value_t else_val = gen_expr(g, n->val.if_stmt.rbranch, false); |
|
| 1045 | + | emit_store(g, else_val, result_val.as.off.base, result_val.as.off.offset); |
|
| 1046 | + | ||
| 1047 | + | /* Patch end jump */ |
|
| 1048 | + | usize end = g->ninstrs; |
|
| 1049 | + | g->instrs[end_jump] = JMP(jump_offset(end_jump, end)); |
|
| 1050 | + | ||
| 1051 | + | return result_val; |
|
| 1052 | + | } |
|
| 1053 | + | ||
| 1054 | + | /* Generate code for an `if let` statement. |
|
| 1055 | + | * This checks if an optional value has content and binds it to a variable if |
|
| 1056 | + | * so. */ |
|
| 1057 | + | static void gen_if_let(gen_t *g, node_t *n) { |
|
| 1058 | + | /* Generate the optional expression */ |
|
| 1059 | + | value_t opt_val = gen_expr(g, n->val.if_let_stmt.expr, false); |
|
| 1060 | + | /* Load the tag to check if optional has a value */ |
|
| 1061 | + | reg_t tag_reg = tval_load_tag(g, opt_val); |
|
| 1062 | + | ||
| 1063 | + | /* Set up conditional branch: if `exists` is 0, skip the left branch */ |
|
| 1064 | + | branch_patch_t lb_branch = branch_patch_make(g, I_BEQ, tag_reg, ZERO); |
|
| 1065 | + | ||
| 1066 | + | /* Create and allocate the bound variable (unless it's a placeholder) */ |
|
| 1067 | + | if (n->val.if_let_stmt.var->cls != NODE_PLACEHOLDER) { |
|
| 1068 | + | symbol_t *val_sym = n->val.if_let_stmt.var->sym; |
|
| 1069 | + | i32 val_off = |
|
| 1070 | + | reserve_aligned(g, val_sym->e.var.typ, val_sym->e.var.align); |
|
| 1071 | + | val_sym->e.var.val = |
|
| 1072 | + | value_stack(OFFSET(FP, val_off), val_sym->e.var.typ); |
|
| 1073 | + | ||
| 1074 | + | /* Copy the value part from the optional to the local variable */ |
|
| 1075 | + | optval_copy_value(g, opt_val, val_sym->e.var.val); |
|
| 1076 | + | } |
|
| 1077 | + | ||
| 1078 | + | /* If there's a guard condition, evaluate it */ |
|
| 1079 | + | branch_patch_t guard_branch = branch_patch_invalid(); |
|
| 1080 | + | ||
| 1081 | + | if (n->val.if_let_stmt.guard) { |
|
| 1082 | + | value_t guard_val = gen_expr(g, n->val.if_let_stmt.guard, false); |
|
| 1083 | + | reg_t guard_reg = emit_load(g, guard_val); |
|
| 1084 | + | ||
| 1085 | + | /* If guard is false, jump to else branch */ |
|
| 1086 | + | guard_branch = |
|
| 1087 | + | branch_patch_make(g, I_BEQ, guard_reg, ZERO); /* Will patch later */ |
|
| 1088 | + | freereg(g, guard_reg); |
|
| 1089 | + | } |
|
| 1090 | + | ||
| 1091 | + | /* Generate code for the left branch */ |
|
| 1092 | + | gen_block(g, n->val.if_let_stmt.lbranch); |
|
| 1093 | + | ||
| 1094 | + | if (n->val.if_let_stmt.rbranch) { |
|
| 1095 | + | /* If we have an else branch, emit jump to skip over it */ |
|
| 1096 | + | const usize lb_end = emit(g, NOP); |
|
| 1097 | + | const usize rb_start = g->ninstrs; |
|
| 1098 | + | ||
| 1099 | + | /* Patch the branch instruction to jump to else in *none* case */ |
|
| 1100 | + | branch_patch_apply(g, lb_branch, rb_start); |
|
| 1101 | + | ||
| 1102 | + | /* Patch guard condition branch if it exists */ |
|
| 1103 | + | if (guard_branch.valid) { |
|
| 1104 | + | branch_patch_apply(g, guard_branch, rb_start); |
|
| 1105 | + | } |
|
| 1106 | + | freereg(g, tag_reg); |
|
| 1107 | + | ||
| 1108 | + | /* Generate code for the else branch */ |
|
| 1109 | + | gen_block(g, n->val.if_let_stmt.rbranch); |
|
| 1110 | + | ||
| 1111 | + | /* Patch the jump instruction to skip over the else branch */ |
|
| 1112 | + | usize rb_end = g->ninstrs; |
|
| 1113 | + | g->instrs[lb_end] = JMP(jump_offset(lb_end, rb_end)); |
|
| 1114 | + | } else { |
|
| 1115 | + | /* No else branch, just patch the branch to skip the then branch */ |
|
| 1116 | + | usize lb_end = g->ninstrs; |
|
| 1117 | + | branch_patch_apply(g, lb_branch, lb_end); |
|
| 1118 | + | ||
| 1119 | + | /* Patch guard condition branch if it exists */ |
|
| 1120 | + | if (guard_branch.valid) { |
|
| 1121 | + | branch_patch_apply(g, guard_branch, lb_end); |
|
| 1122 | + | } |
|
| 1123 | + | freereg(g, tag_reg); |
|
| 1124 | + | } |
|
| 1125 | + | } |
|
| 1126 | + | ||
| 1127 | + | /* Generate code for a forever loop. */ |
|
| 1128 | + | static void gen_loop(gen_t *g, node_t *n) { |
|
| 1129 | + | /* Save the outer loop context and setup new context with a new loop id. */ |
|
| 1130 | + | loop_t outer = g->loop; |
|
| 1131 | + | g->loop.current = n; |
|
| 1132 | + | g->loop.start = g->ninstrs; |
|
| 1133 | + | ||
| 1134 | + | /* Generate code for the loop body. */ |
|
| 1135 | + | gen_block(g, n->val.loop_stmt.body); |
|
| 1136 | + | /* Jump back to the beginning of the loop. */ |
|
| 1137 | + | emit_jump(g, g->loop.start); |
|
| 1138 | + | ||
| 1139 | + | /* Mark this position as the loop end for break statements */ |
|
| 1140 | + | g->loop.end = g->ninstrs; |
|
| 1141 | + | patch_break_stmts(g); |
|
| 1142 | + | g->loop = outer; |
|
| 1143 | + | } |
|
| 1144 | + | ||
| 1145 | + | /* Generate code for a break statement. */ |
|
| 1146 | + | static void gen_break(gen_t *g, node_t *n) { |
|
| 1147 | + | (void)n; |
|
| 1148 | + | ||
| 1149 | + | if (g->loop.current->cls != NODE_LOOP) { |
|
| 1150 | + | bail("`break` statement outside of loop"); |
|
| 1151 | + | } |
|
| 1152 | + | /* Instead of calculating the jump offset now, emit a placeholder |
|
| 1153 | + | * instruction that will be patched when we know where the loop ends. */ |
|
| 1154 | + | usize offset = emit(g, NOP); |
|
| 1155 | + | ||
| 1156 | + | /* Record this location for patching. */ |
|
| 1157 | + | g->fn.brkpatches[g->fn.nbrkpatches++] = (ctpatch_t){ |
|
| 1158 | + | .pc = offset, |
|
| 1159 | + | .loop = g->loop.current, |
|
| 1160 | + | .applied = false, |
|
| 1161 | + | }; |
|
| 1162 | + | } |
|
| 1163 | + | ||
| 1164 | + | static void gen_assign(gen_t *g, node_t *n) { |
|
| 1165 | + | node_t *lval = n->val.assign.lval; |
|
| 1166 | + | node_t *rval = n->val.assign.rval; |
|
| 1167 | + | ||
| 1168 | + | switch (lval->cls) { |
|
| 1169 | + | case NODE_IDENT: { /* Handle normal variable assignment. */ |
|
| 1170 | + | symbol_t *sym = lval->sym; |
|
| 1171 | + | ||
| 1172 | + | value_t left = sym->e.var.val; |
|
| 1173 | + | value_t right = gen_expr(g, rval, false); |
|
| 1174 | + | ||
| 1175 | + | /* Nb. frees the right value if it's in a register. */ |
|
| 1176 | + | emit_replace(g, left, right); |
|
| 1177 | + | break; |
|
| 1178 | + | } |
|
| 1179 | + | case NODE_ACCESS: { /* Handle record field assignment (e.g., x.y = 1). */ |
|
| 1180 | + | value_t left = gen_expr(g, lval, true); |
|
| 1181 | + | value_t right = gen_expr(g, rval, false); |
|
| 1182 | + | ||
| 1183 | + | /* Replace the field value with the right-hand side */ |
|
| 1184 | + | emit_replace(g, left, right); |
|
| 1185 | + | break; |
|
| 1186 | + | } |
|
| 1187 | + | case NODE_ARRAY_INDEX: { /* Array index assignment (e.g. `arr[0] = 1`). */ |
|
| 1188 | + | value_t left = gen_array_index(g, lval, true); |
|
| 1189 | + | value_t right = gen_expr(g, rval, false); |
|
| 1190 | + | /* Replace the array element value with the right-hand side. */ |
|
| 1191 | + | emit_replace(g, left, right); |
|
| 1192 | + | /* Free the address register from array indexing */ |
|
| 1193 | + | if (left.loc == LOC_STACK) { |
|
| 1194 | + | freereg(g, left.as.off.base); |
|
| 1195 | + | } |
|
| 1196 | + | break; |
|
| 1197 | + | } |
|
| 1198 | + | case NODE_UNOP: { /* Handle pointer dereference assignment */ |
|
| 1199 | + | if (lval->val.unop.op != OP_DEREF) { |
|
| 1200 | + | bail("unsupported unary operator in assignment target"); |
|
| 1201 | + | } |
|
| 1202 | + | value_t ptr_val = gen_expr(g, lval->val.unop.expr, true); |
|
| 1203 | + | value_t right = gen_expr(g, rval, false); |
|
| 1204 | + | /* `gen_deref` expects an lvalue when the pointer itself is the storage |
|
| 1205 | + | * we want to mutate (e.g., `*ptr = ...`). */ |
|
| 1206 | + | value_t left = gen_deref(g, lval, ptr_val, true); |
|
| 1207 | + | ||
| 1208 | + | emit_replace(g, left, right); |
|
| 1209 | + | break; |
|
| 1210 | + | } |
|
| 1211 | + | default: |
|
| 1212 | + | bail("unsupported assignment target %s", node_names[lval->cls]); |
|
| 1213 | + | } |
|
| 1214 | + | } |
|
| 1215 | + | ||
| 1216 | + | static void gen_return(gen_t *g, node_t *n) { |
|
| 1217 | + | type_t *ret_typ = g->fn.current->node->type->info.fun.ret; |
|
| 1218 | + | node_t *value = n->val.return_stmt.value; |
|
| 1219 | + | /* If there's a return value, evaluate the expression. |
|
| 1220 | + | * Then, store the expression, in the return register A0, |
|
| 1221 | + | * according to the RISC-V calling conventions. */ |
|
| 1222 | + | if (value) { |
|
| 1223 | + | value_t val = gen_expr(g, value, false); |
|
| 1224 | + | ||
| 1225 | + | if (ret_typ->cls == TYPE_RESULT) { |
|
| 1226 | + | value_t dest; |
|
| 1227 | + | if (type_is_passed_by_ref(ret_typ)) { |
|
| 1228 | + | usereg(g, A0); |
|
| 1229 | + | dest = value_stack(OFFSET(A0, 0), ret_typ); |
|
| 1230 | + | } else { |
|
| 1231 | + | dest = value_reg(A0, ret_typ); |
|
| 1232 | + | } |
|
| 1233 | + | /* Returns are always for the "success" case. */ |
|
| 1234 | + | emit_result_store_success(g, dest, val); |
|
| 1235 | + | } else if (ret_typ->cls == TYPE_OPT && |
|
| 1236 | + | type_coercible(val.type, ret_typ->info.opt.elem)) { |
|
| 1237 | + | /* Wrap value in an optional */ |
|
| 1238 | + | usereg(g, A0); |
|
| 1239 | + | tval_store(g, value_stack(OFFSET(A0, 0), ret_typ), val, 1); |
|
| 1240 | + | } else if (ret_typ->cls == TYPE_OPT && val.type->cls == TYPE_OPT) { |
|
| 1241 | + | /* Value is already optional, copy it */ |
|
| 1242 | + | usereg(g, A0); |
|
| 1243 | + | emit_replace(g, value_stack(OFFSET(A0, 0), ret_typ), val); |
|
| 1244 | + | } else if (type_is_passed_by_ref(val.type)) { |
|
| 1245 | + | /* Aggregate returns go through the hidden sret pointer. */ |
|
| 1246 | + | usereg(g, A0); |
|
| 1247 | + | emit_replace(g, value_stack(OFFSET(A0, 0), val.type), val); |
|
| 1248 | + | } else { |
|
| 1249 | + | emit_load_into(g, A0, val); |
|
| 1250 | + | } |
|
| 1251 | + | freeval(g, val); |
|
| 1252 | + | } else { |
|
| 1253 | + | if (ret_typ->cls == TYPE_RESULT) { |
|
| 1254 | + | value_t dest; |
|
| 1255 | + | if (type_is_passed_by_ref(ret_typ)) { |
|
| 1256 | + | usereg(g, A0); |
|
| 1257 | + | dest = value_stack(OFFSET(A0, 0), ret_typ); |
|
| 1258 | + | } else { |
|
| 1259 | + | dest = value_reg(A0, ret_typ); |
|
| 1260 | + | } |
|
| 1261 | + | emit_result_store_success(g, dest, value_none()); |
|
| 1262 | + | } else { |
|
| 1263 | + | /* If there's no return value, we just store zero in A0. */ |
|
| 1264 | + | emit_load_into( |
|
| 1265 | + | g, A0, value_imm((imm_t){ .i = 0 }, g->types->type_i32) |
|
| 1266 | + | ); |
|
| 1267 | + | } |
|
| 1268 | + | } |
|
| 1269 | + | ||
| 1270 | + | /* Instead of returning directly, emit a placeholder jump to the function |
|
| 1271 | + | * epilogue that will be patched later. This avoids duplicating epilogue |
|
| 1272 | + | * code for each return point. */ |
|
| 1273 | + | usize pc = emit(g, NOP); |
|
| 1274 | + | ||
| 1275 | + | if (g->fn.nretpatches >= MAX_RET_PATCHES) |
|
| 1276 | + | bail("too many return statements in function"); |
|
| 1277 | + | ||
| 1278 | + | /* Record this location for patching */ |
|
| 1279 | + | g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){ |
|
| 1280 | + | .pc = pc, |
|
| 1281 | + | .applied = false, |
|
| 1282 | + | }; |
|
| 1283 | + | } |
|
| 1284 | + | ||
| 1285 | + | /* Emit the control flow for `throw` |
|
| 1286 | + | * |
|
| 1287 | + | * 1. Evaluate the error expression |
|
| 1288 | + | * 2. Lay it out in the caller-visible result slot (A0 or *A0) |
|
| 1289 | + | * 3. Queue a jump to the epilogue so every throw shares the same return path */ |
|
| 1290 | + | static void gen_throw(gen_t *g, node_t *n) { |
|
| 1291 | + | type_t *fn_ret = g->fn.current->node->type->info.fun.ret; |
|
| 1292 | + | ||
| 1293 | + | value_t err_val = gen_expr(g, n->val.throw_stmt.expr, false); |
|
| 1294 | + | value_t dest; |
|
| 1295 | + | ||
| 1296 | + | if (type_is_passed_by_ref(fn_ret)) { |
|
| 1297 | + | usereg(g, A0); |
|
| 1298 | + | dest = value_stack(OFFSET(A0, 0), fn_ret); |
|
| 1299 | + | } else { |
|
| 1300 | + | dest = value_reg(A0, fn_ret); |
|
| 1301 | + | } |
|
| 1302 | + | emit_result_store_error(g, dest, err_val); |
|
| 1303 | + | freeval(g, err_val); |
|
| 1304 | + | ||
| 1305 | + | /* Jump to function end (patch) */ |
|
| 1306 | + | usize pc = emit(g, NOP); |
|
| 1307 | + | ||
| 1308 | + | if (g->fn.nretpatches >= MAX_RET_PATCHES) |
|
| 1309 | + | bail("too many return statements in function"); |
|
| 1310 | + | ||
| 1311 | + | /* Patch to jump to function epilogue */ |
|
| 1312 | + | g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){ |
|
| 1313 | + | .pc = pc, |
|
| 1314 | + | .applied = false, |
|
| 1315 | + | }; |
|
| 1316 | + | } |
|
| 1317 | + | ||
| 1318 | + | /* Emit `try` |
|
| 1319 | + | * |
|
| 1320 | + | * 1. Evaluate the expression result |
|
| 1321 | + | * 2. Load its tag and branch past the error path when the tag is zero |
|
| 1322 | + | * 3. On error, normalize the tag/value into the function result slot and |
|
| 1323 | + | * enqueue a jump to the epilogue (mirroring an early return) |
|
| 1324 | + | * 4. On success, expose the payload location for the caller |
|
| 1325 | + | * |
|
| 1326 | + | * With catch block: |
|
| 1327 | + | * 1. Evaluate the expression result |
|
| 1328 | + | * 2. Load its tag and branch based on success/error |
|
| 1329 | + | * 3. On error: execute catch block (must diverge or return void) |
|
| 1330 | + | * 4. On success: use payload value |
|
| 1331 | + | */ |
|
| 1332 | + | static value_t gen_try(gen_t *g, node_t *n) { |
|
| 1333 | + | /* 1. */ |
|
| 1334 | + | value_t res_val = gen_expr(g, n->val.try_expr.expr, false); |
|
| 1335 | + | tval_t res = tval_from_val(g, res_val); |
|
| 1336 | + | ||
| 1337 | + | /* Inspect the tag to determine whether the result is success or error. */ |
|
| 1338 | + | reg_t tag = nextreg(g); |
|
| 1339 | + | emit_regload( |
|
| 1340 | + | g, tag, res.tag.as.off.base, res.tag.as.off.offset, g->types->type_u8 |
|
| 1341 | + | ); |
|
| 1342 | + | type_t *payload = res_val.type->info.res.payload; |
|
| 1343 | + | type_t *result_type = n->type ? n->type : payload; |
|
| 1344 | + | ||
| 1345 | + | /* Handle `try?` expressions */ |
|
| 1346 | + | if (n->val.try_expr.optional) { |
|
| 1347 | + | /* Allocate stack space for the optional result. */ |
|
| 1348 | + | i32 result_offset = reserve(g, result_type); |
|
| 1349 | + | value_t result = value_stack(OFFSET(FP, result_offset), result_type); |
|
| 1350 | + | ||
| 1351 | + | /* Branch over the error path when the tag is zero (success). */ |
|
| 1352 | + | branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO); |
|
| 1353 | + | freereg(g, tag); |
|
| 1354 | + | ||
| 1355 | + | /* Error path: store nil (tag = 0). */ |
|
| 1356 | + | tval_store(g, result, (value_t){ 0 }, 0); |
|
| 1357 | + | ||
| 1358 | + | /* Jump over success path. */ |
|
| 1359 | + | usize end_patch = emit(g, JMP(0)); |
|
| 1360 | + | ||
| 1361 | + | /* Success path: store Some(payload) (tag = 1). */ |
|
| 1362 | + | branch_patch_apply(g, success_branch, g->ninstrs); |
|
| 1363 | + | ||
| 1364 | + | value_t payload_val = res.val; |
|
| 1365 | + | payload_val.type = payload; |
|
| 1366 | + | tval_store(g, result, payload_val, 1); |
|
| 1367 | + | ||
| 1368 | + | /* End: both paths converge here. */ |
|
| 1369 | + | g->instrs[end_patch] = JMP(jump_offset(end_patch, g->ninstrs)); |
|
| 1370 | + | ||
| 1371 | + | return result; |
|
| 1372 | + | } |
|
| 1373 | + | ||
| 1374 | + | /* Handle catch block */ |
|
| 1375 | + | if (n->val.try_expr.catch_expr) { |
|
| 1376 | + | node_t *catch_node = n->val.try_expr.catch_expr; |
|
| 1377 | + | ||
| 1378 | + | node_t *catch_binding = catch_node->val.catch_clause.binding; |
|
| 1379 | + | node_t *catch_body = catch_node->val.catch_clause.body; |
|
| 1380 | + | ||
| 1381 | + | /* Branch over the error path when the tag is zero (success). */ |
|
| 1382 | + | branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO); |
|
| 1383 | + | freereg(g, tag); |
|
| 1384 | + | ||
| 1385 | + | /* If there's a binding, store the error value to the variable. */ |
|
| 1386 | + | if (catch_binding && catch_binding->sym) { |
|
| 1387 | + | symbol_t *err_sym = catch_binding->sym; |
|
| 1388 | + | type_t *err_type = res_val.type->info.res.err; |
|
| 1389 | + | i32 err_off = reserve_aligned(g, err_type, err_sym->e.var.align); |
|
| 1390 | + | err_sym->e.var.val = value_stack(OFFSET(FP, err_off), err_type); |
|
| 1391 | + | ||
| 1392 | + | /* Create a value pointing to the error slot (same as payload). */ |
|
| 1393 | + | value_t err_slot = res.val; |
|
| 1394 | + | err_slot.type = err_type; |
|
| 1395 | + | ||
| 1396 | + | /* Copy the error to the bound variable. */ |
|
| 1397 | + | emit_replace(g, err_sym->e.var.val, err_slot); |
|
| 1398 | + | } |
|
| 1399 | + | gen_block(g, catch_body); |
|
| 1400 | + | branch_patch_apply(g, success_branch, g->ninstrs); |
|
| 1401 | + | ||
| 1402 | + | if (catch_body->type && catch_body->type->cls == TYPE_NEVER) { |
|
| 1403 | + | value_t payload_val = res.val; |
|
| 1404 | + | payload_val.type = payload; |
|
| 1405 | + | return payload_val; |
|
| 1406 | + | } |
|
| 1407 | + | return value_none(); |
|
| 1408 | + | } |
|
| 1409 | + | ||
| 1410 | + | /* Branch over the error path when the tag is zero (success). */ |
|
| 1411 | + | branch_patch_t success_branch = branch_patch_make(g, I_BEQ, tag, ZERO); |
|
| 1412 | + | if (n->val.try_expr.panic) { |
|
| 1413 | + | emit(g, EBREAK); |
|
| 1414 | + | branch_patch_apply(g, success_branch, g->ninstrs); |
|
| 1415 | + | freereg(g, tag); |
|
| 1416 | + | ||
| 1417 | + | if (n->val.try_expr.handlers.len > 0) |
|
| 1418 | + | bail("catch clauses not supported in code generation"); |
|
| 1419 | + | ||
| 1420 | + | if (!payload->size) { |
|
| 1421 | + | return value_none(); |
|
| 1422 | + | } |
|
| 1423 | + | value_t result = res.val; |
|
| 1424 | + | result.type = payload; |
|
| 1425 | + | ||
| 1426 | + | return result; |
|
| 1427 | + | } |
|
| 1428 | + | ||
| 1429 | + | type_t *fn_ret = g->fn.current->node->type->info.fun.ret; |
|
| 1430 | + | ||
| 1431 | + | /* Prepare the function result slot so we can store an error in-place. */ |
|
| 1432 | + | value_t dest; |
|
| 1433 | + | if (type_is_passed_by_ref(fn_ret)) { |
|
| 1434 | + | usereg(g, A0); |
|
| 1435 | + | dest = value_stack(OFFSET(A0, 0), fn_ret); |
|
| 1436 | + | } else { |
|
| 1437 | + | dest = value_reg(A0, fn_ret); |
|
| 1438 | + | } |
|
| 1439 | + | /* Copy the error payload into the function result slot. */ |
|
| 1440 | + | value_t err_slot = res.val; |
|
| 1441 | + | err_slot.type = res_val.type->info.res.err; |
|
| 1442 | + | emit_result_store_error(g, dest, err_slot); |
|
| 1443 | + | ||
| 1444 | + | usize ret_pc = emit(g, NOP); |
|
| 1445 | + | ||
| 1446 | + | if (g->fn.nretpatches >= MAX_RET_PATCHES) |
|
| 1447 | + | bail("too many return statements in function"); |
|
| 1448 | + | ||
| 1449 | + | g->fn.retpatches[g->fn.nretpatches++] = (ctpatch_t){ |
|
| 1450 | + | .pc = ret_pc, |
|
| 1451 | + | .applied = false, |
|
| 1452 | + | }; |
|
| 1453 | + | branch_patch_apply(g, success_branch, g->ninstrs); |
|
| 1454 | + | ||
| 1455 | + | freereg(g, tag); |
|
| 1456 | + | ||
| 1457 | + | if (n->val.try_expr.handlers.len > 0) |
|
| 1458 | + | bail("catch clauses not supported in code generation"); |
|
| 1459 | + | ||
| 1460 | + | if (!payload->size) { |
|
| 1461 | + | return value_none(); |
|
| 1462 | + | } |
|
| 1463 | + | value_t result = res.val; |
|
| 1464 | + | result.type = payload; /* Unwrap payload */ |
|
| 1465 | + | ||
| 1466 | + | return result; |
|
| 1467 | + | } |
|
| 1468 | + | ||
| 1469 | + | static value_t gen_binop(gen_t *g, node_t *n) { |
|
| 1470 | + | value_t lval = gen_expr(g, n->val.binop.left, false); |
|
| 1471 | + | reg_t left = emit_load(g, lval); |
|
| 1472 | + | ||
| 1473 | + | /* Ensure generation for the rval does not overwrite the lval. */ |
|
| 1474 | + | usereg(g, left); |
|
| 1475 | + | ||
| 1476 | + | value_t rval = gen_expr(g, n->val.binop.right, false); |
|
| 1477 | + | reg_t right = emit_load(g, rval); |
|
| 1478 | + | reg_t result = left; |
|
| 1479 | + | ||
| 1480 | + | switch (n->val.binop.op) { |
|
| 1481 | + | case OP_ADD: |
|
| 1482 | + | if (type_is_int(lval.type->cls)) { |
|
| 1483 | + | emit(g, ADDW(left, left, right)); |
|
| 1484 | + | } else { |
|
| 1485 | + | emit(g, ADD(left, left, right)); |
|
| 1486 | + | } |
|
| 1487 | + | break; |
|
| 1488 | + | case OP_SUB: |
|
| 1489 | + | if (type_is_int(lval.type->cls)) { |
|
| 1490 | + | emit(g, SUBW(left, left, right)); |
|
| 1491 | + | } else { |
|
| 1492 | + | emit(g, SUB(left, left, right)); |
|
| 1493 | + | } |
|
| 1494 | + | break; |
|
| 1495 | + | case OP_MUL: |
|
| 1496 | + | if (type_is_int(lval.type->cls)) { |
|
| 1497 | + | emit(g, MULW(left, left, right)); |
|
| 1498 | + | } else { |
|
| 1499 | + | emit(g, MUL(left, left, right)); |
|
| 1500 | + | } |
|
| 1501 | + | break; |
|
| 1502 | + | case OP_DIV: |
|
| 1503 | + | /* Check for division by zero (node is already set by gen_node) */ |
|
| 1504 | + | emit(g, BNE(right, ZERO, INSTR_SIZE * 2)); |
|
| 1505 | + | emit(g, EBREAK); |
|
| 1506 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1507 | + | emit(g, DIVUW(left, left, right)); |
|
| 1508 | + | } else { |
|
| 1509 | + | emit(g, DIVW(left, left, right)); |
|
| 1510 | + | } |
|
| 1511 | + | break; |
|
| 1512 | + | case OP_MOD: |
|
| 1513 | + | if (type_is_int(lval.type->cls)) { |
|
| 1514 | + | /* Check for division by zero (node is already set by gen_node) */ |
|
| 1515 | + | emit(g, BNE(right, ZERO, INSTR_SIZE * 2)); |
|
| 1516 | + | emit(g, EBREAK); |
|
| 1517 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1518 | + | emit(g, REMUW(left, left, right)); |
|
| 1519 | + | } else { |
|
| 1520 | + | emit(g, REMW(left, left, right)); |
|
| 1521 | + | } |
|
| 1522 | + | } else { |
|
| 1523 | + | bail("modulo operator is only supported for integers"); |
|
| 1524 | + | } |
|
| 1525 | + | break; |
|
| 1526 | + | case OP_EQ: |
|
| 1527 | + | case OP_NE: { |
|
| 1528 | + | bool invert = (n->val.binop.op == OP_NE); |
|
| 1529 | + | bool opt_left = (lval.type->cls == TYPE_OPT); |
|
| 1530 | + | bool opt_right = (rval.type->cls == TYPE_OPT); |
|
| 1531 | + | bool left_nil = (n->val.binop.left->cls == NODE_NIL); |
|
| 1532 | + | bool right_nil = (n->val.binop.right->cls == NODE_NIL); |
|
| 1533 | + | ||
| 1534 | + | /* Fast-path for comparisons with `nil`. */ |
|
| 1535 | + | if (opt_left && opt_right && (left_nil || right_nil)) { |
|
| 1536 | + | if (left_nil && right_nil) { |
|
| 1537 | + | freereg(g, left); |
|
| 1538 | + | freereg(g, right); |
|
| 1539 | + | ||
| 1540 | + | reg_t result_reg = nextreg(g); |
|
| 1541 | + | emit_li(g, result_reg, invert ? 0 : 1); |
|
| 1542 | + | ||
| 1543 | + | return value_reg(result_reg, n->type); |
|
| 1544 | + | } |
|
| 1545 | + | reg_t opt_reg = left_nil ? right : left; |
|
| 1546 | + | reg_t nil_reg = left_nil ? left : right; |
|
| 1547 | + | ||
| 1548 | + | freereg(g, nil_reg); |
|
| 1549 | + | ||
| 1550 | + | reg_t tag_reg = nextreg(g); |
|
| 1551 | + | emit(g, LBU(tag_reg, opt_reg, 0)); |
|
| 1552 | + | emit(g, SLTIU(tag_reg, tag_reg, 1)); |
|
| 1553 | + | ||
| 1554 | + | if (invert) |
|
| 1555 | + | emit(g, XORI(tag_reg, tag_reg, 1)); |
|
| 1556 | + | ||
| 1557 | + | freereg(g, opt_reg); |
|
| 1558 | + | ||
| 1559 | + | return value_reg(tag_reg, n->type); |
|
| 1560 | + | } |
|
| 1561 | + | ||
| 1562 | + | if (opt_left != opt_right) { |
|
| 1563 | + | type_t *opt_type = opt_left ? lval.type : rval.type; |
|
| 1564 | + | value_t value_expr = opt_left ? rval : lval; |
|
| 1565 | + | value_t wrapped = optval_from_value(g, opt_type, value_expr); |
|
| 1566 | + | reg_t target_reg = opt_left ? right : left; |
|
| 1567 | + | ||
| 1568 | + | emit_load_into(g, target_reg, wrapped); |
|
| 1569 | + | result = nextreg(g); |
|
| 1570 | + | emit_memequal(g, left, right, opt_type, result); |
|
| 1571 | + | ||
| 1572 | + | if (invert) |
|
| 1573 | + | emit(g, XORI(result, result, 1)); |
|
| 1574 | + | } else if (type_is_primitive(lval.type)) { |
|
| 1575 | + | if (invert) { |
|
| 1576 | + | /* XOR will be non-zero if values differ. */ |
|
| 1577 | + | emit(g, XOR(left, left, right)); |
|
| 1578 | + | /* Set to 1 if result is non-zero (different). */ |
|
| 1579 | + | emit(g, SLTU(left, ZERO, left)); |
|
| 1580 | + | } else { |
|
| 1581 | + | /* Emits `result = left - right` */ |
|
| 1582 | + | if (type_is_int(lval.type->cls)) { |
|
| 1583 | + | emit(g, SUBW(left, left, right)); |
|
| 1584 | + | } else { |
|
| 1585 | + | emit(g, SUB(left, left, right)); |
|
| 1586 | + | } |
|
| 1587 | + | /* Emits `result = (result < 1) ? 1 : 0` */ |
|
| 1588 | + | emit(g, SLTIU(left, left, 1)); |
|
| 1589 | + | } |
|
| 1590 | + | } else { |
|
| 1591 | + | result = nextreg(g); |
|
| 1592 | + | emit_memequal(g, left, right, lval.type, result); |
|
| 1593 | + | if (invert) |
|
| 1594 | + | emit(g, XORI(result, result, 1)); |
|
| 1595 | + | } |
|
| 1596 | + | break; |
|
| 1597 | + | } |
|
| 1598 | + | case OP_LT: |
|
| 1599 | + | /* Emits `result = (left < right) ? 1 : 0` */ |
|
| 1600 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1601 | + | emit(g, SLTU(left, left, right)); |
|
| 1602 | + | } else { |
|
| 1603 | + | emit(g, SLT(left, left, right)); |
|
| 1604 | + | } |
|
| 1605 | + | break; |
|
| 1606 | + | case OP_GT: |
|
| 1607 | + | /* Emits `result = (right < left) ? 1 : 0` */ |
|
| 1608 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1609 | + | emit(g, SLTU(left, right, left)); |
|
| 1610 | + | } else { |
|
| 1611 | + | emit(g, SLT(left, right, left)); |
|
| 1612 | + | } |
|
| 1613 | + | break; |
|
| 1614 | + | case OP_LE: |
|
| 1615 | + | /* For `x <= y`, we can compute `!(x > y)`, which is `!(y < x)`, */ |
|
| 1616 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1617 | + | emit(g, SLTU(left, right, left)); |
|
| 1618 | + | } else { |
|
| 1619 | + | emit(g, SLT(left, right, left)); |
|
| 1620 | + | } |
|
| 1621 | + | emit(g, XORI(left, left, 1)); |
|
| 1622 | + | break; |
|
| 1623 | + | case OP_GE: |
|
| 1624 | + | /* For `x >= y`, we can compute `!(x < y)`. */ |
|
| 1625 | + | if (type_is_unsigned(lval.type->cls)) { |
|
| 1626 | + | emit(g, SLTU(left, left, right)); |
|
| 1627 | + | } else { |
|
| 1628 | + | emit(g, SLT(left, left, right)); |
|
| 1629 | + | } |
|
| 1630 | + | emit(g, XORI(left, left, 1)); |
|
| 1631 | + | break; |
|
| 1632 | + | case OP_AND: |
|
| 1633 | + | /* Logical AND; both values must be 1 for the result to be 1. */ |
|
| 1634 | + | emit(g, AND(left, left, right)); |
|
| 1635 | + | break; |
|
| 1636 | + | case OP_OR: |
|
| 1637 | + | /* Logical OR; if either value is 1, the result is 1. */ |
|
| 1638 | + | emit(g, OR(left, left, right)); |
|
| 1639 | + | break; |
|
| 1640 | + | case OP_BAND: |
|
| 1641 | + | /* Bitwise AND */ |
|
| 1642 | + | emit(g, AND(left, left, right)); |
|
| 1643 | + | break; |
|
| 1644 | + | case OP_BOR: |
|
| 1645 | + | /* Bitwise OR */ |
|
| 1646 | + | emit(g, OR(left, left, right)); |
|
| 1647 | + | break; |
|
| 1648 | + | case OP_XOR: |
|
| 1649 | + | /* Bitwise XOR */ |
|
| 1650 | + | emit(g, XOR(left, left, right)); |
|
| 1651 | + | break; |
|
| 1652 | + | case OP_SHL: |
|
| 1653 | + | /* Left shift */ |
|
| 1654 | + | if (type_is_int(lval.type->cls)) { |
|
| 1655 | + | emit(g, SLLW(left, left, right)); |
|
| 1656 | + | } else { |
|
| 1657 | + | emit(g, SLL(left, left, right)); |
|
| 1658 | + | } |
|
| 1659 | + | break; |
|
| 1660 | + | case OP_SHR: |
|
| 1661 | + | /* Right shift */ |
|
| 1662 | + | if (type_is_int(lval.type->cls)) { |
|
| 1663 | + | emit(g, SRLW(left, left, right)); |
|
| 1664 | + | } else { |
|
| 1665 | + | emit(g, SRL(left, left, right)); |
|
| 1666 | + | } |
|
| 1667 | + | break; |
|
| 1668 | + | } |
|
| 1669 | + | /* Check if result needs to be coerced to optional type */ |
|
| 1670 | + | if (n->type->cls == TYPE_OPT) { |
|
| 1671 | + | i32 offset = reserve(g, n->type); |
|
| 1672 | + | value_t opt_val = value_stack(OFFSET(FP, offset), n->type); |
|
| 1673 | + | value_t result_val = value_reg(result, n->type->info.opt.elem); |
|
| 1674 | + | ||
| 1675 | + | tval_store(g, opt_val, result_val, 1); |
|
| 1676 | + | lval = opt_val; |
|
| 1677 | + | ||
| 1678 | + | /* Can free all registers since result is stored on stack */ |
|
| 1679 | + | freereg(g, left); |
|
| 1680 | + | freereg(g, right); |
|
| 1681 | + | freereg(g, result); |
|
| 1682 | + | } else { |
|
| 1683 | + | lval = value_reg(result, n->type); |
|
| 1684 | + | ||
| 1685 | + | if (left != result) |
|
| 1686 | + | freereg(g, left); |
|
| 1687 | + | if (right != result) |
|
| 1688 | + | freereg(g, right); |
|
| 1689 | + | } |
|
| 1690 | + | return lval; |
|
| 1691 | + | } |
|
| 1692 | + | ||
| 1693 | + | /* Generate code for record construction. Handles both labeled syntax like |
|
| 1694 | + | * `Point { x: 1, y: 2 }` (NODE_RECORD_LIT) and tuple syntax like `Pair(1, 2)` |
|
| 1695 | + | * (NODE_CALL with tuple record type). */ |
|
| 1696 | + | static value_t gen_record_lit(gen_t *g, node_t *n) { |
|
| 1697 | + | type_t *stype = n->type; |
|
| 1698 | + | int strct_off = reserve(g, stype); |
|
| 1699 | + | ||
| 1700 | + | usize nfields = (n->cls == NODE_RECORD_LIT) ? n->val.record_lit.fields.len |
|
| 1701 | + | : n->val.call.args.len; |
|
| 1702 | + | node_t **fields = |
|
| 1703 | + | (n->cls == NODE_RECORD_LIT) |
|
| 1704 | + | ? nodespan_ptrs(&g->mod->parser, n->val.record_lit.fields) |
|
| 1705 | + | : NULL; |
|
| 1706 | + | ||
| 1707 | + | for (usize i = 0; i < nfields; i++) { |
|
| 1708 | + | symbol_t *field; |
|
| 1709 | + | node_t *expr; |
|
| 1710 | + | ||
| 1711 | + | if (n->cls == NODE_RECORD_LIT) { |
|
| 1712 | + | node_t *arg = fields[i]; |
|
| 1713 | + | field = arg->sym ? arg->sym : stype->info.srt.fields[i]; |
|
| 1714 | + | expr = arg->val.call_arg.expr; |
|
| 1715 | + | } else { |
|
| 1716 | + | node_t *arg = SPAN(g, n->val.call.args)[i]; |
|
| 1717 | + | field = stype->info.srt.fields[i]; |
|
| 1718 | + | expr = (arg->cls == NODE_CALL_ARG) ? arg->val.call_arg.expr : arg; |
|
| 1719 | + | } |
|
| 1720 | + | ||
| 1721 | + | value_t argval = gen_expr(g, expr, false); |
|
| 1722 | + | emit_record_field_set(g, argval, FP, strct_off, field); |
|
| 1723 | + | freeval(g, argval); |
|
| 1724 | + | } |
|
| 1725 | + | return value_stack(OFFSET(FP, strct_off), stype); |
|
| 1726 | + | } |
|
| 1727 | + | ||
| 1728 | + | static value_t gen_call_intrinsic( |
|
| 1729 | + | gen_t *g, node_t *n, void (*gen_intrinsic)(gen_t *, node_t *) |
|
| 1730 | + | ) { |
|
| 1731 | + | node_t *fn = n->sym->node; |
|
| 1732 | + | type_t *ret = |
|
| 1733 | + | fn->val.fn_decl.return_type ? fn->val.fn_decl.return_type->type : NULL; |
|
| 1734 | + | /* Call the specialized generator for this intrinsic. |
|
| 1735 | + | * It will handle argument processing in its own way. */ |
|
| 1736 | + | (*gen_intrinsic)(g, n); |
|
| 1737 | + | ||
| 1738 | + | /* For void functions, return a void value */ |
|
| 1739 | + | if (!ret) { |
|
| 1740 | + | return (value_t){ .type = NULL, .loc = LOC_NONE }; |
|
| 1741 | + | } |
|
| 1742 | + | return value_reg(A0, ret); |
|
| 1743 | + | } |
|
| 1744 | + | ||
| 1745 | + | static value_t gen_call(gen_t *g, node_t *n) { |
|
| 1746 | + | symbol_t *sym = n->sym; |
|
| 1747 | + | const char *name = sym->qualified; |
|
| 1748 | + | ||
| 1749 | + | /* Get the return type. Fall back to the call node type when the symbol |
|
| 1750 | + | * does not carry a resolved function signature (eg. indirect calls). */ |
|
| 1751 | + | type_t *return_type = sym->node->type->info.fun.ret; |
|
| 1752 | + | if (!return_type && n->type) { |
|
| 1753 | + | return_type = n->type; |
|
| 1754 | + | } |
|
| 1755 | + | ||
| 1756 | + | /* Keep track of registers we saved before the call. */ |
|
| 1757 | + | i32 saved_regs[REGISTERS] = { 0 }; |
|
| 1758 | + | value_t saved_vals[REGISTERS] = { 0 }; |
|
| 1759 | + | symbol_t *saved_syms[REGISTERS] = { 0 }; |
|
| 1760 | + | ||
| 1761 | + | /* Save live registers to the stack, in case they get clobbered by |
|
| 1762 | + | * the callee. */ |
|
| 1763 | + | for (usize i = 0; i < RALLOC_NREGS; i++) { |
|
| 1764 | + | reg_t r = ralloc_regs[i]; |
|
| 1765 | + | ||
| 1766 | + | /* Don't save registers that aren't caller-saved. */ |
|
| 1767 | + | if (!caller_saved_registers[r]) |
|
| 1768 | + | continue; |
|
| 1769 | + | ||
| 1770 | + | /* Don't save registers that aren't in use. */ |
|
| 1771 | + | if (ralloc_is_free(&g->regs, r)) |
|
| 1772 | + | continue; |
|
| 1773 | + | ||
| 1774 | + | /* Use a pointer-sized type for saving registers to the stack. */ |
|
| 1775 | + | static type_t dword = { .cls = TYPE_PTR, |
|
| 1776 | + | .size = WORD_SIZE, |
|
| 1777 | + | .align = WORD_SIZE }; |
|
| 1778 | + | saved_regs[r] = emit_regpush(g, r, &dword); |
|
| 1779 | + | /* We can free the register since it's on the stack. */ |
|
| 1780 | + | freereg(g, r); |
|
| 1781 | + | ||
| 1782 | + | /* Parameters arrive in caller-saved registers; if we let the allocator |
|
| 1783 | + | * reuse that register (e.g. in emit_memzero), the parameter value gets |
|
| 1784 | + | * clobbered. When we spill the register here, rewrite the symbol to |
|
| 1785 | + | * point at the spill slot so later loads grab the preserved copy. */ |
|
| 1786 | + | node_t *fn_node = g->fn.current->node; |
|
| 1787 | + | ||
| 1788 | + | for (usize p = 0; p < fn_node->val.fn_decl.params.len; p++) { |
|
| 1789 | + | node_t *param = SPAN(g, fn_node->val.fn_decl.params)[p]; |
|
| 1790 | + | symbol_t *param_sym = param->sym; |
|
| 1791 | + | value_t *param_val = ¶m_sym->e.var.val; |
|
| 1792 | + | ||
| 1793 | + | if (param_val->loc == LOC_REG && param_val->as.reg == r) { |
|
| 1794 | + | saved_syms[r] = param_sym; |
|
| 1795 | + | saved_vals[r] = *param_val; |
|
| 1796 | + | ||
| 1797 | + | param_sym->e.var.val = |
|
| 1798 | + | value_stack(OFFSET(FP, saved_regs[r]), param_val->type); |
|
| 1799 | + | param_sym->e.var.val.temp = false; |
|
| 1800 | + | ||
| 1801 | + | break; |
|
| 1802 | + | } |
|
| 1803 | + | } |
|
| 1804 | + | } |
|
| 1805 | + | ||
| 1806 | + | bool sret = type_is_passed_by_ref(return_type); |
|
| 1807 | + | reg_t arg0 = sret ? A1 : A0; |
|
| 1808 | + | usize avail_arg_regs = (usize)((A7 - arg0) + 1); |
|
| 1809 | + | ||
| 1810 | + | if (n->val.call.args.len > avail_arg_regs) { |
|
| 1811 | + | bail( |
|
| 1812 | + | "function call '%s' requires %zu argument registers but only %zu " |
|
| 1813 | + | "are available", |
|
| 1814 | + | name, |
|
| 1815 | + | n->val.call.args.len, |
|
| 1816 | + | avail_arg_regs |
|
| 1817 | + | ); |
|
| 1818 | + | } |
|
| 1819 | + | ||
| 1820 | + | /* Setup arguments in argument registers (A0..A7), shifting when a hidden |
|
| 1821 | + | * return pointer occupies A0. */ |
|
| 1822 | + | for (usize i = 0; i < n->val.call.args.len; i++) { |
|
| 1823 | + | /* Generate code for the expression part of the argument. */ |
|
| 1824 | + | node_t *arg = SPAN(g, n->val.call.args)[i]; |
|
| 1825 | + | value_t argval = gen_expr(g, arg->val.call_arg.expr, false); |
|
| 1826 | + | ||
| 1827 | + | type_t *param_type = sym->node->type->info.fun.params[i]; |
|
| 1828 | + | if (param_type->cls == TYPE_OPT && argval.type->cls != TYPE_OPT) { |
|
| 1829 | + | argval = optval_from_value(g, param_type, argval); |
|
| 1830 | + | } |
|
| 1831 | + | /* Mark this register as in use for the duration of the call. */ |
|
| 1832 | + | reg_t arg_reg = arg0 + (reg_t)i; |
|
| 1833 | + | emit_load_into(g, usereg(g, arg_reg), argval); |
|
| 1834 | + | } |
|
| 1835 | + | /* Return value is in A0, by convention, whether or not an address was |
|
| 1836 | + | * passed into A0 by the caller. */ |
|
| 1837 | + | reg_t return_reg = A0; |
|
| 1838 | + | /* Return stack offset if we store it on the stack. */ |
|
| 1839 | + | i32 return_off = 0; |
|
| 1840 | + | i32 return_stack_off = 0; |
|
| 1841 | + | bool return_is_on_stack = false; |
|
| 1842 | + | ||
| 1843 | + | /* For types that are passed by reference, allocate space in this |
|
| 1844 | + | * stack frame, and pass the address via A0, as a hidden first parameter. |
|
| 1845 | + | * Nb. The return record address is setup *after* the call arguments |
|
| 1846 | + | * are generated, to not clobber A0 in case one of the arguments is a |
|
| 1847 | + | * call, eg. `f(g())` where `f` is the current function call. */ |
|
| 1848 | + | if (return_type->cls == TYPE_VOID) { |
|
| 1849 | + | /* For void functions, no need to allocate space for return value */ |
|
| 1850 | + | } else if (sret) { |
|
| 1851 | + | return_off = reserve(g, return_type); |
|
| 1852 | + | /* Result-returning callees can legitimately skip rewriting the tag on |
|
| 1853 | + | * a fast-path success, so ensure the caller-visible slot starts zeroed. |
|
| 1854 | + | * Other pass-by-ref aggregates are always fully overwritten by the |
|
| 1855 | + | * callee, making a pre-emptive memset unnecessary work. */ |
|
| 1856 | + | if (return_type->cls == TYPE_RESULT) { |
|
| 1857 | + | emit_memzero(g, OFFSET(FP, return_off), return_type->size); |
|
| 1858 | + | } |
|
| 1859 | + | /* Store return address in return address register. */ |
|
| 1860 | + | usereg(g, return_reg); |
|
| 1861 | + | emit_addr_offset(g, return_reg, FP, return_off); |
|
| 1862 | + | } |
|
| 1863 | + | ||
| 1864 | + | /* Call the function. */ |
|
| 1865 | + | if (sym->kind == SYM_VARIABLE) { |
|
| 1866 | + | /* Function pointer call: load address into S2 and call via JALR */ |
|
| 1867 | + | value_t fn_ptr_val = sym->e.var.val; |
|
| 1868 | + | ||
| 1869 | + | if (fn_ptr_val.loc == LOC_REG && saved_regs[fn_ptr_val.as.reg]) { |
|
| 1870 | + | value_t spill = value_stack( |
|
| 1871 | + | OFFSET(FP, saved_regs[fn_ptr_val.as.reg]), fn_ptr_val.type |
|
| 1872 | + | ); |
|
| 1873 | + | emit_load_into(g, S2, spill); |
|
| 1874 | + | } else if (fn_ptr_val.loc == LOC_REG) { |
|
| 1875 | + | emit_mv(g, S2, fn_ptr_val.as.reg); |
|
| 1876 | + | } else { |
|
| 1877 | + | emit_load_into(g, S2, fn_ptr_val); |
|
| 1878 | + | } |
|
| 1879 | + | emit(g, JALR(RA, S2, 0)); |
|
| 1880 | + | } else if (sym->e.fn.attribs & ATTRIB_EXTERN) { |
|
| 1881 | + | /* External function. */ |
|
| 1882 | + | } else if (sym->e.fn.addr) { |
|
| 1883 | + | /* Direct call, address is already known. */ |
|
| 1884 | + | emit_call(g, sym->e.fn.addr); |
|
| 1885 | + | } else { |
|
| 1886 | + | if (g->nfnpatches >= MAX_FN_PATCHES) |
|
| 1887 | + | bail("too many function call patches"); |
|
| 1888 | + | ||
| 1889 | + | /* Indirect call with patch later, address is not yet known. */ |
|
| 1890 | + | ||
| 1891 | + | reg_t scratch = nextreg(g); |
|
| 1892 | + | usize pc = emit(g, NOP); |
|
| 1893 | + | usize tramp = emit(g, NOP); |
|
| 1894 | + | ||
| 1895 | + | g->fnpatches[g->nfnpatches++] = (fnpatch_t){ |
|
| 1896 | + | .fn_name = sym->qualified, |
|
| 1897 | + | .pc = pc, |
|
| 1898 | + | .tramp_pc = tramp, |
|
| 1899 | + | .patch_type = PATCH_CALL, |
|
| 1900 | + | .target_reg = 0, |
|
| 1901 | + | .scratch_reg = scratch, |
|
| 1902 | + | }; |
|
| 1903 | + | freereg(g, scratch); |
|
| 1904 | + | } |
|
| 1905 | + | /* If the return register (A0) was in use before the function call, move the |
|
| 1906 | + | * return value to a fresh register so restored caller values do not wipe it |
|
| 1907 | + | * out. */ |
|
| 1908 | + | bool is_reg_return = |
|
| 1909 | + | (return_type->cls != TYPE_VOID) && !type_is_passed_by_ref(return_type); |
|
| 1910 | + | bool is_return_reg_saved = saved_regs[return_reg] != 0; |
|
| 1911 | + | ||
| 1912 | + | if (is_reg_return && is_return_reg_saved) { |
|
| 1913 | + | return_stack_off = emit_regpush(g, return_reg, return_type); |
|
| 1914 | + | return_is_on_stack = true; |
|
| 1915 | + | } |
|
| 1916 | + | ||
| 1917 | + | /* Restore all saved registers. */ |
|
| 1918 | + | for (usize i = 0; i < RALLOC_NREGS; i++) { |
|
| 1919 | + | reg_t dst = ralloc_regs[i]; |
|
| 1920 | + | i32 offset = saved_regs[dst]; |
|
| 1921 | + | ||
| 1922 | + | if (!offset) |
|
| 1923 | + | continue; |
|
| 1924 | + | ||
| 1925 | + | static type_t dword = { .cls = TYPE_PTR, |
|
| 1926 | + | .size = WORD_SIZE, |
|
| 1927 | + | .align = WORD_SIZE }; |
|
| 1928 | + | emit_regload(g, dst, FP, offset, &dword); |
|
| 1929 | + | usereg(g, dst); |
|
| 1930 | + | ||
| 1931 | + | /* Undo the temporary rebinding so the parameter once again refers to |
|
| 1932 | + | * its original register value now that the spill has been reloaded. */ |
|
| 1933 | + | if (saved_syms[dst]) { |
|
| 1934 | + | saved_syms[dst]->e.var.val = saved_vals[dst]; |
|
| 1935 | + | saved_syms[dst]->e.var.val.temp = false; |
|
| 1936 | + | } |
|
| 1937 | + | } |
|
| 1938 | + | ||
| 1939 | + | /* Restore argument registers that weren't in use before the call. */ |
|
| 1940 | + | for (usize i = 0; i < n->val.call.args.len; i++) { |
|
| 1941 | + | reg_t arg = arg0 + (reg_t)i; |
|
| 1942 | + | if (!saved_regs[arg]) |
|
| 1943 | + | freereg(g, arg); |
|
| 1944 | + | } |
|
| 1945 | + | ||
| 1946 | + | /* For records, the return value is stored on the stack, and the return |
|
| 1947 | + | * register holds the address. For everything else, it's in a register. */ |
|
| 1948 | + | if (return_type->cls == TYPE_VOID) { |
|
| 1949 | + | /* For void functions, we don't return a value */ |
|
| 1950 | + | if (!is_return_reg_saved) |
|
| 1951 | + | freereg(g, return_reg); |
|
| 1952 | + | return (value_t){ .type = return_type, .loc = LOC_NONE }; |
|
| 1953 | + | } else if (type_is_passed_by_ref(return_type)) { |
|
| 1954 | + | return value_stack(OFFSET(FP, return_off), return_type); |
|
| 1955 | + | } else { |
|
| 1956 | + | if (return_is_on_stack) { |
|
| 1957 | + | if (!is_return_reg_saved) |
|
| 1958 | + | freereg(g, return_reg); |
|
| 1959 | + | return value_stack(OFFSET(FP, return_stack_off), return_type); |
|
| 1960 | + | } |
|
| 1961 | + | /* The return value is marked as temp, so the caller is responsible |
|
| 1962 | + | * for freeing the register when done with the value. Mark the register |
|
| 1963 | + | * as in use to prevent reallocation before the value is consumed. */ |
|
| 1964 | + | usereg(g, return_reg); |
|
| 1965 | + | return value_reg(return_reg, return_type); |
|
| 1966 | + | } |
|
| 1967 | + | } |
|
| 1968 | + | ||
| 1969 | + | /* Generate code to access a slice field (len or ptr) given the slice value. */ |
|
| 1970 | + | static value_t gen_slice_field( |
|
| 1971 | + | gen_t *g, value_t slice_val, node_t *field, type_t *result_type |
|
| 1972 | + | ) { |
|
| 1973 | + | if (memcmp(field->val.ident.name, LEN_FIELD, LEN_FIELD_LEN) == 0) { |
|
| 1974 | + | reg_t len = emit_load_offset(g, slice_val, SLICE_FIELD_LEN_OFFSET); |
|
| 1975 | + | /* Slice lengths are stored as full dwords but typed as u32. |
|
| 1976 | + | * Zero-extend to clear any upper 32 bits so that 64-bit |
|
| 1977 | + | * comparisons (SLTU etc.) produce correct results on RV64. */ |
|
| 1978 | + | if (WORD_SIZE == 8) { |
|
| 1979 | + | emit(g, SLLI(len, len, 32)); |
|
| 1980 | + | emit(g, SRLI(len, len, 32)); |
|
| 1981 | + | } |
|
| 1982 | + | return value_reg(len, result_type); |
|
| 1983 | + | } |
|
| 1984 | + | if (memcmp(field->val.ident.name, PTR_FIELD, PTR_FIELD_LEN) == 0) { |
|
| 1985 | + | reg_t ptr = emit_load_offset(g, slice_val, SLICE_FIELD_PTR_OFFSET); |
|
| 1986 | + | return value_reg(ptr, result_type); |
|
| 1987 | + | } |
|
| 1988 | + | bail("unknown slice field"); |
|
| 1989 | + | } |
|
| 1990 | + | ||
| 1991 | + | static value_t gen_access_ref(gen_t *g, node_t *n) { |
|
| 1992 | + | node_t *expr = n->val.access.lval; |
|
| 1993 | + | type_t *expr_typ = expr->type; |
|
| 1994 | + | ||
| 1995 | + | type_t *target_type = deref_type(expr_typ); |
|
| 1996 | + | ||
| 1997 | + | switch (target_type->cls) { |
|
| 1998 | + | case TYPE_RECORD: { |
|
| 1999 | + | value_t ptr_val = gen_expr(g, expr, true); |
|
| 2000 | + | symbol_t *field = n->sym; |
|
| 2001 | + | useval(g, ptr_val); |
|
| 2002 | + | ||
| 2003 | + | /* For pointer access like ptr.field, we need to dereference first */ |
|
| 2004 | + | /* Create a temporary node for dereferencing */ |
|
| 2005 | + | node_t deref_node = { |
|
| 2006 | + | .cls = NODE_UNOP, |
|
| 2007 | + | .type = target_type, |
|
| 2008 | + | .val.unop.op = OP_DEREF, |
|
| 2009 | + | .val.unop.expr = expr, |
|
| 2010 | + | }; |
|
| 2011 | + | /* For pointer-to-record field access, keep the pointed-to record as an |
|
| 2012 | + | * lvalue so the field setter sees the original storage address. */ |
|
| 2013 | + | value_t record_val = gen_deref(g, &deref_node, ptr_val, true); |
|
| 2014 | + | freeval(g, ptr_val); |
|
| 2015 | + | ||
| 2016 | + | return emit_record_field_get(record_val, field); |
|
| 2017 | + | } |
|
| 2018 | + | case TYPE_SLICE: { |
|
| 2019 | + | node_t *field = n->val.access.rval; |
|
| 2020 | + | ||
| 2021 | + | /* Dereference to get the slice value, then access the field */ |
|
| 2022 | + | value_t ptr_val = gen_expr(g, expr, true); |
|
| 2023 | + | useval(g, ptr_val); |
|
| 2024 | + | ||
| 2025 | + | node_t deref_node = { |
|
| 2026 | + | .cls = NODE_UNOP, |
|
| 2027 | + | .type = target_type, |
|
| 2028 | + | .val.unop.op = OP_DEREF, |
|
| 2029 | + | .val.unop.expr = expr, |
|
| 2030 | + | }; |
|
| 2031 | + | value_t slice_val = gen_deref(g, &deref_node, ptr_val, true); |
|
| 2032 | + | freeval(g, ptr_val); |
|
| 2033 | + | ||
| 2034 | + | return gen_slice_field(g, slice_val, field, n->type); |
|
| 2035 | + | } |
|
| 2036 | + | case TYPE_ARRAY: { |
|
| 2037 | + | /* For pointer access like ptr[index], create a temporary array index |
|
| 2038 | + | * node */ |
|
| 2039 | + | /* and let gen_array_index handle the pointer dereferencing */ |
|
| 2040 | + | node_t array_index_node = { .cls = NODE_ARRAY_INDEX, |
|
| 2041 | + | .type = n->type, |
|
| 2042 | + | .val.access.lval = expr, |
|
| 2043 | + | .val.access.rval = n->val.access.rval }; |
|
| 2044 | + | ||
| 2045 | + | return gen_array_index(g, &array_index_node, true); |
|
| 2046 | + | } |
|
| 2047 | + | default: |
|
| 2048 | + | bail( |
|
| 2049 | + | "cannot access field of reference to %s", |
|
| 2050 | + | type_names[target_type->cls] |
|
| 2051 | + | ); |
|
| 2052 | + | } |
|
| 2053 | + | } |
|
| 2054 | + | ||
| 2055 | + | static value_t gen_access(gen_t *g, node_t *n, bool lval) { |
|
| 2056 | + | node_t *expr = n->val.access.lval; |
|
| 2057 | + | type_t *expr_typ = expr->type; |
|
| 2058 | + | node_t *field = n->val.access.rval; |
|
| 2059 | + | ||
| 2060 | + | /* Handle non-reference types. */ |
|
| 2061 | + | switch (expr_typ->cls) { |
|
| 2062 | + | case TYPE_PTR: |
|
| 2063 | + | return gen_access_ref(g, n); |
|
| 2064 | + | case TYPE_RECORD: { |
|
| 2065 | + | /* Struct value and type. */ |
|
| 2066 | + | value_t sval = gen_expr(g, expr, lval); |
|
| 2067 | + | symbol_t *field = n->sym; |
|
| 2068 | + | ||
| 2069 | + | return emit_record_field_get(sval, field); |
|
| 2070 | + | } |
|
| 2071 | + | case TYPE_SLICE: { |
|
| 2072 | + | value_t slice_val = gen_expr(g, expr, lval); |
|
| 2073 | + | return gen_slice_field(g, slice_val, field, n->type); |
|
| 2074 | + | } |
|
| 2075 | + | /* Fall through */ |
|
| 2076 | + | default: |
|
| 2077 | + | abort(); |
|
| 2078 | + | } |
|
| 2079 | + | } |
|
| 2080 | + | ||
| 2081 | + | /* Generate code to obtain a function pointer for the given symbol */ |
|
| 2082 | + | static value_t gen_fn_ptr(gen_t *g, symbol_t *sym, type_t *type) { |
|
| 2083 | + | reg_t reg = nextreg(g); |
|
| 2084 | + | ||
| 2085 | + | if (sym->e.fn.addr) { |
|
| 2086 | + | /* Direct function address is known - use AUIPC+ADDI for PC-relative |
|
| 2087 | + | * addressing since the program may be loaded at a non-zero base. */ |
|
| 2088 | + | emit_pc_rel_addr(g, reg, sym->e.fn.addr); |
|
| 2089 | + | return value_reg(reg, type); |
|
| 2090 | + | } |
|
| 2091 | + | ||
| 2092 | + | /* Function address will be patched later - generate AUIPC+ADDI sequence */ |
|
| 2093 | + | usize pc = emit(g, NOP); /* Placeholder - will be patched with AUIPC */ |
|
| 2094 | + | emit(g, NOP); /* Second placeholder - will be patched with ADDI */ |
|
| 2095 | + | ||
| 2096 | + | if (g->nfnpatches >= MAX_FN_PATCHES) |
|
| 2097 | + | bail("too many function address patches"); |
|
| 2098 | + | ||
| 2099 | + | g->fnpatches[g->nfnpatches++] = (fnpatch_t){ |
|
| 2100 | + | .fn_name = sym->qualified, |
|
| 2101 | + | .pc = pc, |
|
| 2102 | + | .tramp_pc = pc + 1, |
|
| 2103 | + | .patch_type = PATCH_ADDRESS, |
|
| 2104 | + | .target_reg = reg, |
|
| 2105 | + | .scratch_reg = ZERO, |
|
| 2106 | + | }; |
|
| 2107 | + | return value_reg(reg, type); |
|
| 2108 | + | } |
|
| 2109 | + | ||
| 2110 | + | static value_t gen_scope(gen_t *g, node_t *n) { |
|
| 2111 | + | symbol_t *sym = n->sym; |
|
| 2112 | + | ||
| 2113 | + | /* Generate code based on the symbol type, not the lval */ |
|
| 2114 | + | switch (sym->kind) { |
|
| 2115 | + | case SYM_VARIABLE: |
|
| 2116 | + | break; |
|
| 2117 | + | case SYM_CONSTANT: |
|
| 2118 | + | if (sym->e.var.val.loc == LOC_NONE) { |
|
| 2119 | + | gen_const(g, sym->node); |
|
| 2120 | + | } |
|
| 2121 | + | return sym->e.var.val; |
|
| 2122 | + | case SYM_VARIANT: |
|
| 2123 | + | if (n->type->cls == TYPE_UNION) { |
|
| 2124 | + | if (type_is_union_with_payload(n->type)) { |
|
| 2125 | + | return gen_union_store(g, n->type, sym, value_none()); |
|
| 2126 | + | } |
|
| 2127 | + | return value_imm( |
|
| 2128 | + | (imm_t){ .i = sym->node->val.union_variant.value }, n->type |
|
| 2129 | + | ); |
|
| 2130 | + | } else { |
|
| 2131 | + | bail("variant of type %s is invalid", type_names[n->type->cls]); |
|
| 2132 | + | } |
|
| 2133 | + | break; |
|
| 2134 | + | case SYM_FUNCTION: |
|
| 2135 | + | return gen_fn_ptr(g, sym, n->type); |
|
| 2136 | + | default: |
|
| 2137 | + | break; |
|
| 2138 | + | } |
|
| 2139 | + | bail( |
|
| 2140 | + | "unhandled scope case for symbol kind %d, node kind %s", |
|
| 2141 | + | sym->kind, |
|
| 2142 | + | node_names[n->cls] |
|
| 2143 | + | ); |
|
| 2144 | + | } |
|
| 2145 | + | ||
| 2146 | + | static value_t gen_ref(gen_t *g, node_t *n) { |
|
| 2147 | + | /* Slice literal */ |
|
| 2148 | + | if (n->val.ref.target->cls == NODE_ARRAY_LIT) { |
|
| 2149 | + | value_t ary = gen_array_literal(g, n->val.ref.target); |
|
| 2150 | + | return gen_array_slice(g, ary, NULL); |
|
| 2151 | + | } |
|
| 2152 | + | ||
| 2153 | + | /* Ask for an lvalue so we get back the actual storage location. */ |
|
| 2154 | + | value_t target_val = gen_expr(g, n->val.ref.target, true); |
|
| 2155 | + | ||
| 2156 | + | /* If the value is in a register, we need its address. |
|
| 2157 | + | * This requires the value to be moved to the stack first. */ |
|
| 2158 | + | if (target_val.loc == LOC_REG) { |
|
| 2159 | + | target_val = emit_push(g, target_val); |
|
| 2160 | + | } |
|
| 2161 | + | if (target_val.loc == LOC_STACK) { |
|
| 2162 | + | /* Turn the stack location into an address held in a register. */ |
|
| 2163 | + | reg_t addr = nextreg(g); |
|
| 2164 | + | emit_addr_offset( |
|
| 2165 | + | g, addr, target_val.as.off.base, target_val.as.off.offset |
|
| 2166 | + | ); |
|
| 2167 | + | ||
| 2168 | + | return value_reg(addr, n->type); |
|
| 2169 | + | } |
|
| 2170 | + | if (target_val.loc == LOC_ADDR) { |
|
| 2171 | + | reg_t addr = nextreg(g); |
|
| 2172 | + | emit_li(g, addr, target_val.as.adr.base + target_val.as.adr.offset); |
|
| 2173 | + | return value_reg(addr, n->type); |
|
| 2174 | + | } |
|
| 2175 | + | /* For immediates and other types, we can't take a reference. */ |
|
| 2176 | + | bail("cannot take a reference to the target expression"); |
|
| 2177 | + | } |
|
| 2178 | + | ||
| 2179 | + | static value_t gen_deref(gen_t *g, node_t *n, value_t ref_val, bool lval) { |
|
| 2180 | + | reg_t addr = ZERO; |
|
| 2181 | + | bool addr_from_load = false; |
|
| 2182 | + | ||
| 2183 | + | /* Resolve the pointer value into a register. */ |
|
| 2184 | + | if (ref_val.loc == LOC_REG) { |
|
| 2185 | + | addr = ref_val.as.reg; |
|
| 2186 | + | } else if (ref_val.loc == LOC_STACK || ref_val.loc == LOC_ADDR) { |
|
| 2187 | + | addr = emit_load(g, ref_val); |
|
| 2188 | + | addr_from_load = true; |
|
| 2189 | + | } else { |
|
| 2190 | + | bail("cannot dereference expression at this location"); |
|
| 2191 | + | } |
|
| 2192 | + | value_t location = value_stack(OFFSET(addr, 0), n->type); |
|
| 2193 | + | ||
| 2194 | + | if (lval || type_is_passed_by_ref(n->type)) { |
|
| 2195 | + | return location; |
|
| 2196 | + | } |
|
| 2197 | + | reg_t val_reg = emit_load(g, location); |
|
| 2198 | + | ||
| 2199 | + | if (addr_from_load) |
|
| 2200 | + | freereg(g, addr); |
|
| 2201 | + | ||
| 2202 | + | return value_reg(val_reg, n->type); |
|
| 2203 | + | } |
|
| 2204 | + | ||
| 2205 | + | /* Generate an array literal. |
|
| 2206 | + | * |
|
| 2207 | + | * This function handles array literals like `[1, 2, 3]`. It allocates |
|
| 2208 | + | * space for the array on the stack, evaluates each element, and initializes |
|
| 2209 | + | * the array elements in memory. */ |
|
| 2210 | + | static value_t gen_array_literal(gen_t *g, node_t *n) { |
|
| 2211 | + | type_t *array_type = n->type; |
|
| 2212 | + | type_t *elem_type = array_type->info.ary.elem; |
|
| 2213 | + | usize length = array_type->info.ary.length; |
|
| 2214 | + | ||
| 2215 | + | /* Reserve stack space for the array in the current frame. */ |
|
| 2216 | + | int array_off = reserve(g, array_type); |
|
| 2217 | + | ||
| 2218 | + | /* Evaluate and store each element of the array. */ |
|
| 2219 | + | node_t **elems = nodespan_ptrs(&g->mod->parser, n->val.array_lit.elems); |
|
| 2220 | + | for (usize i = 0; i < length; i++) { |
|
| 2221 | + | node_t *elem = elems[i]; |
|
| 2222 | + | frame_t *frame = &g->fn.current->e.fn.frame; |
|
| 2223 | + | i32 saved_sp = frame->sp; |
|
| 2224 | + | value_t elem_val = gen_expr(g, elem, false); |
|
| 2225 | + | ||
| 2226 | + | /* Calculate the offset for this element in the array. */ |
|
| 2227 | + | i32 elem_off = array_off + (i32)(i * elem_type->size); |
|
| 2228 | + | ||
| 2229 | + | /* Store the element value at the calculated offset. */ |
|
| 2230 | + | emit_store(g, elem_val, FP, elem_off); |
|
| 2231 | + | freeval(g, elem_val); |
|
| 2232 | + | ||
| 2233 | + | /* Only reclaim stack space if the element type doesn't contain |
|
| 2234 | + | * pointers. Slices and pointers may reference stack-allocated |
|
| 2235 | + | * temporaries that must remain live. */ |
|
| 2236 | + | if (!type_is_address(elem_type->cls)) { |
|
| 2237 | + | frame->sp = saved_sp; |
|
| 2238 | + | } |
|
| 2239 | + | } |
|
| 2240 | + | /* The initialized array is on the stack at the computed offset. */ |
|
| 2241 | + | return value_stack(OFFSET(FP, array_off), array_type); |
|
| 2242 | + | } |
|
| 2243 | + | ||
| 2244 | + | /* Generate code for an array repeat literal (e.g. [0; 24]). */ |
|
| 2245 | + | static value_t gen_array_repeat(gen_t *g, node_t *n) { |
|
| 2246 | + | type_t *array_type = n->type; |
|
| 2247 | + | type_t *elem_type = array_type->info.ary.elem; |
|
| 2248 | + | usize length = array_type->info.ary.length; |
|
| 2249 | + | usize array_off = reserve(g, array_type); |
|
| 2250 | + | value_t elem_val = gen_expr(g, n->val.array_repeat_lit.value, false); |
|
| 2251 | + | ||
| 2252 | + | /* Store the same value at each array position */ |
|
| 2253 | + | for (usize i = 0; i < length; i++) { |
|
| 2254 | + | i32 elem_off = array_off + (i32)(i * elem_type->size); |
|
| 2255 | + | emit_store(g, elem_val, FP, elem_off); |
|
| 2256 | + | } |
|
| 2257 | + | if (elem_val.loc == LOC_REG) |
|
| 2258 | + | freereg(g, elem_val.as.reg); |
|
| 2259 | + | ||
| 2260 | + | return value_stack(OFFSET(FP, array_off), array_type); |
|
| 2261 | + | } |
|
| 2262 | + | ||
| 2263 | + | /* Generate code for a slice with a range expression. */ |
|
| 2264 | + | static value_t gen_array_slice(gen_t *g, value_t array_val, node_t *range) { |
|
| 2265 | + | static type_t dword_type = { .cls = TYPE_PTR }; |
|
| 2266 | + | ||
| 2267 | + | type_t *slice_type, *elem_type; |
|
| 2268 | + | if (array_val.type->cls == TYPE_ARRAY) { |
|
| 2269 | + | slice_type = array_val.type->slice; |
|
| 2270 | + | elem_type = slice_type->info.slc.elem; |
|
| 2271 | + | } else { /* TYPE_SLICE */ |
|
| 2272 | + | slice_type = array_val.type; |
|
| 2273 | + | elem_type = array_val.type->info.slc.elem; |
|
| 2274 | + | } |
|
| 2275 | + | ||
| 2276 | + | /* Reserve stack space for the slice (pointer + length) */ |
|
| 2277 | + | i32 slice_off = reserve(g, slice_type); |
|
| 2278 | + | value_t slice_val = value_stack(OFFSET(FP, slice_off), slice_type); |
|
| 2279 | + | reg_t slice_start = ZERO; /* Start index */ |
|
| 2280 | + | ||
| 2281 | + | /* 1. Store array pointer at slice offset `0`. |
|
| 2282 | + | * 2. Update slice offset `0` with slice start range. |
|
| 2283 | + | * 3. Compute slice length, based on range. |
|
| 2284 | + | * 4. Store slice length at slice offset `4`. */ |
|
| 2285 | + | ||
| 2286 | + | /* Emit slice address information */ |
|
| 2287 | + | if (range && range->val.range.start) { |
|
| 2288 | + | /* Generate start expression and bounds check */ |
|
| 2289 | + | reg_t r = nextreg(g); |
|
| 2290 | + | value_t start_val = gen_expr(g, range->val.range.start, false); |
|
| 2291 | + | reg_t start_reg = emit_load(g, start_val); |
|
| 2292 | + | reg_t slice_adr = ZERO; |
|
| 2293 | + | ||
| 2294 | + | if (array_val.type->cls == TYPE_ARRAY) { |
|
| 2295 | + | slice_adr = emit_load(g, array_val); |
|
| 2296 | + | } else { |
|
| 2297 | + | /* Load data pointer from slice (first word) */ |
|
| 2298 | + | slice_adr = emit_load_dword(g, array_val); |
|
| 2299 | + | } |
|
| 2300 | + | offset_t slice_off = slice_val.as.off; |
|
| 2301 | + | ||
| 2302 | + | emit_li(g, r, elem_type->size); |
|
| 2303 | + | emit(g, MUL(r, r, start_reg)); /* Offset from array address */ |
|
| 2304 | + | emit(g, ADD(r, r, slice_adr)); /* Full address */ |
|
| 2305 | + | emit_regstore( |
|
| 2306 | + | g, r, slice_off.base, slice_off.offset, &dword_type |
|
| 2307 | + | ); /* Save */ |
|
| 2308 | + | ||
| 2309 | + | slice_start = start_reg; |
|
| 2310 | + | ||
| 2311 | + | /* Don't free start_reg yet - still needed as slice_start */ |
|
| 2312 | + | if (array_val.type->cls == TYPE_SLICE) { |
|
| 2313 | + | freereg(g, slice_adr); |
|
| 2314 | + | } |
|
| 2315 | + | freereg(g, r); |
|
| 2316 | + | } else { |
|
| 2317 | + | if (array_val.type->cls == TYPE_ARRAY) { |
|
| 2318 | + | /* For arrays, copy the array address */ |
|
| 2319 | + | emit_copy_by_ref(g, array_val, slice_val); |
|
| 2320 | + | } else { /* TYPE_SLICE */ |
|
| 2321 | + | /* For slices, copy the slice fat pointer */ |
|
| 2322 | + | emit_memcopy(g, array_val.as.off, slice_val.as.off, array_val.type); |
|
| 2323 | + | } |
|
| 2324 | + | } |
|
| 2325 | + | ||
| 2326 | + | /* Emit slice length information */ |
|
| 2327 | + | if (range && range->val.range.end) { |
|
| 2328 | + | /* Generate end value */ |
|
| 2329 | + | value_t end_val = gen_expr(g, range->val.range.end, false); |
|
| 2330 | + | reg_t end_reg = emit_load(g, end_val); |
|
| 2331 | + | ||
| 2332 | + | offset_t slice_off = slice_val.as.off; |
|
| 2333 | + | if (slice_start != ZERO) { |
|
| 2334 | + | /* Use SUBW on RV64 so the result is properly sign-extended |
|
| 2335 | + | * to 64 bits, keeping the upper 32 bits clean. */ |
|
| 2336 | + | emit(g, SUBW(end_reg, end_reg, slice_start)); |
|
| 2337 | + | } |
|
| 2338 | + | emit_regstore( |
|
| 2339 | + | g, |
|
| 2340 | + | end_reg, |
|
| 2341 | + | slice_off.base, |
|
| 2342 | + | slice_off.offset + WORD_SIZE, |
|
| 2343 | + | &dword_type |
|
| 2344 | + | ); |
|
| 2345 | + | ||
| 2346 | + | freereg(g, end_reg); |
|
| 2347 | + | } else { |
|
| 2348 | + | reg_t r = nextreg(g); |
|
| 2349 | + | if (array_val.type->cls == TYPE_ARRAY) { |
|
| 2350 | + | emit_li(g, r, array_val.type->info.ary.length); |
|
| 2351 | + | } else { /* Slice */ |
|
| 2352 | + | /* Load length from slice (second word) */ |
|
| 2353 | + | r = emit_load_offset(g, array_val, SLICE_FIELD_LEN_OFFSET); |
|
| 2354 | + | } |
|
| 2355 | + | /* Slice length = array length - slice start */ |
|
| 2356 | + | offset_t slice_off = slice_val.as.off; |
|
| 2357 | + | if (slice_start != ZERO) { |
|
| 2358 | + | /* Use SUBW on RV64 so the result is properly sign-extended |
|
| 2359 | + | * to 64 bits, keeping the upper 32 bits clean. */ |
|
| 2360 | + | emit(g, SUBW(r, r, slice_start)); |
|
| 2361 | + | } |
|
| 2362 | + | emit_regstore( |
|
| 2363 | + | g, r, slice_off.base, slice_off.offset + WORD_SIZE, &dword_type |
|
| 2364 | + | ); |
|
| 2365 | + | ||
| 2366 | + | freereg(g, r); |
|
| 2367 | + | } |
|
| 2368 | + | freereg(g, slice_start); |
|
| 2369 | + | ||
| 2370 | + | return slice_val; |
|
| 2371 | + | } |
|
| 2372 | + | ||
| 2373 | + | /* Generate array indexing. |
|
| 2374 | + | * |
|
| 2375 | + | * This function handles array indexing operations like `arr[i]` or `slice[i]`, |
|
| 2376 | + | * as well as slicing operations using ranges like `arr[..]` or `arr[0..5]`. */ |
|
| 2377 | + | static value_t gen_array_index(gen_t *g, node_t *n, bool lval) { |
|
| 2378 | + | /* Generate code for the array/slice expression. */ |
|
| 2379 | + | value_t array_val = gen_expr(g, n->val.access.lval, lval); |
|
| 2380 | + | type_t *array_type = array_val.type; |
|
| 2381 | + | ||
| 2382 | + | if (array_type->cls == TYPE_PTR) { |
|
| 2383 | + | array_type = deref_type(array_type); |
|
| 2384 | + | } |
|
| 2385 | + | ||
| 2386 | + | /* Check if this is a range expression (for slicing) */ |
|
| 2387 | + | node_t *idx_node = n->val.access.rval; |
|
| 2388 | + | if (idx_node->cls == NODE_RANGE) { |
|
| 2389 | + | return gen_array_slice(g, array_val, idx_node); |
|
| 2390 | + | } else { |
|
| 2391 | + | return emit_array_index( |
|
| 2392 | + | g, array_val, gen_expr(g, idx_node, false), lval |
|
| 2393 | + | ); |
|
| 2394 | + | } |
|
| 2395 | + | } |
|
| 2396 | + | ||
| 2397 | + | static value_t gen_unop(gen_t *g, node_t *n, bool lval) { |
|
| 2398 | + | value_t expr_val = gen_expr(g, n->val.unop.expr, lval); |
|
| 2399 | + | ||
| 2400 | + | switch (n->val.unop.op) { |
|
| 2401 | + | case OP_NOT: { |
|
| 2402 | + | /* Logical NOT; invert the boolean value. */ |
|
| 2403 | + | reg_t expr_reg = emit_load(g, expr_val); |
|
| 2404 | + | emit(g, NOT(expr_reg, expr_reg)); |
|
| 2405 | + | return value_reg(expr_reg, expr_val.type); |
|
| 2406 | + | } |
|
| 2407 | + | case OP_NEG: { |
|
| 2408 | + | /* Numerical negation. */ |
|
| 2409 | + | reg_t expr_reg = emit_load(g, expr_val); |
|
| 2410 | + | emit(g, NEG(expr_reg, expr_reg)); |
|
| 2411 | + | return value_reg(expr_reg, expr_val.type); |
|
| 2412 | + | } |
|
| 2413 | + | case OP_BNOT: { |
|
| 2414 | + | /* Bitwise NOT; invert all bits. */ |
|
| 2415 | + | reg_t expr_reg = emit_load(g, expr_val); |
|
| 2416 | + | emit(g, XORI(expr_reg, expr_reg, -1)); |
|
| 2417 | + | return value_reg(expr_reg, expr_val.type); |
|
| 2418 | + | } |
|
| 2419 | + | case OP_DEREF: |
|
| 2420 | + | return gen_deref(g, n, expr_val, lval); |
|
| 2421 | + | default: |
|
| 2422 | + | abort(); |
|
| 2423 | + | } |
|
| 2424 | + | } |
|
| 2425 | + | ||
| 2426 | + | static value_t gen_string(gen_t *g, node_t *n) { |
|
| 2427 | + | /* Add the string to the data section and get its offset */ |
|
| 2428 | + | usize str_len = n->val.string_lit.length; |
|
| 2429 | + | usize str_off = data_string(&g->data, n->val.string_lit.data, str_len); |
|
| 2430 | + | ||
| 2431 | + | /* Create a stack space for the string slice */ |
|
| 2432 | + | i32 slice_off = reserve(g, n->type); |
|
| 2433 | + | ||
| 2434 | + | return emit_slice_lit(g, slice_off, str_off, str_len, n->type); |
|
| 2435 | + | } |
|
| 2436 | + | ||
| 2437 | + | static value_t gen_expr(gen_t *g, node_t *n, bool lvalue) { |
|
| 2438 | + | assert(n->type); |
|
| 2439 | + | ||
| 2440 | + | value_t val = (value_t){ .type = n->type }; |
|
| 2441 | + | ||
| 2442 | + | switch (n->cls) { |
|
| 2443 | + | case NODE_UNOP: |
|
| 2444 | + | return gen_unop(g, n, lvalue); |
|
| 2445 | + | case NODE_BINOP: |
|
| 2446 | + | return gen_binop(g, n); |
|
| 2447 | + | case NODE_BOOL: |
|
| 2448 | + | if (n->type->cls == TYPE_OPT) { |
|
| 2449 | + | value_t inner_val = (value_t){ |
|
| 2450 | + | .type = n->type->info.opt.elem, |
|
| 2451 | + | .loc = LOC_IMM, |
|
| 2452 | + | .as.imm.b = n->val.bool_lit, |
|
| 2453 | + | }; |
|
| 2454 | + | return optval_from_prim(g, n->type, inner_val); |
|
| 2455 | + | } else { |
|
| 2456 | + | val.loc = LOC_IMM; |
|
| 2457 | + | val.as.imm.b = n->val.bool_lit; |
|
| 2458 | + | } |
|
| 2459 | + | break; |
|
| 2460 | + | case NODE_STRING: |
|
| 2461 | + | return gen_string(g, n); |
|
| 2462 | + | case NODE_CHAR: |
|
| 2463 | + | if (n->type->cls == TYPE_OPT) { |
|
| 2464 | + | value_t inner_val = (value_t){ |
|
| 2465 | + | .type = n->type->info.opt.elem, |
|
| 2466 | + | .loc = LOC_IMM, |
|
| 2467 | + | .as.imm.u = (u8)n->val.char_lit, |
|
| 2468 | + | }; |
|
| 2469 | + | return optval_from_prim(g, n->type, inner_val); |
|
| 2470 | + | } else { |
|
| 2471 | + | val.loc = LOC_IMM; |
|
| 2472 | + | val.as.imm.u = (u8)n->val.char_lit; |
|
| 2473 | + | } |
|
| 2474 | + | break; |
|
| 2475 | + | case NODE_NUMBER: |
|
| 2476 | + | val.loc = LOC_IMM; |
|
| 2477 | + | ||
| 2478 | + | switch (n->type->cls) { |
|
| 2479 | + | case TYPE_I8: |
|
| 2480 | + | case TYPE_I16: |
|
| 2481 | + | case TYPE_I32: |
|
| 2482 | + | val.as.imm.i = n->val.number.value.i; |
|
| 2483 | + | break; |
|
| 2484 | + | case TYPE_U8: |
|
| 2485 | + | case TYPE_U16: |
|
| 2486 | + | case TYPE_U32: |
|
| 2487 | + | val.as.imm.u = n->val.number.value.u; |
|
| 2488 | + | break; |
|
| 2489 | + | case TYPE_OPT: { |
|
| 2490 | + | /* Number coerced to optional - create some(number) on stack */ |
|
| 2491 | + | type_t *elem_type = n->type->info.opt.elem; |
|
| 2492 | + | value_t inner_val = (value_t){ .type = elem_type, .loc = LOC_IMM }; |
|
| 2493 | + | ||
| 2494 | + | switch (elem_type->cls) { |
|
| 2495 | + | case TYPE_I8: |
|
| 2496 | + | case TYPE_I16: |
|
| 2497 | + | case TYPE_I32: |
|
| 2498 | + | inner_val.as.imm.i = n->val.number.value.i; |
|
| 2499 | + | break; |
|
| 2500 | + | case TYPE_U8: |
|
| 2501 | + | case TYPE_U16: |
|
| 2502 | + | case TYPE_U32: |
|
| 2503 | + | inner_val.as.imm.u = n->val.number.value.u; |
|
| 2504 | + | break; |
|
| 2505 | + | default: |
|
| 2506 | + | break; |
|
| 2507 | + | } |
|
| 2508 | + | return optval_from_prim(g, n->type, inner_val); |
|
| 2509 | + | } |
|
| 2510 | + | default: |
|
| 2511 | + | break; |
|
| 2512 | + | } |
|
| 2513 | + | break; |
|
| 2514 | + | case NODE_ACCESS: |
|
| 2515 | + | return gen_access(g, n, lvalue); |
|
| 2516 | + | case NODE_SCOPE: |
|
| 2517 | + | return gen_scope(g, n); |
|
| 2518 | + | case NODE_TRY: |
|
| 2519 | + | return gen_try(g, n); |
|
| 2520 | + | case NODE_IDENT: |
|
| 2521 | + | ||
| 2522 | + | if (n->sym->kind == SYM_FUNCTION) { |
|
| 2523 | + | /* Function identifier used as a value (function pointer) */ |
|
| 2524 | + | return gen_fn_ptr(g, n->sym, n->type); |
|
| 2525 | + | } |
|
| 2526 | + | ||
| 2527 | + | /* For types that are passed by reference and held in registers |
|
| 2528 | + | * (function parameters), dereference the pointer to get the data */ |
|
| 2529 | + | if ((type_is_passed_by_ref(n->type)) && |
|
| 2530 | + | n->sym->e.var.val.loc == LOC_REG) { |
|
| 2531 | + | return value_stack(OFFSET(n->sym->e.var.val.as.reg, 0), n->type); |
|
| 2532 | + | } |
|
| 2533 | + | return n->sym->e.var.val; |
|
| 2534 | + | case NODE_CALL: { |
|
| 2535 | + | /* Check if this is a tuple record constructor call */ |
|
| 2536 | + | if (!n->sym && n->type && n->type->cls == TYPE_RECORD && |
|
| 2537 | + | n->type->info.srt.tuple) { |
|
| 2538 | + | return gen_record_lit(g, n); |
|
| 2539 | + | } |
|
| 2540 | + | assert(n->sym); |
|
| 2541 | + | /* Check if this is a union constructor call */ |
|
| 2542 | + | if (n->sym->kind == SYM_VARIANT && |
|
| 2543 | + | type_is_union_with_payload(n->type)) { |
|
| 2544 | + | return gen_union_constructor(g, n); |
|
| 2545 | + | } |
|
| 2546 | + | /* Function pointer call */ |
|
| 2547 | + | if (n->sym->kind == SYM_VARIABLE) { |
|
| 2548 | + | return gen_call(g, n); |
|
| 2549 | + | } |
|
| 2550 | + | /* Regular function call */ |
|
| 2551 | + | ||
| 2552 | + | if (n->sym->e.fn.attribs & ATTRIB_EXTERN) { |
|
| 2553 | + | /* Check if it's a built-in function. */ |
|
| 2554 | + | for (usize i = 0; BUILTINS[i].name; i++) { |
|
| 2555 | + | if (strcmp(n->sym->qualified, BUILTINS[i].name) == 0) { |
|
| 2556 | + | return gen_call_intrinsic(g, n, BUILTINS[i].gen); |
|
| 2557 | + | } |
|
| 2558 | + | } |
|
| 2559 | + | } |
|
| 2560 | + | return gen_call(g, n); |
|
| 2561 | + | } |
|
| 2562 | + | case NODE_CALL_ARG: |
|
| 2563 | + | /* Unreachable. This is handled inside `NODE_CALL`. */ |
|
| 2564 | + | case NODE_RECORD_LIT: |
|
| 2565 | + | if (type_is_union_with_payload(n->type)) { |
|
| 2566 | + | type_t *payload_type = n->sym->node->type; |
|
| 2567 | + | ||
| 2568 | + | node_t payload_lit = *n; |
|
| 2569 | + | payload_lit.type = payload_type; |
|
| 2570 | + | payload_lit.sym = NULL; |
|
| 2571 | + | ||
| 2572 | + | value_t payload = gen_record_lit(g, &payload_lit); |
|
| 2573 | + | ||
| 2574 | + | return gen_union_store(g, n->type, n->sym, payload); |
|
| 2575 | + | } |
|
| 2576 | + | return gen_record_lit(g, n); |
|
| 2577 | + | case NODE_ARRAY_LIT: |
|
| 2578 | + | return gen_array_literal(g, n); |
|
| 2579 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 2580 | + | return gen_array_repeat(g, n); |
|
| 2581 | + | case NODE_ARRAY_INDEX: |
|
| 2582 | + | return gen_array_index(g, n, lvalue); |
|
| 2583 | + | case NODE_REF: |
|
| 2584 | + | return gen_ref(g, n); |
|
| 2585 | + | case NODE_NIL: { |
|
| 2586 | + | /* Allocate space for the optional value and initialize as nil */ |
|
| 2587 | + | i32 off = reserve(g, n->type); |
|
| 2588 | + | val = value_stack(OFFSET(FP, off), n->type); |
|
| 2589 | + | tval_store(g, val, (value_t){ 0 }, 0); |
|
| 2590 | + | ||
| 2591 | + | return val; |
|
| 2592 | + | } |
|
| 2593 | + | case NODE_UNDEF: { |
|
| 2594 | + | i32 off = reserve(g, n->type); |
|
| 2595 | + | val = value_stack(OFFSET(FP, off), n->type); |
|
| 2596 | + | ||
| 2597 | + | return val; |
|
| 2598 | + | } |
|
| 2599 | + | case NODE_AS: |
|
| 2600 | + | return gen_as_cast(g, n); |
|
| 2601 | + | case NODE_IF: |
|
| 2602 | + | if (n->type->cls != TYPE_VOID) { |
|
| 2603 | + | return gen_if_expr(g, n); |
|
| 2604 | + | } else { |
|
| 2605 | + | gen_if(g, n); |
|
| 2606 | + | return value_none(); |
|
| 2607 | + | } |
|
| 2608 | + | case NODE_BUILTIN: { |
|
| 2609 | + | builtin_kind_t kind = n->val.builtin.kind; |
|
| 2610 | + | node_t **args = nodespan_ptrs(&g->mod->parser, n->val.builtin.args); |
|
| 2611 | + | ||
| 2612 | + | switch (kind) { |
|
| 2613 | + | case BUILTIN_SLICE_OF: { |
|
| 2614 | + | /* @sliceOf(ptr, len) - construct a slice from a pointer and length. |
|
| 2615 | + | * Slices are fat pointers: 4 bytes for ptr, 4 bytes for len. */ |
|
| 2616 | + | node_t *ptr_expr = args[0]; |
|
| 2617 | + | node_t *len_expr = args[1]; |
|
| 2618 | + | ||
| 2619 | + | /* Generate code for pointer and length expressions */ |
|
| 2620 | + | value_t ptr_val = gen_expr(g, ptr_expr, false); |
|
| 2621 | + | value_t len_val = gen_expr(g, len_expr, false); |
|
| 2622 | + | ||
| 2623 | + | /* Reserve stack space for the slice */ |
|
| 2624 | + | i32 off = reserve(g, n->type); |
|
| 2625 | + | val = value_stack(OFFSET(FP, off), n->type); |
|
| 2626 | + | ||
| 2627 | + | /* Store pointer at offset+0, length at offset+WORD_SIZE */ |
|
| 2628 | + | emit_store(g, ptr_val, FP, off + SLICE_FIELD_PTR_OFFSET); |
|
| 2629 | + | /* Force len to be stored as a dword (WORD_SIZE bytes) */ |
|
| 2630 | + | static type_t dword = { .cls = TYPE_PTR }; |
|
| 2631 | + | len_val.type = &dword; |
|
| 2632 | + | emit_store(g, len_val, FP, off + SLICE_FIELD_LEN_OFFSET); |
|
| 2633 | + | ||
| 2634 | + | return val; |
|
| 2635 | + | } |
|
| 2636 | + | case BUILTIN_SIZE_OF: |
|
| 2637 | + | case BUILTIN_ALIGN_OF: |
|
| 2638 | + | /* These are compile-time constants and should have been |
|
| 2639 | + | * folded during type checking. */ |
|
| 2640 | + | bail("@sizeOf/@alignOf should be folded at compile time"); |
|
| 2641 | + | } |
|
| 2642 | + | break; |
|
| 2643 | + | } |
|
| 2644 | + | default: |
|
| 2645 | + | bail("unsupported expression node type %s", node_names[n->cls]); |
|
| 2646 | + | } |
|
| 2647 | + | return val; |
|
| 2648 | + | } |
|
| 2649 | + | ||
| 2650 | + | static void gen_fn_param(gen_t *g, node_t *param, usize ix) { |
|
| 2651 | + | node_t *fn = g->fn.current->node; |
|
| 2652 | + | ||
| 2653 | + | type_t *ret = fn->type->info.fun.ret; |
|
| 2654 | + | bool sret = type_is_passed_by_ref(ret); |
|
| 2655 | + | reg_t base = sret ? A1 : A0; |
|
| 2656 | + | reg_t a = base + (reg_t)ix; |
|
| 2657 | + | ||
| 2658 | + | /* We're going to simply track the register in which our parameter is |
|
| 2659 | + | * held, and mark it as in use. */ |
|
| 2660 | + | param->sym->e.var.val = value_reg(a, param->type); |
|
| 2661 | + | param->sym->e.var.val.temp = false; |
|
| 2662 | + | usereg(g, a); |
|
| 2663 | + | ||
| 2664 | + | /* If the type was passed by reference, we need to copy it to avoid |
|
| 2665 | + | * modifying the original copy. */ |
|
| 2666 | + | if (type_is_passed_by_ref(param->type)) { |
|
| 2667 | + | param->sym->e.var.val = emit_push(g, param->sym->e.var.val); |
|
| 2668 | + | freereg(g, a); |
|
| 2669 | + | } |
|
| 2670 | + | /* Nb. If code takes the address of a parameter (`¶m`), that parameter |
|
| 2671 | + | * typically must be spilled to memory since registers don't have |
|
| 2672 | + | * addresses. */ |
|
| 2673 | + | } |
|
| 2674 | + | ||
| 2675 | + | /* Detect literal initializers that reside in a dedicated temporary and |
|
| 2676 | + | * therefore can be bound directly without creating a defensive copy. */ |
|
| 2677 | + | static bool is_unaliased(node_t *init) { |
|
| 2678 | + | switch (init->cls) { |
|
| 2679 | + | case NODE_ARRAY_LIT: |
|
| 2680 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 2681 | + | case NODE_RECORD_LIT: |
|
| 2682 | + | case NODE_STRING: |
|
| 2683 | + | case NODE_NIL: |
|
| 2684 | + | case NODE_CALL: |
|
| 2685 | + | return true; |
|
| 2686 | + | default: |
|
| 2687 | + | /* Nb. all immediates return `false`, because they do not occupy a |
|
| 2688 | + | * stack location and therefore are not considered aliasable. */ |
|
| 2689 | + | return false; |
|
| 2690 | + | } |
|
| 2691 | + | } |
|
| 2692 | + | ||
| 2693 | + | static void gen_var(gen_t *g, node_t *n) { |
|
| 2694 | + | node_t *lval = n->val.var.ident; |
|
| 2695 | + | node_t *rval = n->val.var.value; |
|
| 2696 | + | ||
| 2697 | + | /* For placeholders, just evaluate the rvalue for side effects */ |
|
| 2698 | + | if (lval->cls == NODE_PLACEHOLDER) { |
|
| 2699 | + | if (rval->cls != NODE_UNDEF) { |
|
| 2700 | + | gen_expr(g, rval, false); |
|
| 2701 | + | } |
|
| 2702 | + | return; |
|
| 2703 | + | } |
|
| 2704 | + | ||
| 2705 | + | i32 align = n->sym->e.var.align; |
|
| 2706 | + | ||
| 2707 | + | if (rval->cls == NODE_UNDEF) { |
|
| 2708 | + | i32 offset = reserve_aligned(g, n->type, align); |
|
| 2709 | + | n->sym->e.var.val = value_stack(OFFSET(FP, offset), n->type); |
|
| 2710 | + | return; |
|
| 2711 | + | } |
|
| 2712 | + | ||
| 2713 | + | value_t val = gen_expr(g, rval, false); |
|
| 2714 | + | bool reuse = |
|
| 2715 | + | align <= n->type->align && val.loc == LOC_STACK && is_unaliased(rval); |
|
| 2716 | + | ||
| 2717 | + | if (reuse) { |
|
| 2718 | + | n->sym->e.var.val = val; |
|
| 2719 | + | return; |
|
| 2720 | + | } |
|
| 2721 | + | i32 offset = reserve_aligned(g, n->type, align); |
|
| 2722 | + | value_t dest = value_stack(OFFSET(FP, offset), n->type); |
|
| 2723 | + | n->sym->e.var.val = dest; |
|
| 2724 | + | ||
| 2725 | + | emit_replace(g, dest, val); |
|
| 2726 | + | } |
|
| 2727 | + | ||
| 2728 | + | static void gen_const(gen_t *g, node_t *n) { |
|
| 2729 | + | /* Don't re-generate if it already has a location. */ |
|
| 2730 | + | if (n->sym->e.var.val.loc != LOC_NONE) |
|
| 2731 | + | return; |
|
| 2732 | + | ||
| 2733 | + | node_t *value = n->val.constant.value; |
|
| 2734 | + | const char *name = n->sym->qualified; |
|
| 2735 | + | usize name_len = strlen(name); |
|
| 2736 | + | usize addr = data_node(&g->data, &g->mod->parser, value, name, name_len); |
|
| 2737 | + | ||
| 2738 | + | /* Store the constant address in the symbol table */ |
|
| 2739 | + | n->sym->e.var.val = value_addr(addr, 0, n->type); |
|
| 2740 | + | n->sym->e.var.align = n->type->align; |
|
| 2741 | + | } |
|
| 2742 | + | ||
| 2743 | + | static void gen_static(gen_t *g, node_t *n) { |
|
| 2744 | + | /* Don't re-generate if it already has a location. */ |
|
| 2745 | + | if (n->sym->e.var.val.loc != LOC_NONE) |
|
| 2746 | + | return; |
|
| 2747 | + | ||
| 2748 | + | node_t *value = n->val.static_decl.value; |
|
| 2749 | + | const char *name = n->sym->qualified; |
|
| 2750 | + | usize name_len = strlen(n->sym->qualified); |
|
| 2751 | + | usize addr = data_node(&g->data, &g->mod->parser, value, name, name_len); |
|
| 2752 | + | ||
| 2753 | + | n->sym->e.var.val = value_addr(addr, 0, n->type); |
|
| 2754 | + | n->sym->e.var.align = n->type->align; |
|
| 2755 | + | } |
|
| 2756 | + | ||
| 2757 | + | /* Generate code for a block of code. */ |
|
| 2758 | + | static void gen_block(gen_t *g, node_t *n) { |
|
| 2759 | + | frame_t *frame = &g->fn.current->e.fn.frame; |
|
| 2760 | + | ||
| 2761 | + | /* Record the stack pointer before entering the block |
|
| 2762 | + | * to restore it when exiting. */ |
|
| 2763 | + | i32 sp = frame->sp; |
|
| 2764 | + | ||
| 2765 | + | /* Generate code for each statement in the block. */ |
|
| 2766 | + | node_t **stmts = nodespan_ptrs(&g->mod->parser, n->val.block.stmts); |
|
| 2767 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 2768 | + | gen_node(g, stmts[i]); |
|
| 2769 | + | } |
|
| 2770 | + | if (-frame->sp > frame->size) { |
|
| 2771 | + | /* Keep track of the maximum stack space used. */ |
|
| 2772 | + | frame->size = -frame->sp; |
|
| 2773 | + | } |
|
| 2774 | + | /* De-allocate stack space. */ |
|
| 2775 | + | frame->sp = sp; |
|
| 2776 | + | } |
|
| 2777 | + | ||
| 2778 | + | /* Generate code for a function. */ |
|
| 2779 | + | static void gen_fn(gen_t *g, node_t *n) { |
|
| 2780 | + | /* Skip unused functions (dead code elimination) */ |
|
| 2781 | + | if (!n->sym->e.fn.used) { |
|
| 2782 | + | return; |
|
| 2783 | + | } |
|
| 2784 | + | ||
| 2785 | + | /* Check if this is an extern function */ |
|
| 2786 | + | if (n->sym->e.fn.attribs & ATTRIB_EXTERN) { |
|
| 2787 | + | /* For extern functions, we don't generate any code since they are |
|
| 2788 | + | * implemented externally or are built-ins. */ |
|
| 2789 | + | return; |
|
| 2790 | + | } |
|
| 2791 | + | /* Check if it's a test function, and skip if not in test mode. */ |
|
| 2792 | + | if (n->sym->e.fn.attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST)) { |
|
| 2793 | + | return; |
|
| 2794 | + | } |
|
| 2795 | + | ||
| 2796 | + | type_t *ret = n->type->info.fun.ret; |
|
| 2797 | + | bool sret = type_is_passed_by_ref(ret); |
|
| 2798 | + | ||
| 2799 | + | /* For types that are returned by reference, keep hidden return pointer |
|
| 2800 | + | * alive */ |
|
| 2801 | + | if (sret) { |
|
| 2802 | + | usereg(g, A0); |
|
| 2803 | + | } |
|
| 2804 | + | ||
| 2805 | + | /* Set current function. */ |
|
| 2806 | + | g->fn.current = n->sym; |
|
| 2807 | + | g->fn.nretpatches = 0; |
|
| 2808 | + | ||
| 2809 | + | symbol_t *sym = n->sym; |
|
| 2810 | + | ||
| 2811 | + | /* Store the current instruction address as the function's address. */ |
|
| 2812 | + | sym->e.fn.addr = g->ninstrs; |
|
| 2813 | + | node_t *body = n->val.fn_decl.body; |
|
| 2814 | + | ||
| 2815 | + | /* Functions should have non-zero address, unless it's the default */ |
|
| 2816 | + | ||
| 2817 | + | frame_t *f = &sym->e.fn.frame; |
|
| 2818 | + | ||
| 2819 | + | /* Offsets for RA and previous FP. */ |
|
| 2820 | + | const i32 fp_off = -WORD_SIZE - WORD_SIZE; |
|
| 2821 | + | ||
| 2822 | + | f->sp = fp_off; |
|
| 2823 | + | f->size = 0; /* Will be patched once we know the frame size. */ |
|
| 2824 | + | ||
| 2825 | + | /* Function prologue. Track prologue address for patching. */ |
|
| 2826 | + | usize prologue = sym->e.fn.addr; |
|
| 2827 | + | ||
| 2828 | + | /* Generate placeholder instructions that will be patched at the end. */ |
|
| 2829 | + | /* This is the maximum prologue size, if we need to create a big |
|
| 2830 | + | * stack frame. */ |
|
| 2831 | + | emit(g, NOP); |
|
| 2832 | + | emit(g, NOP); |
|
| 2833 | + | emit(g, NOP); |
|
| 2834 | + | emit(g, NOP); |
|
| 2835 | + | emit(g, NOP); |
|
| 2836 | + | emit(g, NOP); |
|
| 2837 | + | emit(g, NOP); |
|
| 2838 | + | ||
| 2839 | + | /* Reserve all argument registers up-front so they are not used as |
|
| 2840 | + | * temporaries while we spill each parameter. */ |
|
| 2841 | + | reg_t param_base = sret ? A1 : A0; |
|
| 2842 | + | ||
| 2843 | + | for (usize i = 0; i < n->val.fn_decl.params.len; i++) { |
|
| 2844 | + | reg_t a = param_base + (reg_t)i; |
|
| 2845 | + | ||
| 2846 | + | if (a > A7) { |
|
| 2847 | + | bail( |
|
| 2848 | + | "function '%s' parameter %zu exceeds available register " |
|
| 2849 | + | "arguments", |
|
| 2850 | + | g->fn.current->qualified, |
|
| 2851 | + | i + 1 |
|
| 2852 | + | ); |
|
| 2853 | + | } |
|
| 2854 | + | usereg(g, a); |
|
| 2855 | + | } |
|
| 2856 | + | ||
| 2857 | + | /* |
|
| 2858 | + | * Save parameters on the stack. |
|
| 2859 | + | */ |
|
| 2860 | + | ||
| 2861 | + | for (usize i = 0; i < n->val.fn_decl.params.len; i++) { |
|
| 2862 | + | gen_fn_param(g, SPAN(g, n->val.fn_decl.params)[i], i); |
|
| 2863 | + | } |
|
| 2864 | + | ||
| 2865 | + | /* |
|
| 2866 | + | * Generate body. |
|
| 2867 | + | */ |
|
| 2868 | + | gen_block(g, body); |
|
| 2869 | + | ||
| 2870 | + | /* Ensure fallible functions that reach the end |
|
| 2871 | + | * implicitly return success. */ |
|
| 2872 | + | if (ret->cls == TYPE_RESULT) { |
|
| 2873 | + | if (!ret->info.res.payload->size) { |
|
| 2874 | + | value_t dest; |
|
| 2875 | + | ||
| 2876 | + | if (type_is_passed_by_ref(ret)) { |
|
| 2877 | + | usereg(g, A0); |
|
| 2878 | + | dest = value_stack(OFFSET(A0, 0), ret); |
|
| 2879 | + | } else { |
|
| 2880 | + | dest = value_reg(A0, ret); |
|
| 2881 | + | } |
|
| 2882 | + | emit_result_store_success(g, dest, value_none()); |
|
| 2883 | + | ||
| 2884 | + | if (!type_is_passed_by_ref(ret)) { |
|
| 2885 | + | freereg(g, A0); |
|
| 2886 | + | } |
|
| 2887 | + | } |
|
| 2888 | + | } |
|
| 2889 | + | /* Align the frame size according to the RISCV ABI. */ |
|
| 2890 | + | f->size = align(f->size, STACK_ALIGNMENT); |
|
| 2891 | + | ||
| 2892 | + | instr_t *ins = &g->instrs[prologue]; |
|
| 2893 | + | usize slot = 0; |
|
| 2894 | + | const i32 locals = f->size - WORD_SIZE * 2; |
|
| 2895 | + | ||
| 2896 | + | ins[slot++] = ADDI(SP, SP, -WORD_SIZE * 2); |
|
| 2897 | + | ins[slot++] = SD(FP, SP, 0); |
|
| 2898 | + | ins[slot++] = SD(RA, SP, WORD_SIZE); |
|
| 2899 | + | ins[slot++] = ADDI(FP, SP, WORD_SIZE * 2); |
|
| 2900 | + | ||
| 2901 | + | if (locals != 0) { |
|
| 2902 | + | if (is_small(-locals)) { |
|
| 2903 | + | ins[slot++] = ADDI(SP, SP, -locals); |
|
| 2904 | + | } else { |
|
| 2905 | + | i32 hi = 0, lo = 0; |
|
| 2906 | + | split_imm(locals, &hi, &lo); |
|
| 2907 | + | ||
| 2908 | + | ins[slot++] = LUI(T0, hi); |
|
| 2909 | + | ||
| 2910 | + | if (lo != 0) |
|
| 2911 | + | ins[slot++] = ADDI(T0, T0, lo); |
|
| 2912 | + | ||
| 2913 | + | ins[slot++] = SUB(SP, SP, T0); |
|
| 2914 | + | } |
|
| 2915 | + | } |
|
| 2916 | + | while (slot < 7) |
|
| 2917 | + | ins[slot++] = NOP; |
|
| 2918 | + | ||
| 2919 | + | /* Mark the epilogue position and patch all return statements |
|
| 2920 | + | * to jump to this epilogue. */ |
|
| 2921 | + | usize epilogue = g->ninstrs; |
|
| 2922 | + | ||
| 2923 | + | for (usize i = 0; i < g->fn.nretpatches; i++) { |
|
| 2924 | + | ctpatch_t *p = &g->fn.retpatches[i]; |
|
| 2925 | + | ||
| 2926 | + | if (!p->applied) { |
|
| 2927 | + | /* Calculate jump offset to the epilogue. */ |
|
| 2928 | + | i32 offset = jump_offset(p->pc, epilogue); |
|
| 2929 | + | ||
| 2930 | + | /* A word-size offset basically means jumping to the next |
|
| 2931 | + | * instruction, which is a redundant. We leave it as a NOP in |
|
| 2932 | + | * that case. */ |
|
| 2933 | + | if (offset != INSTR_SIZE) { |
|
| 2934 | + | /* Update the jump instruction with the correct offset. */ |
|
| 2935 | + | g->instrs[p->pc] = JMP(offset); |
|
| 2936 | + | } |
|
| 2937 | + | p->applied = true; |
|
| 2938 | + | } |
|
| 2939 | + | } |
|
| 2940 | + | /* |
|
| 2941 | + | * Function epilogue. |
|
| 2942 | + | */ |
|
| 2943 | + | if (locals != 0) { |
|
| 2944 | + | if (is_small(locals)) { |
|
| 2945 | + | emit(g, ADDI(SP, SP, locals)); |
|
| 2946 | + | } else { |
|
| 2947 | + | emit_li(g, T0, locals); |
|
| 2948 | + | emit(g, ADD(SP, SP, T0)); |
|
| 2949 | + | } |
|
| 2950 | + | } |
|
| 2951 | + | emit(g, LD(FP, SP, 0)); |
|
| 2952 | + | emit(g, LD(RA, SP, WORD_SIZE)); |
|
| 2953 | + | emit(g, ADDI(SP, SP, WORD_SIZE * 2)); |
|
| 2954 | + | emit(g, RET); |
|
| 2955 | + | ||
| 2956 | + | /* Release parameter and temporary registers */ |
|
| 2957 | + | for (reg_t r = A0; r <= A7; r++) |
|
| 2958 | + | freereg(g, r); |
|
| 2959 | + | ||
| 2960 | + | for (usize i = 0; i < sizeof(temp_registers) / sizeof(reg_t); i++) |
|
| 2961 | + | freereg(g, temp_registers[i]); |
|
| 2962 | + | ||
| 2963 | + | /* Patch function call locations. */ |
|
| 2964 | + | for (usize i = 0; i < g->nfnpatches; i++) { |
|
| 2965 | + | fnpatch_t *p = &g->fnpatches[i]; |
|
| 2966 | + | ||
| 2967 | + | if (!p->applied && strcmp(p->fn_name, sym->qualified) == 0) { |
|
| 2968 | + | if (p->patch_type == PATCH_CALL) { |
|
| 2969 | + | i32 offset = jump_offset(p->pc, sym->e.fn.addr); |
|
| 2970 | + | ||
| 2971 | + | if (is_jump_imm(offset)) { |
|
| 2972 | + | g->instrs[p->pc] = JAL(RA, offset); |
|
| 2973 | + | if (p->tramp_pc != (usize)-1) |
|
| 2974 | + | g->instrs[p->tramp_pc] = NOP; |
|
| 2975 | + | } else { |
|
| 2976 | + | i32 target_addr = (i32)(sym->e.fn.addr * INSTR_SIZE); |
|
| 2977 | + | i32 current_addr = (i32)(p->pc * INSTR_SIZE); |
|
| 2978 | + | i32 rel = target_addr - current_addr; |
|
| 2979 | + | ||
| 2980 | + | i32 hi, lo; |
|
| 2981 | + | split_imm(rel, &hi, &lo); |
|
| 2982 | + | ||
| 2983 | + | reg_t scratch = p->scratch_reg ? p->scratch_reg : T0; |
|
| 2984 | + | g->instrs[p->pc] = AUIPC(scratch, hi); |
|
| 2985 | + | g->instrs[p->tramp_pc] = JALR(RA, scratch, lo); |
|
| 2986 | + | } |
|
| 2987 | + | } else if (p->patch_type == PATCH_ADDRESS) { |
|
| 2988 | + | /* For function address patches, replace the NOPs with AUIPC + |
|
| 2989 | + | * ADDI for PC-relative addressing. Calculate target - |
|
| 2990 | + | * current_pc. */ |
|
| 2991 | + | i32 target_addr = sym->e.fn.addr * INSTR_SIZE; |
|
| 2992 | + | i32 current_addr = p->pc * INSTR_SIZE; |
|
| 2993 | + | i32 offset = target_addr - current_addr; |
|
| 2994 | + | ||
| 2995 | + | /* Split offset into upper 20 bits and lower 12 bits */ |
|
| 2996 | + | i32 hi, lo; |
|
| 2997 | + | split_imm(offset, &hi, &lo); |
|
| 2998 | + | ||
| 2999 | + | /* Emit AUIPC + ADDI sequence */ |
|
| 3000 | + | g->instrs[p->pc] = AUIPC(p->target_reg, hi); |
|
| 3001 | + | g->instrs[p->tramp_pc] = ADDI(p->target_reg, p->target_reg, lo); |
|
| 3002 | + | } |
|
| 3003 | + | /* Mark as applied so we don't patch it again. */ |
|
| 3004 | + | p->applied = true; |
|
| 3005 | + | } |
|
| 3006 | + | } |
|
| 3007 | + | } |
|
| 3008 | + | ||
| 3009 | + | /* Generate code for a module. */ |
|
| 3010 | + | static void gen_module(gen_t *g, module_t *m) { |
|
| 3011 | + | node_t *n = m->ast; |
|
| 3012 | + | ||
| 3013 | + | if (m->compiled) |
|
| 3014 | + | return; |
|
| 3015 | + | ||
| 3016 | + | /* Set the current module for span access */ |
|
| 3017 | + | module_t *prev_mod = g->mod; |
|
| 3018 | + | g->mod = m; |
|
| 3019 | + | ||
| 3020 | + | /* Don't compile test modules unless we are in test mode. */ |
|
| 3021 | + | if (m->attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST)) { |
|
| 3022 | + | g->mod = prev_mod; |
|
| 3023 | + | return; |
|
| 3024 | + | } |
|
| 3025 | + | /* Generate all constants to ensure they're available */ |
|
| 3026 | + | node_t **stmts_const = nodespan_ptrs(&m->parser, n->val.block.stmts); |
|
| 3027 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 3028 | + | node_t *stmt = stmts_const[i]; |
|
| 3029 | + | if (stmt->cls == NODE_CONST) { |
|
| 3030 | + | gen_const(g, stmt); |
|
| 3031 | + | } else if (stmt->cls == NODE_STATIC) { |
|
| 3032 | + | gen_static(g, stmt); |
|
| 3033 | + | } |
|
| 3034 | + | } |
|
| 3035 | + | /* Generate code for module entry point. */ |
|
| 3036 | + | /* Must be at address _zero_ of the module. */ |
|
| 3037 | + | if (n->sym->e.mod->default_fn) { |
|
| 3038 | + | gen_fn(g, n->sym->e.mod->default_fn->node); |
|
| 3039 | + | } |
|
| 3040 | + | ||
| 3041 | + | /* Generate all declared modules */ |
|
| 3042 | + | node_t **stmts_sub = nodespan_ptrs(&m->parser, n->val.block.stmts); |
|
| 3043 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 3044 | + | node_t *stmt = stmts_sub[i]; |
|
| 3045 | + | if (stmt->cls == NODE_MOD) { |
|
| 3046 | + | gen_mod(g, stmt); |
|
| 3047 | + | } |
|
| 3048 | + | if (stmt->cls == NODE_USE) { |
|
| 3049 | + | gen_use(g, stmt); |
|
| 3050 | + | } |
|
| 3051 | + | } |
|
| 3052 | + | /* Generate code for everything else. */ |
|
| 3053 | + | node_t **stmts = nodespan_ptrs(&m->parser, n->val.block.stmts); |
|
| 3054 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 3055 | + | node_t *stmt = stmts[i]; |
|
| 3056 | + | if (stmt->cls == NODE_CONST) |
|
| 3057 | + | continue; |
|
| 3058 | + | if (stmt->cls == NODE_FN && stmt->sym->e.fn.attribs & ATTRIB_DEFAULT) |
|
| 3059 | + | continue; |
|
| 3060 | + | gen_node(g, stmt); |
|
| 3061 | + | } |
|
| 3062 | + | m->compiled = true; |
|
| 3063 | + | g->mod = prev_mod; |
|
| 3064 | + | } |
|
| 3065 | + | ||
| 3066 | + | /* Generate code for a module declaration. */ |
|
| 3067 | + | static void gen_mod(gen_t *g, node_t *n) { |
|
| 3068 | + | if (!n->sym) { /* Skip modules that aren't loaded like test modules. */ |
|
| 3069 | + | return; |
|
| 3070 | + | } |
|
| 3071 | + | module_t *mod = n->sym->e.mod; |
|
| 3072 | + | ||
| 3073 | + | gen_module(g, mod); |
|
| 3074 | + | } |
|
| 3075 | + | ||
| 3076 | + | /* Generate code for a use declaration. */ |
|
| 3077 | + | /* For function/variable imports, this generates the parent module. */ |
|
| 3078 | + | static void gen_use(gen_t *g, node_t *n) { |
|
| 3079 | + | /* For wildcard re-exports, n->sym is NULL since we're not binding |
|
| 3080 | + | * the module itself, just re-exporting its symbols. */ |
|
| 3081 | + | if (!n->sym) |
|
| 3082 | + | return; |
|
| 3083 | + | ||
| 3084 | + | module_t *mod = n->sym->scope->mod; |
|
| 3085 | + | ||
| 3086 | + | gen_module(g, mod); |
|
| 3087 | + | } |
|
| 3088 | + | ||
| 3089 | + | /* Generating nothing. This is used eg. for type declaration nodes |
|
| 3090 | + | * which don't have any associated code. */ |
|
| 3091 | + | static void gen_nop(gen_t *g, node_t *n) { |
|
| 3092 | + | (void)g; |
|
| 3093 | + | (void)n; |
|
| 3094 | + | } |
|
| 3095 | + | ||
| 3096 | + | /* Generate code from AST. */ |
|
| 3097 | + | /* Pre-size the initialized data region by summing the aligned sizes of all |
|
| 3098 | + | * initialized (non-BSS) constants and statics across every module. */ |
|
| 3099 | + | static void data_presize(gen_t *g) { |
|
| 3100 | + | usize total = 0; |
|
| 3101 | + | ||
| 3102 | + | for (usize m = 0; m < g->mm->nmodules; m++) { |
|
| 3103 | + | module_t *mod = &g->mm->modules[m]; |
|
| 3104 | + | ||
| 3105 | + | if (!mod->ast) |
|
| 3106 | + | continue; |
|
| 3107 | + | if (mod->attribs & ATTRIB_TEST && !(g->flags & FLAG_TEST)) |
|
| 3108 | + | continue; |
|
| 3109 | + | ||
| 3110 | + | node_t *n = mod->ast; |
|
| 3111 | + | node_t **stmts = nodespan_ptrs(&mod->parser, n->val.block.stmts); |
|
| 3112 | + | ||
| 3113 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 3114 | + | node_t *stmt = stmts[i]; |
|
| 3115 | + | node_t *value = NULL; |
|
| 3116 | + | ||
| 3117 | + | if (stmt->cls == NODE_CONST) |
|
| 3118 | + | value = stmt->val.constant.value; |
|
| 3119 | + | else if (stmt->cls == NODE_STATIC) |
|
| 3120 | + | value = stmt->val.static_decl.value; |
|
| 3121 | + | else |
|
| 3122 | + | continue; |
|
| 3123 | + | ||
| 3124 | + | if (value->cls == NODE_UNDEF) |
|
| 3125 | + | continue; |
|
| 3126 | + | ||
| 3127 | + | total = align(total, WORD_SIZE); |
|
| 3128 | + | total += stmt->type->size; |
|
| 3129 | + | } |
|
| 3130 | + | } |
|
| 3131 | + | g->data.rw_init_total = align(total, WORD_SIZE); |
|
| 3132 | + | } |
|
| 3133 | + | ||
| 3134 | + | int gen_emit(gen_t *g, module_t *root) { |
|
| 3135 | + | /* Pre-size the initialized data region so that BSS items are placed |
|
| 3136 | + | * after all initialized data in the rw section. */ |
|
| 3137 | + | data_presize(g); |
|
| 3138 | + | ||
| 3139 | + | /* Generate root module. This has to have address zero, as it has |
|
| 3140 | + | * the entry point. */ |
|
| 3141 | + | gen_module(g, root); |
|
| 3142 | + | ||
| 3143 | + | /* Generate `std` module if available. */ |
|
| 3144 | + | module_t *std = module_manager_lookup_by_qualified_name(g->mm, "std"); |
|
| 3145 | + | if (std) { |
|
| 3146 | + | gen_module(g, std); |
|
| 3147 | + | } |
|
| 3148 | + | /* Check that all patches have been applied. */ |
|
| 3149 | + | for (usize i = 0; i < g->nfnpatches; i++) { |
|
| 3150 | + | if (!g->fnpatches[i].applied) |
|
| 3151 | + | bail( |
|
| 3152 | + | "jump for function '%s' was not patched", |
|
| 3153 | + | g->fnpatches[i].fn_name |
|
| 3154 | + | ); |
|
| 3155 | + | } |
|
| 3156 | + | /* Check that all return patches have been applied. */ |
|
| 3157 | + | for (usize i = 0; i < g->fn.nretpatches; i++) { |
|
| 3158 | + | if (!g->fn.retpatches[i].applied) |
|
| 3159 | + | bail("return statement was not properly patched"); |
|
| 3160 | + | } |
|
| 3161 | + | /* Check that all break patches have been applied. */ |
|
| 3162 | + | for (usize i = 0; i < g->fn.nbrkpatches; i++) { |
|
| 3163 | + | if (!g->fn.brkpatches[i].applied) |
|
| 3164 | + | bail("break statement was not properly patched"); |
|
| 3165 | + | } |
|
| 3166 | + | /* Keep root module reference for data emission. */ |
|
| 3167 | + | g->mod = root; |
|
| 3168 | + | ||
| 3169 | + | return 0; |
|
| 3170 | + | } |
|
| 3171 | + | ||
| 3172 | + | static value_t gen_as_cast(gen_t *g, node_t *n) { |
|
| 3173 | + | node_t *expr = n->val.as_expr.expr; |
|
| 3174 | + | value_t val = gen_expr(g, expr, false); |
|
| 3175 | + | ||
| 3176 | + | /* If casting to the same type, no conversion needed */ |
|
| 3177 | + | if (val.type == n->type) |
|
| 3178 | + | return val; |
|
| 3179 | + | ||
| 3180 | + | /* For casts between different primitive types, we need to handle |
|
| 3181 | + | * size changes properly (e.g., u8 -> i32 requires zero extension) */ |
|
| 3182 | + | /* If the types are the same size, just change the type metadata */ |
|
| 3183 | + | if (val.type->size == n->type->size) { |
|
| 3184 | + | val.type = n->type; |
|
| 3185 | + | return val; |
|
| 3186 | + | } |
|
| 3187 | + | /* For size changes, we need to properly load and re-store the value |
|
| 3188 | + | * to ensure correct zero/sign extension */ |
|
| 3189 | + | if (val.loc == LOC_STACK) { |
|
| 3190 | + | /* Load the value using the source type (proper sized load) */ |
|
| 3191 | + | reg_t rd = emit_load(g, val); |
|
| 3192 | + | /* Push to stack using the target type (proper sized store) */ |
|
| 3193 | + | i32 offset = emit_regpush(g, rd, n->type); |
|
| 3194 | + | freereg(g, rd); |
|
| 3195 | + | ||
| 3196 | + | return value_stack(OFFSET(FP, offset), n->type); |
|
| 3197 | + | } |
|
| 3198 | + | /* For non-stack values (registers, immediates), just change the |
|
| 3199 | + | * type */ |
|
| 3200 | + | val.type = n->type; |
|
| 3201 | + | ||
| 3202 | + | return val; |
|
| 3203 | + | } |
|
| 3204 | + | ||
| 3205 | + | void gen_dump_bin(gen_t *g, FILE *text, FILE *data_ro, FILE *data_rw) { |
|
| 3206 | + | /* Write instructions */ |
|
| 3207 | + | fwrite(g->instrs, sizeof(u32), g->ninstrs, text); |
|
| 3208 | + | /* Write data */ |
|
| 3209 | + | data_emit_rw(&g->data, data_rw); |
|
| 3210 | + | data_emit_ro(&g->data, data_ro); |
|
| 3211 | + | ||
| 3212 | + | fflush(text); |
|
| 3213 | + | fflush(data_ro); |
|
| 3214 | + | fflush(data_rw); |
|
| 3215 | + | } |
|
| 3216 | + | ||
| 3217 | + | /* Initialize a `gen` object. */ |
|
| 3218 | + | void gen_init(gen_t *g, types_t *t, module_manager_t *mm, u32 flags) { |
|
| 3219 | + | g->ninstrs = 0; |
|
| 3220 | + | g->nfnpatches = 0; |
|
| 3221 | + | g->fn.current = NULL; |
|
| 3222 | + | g->fn.nretpatches = 0; |
|
| 3223 | + | g->fn.nbrkpatches = 0; |
|
| 3224 | + | g->regs = ralloc(); |
|
| 3225 | + | g->types = t; |
|
| 3226 | + | g->loop.current = NULL; |
|
| 3227 | + | g->loop.end = 0; |
|
| 3228 | + | g->mm = mm; |
|
| 3229 | + | g->flags = flags; |
|
| 3230 | + | ||
| 3231 | + | /* Initialize data section */ |
|
| 3232 | + | data_init(&g->data); |
|
| 3233 | + | } |
gen.h
added
+153 -0
| 1 | + | #ifndef GEN_H |
|
| 2 | + | #define GEN_H |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "limits.h" |
|
| 6 | + | #include "module.h" |
|
| 7 | + | #include "ralloc.h" |
|
| 8 | + | #include "resolver.h" |
|
| 9 | + | #include "riscv.h" |
|
| 10 | + | #include "scanner.h" |
|
| 11 | + | #include "symtab.h" |
|
| 12 | + | #include "types.h" |
|
| 13 | + | ||
| 14 | + | #include "gen/data.h" |
|
| 15 | + | ||
| 16 | + | /* Create an offset. */ |
|
| 17 | + | #define OFFSET(base, off) ((offset_t){ base, off }) |
|
| 18 | + | ||
| 19 | + | /* Function call instruction patch, used to patch a jump location after |
|
| 20 | + | * function addresses are calculated. */ |
|
| 21 | + | typedef enum { |
|
| 22 | + | PATCH_CALL, /* Function call (JAL instruction) */ |
|
| 23 | + | PATCH_ADDRESS, /* Function address (load instruction) */ |
|
| 24 | + | } patch_type_t; |
|
| 25 | + | ||
| 26 | + | /* Metadata for deferred branch patching that may expand to a trampoline. */ |
|
| 27 | + | typedef struct { |
|
| 28 | + | usize pc; /* Branch instruction index */ |
|
| 29 | + | usize tramp_pc; /* Trampoline instruction index */ |
|
| 30 | + | iname_t op; /* Branch operation */ |
|
| 31 | + | reg_t rs1; /* First operand register */ |
|
| 32 | + | reg_t rs2; /* Second operand register */ |
|
| 33 | + | bool valid; /* Whether this patch slot is in use */ |
|
| 34 | + | } branch_patch_t; |
|
| 35 | + | ||
| 36 | + | typedef struct { |
|
| 37 | + | usize skip_body; /* Jump location when pattern (or guard) fails */ |
|
| 38 | + | branch_patch_t guard_branch; /* Guard failure branch placeholder */ |
|
| 39 | + | } match_case_ctrl_t; |
|
| 40 | + | ||
| 41 | + | typedef struct { |
|
| 42 | + | const char *fn_name; |
|
| 43 | + | usize pc; /* Instruction index. */ |
|
| 44 | + | usize tramp_pc; /* Optional secondary slot for long jumps. */ |
|
| 45 | + | bool applied; /* Whether the patch was applied. */ |
|
| 46 | + | patch_type_t patch_type; /* Type of patch to apply. */ |
|
| 47 | + | reg_t target_reg; /* Target register for address patches. */ |
|
| 48 | + | reg_t scratch_reg; /* Scratch register for far calls. */ |
|
| 49 | + | } fnpatch_t; |
|
| 50 | + | ||
| 51 | + | /* Return patch, used to jump to a function's epilogue from return statements */ |
|
| 52 | + | typedef struct { |
|
| 53 | + | usize pc; /* Instruction index of the jump placeholder */ |
|
| 54 | + | bool applied; /* Whether the patch was applied */ |
|
| 55 | + | } retpatch_t; |
|
| 56 | + | ||
| 57 | + | /* Control flow patch, used to jump to a function's epilogue from |
|
| 58 | + | * return statements, or jump to a loop's end. */ |
|
| 59 | + | typedef struct { |
|
| 60 | + | usize pc; /* Instruction index of the jump placeholder */ |
|
| 61 | + | bool applied; /* Whether the patch was applied */ |
|
| 62 | + | node_t *loop; /* Parent loop of this patch, for breaks. */ |
|
| 63 | + | } ctpatch_t; |
|
| 64 | + | ||
| 65 | + | /* Loop context for handling control flow statements. */ |
|
| 66 | + | typedef struct { |
|
| 67 | + | usize start; /* Start address of loop (for `continue`). */ |
|
| 68 | + | usize end; /* End address of loop (for `break`). */ |
|
| 69 | + | node_t *current; /* Current loop we're in. */ |
|
| 70 | + | } loop_t; |
|
| 71 | + | ||
| 72 | + | /* Function context for handling return statements. */ |
|
| 73 | + | typedef struct { |
|
| 74 | + | /* Return and break patches for current function. */ |
|
| 75 | + | ctpatch_t retpatches[MAX_RET_PATCHES]; |
|
| 76 | + | usize nretpatches; |
|
| 77 | + | ctpatch_t brkpatches[MAX_BRK_PATCHES]; |
|
| 78 | + | usize nbrkpatches; |
|
| 79 | + | symbol_t *current; /* Current function. */ |
|
| 80 | + | } fn_t; |
|
| 81 | + | ||
| 82 | + | /* Code generator context. */ |
|
| 83 | + | typedef struct gen_t { |
|
| 84 | + | instr_t instrs[MAX_INSTRS]; |
|
| 85 | + | usize ninstrs; |
|
| 86 | + | loop_t loop; /* Current loop. */ |
|
| 87 | + | fn_t fn; /* Current function. */ |
|
| 88 | + | fnpatch_t fnpatches[MAX_FN_PATCHES]; |
|
| 89 | + | usize nfnpatches; |
|
| 90 | + | ||
| 91 | + | types_t *types; |
|
| 92 | + | ralloc_t regs; |
|
| 93 | + | module_manager_t *mm; |
|
| 94 | + | struct module_t *mod; /* Current module being compiled */ |
|
| 95 | + | data_section_t data; /* Static data section */ |
|
| 96 | + | u32 flags; |
|
| 97 | + | } gen_t; |
|
| 98 | + | ||
| 99 | + | /* Represents a tagged value (eg. an optional or enum with payload) */ |
|
| 100 | + | typedef struct { |
|
| 101 | + | value_t tag; /* Location of the tag */ |
|
| 102 | + | value_t val; /* Location of the value */ |
|
| 103 | + | type_t *typ; /* The tagged type (eg. `?T` or `enum`) */ |
|
| 104 | + | } tval_t; |
|
| 105 | + | ||
| 106 | + | value_t value_stack(offset_t off, type_t *ty); |
|
| 107 | + | value_t value_addr(usize addr, i32 off, type_t *ty); |
|
| 108 | + | value_t value_imm(imm_t imm, type_t *ty); |
|
| 109 | + | value_t value_reg(reg_t r, type_t *ty); |
|
| 110 | + | ||
| 111 | + | static inline reg_t nextreg(gen_t *g) { |
|
| 112 | + | return ralloc_next(&g->regs); |
|
| 113 | + | } |
|
| 114 | + | ||
| 115 | + | static inline reg_t nextreg_except(gen_t *g, reg_t r) { |
|
| 116 | + | return ralloc_next_except(&g->regs, r); |
|
| 117 | + | } |
|
| 118 | + | ||
| 119 | + | static inline bool isreserved(gen_t *g, reg_t r) { |
|
| 120 | + | if (r == FP) |
|
| 121 | + | return true; |
|
| 122 | + | return !ralloc_is_free(&g->regs, r); |
|
| 123 | + | } |
|
| 124 | + | ||
| 125 | + | static inline void freereg(gen_t *g, reg_t r) { |
|
| 126 | + | ralloc_free(&g->regs, r); |
|
| 127 | + | } |
|
| 128 | + | ||
| 129 | + | static inline reg_t usereg(gen_t *g, reg_t r) { |
|
| 130 | + | ralloc_reserve(&g->regs, r); |
|
| 131 | + | return r; |
|
| 132 | + | } |
|
| 133 | + | ||
| 134 | + | /* Calculate a jump offset. */ |
|
| 135 | + | i32 jump_offset(usize from, usize to); |
|
| 136 | + | /* Reserve stack space for the given type. */ |
|
| 137 | + | i32 reserve(gen_t *g, type_t *ty); |
|
| 138 | + | /* Like `align`, but rounds down. */ |
|
| 139 | + | i32 align_stack(i32 addr, i32 alignment); |
|
| 140 | + | ||
| 141 | + | /* Optional and enum value helper functions */ |
|
| 142 | + | tval_t tval_from_val(gen_t *g, value_t val); |
|
| 143 | + | i32 tval_payload_zero_size(type_t *container); |
|
| 144 | + | void tval_store(gen_t *g, value_t dest, value_t value, i32 tag); |
|
| 145 | + | ||
| 146 | + | /* Write instructions in binary format. */ |
|
| 147 | + | void gen_dump_bin(gen_t *g, FILE *text, FILE *data_ro, FILE *data_rw); |
|
| 148 | + | /* Generate code for the given module. */ |
|
| 149 | + | int gen_emit(gen_t *g, module_t *root); |
|
| 150 | + | /* Initialize a `codegen` object. */ |
|
| 151 | + | void gen_init(gen_t *g, types_t *t, module_manager_t *mm, u32 flags); |
|
| 152 | + | ||
| 153 | + | #endif |
gen/data.c
added
+364 -0
| 1 | + | #include <assert.h> |
|
| 2 | + | #include <stdio.h> |
|
| 3 | + | #include <string.h> |
|
| 4 | + | ||
| 5 | + | #include "../ast.h" |
|
| 6 | + | #include "../gen.h" |
|
| 7 | + | #include "../io.h" |
|
| 8 | + | #include "../limits.h" |
|
| 9 | + | #include "../parser.h" |
|
| 10 | + | #include "../riscv.h" |
|
| 11 | + | #include "../strings.h" |
|
| 12 | + | ||
| 13 | + | #include "data.h" |
|
| 14 | + | ||
| 15 | + | static void emit_node_data( |
|
| 16 | + | node_t *n, parser_t *p, FILE *out, data_section_t *d |
|
| 17 | + | ); |
|
| 18 | + | static void emit_array_data( |
|
| 19 | + | node_t *n, parser_t *p, FILE *out, data_section_t *d |
|
| 20 | + | ); |
|
| 21 | + | ||
| 22 | + | /* Write a little-endian word */ |
|
| 23 | + | static void write_le32(FILE *out, u32 v) { |
|
| 24 | + | u8 bytes[4]; |
|
| 25 | + | ||
| 26 | + | bytes[0] = (v & 0xFF); |
|
| 27 | + | bytes[1] = ((v >> 8) & 0xFF); |
|
| 28 | + | bytes[2] = ((v >> 16) & 0xFF); |
|
| 29 | + | bytes[3] = ((v >> 24) & 0xFF); |
|
| 30 | + | ||
| 31 | + | fwrite(bytes, sizeof(bytes), 1, out); |
|
| 32 | + | } |
|
| 33 | + | ||
| 34 | + | static void write_le64(FILE *out, u64 v) { |
|
| 35 | + | u8 bytes[8]; |
|
| 36 | + | ||
| 37 | + | bytes[0] = (v & 0xFF); |
|
| 38 | + | bytes[1] = ((v >> 8) & 0xFF); |
|
| 39 | + | bytes[2] = ((v >> 16) & 0xFF); |
|
| 40 | + | bytes[3] = ((v >> 24) & 0xFF); |
|
| 41 | + | bytes[4] = ((v >> 32) & 0xFF); |
|
| 42 | + | bytes[5] = ((v >> 40) & 0xFF); |
|
| 43 | + | bytes[6] = ((v >> 48) & 0xFF); |
|
| 44 | + | bytes[7] = ((v >> 56) & 0xFF); |
|
| 45 | + | ||
| 46 | + | fwrite(bytes, sizeof(bytes), 1, out); |
|
| 47 | + | } |
|
| 48 | + | ||
| 49 | + | /* Add a string literal to the data section. |
|
| 50 | + | * Returns the offset in the data section. */ |
|
| 51 | + | usize data_string(data_section_t *d, const char *str, usize len) { |
|
| 52 | + | /* Check if this string already exists in the data section */ |
|
| 53 | + | for (usize i = 0; i < d->nstrings; i++) { |
|
| 54 | + | string_data_t *existing = &d->strings[i]; |
|
| 55 | + | ||
| 56 | + | if (existing->length == len && memcmp(existing->data, str, len) == 0) { |
|
| 57 | + | return d->ro_offset + existing->offset; |
|
| 58 | + | } |
|
| 59 | + | } |
|
| 60 | + | d->ro_size = align(d->ro_size, WORD_SIZE); |
|
| 61 | + | ||
| 62 | + | /* Store the string information */ |
|
| 63 | + | string_data_t *s = &d->strings[d->nstrings++]; |
|
| 64 | + | s->data = str; |
|
| 65 | + | s->length = len; |
|
| 66 | + | s->offset = d->ro_size; |
|
| 67 | + | ||
| 68 | + | usize data = len + 1; /* Account for `NULL` terminator */ |
|
| 69 | + | usize padded = align(data, WORD_SIZE); /* Align to word boundary */ |
|
| 70 | + | d->ro_size += padded; |
|
| 71 | + | ||
| 72 | + | return d->ro_offset + s->offset; |
|
| 73 | + | } |
|
| 74 | + | ||
| 75 | + | /* Add a node value to the data section. |
|
| 76 | + | * Initialized items are placed in [0, rw_init_total), BSS items in |
|
| 77 | + | * [rw_init_total, ...). Returns the offset in the data section. */ |
|
| 78 | + | usize data_node( |
|
| 79 | + | data_section_t *d, |
|
| 80 | + | parser_t *p, |
|
| 81 | + | node_t *node, |
|
| 82 | + | const char *name, |
|
| 83 | + | usize name_len |
|
| 84 | + | ) { |
|
| 85 | + | bool bss = node->cls == NODE_UNDEF; |
|
| 86 | + | ||
| 87 | + | /* Store the constant information */ |
|
| 88 | + | data_item_t *item = &d->items[d->nitems++]; |
|
| 89 | + | item->kind = DATA_CONST; |
|
| 90 | + | item->node = node; |
|
| 91 | + | item->parser = p; |
|
| 92 | + | item->as.constant.name = name; |
|
| 93 | + | item->as.constant.name_len = name_len; |
|
| 94 | + | ||
| 95 | + | if (bss) { |
|
| 96 | + | d->rw_bss_size = align(d->rw_bss_size, WORD_SIZE); |
|
| 97 | + | item->offset = d->rw_init_total + d->rw_bss_size; |
|
| 98 | + | d->rw_bss_size += node->type->size; |
|
| 99 | + | } else { |
|
| 100 | + | d->rw_init_size = align(d->rw_init_size, WORD_SIZE); |
|
| 101 | + | item->offset = d->rw_init_size; |
|
| 102 | + | d->rw_init_size += node->type->size; |
|
| 103 | + | } |
|
| 104 | + | return d->rw_offset + item->offset; |
|
| 105 | + | } |
|
| 106 | + | ||
| 107 | + | /* Add array data for a slice to the data section. |
|
| 108 | + | * Returns the offset in the data section. */ |
|
| 109 | + | usize data_array(data_section_t *d, parser_t *p, node_t *n) { |
|
| 110 | + | /* Check if this array already exists in the data section */ |
|
| 111 | + | for (usize i = 0; i < d->nitems; i++) { |
|
| 112 | + | data_item_t *existing = &d->items[i]; |
|
| 113 | + | ||
| 114 | + | if (existing->kind == DATA_ARRAY && existing->node == n) { |
|
| 115 | + | return d->rw_offset + existing->offset; |
|
| 116 | + | } |
|
| 117 | + | } |
|
| 118 | + | d->rw_init_size = align(d->rw_init_size, WORD_SIZE); |
|
| 119 | + | ||
| 120 | + | /* Store the array information */ |
|
| 121 | + | data_item_t *item = &d->items[d->nitems++]; |
|
| 122 | + | item->kind = DATA_ARRAY; |
|
| 123 | + | item->offset = d->rw_init_size; |
|
| 124 | + | item->node = n; |
|
| 125 | + | item->parser = p; |
|
| 126 | + | item->as.array.length = n->val.array_lit.elems.len; |
|
| 127 | + | item->as.array.elem = n->type->info.slc.elem; |
|
| 128 | + | ||
| 129 | + | d->rw_init_size += n->type->size; |
|
| 130 | + | ||
| 131 | + | return d->rw_offset + item->offset; |
|
| 132 | + | } |
|
| 133 | + | ||
| 134 | + | /* Initialize data section */ |
|
| 135 | + | void data_init(data_section_t *d) { |
|
| 136 | + | d->nstrings = 0; |
|
| 137 | + | d->nitems = 0; |
|
| 138 | + | d->ro_size = 0; |
|
| 139 | + | d->rw_init_total = 0; |
|
| 140 | + | d->rw_init_size = 0; |
|
| 141 | + | d->rw_bss_size = 0; |
|
| 142 | + | d->rw_offset = DATA_RW_OFFSET; |
|
| 143 | + | d->ro_offset = DATA_RO_OFFSET; |
|
| 144 | + | } |
|
| 145 | + | ||
| 146 | + | /* For slices, we need to emit: |
|
| 147 | + | * 1. The data pointer; points to array data |
|
| 148 | + | * 2. The length */ |
|
| 149 | + | static void emit_slice_data(node_t *n, FILE *out, data_section_t *d) { |
|
| 150 | + | if (n->cls == NODE_STRING) { |
|
| 151 | + | /* For string literals, look up or register the string offset */ |
|
| 152 | + | usize addr = 0; |
|
| 153 | + | ||
| 154 | + | for (usize i = 0; i < d->nstrings; i++) { |
|
| 155 | + | string_data_t *s = &d->strings[i]; |
|
| 156 | + | if (s->length == n->val.string_lit.length && |
|
| 157 | + | memcmp(s->data, n->val.string_lit.data, s->length) == 0) { |
|
| 158 | + | addr = s->offset + d->ro_offset; |
|
| 159 | + | break; |
|
| 160 | + | } |
|
| 161 | + | } |
|
| 162 | + | /* If string not found, register it now */ |
|
| 163 | + | if (!addr) { |
|
| 164 | + | addr = data_string( |
|
| 165 | + | d, n->val.string_lit.data, n->val.string_lit.length |
|
| 166 | + | ); |
|
| 167 | + | } |
|
| 168 | + | write_le64(out, addr); |
|
| 169 | + | write_le64(out, n->val.string_lit.length); |
|
| 170 | + | } else { |
|
| 171 | + | bail("unsupported slice node %s", node_names[n->cls]); |
|
| 172 | + | } |
|
| 173 | + | } |
|
| 174 | + | ||
| 175 | + | static void emit_node_data( |
|
| 176 | + | node_t *n, parser_t *p, FILE *out, data_section_t *d |
|
| 177 | + | ) { |
|
| 178 | + | /* Handle undefined nodes by emitting zeros for their type size */ |
|
| 179 | + | if (n->cls == NODE_UNDEF) { |
|
| 180 | + | for (i32 i = 0; i < n->type->size; i++) { |
|
| 181 | + | fputc(0, out); |
|
| 182 | + | } |
|
| 183 | + | return; |
|
| 184 | + | } |
|
| 185 | + | /* Resolve identifiers that reference constants */ |
|
| 186 | + | if (n->cls == NODE_IDENT && n->sym && n->sym->kind == SYM_CONSTANT) { |
|
| 187 | + | emit_node_data(n->sym->node->val.constant.value, p, out, d); |
|
| 188 | + | return; |
|
| 189 | + | } |
|
| 190 | + | switch (n->type->cls) { |
|
| 191 | + | case TYPE_ARRAY: { |
|
| 192 | + | if (n->cls == NODE_ARRAY_LIT) { |
|
| 193 | + | node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); |
|
| 194 | + | for (usize j = 0; j < n->val.array_lit.elems.len; j++) { |
|
| 195 | + | emit_node_data(elems[j], p, out, d); |
|
| 196 | + | } |
|
| 197 | + | } else if (n->cls == NODE_ARRAY_REPEAT_LIT) { |
|
| 198 | + | /* Array repeat literal: emit the same value N times */ |
|
| 199 | + | usize count = n->val.array_repeat_lit.count->val.number.value.u; |
|
| 200 | + | for (usize j = 0; j < count; j++) { |
|
| 201 | + | emit_node_data(n->val.array_repeat_lit.value, p, out, d); |
|
| 202 | + | } |
|
| 203 | + | } else { |
|
| 204 | + | bail("unsupported array node %s", node_names[n->cls]); |
|
| 205 | + | } |
|
| 206 | + | break; |
|
| 207 | + | } |
|
| 208 | + | case TYPE_RECORD: { |
|
| 209 | + | /* Emit record fields in order, with proper inter-field padding. */ |
|
| 210 | + | node_t **fields = nodespan_ptrs(p, n->val.record_lit.fields); |
|
| 211 | + | i32 pos = 0; |
|
| 212 | + | ||
| 213 | + | for (usize i = 0; i < n->val.record_lit.fields.len; i++) { |
|
| 214 | + | node_t *field = fields[i]; |
|
| 215 | + | node_t *field_val = field->val.record_lit_field.value; |
|
| 216 | + | symbol_t *field_sym = field->sym; |
|
| 217 | + | i32 field_off = field_sym->e.field.offset; |
|
| 218 | + | ||
| 219 | + | /* Emit padding bytes to reach the field's offset. */ |
|
| 220 | + | while (pos < field_off) { |
|
| 221 | + | fputc(0, out); |
|
| 222 | + | pos++; |
|
| 223 | + | } |
|
| 224 | + | emit_node_data(field_val, p, out, d); |
|
| 225 | + | pos += field_val->type->size; |
|
| 226 | + | } |
|
| 227 | + | /* Emit trailing padding to reach the full record size. */ |
|
| 228 | + | while (pos < n->type->size) { |
|
| 229 | + | fputc(0, out); |
|
| 230 | + | pos++; |
|
| 231 | + | } |
|
| 232 | + | break; |
|
| 233 | + | } |
|
| 234 | + | case TYPE_SLICE: |
|
| 235 | + | emit_slice_data(n, out, d); |
|
| 236 | + | break; |
|
| 237 | + | case TYPE_UNION: { |
|
| 238 | + | assert(n->sym); |
|
| 239 | + | assert(n->sym->node); |
|
| 240 | + | /* For union types, write the tag byte and pad to the type size. */ |
|
| 241 | + | fputc((u8)n->sym->node->val.union_variant.value, out); |
|
| 242 | + | for (i32 i = 1; i < n->type->size; i++) { |
|
| 243 | + | fputc(0, out); |
|
| 244 | + | } |
|
| 245 | + | break; |
|
| 246 | + | } |
|
| 247 | + | case TYPE_BOOL: |
|
| 248 | + | fputc(n->val.bool_lit ? 1 : 0, out); |
|
| 249 | + | break; |
|
| 250 | + | case TYPE_U8: { |
|
| 251 | + | u8 value = (u8)n->val.number.value.u; |
|
| 252 | + | if (n->cls == NODE_CHAR) |
|
| 253 | + | value = (u8)n->val.char_lit; |
|
| 254 | + | fputc(value, out); |
|
| 255 | + | break; |
|
| 256 | + | } |
|
| 257 | + | case TYPE_U16: { |
|
| 258 | + | u16 value = (u16)n->val.number.value.u; |
|
| 259 | + | fputc(value & 0xFF, out); |
|
| 260 | + | fputc((value >> 8) & 0xFF, out); |
|
| 261 | + | break; |
|
| 262 | + | } |
|
| 263 | + | case TYPE_U32: |
|
| 264 | + | write_le32(out, n->val.number.value.u); |
|
| 265 | + | break; |
|
| 266 | + | case TYPE_I8: |
|
| 267 | + | fputc((u8)n->val.number.value.i, out); |
|
| 268 | + | break; |
|
| 269 | + | case TYPE_I16: { |
|
| 270 | + | i16 value = (i16)n->val.number.value.i; |
|
| 271 | + | fputc(value & 0xFF, out); |
|
| 272 | + | fputc((value >> 8) & 0xFF, out); |
|
| 273 | + | break; |
|
| 274 | + | } |
|
| 275 | + | case TYPE_I32: |
|
| 276 | + | write_le32(out, n->val.number.value.i); |
|
| 277 | + | break; |
|
| 278 | + | default: |
|
| 279 | + | break; |
|
| 280 | + | } |
|
| 281 | + | ||
| 282 | + | /* Add padding to ensure alignment */ |
|
| 283 | + | usize size = n->type->size; |
|
| 284 | + | usize aligned = align(size, n->type->align); |
|
| 285 | + | usize padding = aligned - size; |
|
| 286 | + | ||
| 287 | + | for (usize i = 0; i < padding; i++) { |
|
| 288 | + | fputc(0, out); |
|
| 289 | + | } |
|
| 290 | + | } |
|
| 291 | + | ||
| 292 | + | /* Helper function to emit array data */ |
|
| 293 | + | static void emit_array_data( |
|
| 294 | + | node_t *n, parser_t *p, FILE *out, data_section_t *d |
|
| 295 | + | ) { |
|
| 296 | + | /* Emit each array element */ |
|
| 297 | + | node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); |
|
| 298 | + | for (usize i = 0; i < n->val.array_lit.elems.len; i++) { |
|
| 299 | + | emit_node_data(elems[i], p, out, d); |
|
| 300 | + | } |
|
| 301 | + | /* Padding */ |
|
| 302 | + | usize aligned = align(n->type->size, n->type->align); |
|
| 303 | + | usize padding = aligned - n->type->size; |
|
| 304 | + | ||
| 305 | + | for (usize i = 0; i < padding; i++) { |
|
| 306 | + | fputc(0, out); |
|
| 307 | + | } |
|
| 308 | + | } |
|
| 309 | + | ||
| 310 | + | /* Emit the data section to the output file */ |
|
| 311 | + | void data_emit_ro(data_section_t *d, FILE *out) { |
|
| 312 | + | if (!out || d->ro_size == 0) { |
|
| 313 | + | return; /* No data to emit */ |
|
| 314 | + | } |
|
| 315 | + | for (usize i = 0; i < d->nstrings; i++) { |
|
| 316 | + | string_data_t *s = &d->strings[i]; |
|
| 317 | + | ||
| 318 | + | /* Write string data */ |
|
| 319 | + | fwrite(s->data, 1, s->length, out); |
|
| 320 | + | fputc(0, out); /* NULL terminator */ |
|
| 321 | + | ||
| 322 | + | /* Write padding */ |
|
| 323 | + | u32 padding = (WORD_SIZE - ((s->length + 1) % WORD_SIZE)) % WORD_SIZE; |
|
| 324 | + | ||
| 325 | + | for (usize j = 0; j < padding; j++) { |
|
| 326 | + | fputc(0, out); |
|
| 327 | + | } |
|
| 328 | + | } |
|
| 329 | + | } |
|
| 330 | + | ||
| 331 | + | void data_emit_rw(data_section_t *d, FILE *out) { |
|
| 332 | + | if (!out || d->rw_init_total == 0) { |
|
| 333 | + | return; /* No initialized data to emit */ |
|
| 334 | + | } |
|
| 335 | + | /* Emit only initialized items (offsets < rw_init_total). |
|
| 336 | + | * BSS items (offsets >= rw_init_total) are zero-initialized by the |
|
| 337 | + | * runtime and are not written to the file. */ |
|
| 338 | + | usize current = 0; |
|
| 339 | + | ||
| 340 | + | for (usize i = 0; i < d->nitems; i++) { |
|
| 341 | + | data_item_t *item = &d->items[i]; |
|
| 342 | + | ||
| 343 | + | /* Skip BSS items */ |
|
| 344 | + | if (item->offset >= d->rw_init_total) |
|
| 345 | + | continue; |
|
| 346 | + | ||
| 347 | + | /* Pad to reach the item's offset */ |
|
| 348 | + | while (current < item->offset) { |
|
| 349 | + | fputc(0, out); |
|
| 350 | + | current++; |
|
| 351 | + | } |
|
| 352 | + | if (item->kind == DATA_ARRAY) { |
|
| 353 | + | emit_array_data(item->node, item->parser, out, d); |
|
| 354 | + | } else { |
|
| 355 | + | emit_node_data(item->node, item->parser, out, d); |
|
| 356 | + | } |
|
| 357 | + | current = ftell(out); |
|
| 358 | + | } |
|
| 359 | + | /* Pad to the full initialized size */ |
|
| 360 | + | while (current < d->rw_init_total) { |
|
| 361 | + | fputc(0, out); |
|
| 362 | + | current++; |
|
| 363 | + | } |
|
| 364 | + | } |
gen/data.h
added
+79 -0
| 1 | + | #ifndef DATA_H |
|
| 2 | + | #define DATA_H |
|
| 3 | + | ||
| 4 | + | #include <stdio.h> |
|
| 5 | + | ||
| 6 | + | #include "../limits.h" |
|
| 7 | + | #include "../resolver.h" |
|
| 8 | + | #include "../types.h" |
|
| 9 | + | ||
| 10 | + | /* Data section offsets in memory */ |
|
| 11 | + | #define DATA_RO_OFFSET 0x10000 |
|
| 12 | + | #define DATA_RW_OFFSET 0xFFFFF0 |
|
| 13 | + | ||
| 14 | + | /* String literal data in the data section */ |
|
| 15 | + | typedef struct { |
|
| 16 | + | const char *data; /* String content */ |
|
| 17 | + | usize length; /* String length */ |
|
| 18 | + | usize offset; /* Offset in data section */ |
|
| 19 | + | } string_data_t; |
|
| 20 | + | ||
| 21 | + | /* Kind of data item in the rw data section */ |
|
| 22 | + | typedef enum { |
|
| 23 | + | DATA_ARRAY, /* Array backing data for slices */ |
|
| 24 | + | DATA_CONST, /* Named constant */ |
|
| 25 | + | } data_kind_t; |
|
| 26 | + | ||
| 27 | + | /* Unified data item for the rw data section */ |
|
| 28 | + | typedef struct { |
|
| 29 | + | data_kind_t kind; |
|
| 30 | + | usize offset; /* Offset in data section */ |
|
| 31 | + | node_t *node; |
|
| 32 | + | struct parser_t *parser; /* Parser that owns this node's spans */ |
|
| 33 | + | union { |
|
| 34 | + | struct { |
|
| 35 | + | usize length; /* Number of elements */ |
|
| 36 | + | type_t *elem; /* Element type */ |
|
| 37 | + | } array; |
|
| 38 | + | struct { |
|
| 39 | + | const char *name; /* Constant name */ |
|
| 40 | + | usize name_len; /* Length of name */ |
|
| 41 | + | } constant; |
|
| 42 | + | } as; |
|
| 43 | + | } data_item_t; |
|
| 44 | + | ||
| 45 | + | /* Data section for static data (strings, constants, etc.) */ |
|
| 46 | + | typedef struct { |
|
| 47 | + | string_data_t strings[MAX_STRING_LITERALS]; |
|
| 48 | + | usize nstrings; |
|
| 49 | + | data_item_t items[MAX_CONSTANTS * 2]; /* Arrays and constants */ |
|
| 50 | + | usize nitems; |
|
| 51 | + | usize ro_size; /* Total size of read-only data section */ |
|
| 52 | + | usize ro_offset; /* Data section offset */ |
|
| 53 | + | usize rw_offset; /* Data section offset */ |
|
| 54 | + | usize rw_init_total; /* Pre-computed total size */ |
|
| 55 | + | usize rw_init_size; /* Current init cursor */ |
|
| 56 | + | usize rw_bss_size; /* Current BSS cursor */ |
|
| 57 | + | } data_section_t; |
|
| 58 | + | ||
| 59 | + | /* Initialize the data section management. */ |
|
| 60 | + | void data_init(data_section_t *d); |
|
| 61 | + | /* Add a string literal to the data section. |
|
| 62 | + | * Returns the data section offset where the string is stored. */ |
|
| 63 | + | usize data_string(data_section_t *d, const char *str, usize len); |
|
| 64 | + | /* Add array data for a slice to the data section. |
|
| 65 | + | * Returns the offset in the data section. */ |
|
| 66 | + | usize data_array(data_section_t *d, struct parser_t *p, node_t *array_node); |
|
| 67 | + | /* Add a node to the data section. */ |
|
| 68 | + | usize data_node( |
|
| 69 | + | data_section_t *d, |
|
| 70 | + | struct parser_t *p, |
|
| 71 | + | node_t *node, |
|
| 72 | + | const char *name, |
|
| 73 | + | usize name_len |
|
| 74 | + | ); |
|
| 75 | + | /* Emit the data section to the output. */ |
|
| 76 | + | void data_emit_ro(data_section_t *d, FILE *out); |
|
| 77 | + | void data_emit_rw(data_section_t *d, FILE *out); |
|
| 78 | + | ||
| 79 | + | #endif |
gen/emit.c
added
+1176 -0
| 1 | + | #include <stdio.h> |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | ||
| 4 | + | #include "emit.h" |
|
| 5 | + | ||
| 6 | + | void split_imm(i32 imm, i32 *hi, i32 *lo) { |
|
| 7 | + | /* Split immediate into upper 20 bits and lower 12 bits */ |
|
| 8 | + | *hi = ((imm + 0x800) >> 12) & 0xFFFFF; /* Add 0x800 for proper rounding */ |
|
| 9 | + | *lo = imm & 0xFFF; |
|
| 10 | + | if (*lo & 0x800) { |
|
| 11 | + | /* If the highest bit of the lower 12 bits is set, |
|
| 12 | + | it will be sign-extended, so adjust upper part */ |
|
| 13 | + | *lo = *lo | ~0xFFF; /* Sign extend lower */ |
|
| 14 | + | } |
|
| 15 | + | } |
|
| 16 | + | ||
| 17 | + | void emit_li(gen_t *g, reg_t rd, i32 imm) { |
|
| 18 | + | if (is_small(imm)) { |
|
| 19 | + | emit(g, instr(I_ADDI, rd, 0, 0, imm)); |
|
| 20 | + | return; |
|
| 21 | + | } |
|
| 22 | + | i32 hi, lo; |
|
| 23 | + | split_imm(imm, &hi, &lo); |
|
| 24 | + | ||
| 25 | + | emit(g, instr(I_LUI, rd, 0, 0, hi)); |
|
| 26 | + | /* Use ADDIW to sign-extend the 32-bit result on RV64, otherwise |
|
| 27 | + | * LUI's upper-bit sign-extension leaves garbage in bits 63:32. */ |
|
| 28 | + | if (lo != 0) { |
|
| 29 | + | emit(g, instr(I_ADDIW, rd, rd, 0, lo)); |
|
| 30 | + | } else { |
|
| 31 | + | /* Even with lo == 0, LUI sign-extends bit 31 into 63:32. |
|
| 32 | + | * Use SEXT.W (ADDIW rd, rd, 0) to canonicalize. */ |
|
| 33 | + | emit(g, instr(I_ADDIW, rd, rd, 0, 0)); |
|
| 34 | + | } |
|
| 35 | + | } |
|
| 36 | + | ||
| 37 | + | void emit_mv(gen_t *g, reg_t dst, reg_t src) { |
|
| 38 | + | if (dst != src) { |
|
| 39 | + | emit(g, instr(I_MV, dst, src, 0, 0)); |
|
| 40 | + | } |
|
| 41 | + | } |
|
| 42 | + | ||
| 43 | + | usize emit_jump(gen_t *g, usize offset) { |
|
| 44 | + | return emit(g, JMP(jump_offset(g->ninstrs, offset))); |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | /* Compute hi/lo split for PC-relative offset to target address. */ |
|
| 48 | + | static void pc_rel_offset(gen_t *g, usize addr, i32 *hi, i32 *lo) { |
|
| 49 | + | i32 target_addr = (i32)(addr * INSTR_SIZE); |
|
| 50 | + | i32 current_addr = (i32)(g->ninstrs * INSTR_SIZE); |
|
| 51 | + | i32 offset = target_addr - current_addr; |
|
| 52 | + | split_imm(offset, hi, lo); |
|
| 53 | + | } |
|
| 54 | + | ||
| 55 | + | void emit_pc_rel_addr(gen_t *g, reg_t rd, usize addr) { |
|
| 56 | + | i32 hi, lo; |
|
| 57 | + | pc_rel_offset(g, addr, &hi, &lo); |
|
| 58 | + | emit(g, AUIPC(rd, hi)); |
|
| 59 | + | emit(g, ADDI(rd, rd, lo)); |
|
| 60 | + | } |
|
| 61 | + | ||
| 62 | + | static usize emit_call_far(gen_t *g, usize addr, reg_t scratch) { |
|
| 63 | + | i32 hi, lo; |
|
| 64 | + | pc_rel_offset(g, addr, &hi, &lo); |
|
| 65 | + | usize pc = emit(g, AUIPC(scratch, hi)); |
|
| 66 | + | emit(g, JALR(RA, scratch, lo)); |
|
| 67 | + | return pc; |
|
| 68 | + | } |
|
| 69 | + | ||
| 70 | + | usize emit_call(gen_t *g, usize addr) { |
|
| 71 | + | i32 offset = jump_offset(g->ninstrs, addr); |
|
| 72 | + | if (is_jump_imm(offset)) |
|
| 73 | + | return emit(g, JAL(RA, offset)); |
|
| 74 | + | ||
| 75 | + | reg_t scratch = nextreg(g); |
|
| 76 | + | usize pc = emit_call_far(g, addr, scratch); |
|
| 77 | + | freereg(g, scratch); |
|
| 78 | + | return pc; |
|
| 79 | + | } |
|
| 80 | + | ||
| 81 | + | void emit_record_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) { |
|
| 82 | + | for (usize i = 0; i < ty->info.srt.nfields; i++) { |
|
| 83 | + | symbol_t *field = ty->info.srt.fields[i]; |
|
| 84 | + | type_t *field_typ = field->e.field.typ; |
|
| 85 | + | i32 field_off = field->e.field.offset; |
|
| 86 | + | offset_t field_src = OFFSET(src.base, src.offset + field_off); |
|
| 87 | + | offset_t field_dst = OFFSET(dst.base, dst.offset + field_off); |
|
| 88 | + | ||
| 89 | + | emit_memcopy(g, field_src, field_dst, field_typ); |
|
| 90 | + | } |
|
| 91 | + | } |
|
| 92 | + | ||
| 93 | + | static value_t emit_field_get(value_t sval, i32 off, type_t *typ) { |
|
| 94 | + | switch (sval.loc) { |
|
| 95 | + | case LOC_REG: |
|
| 96 | + | return value_stack(OFFSET(sval.as.reg, off), typ); |
|
| 97 | + | case LOC_STACK: |
|
| 98 | + | return value_stack( |
|
| 99 | + | OFFSET(sval.as.off.base, sval.as.off.offset + off), typ |
|
| 100 | + | ); |
|
| 101 | + | case LOC_ADDR: |
|
| 102 | + | return value_addr(sval.as.adr.base, sval.as.adr.offset + off, typ); |
|
| 103 | + | case LOC_NONE: |
|
| 104 | + | case LOC_IMM: |
|
| 105 | + | break; |
|
| 106 | + | } |
|
| 107 | + | abort(); |
|
| 108 | + | } |
|
| 109 | + | ||
| 110 | + | /* RISC-V load/store immediates are limited to signed 12 bits. This helper folds |
|
| 111 | + | * large displacements into a temporary register so the generated instruction |
|
| 112 | + | * still uses the small-immediate forms, keeping the addressing logic in callers |
|
| 113 | + | * simple. */ |
|
| 114 | + | static addr_adj_t adjust_addr_avoid( |
|
| 115 | + | gen_t *g, reg_t base, i32 *offset, reg_t avoid |
|
| 116 | + | ) { |
|
| 117 | + | if (is_small(*offset)) |
|
| 118 | + | return (addr_adj_t){ base, false }; |
|
| 119 | + | ||
| 120 | + | reg_t tmp = avoid ? nextreg_except(g, avoid) : nextreg(g); |
|
| 121 | + | ||
| 122 | + | emit_li(g, tmp, *offset); |
|
| 123 | + | emit(g, ADD(tmp, base, tmp)); |
|
| 124 | + | *offset = 0; |
|
| 125 | + | ||
| 126 | + | return (addr_adj_t){ tmp, true }; |
|
| 127 | + | } |
|
| 128 | + | ||
| 129 | + | static addr_adj_t adjust_addr(gen_t *g, reg_t base, i32 *offset) { |
|
| 130 | + | return adjust_addr_avoid(g, base, offset, 0); |
|
| 131 | + | } |
|
| 132 | + | ||
| 133 | + | /* Release any temporary register created by `adjust_addr`. */ |
|
| 134 | + | static void release_addr(gen_t *g, addr_adj_t adj) { |
|
| 135 | + | if (adj.temp) |
|
| 136 | + | freereg(g, adj.base); |
|
| 137 | + | } |
|
| 138 | + | ||
| 139 | + | void emit_addr_offset(gen_t *g, reg_t dst, reg_t base, i32 offset) { |
|
| 140 | + | if (is_small(offset)) { |
|
| 141 | + | emit(g, ADDI(dst, base, offset)); |
|
| 142 | + | return; |
|
| 143 | + | } |
|
| 144 | + | reg_t tmp = nextreg(g); |
|
| 145 | + | emit_li(g, tmp, offset); |
|
| 146 | + | emit(g, ADD(dst, base, tmp)); |
|
| 147 | + | freereg(g, tmp); |
|
| 148 | + | } |
|
| 149 | + | ||
| 150 | + | value_t emit_slice_lit( |
|
| 151 | + | gen_t *g, i32 offset, usize ptr, usize len, type_t *typ |
|
| 152 | + | ) { |
|
| 153 | + | static type_t ptr_type = { .cls = TYPE_PTR }; |
|
| 154 | + | imm_t imm_ptr = (imm_t){ .u = ptr }; /* Slice pointer */ |
|
| 155 | + | imm_t imm_len = (imm_t){ .u = len }; /* Slice length */ |
|
| 156 | + | ||
| 157 | + | emit_store( |
|
| 158 | + | g, value_imm(imm_ptr, &ptr_type), FP, offset + SLICE_FIELD_PTR_OFFSET |
|
| 159 | + | ); |
|
| 160 | + | emit_store( |
|
| 161 | + | g, value_imm(imm_len, &ptr_type), FP, offset + SLICE_FIELD_LEN_OFFSET |
|
| 162 | + | ); |
|
| 163 | + | return value_stack(OFFSET(FP, offset), typ); |
|
| 164 | + | } |
|
| 165 | + | ||
| 166 | + | value_t emit_record_field_get(value_t sval, symbol_t *field) { |
|
| 167 | + | i32 foff = field->e.field.offset; |
|
| 168 | + | type_t *ftype = field->node->type; |
|
| 169 | + | ||
| 170 | + | return emit_field_get(sval, foff, ftype); |
|
| 171 | + | } |
|
| 172 | + | ||
| 173 | + | void emit_record_field_set( |
|
| 174 | + | gen_t *g, value_t val, reg_t base, i32 record_offset, symbol_t *field |
|
| 175 | + | ) { |
|
| 176 | + | i32 field_offset = field->e.field.offset; |
|
| 177 | + | i32 target_offset = record_offset + field_offset; |
|
| 178 | + | ||
| 179 | + | value_t dest = value_stack(OFFSET(base, target_offset), field->e.field.typ); |
|
| 180 | + | ||
| 181 | + | emit_replace(g, dest, val); |
|
| 182 | + | } |
|
| 183 | + | ||
| 184 | + | void emit_memzero(gen_t *g, offset_t dst, i32 size) { |
|
| 185 | + | if (size == 0) /* Nothing to do for zero-sized regions */ |
|
| 186 | + | return; |
|
| 187 | + | ||
| 188 | + | reg_t cursor = nextreg(g); |
|
| 189 | + | emit_addr_offset(g, cursor, dst.base, dst.offset); |
|
| 190 | + | ||
| 191 | + | /* Calculate word-aligned size and remainder */ |
|
| 192 | + | i32 aligned_size = align_stack(size, WORD_SIZE); |
|
| 193 | + | i32 remainder = size - aligned_size; |
|
| 194 | + | ||
| 195 | + | reg_t end = ZERO; |
|
| 196 | + | ||
| 197 | + | /* Only use the word-based loop if we have at least one complete word */ |
|
| 198 | + | if (aligned_size > 0) { |
|
| 199 | + | end = nextreg(g); |
|
| 200 | + | emit_addr_offset(g, end, cursor, aligned_size); |
|
| 201 | + | ||
| 202 | + | usize loop_start = g->ninstrs; |
|
| 203 | + | usize branch_end = emit(g, NOP); |
|
| 204 | + | ||
| 205 | + | /* Store zero to current address and increment by word size */ |
|
| 206 | + | emit(g, SD(ZERO, cursor, 0)); |
|
| 207 | + | emit(g, ADDI(cursor, cursor, WORD_SIZE)); |
|
| 208 | + | emit(g, JMP(jump_offset(g->ninstrs, loop_start))); /* Loop back */ |
|
| 209 | + | ||
| 210 | + | g->instrs[branch_end] = |
|
| 211 | + | BGE(cursor, end, jump_offset(branch_end, g->ninstrs)); |
|
| 212 | + | } |
|
| 213 | + | ||
| 214 | + | /* At least four bytes left */ |
|
| 215 | + | if (remainder >= 4) { /* Store a word (4 bytes) */ |
|
| 216 | + | emit(g, SW(ZERO, cursor, 0)); |
|
| 217 | + | emit(g, ADDI(cursor, cursor, 4)); |
|
| 218 | + | remainder -= 4; |
|
| 219 | + | } |
|
| 220 | + | /* At least two bytes left */ |
|
| 221 | + | if (remainder >= 2) { /* Store a halfword (2 bytes) */ |
|
| 222 | + | emit(g, SH(ZERO, cursor, 0)); |
|
| 223 | + | emit(g, ADDI(cursor, cursor, 2)); |
|
| 224 | + | remainder -= 2; |
|
| 225 | + | } |
|
| 226 | + | /* One byte left */ |
|
| 227 | + | if (remainder == 1) { |
|
| 228 | + | emit(g, SB(ZERO, cursor, 0)); |
|
| 229 | + | } |
|
| 230 | + | freereg(g, cursor); |
|
| 231 | + | if (aligned_size > 0) |
|
| 232 | + | freereg(g, end); |
|
| 233 | + | } |
|
| 234 | + | ||
| 235 | + | void emit_replace(gen_t *g, value_t old, value_t new) { |
|
| 236 | + | if (old.type->cls == TYPE_OPT) { |
|
| 237 | + | if (new.type->cls == TYPE_OPT) { |
|
| 238 | + | switch (old.loc) { |
|
| 239 | + | case LOC_STACK: |
|
| 240 | + | emit_memcopy(g, new.as.off, old.as.off, old.type); |
|
| 241 | + | break; |
|
| 242 | + | case LOC_ADDR: { |
|
| 243 | + | /* Handle assignment to LOC_ADDR optional */ |
|
| 244 | + | reg_t base = nextreg(g); |
|
| 245 | + | emit_li(g, base, old.as.adr.base); |
|
| 246 | + | emit_store(g, new, base, old.as.adr.offset); |
|
| 247 | + | freereg(g, base); |
|
| 248 | + | break; |
|
| 249 | + | } |
|
| 250 | + | default: |
|
| 251 | + | bail( |
|
| 252 | + | "can't replace tagged value with storage location %d", |
|
| 253 | + | old.loc |
|
| 254 | + | ); |
|
| 255 | + | } |
|
| 256 | + | } else if (new.type->cls == old.type->info.opt.elem->cls) { |
|
| 257 | + | /* T -> ?T coercion: create some value */ |
|
| 258 | + | tval_store(g, old, new, 1); |
|
| 259 | + | } else { |
|
| 260 | + | bail( |
|
| 261 | + | "cannot assign %s to %s; type mismatch", |
|
| 262 | + | type_names[new.type->cls], |
|
| 263 | + | type_names[old.type->cls] |
|
| 264 | + | ); |
|
| 265 | + | } |
|
| 266 | + | } else if (old.type->cls == TYPE_RESULT) { |
|
| 267 | + | type_t *payload = old.type->info.res.payload; |
|
| 268 | + | type_t *err = old.type->info.res.err; |
|
| 269 | + | ||
| 270 | + | if (new.type->cls == TYPE_RESULT) { |
|
| 271 | + | switch (old.loc) { |
|
| 272 | + | case LOC_STACK: |
|
| 273 | + | emit_memcopy(g, new.as.off, old.as.off, old.type); |
|
| 274 | + | break; |
|
| 275 | + | case LOC_ADDR: { |
|
| 276 | + | /* Handle assignment to LOC_ADDR result */ |
|
| 277 | + | reg_t base = nextreg(g); |
|
| 278 | + | emit_li(g, base, old.as.adr.base); |
|
| 279 | + | emit_store(g, new, base, old.as.adr.offset); |
|
| 280 | + | freereg(g, base); |
|
| 281 | + | break; |
|
| 282 | + | } |
|
| 283 | + | default: |
|
| 284 | + | bail( |
|
| 285 | + | "can't replace tagged value with storage location %d", |
|
| 286 | + | old.loc |
|
| 287 | + | ); |
|
| 288 | + | } |
|
| 289 | + | } else if (new.type == payload) { |
|
| 290 | + | emit_result_store_success(g, old, new); |
|
| 291 | + | } else if (new.type == err) { |
|
| 292 | + | emit_result_store_error(g, old, new); |
|
| 293 | + | } else { |
|
| 294 | + | bail( |
|
| 295 | + | "cannot assign %s to %s; type mismatch", |
|
| 296 | + | type_names[new.type->cls], |
|
| 297 | + | type_names[old.type->cls] |
|
| 298 | + | ); |
|
| 299 | + | } |
|
| 300 | + | } else { |
|
| 301 | + | /* Non-optional assignments (original logic) */ |
|
| 302 | + | switch (old.loc) { |
|
| 303 | + | case LOC_REG: |
|
| 304 | + | /* Load the new value directly into the register of |
|
| 305 | + | * the old value. */ |
|
| 306 | + | emit_load_into(g, old.as.reg, new); |
|
| 307 | + | break; |
|
| 308 | + | case LOC_STACK: |
|
| 309 | + | emit_store(g, new, old.as.off.base, old.as.off.offset); |
|
| 310 | + | break; |
|
| 311 | + | case LOC_ADDR: { |
|
| 312 | + | reg_t base = usereg(g, nextreg(g)); |
|
| 313 | + | emit_li(g, base, old.as.adr.base); |
|
| 314 | + | emit_store(g, new, base, old.as.adr.offset); |
|
| 315 | + | freereg(g, base); |
|
| 316 | + | break; |
|
| 317 | + | } |
|
| 318 | + | default: |
|
| 319 | + | bail("can't replace variable with storage location %d", old.loc); |
|
| 320 | + | } |
|
| 321 | + | } |
|
| 322 | + | ||
| 323 | + | /* Free the new location and update the value, since we don't |
|
| 324 | + | * need two copies of the value. Only free temporaries so we don't |
|
| 325 | + | * invalidate live values that are intentionally kept in registers |
|
| 326 | + | * (eg. function parameters). */ |
|
| 327 | + | if (new.loc == LOC_REG && new.temp) { |
|
| 328 | + | freereg(g, new.as.reg); |
|
| 329 | + | } |
|
| 330 | + | } |
|
| 331 | + | ||
| 332 | + | void emit_array_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) { |
|
| 333 | + | type_t *elem_type = ty->info.ary.elem; |
|
| 334 | + | usize length = ty->info.ary.length; |
|
| 335 | + | ||
| 336 | + | for (usize i = 0; i < length; i++) { |
|
| 337 | + | i32 elem_off = (i32)(i * elem_type->size); |
|
| 338 | + | offset_t elem_src = OFFSET(src.base, src.offset + elem_off); |
|
| 339 | + | offset_t elem_dst = OFFSET(dst.base, dst.offset + elem_off); |
|
| 340 | + | ||
| 341 | + | emit_memcopy(g, elem_src, elem_dst, elem_type); |
|
| 342 | + | } |
|
| 343 | + | } |
|
| 344 | + | ||
| 345 | + | /* Copy single value between offsets, via register */ |
|
| 346 | + | static void emit_offset_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) { |
|
| 347 | + | reg_t rs = emit_load(g, value_stack(src, ty)); |
|
| 348 | + | emit_regstore(g, rs, dst.base, dst.offset, ty); |
|
| 349 | + | freereg(g, rs); |
|
| 350 | + | } |
|
| 351 | + | ||
| 352 | + | /* Copy a full machine word (WORD_SIZE bytes) using LD/SD. */ |
|
| 353 | + | static void emit_dword_copy(gen_t *g, offset_t src, offset_t dst) { |
|
| 354 | + | reg_t tmp = nextreg(g); |
|
| 355 | + | i32 src_off = src.offset; |
|
| 356 | + | i32 dst_off = dst.offset; |
|
| 357 | + | addr_adj_t src_adj = adjust_addr(g, src.base, &src_off); |
|
| 358 | + | emit(g, LD(tmp, src_adj.base, src_off)); |
|
| 359 | + | release_addr(g, src_adj); |
|
| 360 | + | addr_adj_t dst_adj = adjust_addr(g, dst.base, &dst_off); |
|
| 361 | + | emit(g, SD(tmp, dst_adj.base, dst_off)); |
|
| 362 | + | release_addr(g, dst_adj); |
|
| 363 | + | freereg(g, tmp); |
|
| 364 | + | } |
|
| 365 | + | ||
| 366 | + | /* Copy tagged values (optional and payload unions) */ |
|
| 367 | + | static void emit_tval_copy( |
|
| 368 | + | gen_t *g, |
|
| 369 | + | offset_t src, |
|
| 370 | + | offset_t dst, |
|
| 371 | + | usize size, |
|
| 372 | + | i32 val_offset, |
|
| 373 | + | type_t *value_type |
|
| 374 | + | ) { |
|
| 375 | + | /* Copy tag byte */ |
|
| 376 | + | emit_offset_copy(g, src, dst, g->types->type_u8); |
|
| 377 | + | ||
| 378 | + | /* Zero padding between tag (1 byte) and payload start, so that |
|
| 379 | + | * byte-level equality comparisons of tagged values work correctly |
|
| 380 | + | * even when the destination was previously uninitialized. */ |
|
| 381 | + | if (val_offset > TAG_SIZE) { |
|
| 382 | + | emit_memzero( |
|
| 383 | + | g, OFFSET(dst.base, dst.offset + TAG_SIZE), val_offset - TAG_SIZE |
|
| 384 | + | ); |
|
| 385 | + | } |
|
| 386 | + | ||
| 387 | + | if (size == 0) |
|
| 388 | + | return; |
|
| 389 | + | ||
| 390 | + | offset_t val_src = OFFSET(src.base, src.offset + val_offset); |
|
| 391 | + | offset_t val_dst = OFFSET(dst.base, dst.offset + val_offset); |
|
| 392 | + | ||
| 393 | + | if (value_type) { |
|
| 394 | + | /* Use recursive memcopy for typed data (optionals) */ |
|
| 395 | + | emit_memcopy(g, val_src, val_dst, value_type); |
|
| 396 | + | return; |
|
| 397 | + | } |
|
| 398 | + | /* Copy raw bytes for untyped data (payload unions) */ |
|
| 399 | + | usize copied = 0; |
|
| 400 | + | ||
| 401 | + | /* Copy whole dwords (8 bytes) */ |
|
| 402 | + | while (copied + WORD_SIZE <= size) { |
|
| 403 | + | emit_dword_copy( |
|
| 404 | + | g, |
|
| 405 | + | OFFSET(val_src.base, val_src.offset + (i32)copied), |
|
| 406 | + | OFFSET(val_dst.base, val_dst.offset + (i32)copied) |
|
| 407 | + | ); |
|
| 408 | + | copied += WORD_SIZE; |
|
| 409 | + | } |
|
| 410 | + | /* Copy remaining word (4 bytes) if present */ |
|
| 411 | + | if (size - copied >= 4) { |
|
| 412 | + | emit_offset_copy( |
|
| 413 | + | g, |
|
| 414 | + | OFFSET(val_src.base, val_src.offset + (i32)copied), |
|
| 415 | + | OFFSET(val_dst.base, val_dst.offset + (i32)copied), |
|
| 416 | + | g->types->type_i32 |
|
| 417 | + | ); |
|
| 418 | + | copied += 4; |
|
| 419 | + | } |
|
| 420 | + | /* Copy remaining halfword if present */ |
|
| 421 | + | if (size - copied >= 2) { |
|
| 422 | + | emit_offset_copy( |
|
| 423 | + | g, |
|
| 424 | + | OFFSET(val_src.base, val_src.offset + (i32)copied), |
|
| 425 | + | OFFSET(val_dst.base, val_dst.offset + (i32)copied), |
|
| 426 | + | g->types->type_u16 |
|
| 427 | + | ); |
|
| 428 | + | copied += 2; |
|
| 429 | + | } |
|
| 430 | + | /* Copy remaining byte if present */ |
|
| 431 | + | if (size - copied == 1) { |
|
| 432 | + | emit_offset_copy( |
|
| 433 | + | g, |
|
| 434 | + | OFFSET(val_src.base, val_src.offset + (i32)copied), |
|
| 435 | + | OFFSET(val_dst.base, val_dst.offset + (i32)copied), |
|
| 436 | + | g->types->type_u8 |
|
| 437 | + | ); |
|
| 438 | + | } |
|
| 439 | + | } |
|
| 440 | + | ||
| 441 | + | void emit_memcopy(gen_t *g, offset_t src, offset_t dst, type_t *ty) { |
|
| 442 | + | if (src.base == dst.base && src.offset == dst.offset) |
|
| 443 | + | return; /* Nothing to do. */ |
|
| 444 | + | ||
| 445 | + | switch (ty->cls) { |
|
| 446 | + | case TYPE_RECORD: |
|
| 447 | + | emit_record_copy(g, src, dst, ty); |
|
| 448 | + | return; |
|
| 449 | + | case TYPE_ARRAY: |
|
| 450 | + | emit_array_copy(g, src, dst, ty); |
|
| 451 | + | return; |
|
| 452 | + | case TYPE_OPT: { |
|
| 453 | + | /* For optional types, copy tag and typed value */ |
|
| 454 | + | i32 val_off = align(TAG_SIZE, ty->info.opt.elem->align); |
|
| 455 | + | emit_tval_copy( |
|
| 456 | + | g, src, dst, ty->info.opt.elem->size, val_off, ty->info.opt.elem |
|
| 457 | + | ); |
|
| 458 | + | return; |
|
| 459 | + | } |
|
| 460 | + | case TYPE_UNION: |
|
| 461 | + | if (ty->info.uni.has_payload) { |
|
| 462 | + | /* Copy the full payload area including alignment padding, |
|
| 463 | + | * so that byte-level equality comparisons work correctly. */ |
|
| 464 | + | i32 val_off = align(TAG_SIZE, ty->align); |
|
| 465 | + | usize payload_size = ty->size - val_off; |
|
| 466 | + | emit_tval_copy(g, src, dst, payload_size, val_off, NULL); |
|
| 467 | + | return; |
|
| 468 | + | } |
|
| 469 | + | break; |
|
| 470 | + | case TYPE_SLICE: { |
|
| 471 | + | /* For slice types, copy both pointer (8 bytes) and length (8 bytes) */ |
|
| 472 | + | emit_dword_copy(g, src, dst); |
|
| 473 | + | emit_dword_copy( |
|
| 474 | + | g, |
|
| 475 | + | OFFSET(src.base, src.offset + WORD_SIZE), |
|
| 476 | + | OFFSET(dst.base, dst.offset + WORD_SIZE) |
|
| 477 | + | ); |
|
| 478 | + | return; |
|
| 479 | + | } |
|
| 480 | + | case TYPE_RESULT: { |
|
| 481 | + | bail("result types are never materialized"); |
|
| 482 | + | } |
|
| 483 | + | default: |
|
| 484 | + | break; |
|
| 485 | + | } |
|
| 486 | + | /* For primitive types, just copy via a register. */ |
|
| 487 | + | emit_offset_copy(g, src, dst, ty); |
|
| 488 | + | } |
|
| 489 | + | ||
| 490 | + | value_t emit_store(gen_t *g, value_t v, reg_t base, int offset) { |
|
| 491 | + | switch (v.loc) { |
|
| 492 | + | case LOC_IMM: { |
|
| 493 | + | /* Load, store, free. */ |
|
| 494 | + | reg_t rd = nextreg(g); |
|
| 495 | + | emit_load_into(g, rd, v); |
|
| 496 | + | emit_regstore(g, rd, base, offset, v.type); |
|
| 497 | + | freereg(g, rd); |
|
| 498 | + | ||
| 499 | + | break; |
|
| 500 | + | } |
|
| 501 | + | case LOC_REG: |
|
| 502 | + | if (type_is_passed_by_ref(v.type)) { |
|
| 503 | + | emit_memcopy(g, OFFSET(v.as.reg, 0), OFFSET(base, offset), v.type); |
|
| 504 | + | } else { |
|
| 505 | + | emit_regstore(g, v.as.reg, base, offset, v.type); |
|
| 506 | + | } |
|
| 507 | + | break; |
|
| 508 | + | case LOC_STACK: |
|
| 509 | + | emit_memcopy(g, v.as.off, OFFSET(base, offset), v.type); |
|
| 510 | + | break; |
|
| 511 | + | case LOC_ADDR: { |
|
| 512 | + | /* Copy from data section into stack */ |
|
| 513 | + | reg_t addr = nextreg(g); |
|
| 514 | + | emit_li(g, addr, v.as.adr.base); |
|
| 515 | + | emit_memcopy( |
|
| 516 | + | g, OFFSET(addr, v.as.adr.offset), OFFSET(base, offset), v.type |
|
| 517 | + | ); |
|
| 518 | + | freereg(g, addr); |
|
| 519 | + | ||
| 520 | + | break; |
|
| 521 | + | } |
|
| 522 | + | case LOC_NONE: |
|
| 523 | + | break; |
|
| 524 | + | } |
|
| 525 | + | return value_stack(OFFSET(base, offset), v.type); |
|
| 526 | + | } |
|
| 527 | + | ||
| 528 | + | reg_t emit_load(gen_t *g, value_t v) { |
|
| 529 | + | if (v.loc == LOC_REG && v.temp) { |
|
| 530 | + | return v.as.reg; |
|
| 531 | + | } else { |
|
| 532 | + | return emit_load_into(g, nextreg(g), v); |
|
| 533 | + | } |
|
| 534 | + | } |
|
| 535 | + | ||
| 536 | + | /* Load a full machine dword (WORD_SIZE = 8 bytes) from a value. */ |
|
| 537 | + | reg_t emit_load_dword(gen_t *g, value_t v) { |
|
| 538 | + | /* Use TYPE_PTR to trigger LD (8-byte load). */ |
|
| 539 | + | type_t ptr_type = { .cls = TYPE_PTR }; |
|
| 540 | + | return emit_load( |
|
| 541 | + | g, (value_t){ .loc = v.loc, .as = v.as, .type = &ptr_type } |
|
| 542 | + | ); |
|
| 543 | + | } |
|
| 544 | + | ||
| 545 | + | reg_t emit_load_offset(gen_t *g, value_t v, i32 offset) { |
|
| 546 | + | reg_t rd = nextreg(g); |
|
| 547 | + | switch (v.loc) { |
|
| 548 | + | case LOC_REG: |
|
| 549 | + | emit(g, LD(rd, v.as.reg, offset)); |
|
| 550 | + | break; |
|
| 551 | + | case LOC_STACK: { |
|
| 552 | + | i32 combined_offset = v.as.off.offset + offset; |
|
| 553 | + | addr_adj_t adj = adjust_addr(g, v.as.off.base, &combined_offset); |
|
| 554 | + | emit(g, LD(rd, adj.base, combined_offset)); |
|
| 555 | + | release_addr(g, adj); |
|
| 556 | + | break; |
|
| 557 | + | } |
|
| 558 | + | case LOC_ADDR: { |
|
| 559 | + | reg_t base = nextreg(g); |
|
| 560 | + | emit_li(g, base, v.as.adr.base); |
|
| 561 | + | i32 combined_offset = v.as.adr.offset + offset; |
|
| 562 | + | addr_adj_t adj = adjust_addr(g, base, &combined_offset); |
|
| 563 | + | emit(g, LD(rd, adj.base, combined_offset)); |
|
| 564 | + | release_addr(g, adj); |
|
| 565 | + | freereg(g, base); |
|
| 566 | + | break; |
|
| 567 | + | } |
|
| 568 | + | case LOC_IMM: |
|
| 569 | + | case LOC_NONE: |
|
| 570 | + | abort(); |
|
| 571 | + | } |
|
| 572 | + | return rd; |
|
| 573 | + | } |
|
| 574 | + | ||
| 575 | + | value_t emit_push(gen_t *g, value_t v) { |
|
| 576 | + | /* Always allocate new stack space - each variable should have its own |
|
| 577 | + | * location */ |
|
| 578 | + | int offset = reserve(g, v.type); |
|
| 579 | + | return emit_store(g, v, FP, offset); |
|
| 580 | + | } |
|
| 581 | + | ||
| 582 | + | value_t emit_array_index(gen_t *g, value_t array_val, value_t index, bool ref) { |
|
| 583 | + | reg_t elem_siz = nextreg(g); |
|
| 584 | + | reg_t data_adr = ZERO; |
|
| 585 | + | reg_t base_reg = ZERO; |
|
| 586 | + | reg_t base_alloc = ZERO; |
|
| 587 | + | i32 base_offset = 0; |
|
| 588 | + | type_t *elem_type; |
|
| 589 | + | type_t *arr_type = array_val.type; |
|
| 590 | + | if (arr_type->cls == TYPE_PTR) { |
|
| 591 | + | arr_type = arr_type->info.ptr.target; |
|
| 592 | + | } |
|
| 593 | + | ||
| 594 | + | /* Handle different storage locations */ |
|
| 595 | + | if (array_val.type->cls == TYPE_PTR) { |
|
| 596 | + | /* Dereference pointers up front to get the actual base address. */ |
|
| 597 | + | base_reg = emit_load_dword(g, array_val); |
|
| 598 | + | base_offset = 0; |
|
| 599 | + | } else if (array_val.loc == LOC_REG) { |
|
| 600 | + | base_reg = array_val.as.reg; |
|
| 601 | + | base_offset = 0; |
|
| 602 | + | } else if (array_val.loc == LOC_STACK) { |
|
| 603 | + | base_reg = array_val.as.off.base; |
|
| 604 | + | base_offset = array_val.as.off.offset; |
|
| 605 | + | } else if (array_val.loc == LOC_ADDR) { |
|
| 606 | + | /* For constants in the data section, load the address but don't |
|
| 607 | + | * dereference it. This way we get the actual array base address for |
|
| 608 | + | * indexing. */ |
|
| 609 | + | base_reg = nextreg(g); |
|
| 610 | + | emit_li(g, base_reg, array_val.as.adr.base); |
|
| 611 | + | base_offset = array_val.as.adr.offset; |
|
| 612 | + | base_alloc = base_reg; |
|
| 613 | + | } else { |
|
| 614 | + | bail("cannot index array/slice at this location"); |
|
| 615 | + | } |
|
| 616 | + | /* Load index into a register. Will hold final output */ |
|
| 617 | + | reg_t rd = emit_load(g, index); |
|
| 618 | + | ||
| 619 | + | if (arr_type->cls == TYPE_SLICE) { |
|
| 620 | + | /* Adjust base_offset for large offsets before loading slice fields */ |
|
| 621 | + | i32 ptr_offset = base_offset; |
|
| 622 | + | addr_adj_t adj = adjust_addr(g, base_reg, &ptr_offset); |
|
| 623 | + | ||
| 624 | + | /* Load data pointer (first dword of slice) */ |
|
| 625 | + | /* and use it as our new base. */ |
|
| 626 | + | data_adr = nextreg(g); |
|
| 627 | + | emit(g, LD(data_adr, adj.base, ptr_offset)); |
|
| 628 | + | ||
| 629 | + | /* Load slice length (second dword of slice) for bounds checking */ |
|
| 630 | + | reg_t len = nextreg(g); |
|
| 631 | + | emit(g, LD(len, adj.base, ptr_offset + WORD_SIZE)); |
|
| 632 | + | ||
| 633 | + | release_addr(g, adj); |
|
| 634 | + | ||
| 635 | + | /* Bounds check: if index >= length, emit EBREAK */ |
|
| 636 | + | /* Skip EBREAK if index < length (jump 2 instructions) */ |
|
| 637 | + | emit(g, BLTU(rd, len, INSTR_SIZE * 2)); |
|
| 638 | + | emit(g, EBREAK); |
|
| 639 | + | ||
| 640 | + | freereg(g, len); |
|
| 641 | + | ||
| 642 | + | base_reg = data_adr; |
|
| 643 | + | base_offset = 0; |
|
| 644 | + | elem_type = arr_type->info.slc.elem; |
|
| 645 | + | } else { |
|
| 646 | + | elem_type = arr_type->info.ary.elem; |
|
| 647 | + | } |
|
| 648 | + | ||
| 649 | + | /* Get element size */ |
|
| 650 | + | emit_li(g, elem_siz, elem_type->size); |
|
| 651 | + | emit(g, MUL(rd, rd, elem_siz)); /* Relative offset. */ |
|
| 652 | + | emit(g, ADD(rd, rd, base_reg)); |
|
| 653 | + | ||
| 654 | + | freereg(g, elem_siz); |
|
| 655 | + | freereg(g, data_adr); |
|
| 656 | + | if (base_alloc) |
|
| 657 | + | freereg(g, base_alloc); |
|
| 658 | + | ||
| 659 | + | if (base_offset != 0 && !is_small(base_offset)) { |
|
| 660 | + | emit_addr_offset(g, rd, rd, base_offset); |
|
| 661 | + | base_offset = 0; |
|
| 662 | + | } |
|
| 663 | + | ||
| 664 | + | if (ref) { |
|
| 665 | + | return value_stack(OFFSET(rd, base_offset), elem_type); |
|
| 666 | + | } else { |
|
| 667 | + | /* Reserve space on stack for the element */ |
|
| 668 | + | i32 stack_offset = reserve(g, elem_type); |
|
| 669 | + | ||
| 670 | + | /* Copy element from array to stack using memcopy */ |
|
| 671 | + | offset_t src = OFFSET(rd, base_offset); /* Source: element in array */ |
|
| 672 | + | offset_t dst = OFFSET(FP, stack_offset); /* Destination: stack */ |
|
| 673 | + | emit_memcopy(g, src, dst, elem_type); |
|
| 674 | + | ||
| 675 | + | freereg(g, rd); |
|
| 676 | + | ||
| 677 | + | /* Return a stack-based value pointing to the array element. */ |
|
| 678 | + | return value_stack(dst, elem_type); |
|
| 679 | + | } |
|
| 680 | + | } |
|
| 681 | + | ||
| 682 | + | usize emit_regstore(gen_t *g, reg_t src, reg_t base, i32 offset, type_t *ty) { |
|
| 683 | + | reg_t orig_base = base; |
|
| 684 | + | i32 orig_offset = offset; |
|
| 685 | + | addr_adj_t adj = adjust_addr_avoid(g, base, &offset, src); |
|
| 686 | + | reg_t addr = adj.base; |
|
| 687 | + | usize idx = 0; |
|
| 688 | + | ||
| 689 | + | switch (ty->cls) { |
|
| 690 | + | case TYPE_BOOL: |
|
| 691 | + | case TYPE_I8: |
|
| 692 | + | case TYPE_U8: |
|
| 693 | + | idx = emit(g, SB(src, addr, offset)); |
|
| 694 | + | break; |
|
| 695 | + | case TYPE_I16: |
|
| 696 | + | case TYPE_U16: |
|
| 697 | + | idx = emit(g, SH(src, addr, offset)); |
|
| 698 | + | break; |
|
| 699 | + | case TYPE_I32: |
|
| 700 | + | case TYPE_U32: |
|
| 701 | + | idx = emit(g, SW(src, addr, offset)); |
|
| 702 | + | break; |
|
| 703 | + | case TYPE_PTR: /* References are pointers, so store as a dword. */ |
|
| 704 | + | case TYPE_FN: /* Function pointers are addresses, so store as a dword. */ |
|
| 705 | + | idx = emit(g, SD(src, addr, offset)); |
|
| 706 | + | break; |
|
| 707 | + | case TYPE_UNION: |
|
| 708 | + | if (ty->info.uni.has_payload) { |
|
| 709 | + | /* Tag is 1 byte. */ |
|
| 710 | + | idx = emit(g, SB(src, addr, offset)); |
|
| 711 | + | break; |
|
| 712 | + | } |
|
| 713 | + | release_addr(g, adj); |
|
| 714 | + | return emit_regstore(g, src, orig_base, orig_offset, ty->info.uni.base); |
|
| 715 | + | case TYPE_ARRAY: |
|
| 716 | + | case TYPE_RECORD: |
|
| 717 | + | case TYPE_OPT: |
|
| 718 | + | /* Structs, arrays, optional types are stored by reference, so |
|
| 719 | + | * just store the address (pointer). */ |
|
| 720 | + | idx = emit(g, SD(src, addr, offset)); |
|
| 721 | + | break; |
|
| 722 | + | case TYPE_SLICE: |
|
| 723 | + | release_addr(g, adj); |
|
| 724 | + | bail("storing slices via register store is unsupported"); |
|
| 725 | + | default: |
|
| 726 | + | bail("storing unsupported type `%s`", type_names[ty->cls]); |
|
| 727 | + | } |
|
| 728 | + | release_addr(g, adj); |
|
| 729 | + | ||
| 730 | + | return idx; |
|
| 731 | + | } |
|
| 732 | + | ||
| 733 | + | void emit_store_tag(gen_t *g, tval_t tv, reg_t tag_reg) { |
|
| 734 | + | i32 off = tv.tag.as.off.offset; |
|
| 735 | + | addr_adj_t adj = adjust_addr(g, tv.tag.as.off.base, &off); |
|
| 736 | + | ||
| 737 | + | emit(g, SB(tag_reg, adj.base, off)); |
|
| 738 | + | release_addr(g, adj); |
|
| 739 | + | } |
|
| 740 | + | ||
| 741 | + | usize emit_regload(gen_t *g, reg_t dst, reg_t base, i32 offset, type_t *ty) { |
|
| 742 | + | reg_t orig_base = base; |
|
| 743 | + | i32 orig_offset = offset; |
|
| 744 | + | addr_adj_t adj = adjust_addr(g, base, &offset); |
|
| 745 | + | reg_t addr = adj.base; |
|
| 746 | + | usize idx = 0; |
|
| 747 | + | ||
| 748 | + | switch (ty->cls) { |
|
| 749 | + | case TYPE_BOOL: |
|
| 750 | + | case TYPE_U8: |
|
| 751 | + | idx = emit(g, LBU(dst, addr, offset)); |
|
| 752 | + | break; |
|
| 753 | + | case TYPE_I8: |
|
| 754 | + | idx = emit(g, LB(dst, addr, offset)); |
|
| 755 | + | break; |
|
| 756 | + | case TYPE_U16: |
|
| 757 | + | idx = emit(g, LHU(dst, addr, offset)); |
|
| 758 | + | break; |
|
| 759 | + | case TYPE_I16: |
|
| 760 | + | idx = emit(g, LH(dst, addr, offset)); |
|
| 761 | + | break; |
|
| 762 | + | case TYPE_I32: |
|
| 763 | + | idx = emit(g, LW(dst, addr, offset)); |
|
| 764 | + | break; |
|
| 765 | + | case TYPE_U32: |
|
| 766 | + | idx = emit(g, LWU(dst, addr, offset)); |
|
| 767 | + | break; |
|
| 768 | + | case TYPE_PTR: /* Raw pointer values occupy one 64-bit dword. */ |
|
| 769 | + | case TYPE_FN: /* Function pointers are addresses, so load as a dword. */ |
|
| 770 | + | idx = emit(g, LD(dst, addr, offset)); |
|
| 771 | + | break; |
|
| 772 | + | case TYPE_UNION: |
|
| 773 | + | if (ty->info.uni.has_payload) { |
|
| 774 | + | idx = emit(g, ADDI(dst, addr, offset)); |
|
| 775 | + | break; |
|
| 776 | + | } |
|
| 777 | + | release_addr(g, adj); |
|
| 778 | + | return emit_regload(g, dst, orig_base, orig_offset, ty->info.uni.base); |
|
| 779 | + | case TYPE_ARRAY: |
|
| 780 | + | case TYPE_RECORD: |
|
| 781 | + | case TYPE_SLICE: |
|
| 782 | + | case TYPE_OPT: |
|
| 783 | + | /* For records, arrays, optional types, we load the address in the |
|
| 784 | + | * register. */ |
|
| 785 | + | idx = emit(g, ADDI(dst, addr, offset)); |
|
| 786 | + | break; |
|
| 787 | + | default: |
|
| 788 | + | release_addr(g, adj); |
|
| 789 | + | bail("loading unsupported type `%s`", type_names[ty->cls]); |
|
| 790 | + | } |
|
| 791 | + | release_addr(g, adj); |
|
| 792 | + | ||
| 793 | + | return idx; |
|
| 794 | + | } |
|
| 795 | + | ||
| 796 | + | int emit_regpush(gen_t *g, reg_t src, type_t *ty) { |
|
| 797 | + | /* Store the register to the stack. */ |
|
| 798 | + | int offset = reserve(g, ty); |
|
| 799 | + | emit_regstore(g, src, FP, offset, ty); |
|
| 800 | + | ||
| 801 | + | return offset; |
|
| 802 | + | } |
|
| 803 | + | ||
| 804 | + | i32 reserve_aligned(gen_t *g, type_t *ty, i32 align) { |
|
| 805 | + | frame_t *frame = &g->fn.current->e.fn.frame; |
|
| 806 | + | ||
| 807 | + | /* Zero-sized types (e.g. empty arrays) don't need stack space. */ |
|
| 808 | + | if (ty->size == 0) { |
|
| 809 | + | return frame->sp; |
|
| 810 | + | } |
|
| 811 | + | frame->sp = align_stack(frame->sp - ty->size, align); |
|
| 812 | + | ||
| 813 | + | if (-frame->sp >= MAX_FRAME_SIZE) |
|
| 814 | + | bail("stack frame overflow"); |
|
| 815 | + | if (-frame->sp < 0) |
|
| 816 | + | bail("stack frame underflow"); |
|
| 817 | + | ||
| 818 | + | if (-frame->sp > frame->size) |
|
| 819 | + | frame->size = -frame->sp; |
|
| 820 | + | ||
| 821 | + | /* Zero memory for non-packed types to ensure clean initialization. |
|
| 822 | + | * Packed types are skipped as they are densely packed without padding. */ |
|
| 823 | + | if (!type_is_packed(ty)) { |
|
| 824 | + | emit_memzero(g, OFFSET(FP, frame->sp), ty->size); |
|
| 825 | + | } |
|
| 826 | + | ||
| 827 | + | return frame->sp; |
|
| 828 | + | } |
|
| 829 | + | ||
| 830 | + | reg_t emit_load_into(gen_t *g, reg_t dst, value_t src) { |
|
| 831 | + | switch (src.loc) { |
|
| 832 | + | case LOC_IMM: |
|
| 833 | + | switch (src.type->cls) { |
|
| 834 | + | case TYPE_UNION: /* Unions default to i32 base type. */ |
|
| 835 | + | case TYPE_I8: |
|
| 836 | + | case TYPE_I16: |
|
| 837 | + | case TYPE_I32: |
|
| 838 | + | emit_li(g, dst, src.as.imm.i); |
|
| 839 | + | break; |
|
| 840 | + | case TYPE_U8: |
|
| 841 | + | case TYPE_U16: |
|
| 842 | + | case TYPE_U32: |
|
| 843 | + | case TYPE_PTR: |
|
| 844 | + | case TYPE_FN: |
|
| 845 | + | emit_li(g, dst, src.as.imm.u); |
|
| 846 | + | break; |
|
| 847 | + | case TYPE_BOOL: |
|
| 848 | + | emit_li(g, dst, src.as.imm.b); |
|
| 849 | + | break; |
|
| 850 | + | default: |
|
| 851 | + | bail("unsupported type `%s`", type_names[src.type->cls]); |
|
| 852 | + | } |
|
| 853 | + | break; |
|
| 854 | + | case LOC_STACK: |
|
| 855 | + | /* For types passed by reference, load the address |
|
| 856 | + | * instead of the value. */ |
|
| 857 | + | if (type_is_passed_by_ref(src.type)) { |
|
| 858 | + | i32 off = src.as.off.offset; |
|
| 859 | + | addr_adj_t adj = adjust_addr(g, src.as.off.base, &off); |
|
| 860 | + | emit(g, ADDI(dst, adj.base, off)); |
|
| 861 | + | release_addr(g, adj); |
|
| 862 | + | } else { |
|
| 863 | + | emit_regload(g, dst, src.as.off.base, src.as.off.offset, src.type); |
|
| 864 | + | } |
|
| 865 | + | break; |
|
| 866 | + | case LOC_REG: { |
|
| 867 | + | reg_t rs = src.as.reg; |
|
| 868 | + | if (rs == dst) { |
|
| 869 | + | break; |
|
| 870 | + | } |
|
| 871 | + | if (src.temp) |
|
| 872 | + | freereg(g, rs); |
|
| 873 | + | ||
| 874 | + | emit(g, MV(dst, rs)); |
|
| 875 | + | break; |
|
| 876 | + | } |
|
| 877 | + | case LOC_ADDR: { |
|
| 878 | + | /* Start by loading the address into the register */ |
|
| 879 | + | emit_li(g, dst, src.as.adr.base); |
|
| 880 | + | ||
| 881 | + | /* For non-compound types, we need to load the value from the address. |
|
| 882 | + | * For compound types, we keep the address itself. */ |
|
| 883 | + | if (!type_is_passed_by_ref(src.type)) { |
|
| 884 | + | emit_regload(g, dst, dst, src.as.adr.offset, src.type); |
|
| 885 | + | } else { |
|
| 886 | + | /* For compound types passed by reference, add the offset to get |
|
| 887 | + | * the actual address. */ |
|
| 888 | + | if (src.as.adr.offset != 0) { |
|
| 889 | + | emit(g, ADDI(dst, dst, src.as.adr.offset)); |
|
| 890 | + | } |
|
| 891 | + | } |
|
| 892 | + | break; |
|
| 893 | + | } |
|
| 894 | + | case LOC_NONE: |
|
| 895 | + | break; |
|
| 896 | + | } |
|
| 897 | + | return dst; |
|
| 898 | + | } |
|
| 899 | + | ||
| 900 | + | /* Compare values at two memory addresses and accumulate result. |
|
| 901 | + | * Loads values from memory, compares them, and ANDs the comparison result |
|
| 902 | + | * with the accumulating result register. */ |
|
| 903 | + | static void emit_cmp_step( |
|
| 904 | + | gen_t *g, |
|
| 905 | + | reg_t left_val, /* Register to hold left value during comparison */ |
|
| 906 | + | reg_t right_val, /* Register to hold right value during comparison */ |
|
| 907 | + | reg_t left_addr, /* Base address register for left operand */ |
|
| 908 | + | reg_t right_addr, /* Base address register for right operand */ |
|
| 909 | + | usize offset, /* Byte offset from base addresses to load from */ |
|
| 910 | + | reg_t result, /* Register that accumulates comparison results */ |
|
| 911 | + | type_t *val_typ /* Type information for loading value */ |
|
| 912 | + | ) { |
|
| 913 | + | /* Load values from both memory addresses at the given offset */ |
|
| 914 | + | emit_regload(g, left_val, left_addr, offset, val_typ); |
|
| 915 | + | emit_regload(g, right_val, right_addr, offset, val_typ); |
|
| 916 | + | ||
| 917 | + | /* XOR the two loaded values: left_val = left_val ^ right_val |
|
| 918 | + | * If values are equal, result will be 0 (equal values XOR to 0) |
|
| 919 | + | * If values differ, result will be non-zero */ |
|
| 920 | + | emit(g, XOR(left_val, left_val, right_val)); |
|
| 921 | + | /* Convert XOR result to 1 (equal) or 0 (not equal) */ |
|
| 922 | + | emit(g, SLTIU(left_val, left_val, 1)); |
|
| 923 | + | /* Accumulate the result with a previous result. |
|
| 924 | + | * If any comparison fails, the final result becomes 0 */ |
|
| 925 | + | emit(g, AND(result, result, left_val)); |
|
| 926 | + | } |
|
| 927 | + | ||
| 928 | + | /* Compare raw bytes at two memory addresses. |
|
| 929 | + | * Sets result = 1 if all bytes match. */ |
|
| 930 | + | void emit_bytes_equal( |
|
| 931 | + | gen_t *g, reg_t left, reg_t right, usize size, reg_t result |
|
| 932 | + | ) { |
|
| 933 | + | /* Start assuming they're equal */ |
|
| 934 | + | emit_li(g, result, 1); |
|
| 935 | + | ||
| 936 | + | if (size == 0) |
|
| 937 | + | return; /* Zero bytes are always equal */ |
|
| 938 | + | ||
| 939 | + | reg_t left_val = nextreg(g); |
|
| 940 | + | reg_t right_val = nextreg(g); |
|
| 941 | + | ||
| 942 | + | /* Compare dword by dword (8 bytes) */ |
|
| 943 | + | usize i, remaining = size; |
|
| 944 | + | ||
| 945 | + | for (i = 0; i + WORD_SIZE <= size; i += WORD_SIZE) { |
|
| 946 | + | /* Load 8-byte dwords directly with LD */ |
|
| 947 | + | i32 off_l = (i32)i, off_r = (i32)i; |
|
| 948 | + | addr_adj_t adj_l = adjust_addr(g, left, &off_l); |
|
| 949 | + | emit(g, LD(left_val, adj_l.base, off_l)); |
|
| 950 | + | release_addr(g, adj_l); |
|
| 951 | + | addr_adj_t adj_r = adjust_addr(g, right, &off_r); |
|
| 952 | + | emit(g, LD(right_val, adj_r.base, off_r)); |
|
| 953 | + | release_addr(g, adj_r); |
|
| 954 | + | emit(g, XOR(left_val, left_val, right_val)); |
|
| 955 | + | emit(g, SLTIU(left_val, left_val, 1)); |
|
| 956 | + | emit(g, AND(result, result, left_val)); |
|
| 957 | + | } |
|
| 958 | + | remaining -= i; |
|
| 959 | + | ||
| 960 | + | if (remaining >= 4) { |
|
| 961 | + | emit_cmp_step( |
|
| 962 | + | g, left_val, right_val, left, right, i, result, g->types->type_u32 |
|
| 963 | + | ); |
|
| 964 | + | i += 4; |
|
| 965 | + | remaining -= 4; |
|
| 966 | + | } |
|
| 967 | + | ||
| 968 | + | if (remaining >= 2) { |
|
| 969 | + | emit_cmp_step( |
|
| 970 | + | g, left_val, right_val, left, right, i, result, g->types->type_u16 |
|
| 971 | + | ); |
|
| 972 | + | i += 2; |
|
| 973 | + | remaining -= 2; |
|
| 974 | + | } |
|
| 975 | + | if (remaining == 1) { |
|
| 976 | + | emit_cmp_step( |
|
| 977 | + | g, left_val, right_val, left, right, i, result, g->types->type_u8 |
|
| 978 | + | ); |
|
| 979 | + | } |
|
| 980 | + | freereg(g, left_val); |
|
| 981 | + | freereg(g, right_val); |
|
| 982 | + | } |
|
| 983 | + | ||
| 984 | + | void emit_memequal( |
|
| 985 | + | gen_t *g, reg_t left, reg_t right, type_t *ty, reg_t result |
|
| 986 | + | ) { |
|
| 987 | + | switch (ty->cls) { |
|
| 988 | + | case TYPE_OPT: { /* For optional types, compare tag and value */ |
|
| 989 | + | reg_t left_tag = nextreg(g); |
|
| 990 | + | reg_t right_tag = nextreg(g); |
|
| 991 | + | ||
| 992 | + | /* Load tags (first byte) */ |
|
| 993 | + | emit(g, LBU(left_tag, left, 0)); |
|
| 994 | + | emit(g, LBU(right_tag, right, 0)); |
|
| 995 | + | ||
| 996 | + | /* Compare tags directly - if different, optionals are not equal */ |
|
| 997 | + | emit_li(g, result, 0); /* Assume not equal */ |
|
| 998 | + | usize jump_to_end = emit(g, NOP); |
|
| 999 | + | ||
| 1000 | + | /* If both are nil (tag == 0), they're equal */ |
|
| 1001 | + | emit_li(g, result, 1); /* Set equal */ |
|
| 1002 | + | usize skip_value_check = emit(g, NOP); |
|
| 1003 | + | ||
| 1004 | + | /* Compare values (past tag) */ |
|
| 1005 | + | type_t *inner_type = ty->info.opt.elem; |
|
| 1006 | + | i32 val_off = align(TAG_SIZE, inner_type->align); |
|
| 1007 | + | reg_t left_val = nextreg(g); |
|
| 1008 | + | reg_t right_val = nextreg(g); |
|
| 1009 | + | ||
| 1010 | + | /* Calculate value addresses (skip tag) */ |
|
| 1011 | + | emit(g, ADDI(left_val, left, val_off)); |
|
| 1012 | + | emit(g, ADDI(right_val, right, val_off)); |
|
| 1013 | + | ||
| 1014 | + | /* Load values if primitive type */ |
|
| 1015 | + | if (type_is_primitive(inner_type)) { |
|
| 1016 | + | emit_regload(g, left_val, left_val, 0, inner_type); |
|
| 1017 | + | emit_regload(g, right_val, right_val, 0, inner_type); |
|
| 1018 | + | } |
|
| 1019 | + | ||
| 1020 | + | /* Compare the values recursively */ |
|
| 1021 | + | emit_memequal(g, left_val, right_val, inner_type, result); |
|
| 1022 | + | ||
| 1023 | + | /* Patch skip_value_check: jump here if both tags are 0 (nil) */ |
|
| 1024 | + | g->instrs[skip_value_check] = |
|
| 1025 | + | BEQ(left_tag, ZERO, jump_offset(skip_value_check, g->ninstrs)); |
|
| 1026 | + | ||
| 1027 | + | /* Patch jump_to_end: jump here if tags are different */ |
|
| 1028 | + | g->instrs[jump_to_end] = |
|
| 1029 | + | BNE(left_tag, right_tag, jump_offset(jump_to_end, g->ninstrs)); |
|
| 1030 | + | ||
| 1031 | + | freereg(g, left_tag); |
|
| 1032 | + | freereg(g, right_tag); |
|
| 1033 | + | freereg(g, left_val); |
|
| 1034 | + | freereg(g, right_val); |
|
| 1035 | + | ||
| 1036 | + | break; |
|
| 1037 | + | } |
|
| 1038 | + | case TYPE_I8: |
|
| 1039 | + | case TYPE_I16: |
|
| 1040 | + | case TYPE_I32: |
|
| 1041 | + | /* For primitive types, compare directly */ |
|
| 1042 | + | emit(g, SUB(result, left, right)); |
|
| 1043 | + | emit(g, SLTIU(result, result, 1)); |
|
| 1044 | + | break; |
|
| 1045 | + | case TYPE_U8: |
|
| 1046 | + | case TYPE_U16: |
|
| 1047 | + | case TYPE_U32: |
|
| 1048 | + | case TYPE_BOOL: |
|
| 1049 | + | case TYPE_PTR: |
|
| 1050 | + | /* For primitive types, compare directly */ |
|
| 1051 | + | emit(g, XOR(result, left, right)); |
|
| 1052 | + | emit(g, SLTIU(result, result, 1)); |
|
| 1053 | + | break; |
|
| 1054 | + | case TYPE_UNION: |
|
| 1055 | + | if (!ty->info.uni.has_payload) { |
|
| 1056 | + | type_t *base = |
|
| 1057 | + | ty->info.uni.base ? ty->info.uni.base : g->types->type_i32; |
|
| 1058 | + | emit_memequal(g, left, right, base, result); |
|
| 1059 | + | } else { |
|
| 1060 | + | emit_bytes_equal(g, left, right, ty->size, result); |
|
| 1061 | + | } |
|
| 1062 | + | break; |
|
| 1063 | + | case TYPE_ARRAY: |
|
| 1064 | + | case TYPE_RECORD: |
|
| 1065 | + | case TYPE_SLICE: |
|
| 1066 | + | emit_bytes_equal(g, left, right, ty->size, result); |
|
| 1067 | + | break; |
|
| 1068 | + | default: |
|
| 1069 | + | bail("equality is not supported for type `%s`", ty->name); |
|
| 1070 | + | } |
|
| 1071 | + | } |
|
| 1072 | + | ||
| 1073 | + | void emit_copy_by_ref(gen_t *g, value_t src, value_t dst) { |
|
| 1074 | + | static type_t ptr_type = { .cls = TYPE_PTR }; |
|
| 1075 | + | ||
| 1076 | + | if (src.loc == LOC_REG && dst.loc == LOC_REG) { |
|
| 1077 | + | emit_mv(g, dst.as.reg, src.as.reg); |
|
| 1078 | + | } else if (src.loc == LOC_REG && dst.loc == LOC_STACK) { |
|
| 1079 | + | i32 dst_off = dst.as.off.offset; |
|
| 1080 | + | type_t *store_ty = dst.type; |
|
| 1081 | + | ||
| 1082 | + | if (dst.type->cls == TYPE_SLICE) { |
|
| 1083 | + | /* Slice fat pointers live on the stack; only copy the address. */ |
|
| 1084 | + | dst_off += SLICE_FIELD_PTR_OFFSET; |
|
| 1085 | + | store_ty = &ptr_type; |
|
| 1086 | + | } |
|
| 1087 | + | emit_regstore(g, src.as.reg, dst.as.off.base, dst_off, store_ty); |
|
| 1088 | + | } else if (src.loc == LOC_STACK && dst.loc == LOC_REG) { |
|
| 1089 | + | type_t *load_ty = dst.type; |
|
| 1090 | + | i32 src_off = src.as.off.offset; |
|
| 1091 | + | ||
| 1092 | + | if (dst.type->cls == TYPE_SLICE) { |
|
| 1093 | + | load_ty = &ptr_type; |
|
| 1094 | + | src_off += SLICE_FIELD_PTR_OFFSET; |
|
| 1095 | + | } |
|
| 1096 | + | emit_regload(g, dst.as.reg, src.as.off.base, src_off, load_ty); |
|
| 1097 | + | } else if (src.loc == LOC_STACK && dst.loc == LOC_STACK) { |
|
| 1098 | + | i32 src_off = src.as.off.offset; |
|
| 1099 | + | addr_adj_t src_adj = adjust_addr(g, src.as.off.base, &src_off); |
|
| 1100 | + | reg_t adr = nextreg(g); |
|
| 1101 | + | ||
| 1102 | + | emit(g, ADDI(adr, src_adj.base, src_off)); |
|
| 1103 | + | ||
| 1104 | + | i32 dst_off = dst.as.off.offset; |
|
| 1105 | + | addr_adj_t dst_adj = adjust_addr(g, dst.as.off.base, &dst_off); |
|
| 1106 | + | ||
| 1107 | + | if (dst.type->cls == TYPE_SLICE) |
|
| 1108 | + | dst_off += SLICE_FIELD_PTR_OFFSET; |
|
| 1109 | + | ||
| 1110 | + | emit(g, SD(adr, dst_adj.base, dst_off)); |
|
| 1111 | + | ||
| 1112 | + | release_addr(g, dst_adj); |
|
| 1113 | + | release_addr(g, src_adj); |
|
| 1114 | + | freereg(g, adr); |
|
| 1115 | + | } else if (src.loc == LOC_ADDR && dst.loc == LOC_STACK) { |
|
| 1116 | + | reg_t adr = nextreg(g); |
|
| 1117 | + | /* Load the absolute address into a register. */ |
|
| 1118 | + | emit_li(g, adr, (i32)(src.as.adr.base + src.as.adr.offset)); |
|
| 1119 | + | i32 dst_off = dst.as.off.offset; |
|
| 1120 | + | type_t *store_ty = dst.type; |
|
| 1121 | + | ||
| 1122 | + | if (dst.type->cls == TYPE_SLICE) { |
|
| 1123 | + | dst_off += SLICE_FIELD_PTR_OFFSET; |
|
| 1124 | + | store_ty = &ptr_type; |
|
| 1125 | + | } |
|
| 1126 | + | emit_regstore(g, adr, dst.as.off.base, dst_off, store_ty); |
|
| 1127 | + | freereg(g, adr); |
|
| 1128 | + | } else { |
|
| 1129 | + | bail("don't know how to copy between these slots"); |
|
| 1130 | + | } |
|
| 1131 | + | } |
|
| 1132 | + | ||
| 1133 | + | /* Write a successful result tag (0) and copy the payload if present. */ |
|
| 1134 | + | void emit_result_store_success(gen_t *g, value_t dest, value_t value) { |
|
| 1135 | + | tval_t tv = tval_from_val(g, dest); |
|
| 1136 | + | reg_t tag = nextreg(g); |
|
| 1137 | + | ||
| 1138 | + | emit_li(g, tag, 0); |
|
| 1139 | + | emit_store_tag(g, tv, tag); |
|
| 1140 | + | freereg(g, tag); |
|
| 1141 | + | ||
| 1142 | + | type_t *payload = dest.type->info.res.payload; |
|
| 1143 | + | ||
| 1144 | + | /* Nb. We don't memzero, since result types are always unwrapped to |
|
| 1145 | + | * one of their payloads. */ |
|
| 1146 | + | ||
| 1147 | + | if (payload->size > 0) { |
|
| 1148 | + | /* Check if we need to wrap the value in an optional. */ |
|
| 1149 | + | if (payload->cls == TYPE_OPT && value.type->cls != TYPE_OPT) { |
|
| 1150 | + | /* Wrap non-optional value in an optional */ |
|
| 1151 | + | value_t payload_val = value_stack( |
|
| 1152 | + | OFFSET(tv.val.as.off.base, tv.val.as.off.offset), payload |
|
| 1153 | + | ); |
|
| 1154 | + | tval_store(g, payload_val, value, 1); |
|
| 1155 | + | } else { |
|
| 1156 | + | emit_store(g, value, tv.val.as.off.base, tv.val.as.off.offset); |
|
| 1157 | + | } |
|
| 1158 | + | } |
|
| 1159 | + | } |
|
| 1160 | + | ||
| 1161 | + | /* Write an error Result tag (1) and copy the error payload. */ |
|
| 1162 | + | void emit_result_store_error(gen_t *g, value_t dest, value_t err) { |
|
| 1163 | + | tval_t tv = tval_from_val(g, dest); |
|
| 1164 | + | reg_t tag = nextreg(g); |
|
| 1165 | + | ||
| 1166 | + | emit_li(g, tag, 1); |
|
| 1167 | + | emit_store_tag(g, tv, tag); |
|
| 1168 | + | freereg(g, tag); |
|
| 1169 | + | ||
| 1170 | + | /* Nb. We don't memzero, since result types are always unwrapped to |
|
| 1171 | + | * one of their payloads. */ |
|
| 1172 | + | ||
| 1173 | + | if (err.type->cls != TYPE_VOID) { |
|
| 1174 | + | emit_store(g, err, tv.val.as.off.base, tv.val.as.off.offset); |
|
| 1175 | + | } |
|
| 1176 | + | } |
gen/emit.h
added
+121 -0
| 1 | + | #ifndef EMIT_H |
|
| 2 | + | #define EMIT_H |
|
| 3 | + | ||
| 4 | + | #include <stdlib.h> |
|
| 5 | + | #include <string.h> |
|
| 6 | + | ||
| 7 | + | #include "../ast.h" |
|
| 8 | + | #include "../gen.h" |
|
| 9 | + | #include "../io.h" |
|
| 10 | + | #include "../scanner.h" |
|
| 11 | + | #include "../types.h" |
|
| 12 | + | ||
| 13 | + | /* Code emission. */ |
|
| 14 | + | #define emit(g, ins) __emit(g, ins) |
|
| 15 | + | ||
| 16 | + | /* Slice field offsets */ |
|
| 17 | + | enum { |
|
| 18 | + | SLICE_FIELD_PTR_OFFSET = 0, |
|
| 19 | + | SLICE_FIELD_LEN_OFFSET = WORD_SIZE, |
|
| 20 | + | }; |
|
| 21 | + | ||
| 22 | + | /* Helper describing the possibly-adjusted base register for stack accesses. */ |
|
| 23 | + | typedef struct { |
|
| 24 | + | reg_t base; /* Register that should be used for the access. */ |
|
| 25 | + | bool temp; /* Whether `base` was synthesized and must be freed. */ |
|
| 26 | + | } addr_adj_t; |
|
| 27 | + | ||
| 28 | + | /* Emit the given instruction. */ |
|
| 29 | + | static inline usize __emit(gen_t *g, instr_t ins) { |
|
| 30 | + | if (g->ninstrs >= MAX_INSTRS) { |
|
| 31 | + | abort(); |
|
| 32 | + | } |
|
| 33 | + | g->instrs[g->ninstrs] = ins; |
|
| 34 | + | g->ninstrs++; |
|
| 35 | + | ||
| 36 | + | return g->ninstrs - 1; |
|
| 37 | + | } |
|
| 38 | + | ||
| 39 | + | /* Split a 32-bit immediate into upper 20 bits and lower 12 bits for RISC-V. */ |
|
| 40 | + | void split_imm(i32 imm, i32 *hi, i32 *lo); |
|
| 41 | + | /* Emit a load immediate (LI) instruction sequence. */ |
|
| 42 | + | void emit_li(gen_t *g, reg_t rd, i32 imm); |
|
| 43 | + | /* Helper function to copy register values if needed. */ |
|
| 44 | + | void emit_mv(gen_t *g, reg_t dst, reg_t src); |
|
| 45 | + | /* Emit relative jump to offset. */ |
|
| 46 | + | usize emit_jump(gen_t *g, usize offset); |
|
| 47 | + | /* Emit a function call. */ |
|
| 48 | + | usize emit_call(gen_t *g, usize addr); |
|
| 49 | + | /* Load a PC-relative address into a register. */ |
|
| 50 | + | void emit_pc_rel_addr(gen_t *g, reg_t rd, usize addr); |
|
| 51 | + | /* Emit code to index into an array */ |
|
| 52 | + | value_t emit_array_index(gen_t *g, value_t array_val, value_t index, bool); |
|
| 53 | + | /* Load a value into a specific register. */ |
|
| 54 | + | reg_t emit_load_into(gen_t *g, reg_t dst, value_t src); |
|
| 55 | + | /* Load a value from a value. */ |
|
| 56 | + | reg_t emit_load(gen_t *g, value_t v); |
|
| 57 | + | /* Load a word from a value. */ |
|
| 58 | + | reg_t emit_load_dword(gen_t *g, value_t v); |
|
| 59 | + | /* Load a value with an offset. */ |
|
| 60 | + | reg_t emit_load_offset(gen_t *g, value_t v, i32 offset); |
|
| 61 | + | /* Store a value on the stack. Returns a new value located on the stack. */ |
|
| 62 | + | value_t emit_store(gen_t *g, value_t v, reg_t base, int offset); |
|
| 63 | + | /* Push a value to the stack. */ |
|
| 64 | + | value_t emit_push(gen_t *g, value_t v); |
|
| 65 | + | /* Replace a value, eg. for assigning */ |
|
| 66 | + | void emit_replace(gen_t *g, value_t old, value_t new); |
|
| 67 | + | /* Compute dst = base + offset while respecting SIMM12 limits. */ |
|
| 68 | + | void emit_addr_offset(gen_t *g, reg_t dst, reg_t base, i32 offset); |
|
| 69 | + | /* Load a value at a stack offset into a register. */ |
|
| 70 | + | usize emit_regload(gen_t *g, reg_t dst, reg_t base, i32 offset, type_t *ty); |
|
| 71 | + | /* Push a register to the stack. */ |
|
| 72 | + | int emit_regpush(gen_t *g, reg_t src, type_t *ty); |
|
| 73 | + | /* Store a register value on the stack. */ |
|
| 74 | + | usize emit_regstore(gen_t *g, reg_t src, reg_t base, i32 offset, type_t *ty); |
|
| 75 | + | /* Store a tvalue tag. */ |
|
| 76 | + | void emit_store_tag(gen_t *g, tval_t tv, reg_t tag_reg); |
|
| 77 | + | /* Reserve stack space with explicit alignment. */ |
|
| 78 | + | i32 reserve_aligned(gen_t *g, type_t *ty, i32 align); |
|
| 79 | + | ||
| 80 | + | /* Copy memory between two locations */ |
|
| 81 | + | void emit_memcopy(gen_t *g, offset_t src, offset_t dst, type_t *ty); |
|
| 82 | + | /* Zero out a memory region */ |
|
| 83 | + | void emit_memzero(gen_t *g, offset_t dst, i32 size); |
|
| 84 | + | /* Compare two registers based on their type: |
|
| 85 | + | * |
|
| 86 | + | * - If references, compares the addresses (pointers) |
|
| 87 | + | * - If values, compares the content |
|
| 88 | + | * - For records and arrays, compares each element |
|
| 89 | + | * - For slices, compares the pointer addresses |
|
| 90 | + | * |
|
| 91 | + | * Puts the result (0 or 1) in the result register. |
|
| 92 | + | */ |
|
| 93 | + | void emit_memequal(gen_t *g, reg_t left, reg_t right, type_t *ty, reg_t result); |
|
| 94 | + | /* Compare raw bytes between two memory locations */ |
|
| 95 | + | void emit_bytes_equal( |
|
| 96 | + | gen_t *g, reg_t left, reg_t right, usize size, reg_t result |
|
| 97 | + | ); |
|
| 98 | + | /* Copy a record from one memory offset to another. */ |
|
| 99 | + | void emit_record_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty); |
|
| 100 | + | /* Store a value directly to a record field. */ |
|
| 101 | + | void emit_record_field_set( |
|
| 102 | + | gen_t *g, value_t val, reg_t base, i32 record_offset, symbol_t *field |
|
| 103 | + | ); |
|
| 104 | + | /* Calculate record field value from a record value and field symbol. |
|
| 105 | + | * Creates a stack-based value pointing to the field at the correct offset. */ |
|
| 106 | + | value_t emit_record_field_get(value_t sval, symbol_t *field); |
|
| 107 | + | /* Copy an array from one memory location to another. */ |
|
| 108 | + | void emit_array_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty); |
|
| 109 | + | /* Copy a word between two locations. Copies addresses of |
|
| 110 | + | * pass-by-reference types. */ |
|
| 111 | + | void emit_copy_by_ref(gen_t *g, value_t src, value_t dst); |
|
| 112 | + | /* Emit a slice literal */ |
|
| 113 | + | value_t emit_slice_lit(gen_t *g, i32 offset, usize ptr, usize len, type_t *typ); |
|
| 114 | + | /* Store a successful result value by clearing the tag and |
|
| 115 | + | * writing the payload. */ |
|
| 116 | + | void emit_result_store_success(gen_t *g, value_t dest, value_t value); |
|
| 117 | + | /* Store an error result by writing the raw error tag and |
|
| 118 | + | * zeroing the payload. */ |
|
| 119 | + | void emit_result_store_error(gen_t *g, value_t dest, value_t err); |
|
| 120 | + | ||
| 121 | + | #endif |
io.c
added
+58 -0
| 1 | + | #include <stdarg.h> |
|
| 2 | + | #include <stdio.h> |
|
| 3 | + | #include <stdlib.h> |
|
| 4 | + | #include <string.h> |
|
| 5 | + | ||
| 6 | + | #include "io.h" |
|
| 7 | + | #include "types.h" |
|
| 8 | + | ||
| 9 | + | i32 readfile(const char *path, char **data) { |
|
| 10 | + | FILE *fp = NULL; |
|
| 11 | + | i32 size = -1; |
|
| 12 | + | ||
| 13 | + | *data = NULL; |
|
| 14 | + | ||
| 15 | + | if (!(fp = fopen(path, "r"))) { |
|
| 16 | + | goto cleanup; |
|
| 17 | + | } |
|
| 18 | + | if (fseek(fp, 0L, SEEK_END) != 0) { |
|
| 19 | + | goto cleanup; |
|
| 20 | + | } |
|
| 21 | + | if ((size = ftell(fp)) < 0) { |
|
| 22 | + | goto cleanup; |
|
| 23 | + | } |
|
| 24 | + | if (fseek(fp, 0L, SEEK_SET) != 0) { |
|
| 25 | + | goto cleanup; |
|
| 26 | + | } |
|
| 27 | + | if ((*data = malloc((size_t)size + 1)) == NULL) { |
|
| 28 | + | goto cleanup; |
|
| 29 | + | } |
|
| 30 | + | if (fread(*data, 1, (size_t)size, fp) != (size_t)size) { |
|
| 31 | + | size = -1; |
|
| 32 | + | goto cleanup; |
|
| 33 | + | } |
|
| 34 | + | (*data)[size] = '\0'; |
|
| 35 | + | ||
| 36 | + | cleanup: |
|
| 37 | + | if (fp) { |
|
| 38 | + | fclose(fp); |
|
| 39 | + | } |
|
| 40 | + | if (size < 0 && *data) { |
|
| 41 | + | free(*data); |
|
| 42 | + | *data = NULL; |
|
| 43 | + | } |
|
| 44 | + | return size; |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | void _bail(const char *file, i32 line, const char *restrict fmt, ...) { |
|
| 48 | + | va_list ap; |
|
| 49 | + | va_start(ap, fmt); |
|
| 50 | + | ||
| 51 | + | fflush(stdout); |
|
| 52 | + | fprintf(stderr, "%s:%d: fatal: ", file, line); |
|
| 53 | + | vfprintf(stderr, fmt, ap); |
|
| 54 | + | fprintf(stderr, "\n"); |
|
| 55 | + | va_end(ap); |
|
| 56 | + | ||
| 57 | + | exit(1); |
|
| 58 | + | } |
io.h
added
+19 -0
| 1 | + | #ifndef IO_H |
|
| 2 | + | #define IO_H |
|
| 3 | + | ||
| 4 | + | #include "types.h" |
|
| 5 | + | ||
| 6 | + | /* Abort execution and exit with an error code. */ |
|
| 7 | + | #define bail(...) _bail(__FILE__, __LINE__, __VA_ARGS__) |
|
| 8 | + | ||
| 9 | + | /* Debug output - disabled for bootstrap compiler. */ |
|
| 10 | + | #define debug(...) ((void)0) |
|
| 11 | + | ||
| 12 | + | __attribute__((noreturn)) void _bail( |
|
| 13 | + | const char *file, int line, const char *restrict fmt, ... |
|
| 14 | + | ); |
|
| 15 | + | ||
| 16 | + | /* Read a file in its entirety into `data`. */ |
|
| 17 | + | i32 readfile(const char *path, char **data); |
|
| 18 | + | ||
| 19 | + | #endif |
limits.h
added
+44 -0
| 1 | + | #ifndef LIMITS_H |
|
| 2 | + | #define LIMITS_H |
|
| 3 | + | ||
| 4 | + | #include "riscv.h" |
|
| 5 | + | ||
| 6 | + | #define MAX_NODES 32768 |
|
| 7 | + | #define MAX_UNION_VARIANTS 128 |
|
| 8 | + | #define MAX_RECORD_FIELDS 32 |
|
| 9 | + | #define MAX_ARRAY_ELEMS 1024 |
|
| 10 | + | #define MAX_BLOCK_STATEMENTS 512 |
|
| 11 | + | #define MAX_SWITCH_CASES 128 |
|
| 12 | + | #define MAX_CASE_PATTERNS 64 |
|
| 13 | + | #define MAX_QUALIFIED_NAME 128 |
|
| 14 | + | #define MAX_FN_PARAMS ((A7 - A0) + 1) |
|
| 15 | + | #define MAX_FN_THROWS 8 |
|
| 16 | + | #define MAX_FN_LOCALS 32 |
|
| 17 | + | #define MAX_SYMBOLS 16384 |
|
| 18 | + | #define MAX_SCOPES 8192 |
|
| 19 | + | #define MAX_SCOPE_SYMBOLS 512 |
|
| 20 | + | #define MAX_INSTRS (1 << 20) |
|
| 21 | + | #define MAX_FN_PATCHES 4096 |
|
| 22 | + | #define MAX_RET_PATCHES 256 |
|
| 23 | + | #define MAX_BRK_PATCHES 512 |
|
| 24 | + | #define MAX_TYPES 4096 |
|
| 25 | + | #define MAX_FRAME_SIZE (512 * 1024) |
|
| 26 | + | #define MAX_STRING_LITERALS 1024 |
|
| 27 | + | #define MAX_CONSTANTS 256 |
|
| 28 | + | #define MAX_TRY_CATCHES 8 |
|
| 29 | + | ||
| 30 | + | /* Pool size for variable-length node pointer arrays. |
|
| 31 | + | * Replaces per-node embedded arrays to shrink node_t. */ |
|
| 32 | + | #define MAX_NODEPTR_POOL (MAX_NODES * 4) |
|
| 33 | + | ||
| 34 | + | /* Pool sizes for type_t sub-arrays (variants, fields, params, throws). */ |
|
| 35 | + | #define MAX_SYMPTR_POOL 16384 |
|
| 36 | + | #define MAX_TYPEPTR_POOL 16384 |
|
| 37 | + | ||
| 38 | + | /* Maximum values for module system */ |
|
| 39 | + | #define MAX_MODULES 48 |
|
| 40 | + | #define MAX_MODULE_DEPS 24 |
|
| 41 | + | #define MAX_PATH_LEN 1024 |
|
| 42 | + | #define MAX_IMPORTS 32 |
|
| 43 | + | ||
| 44 | + | #endif |
module.c
added
+341 -0
| 1 | + | #include <libgen.h> |
|
| 2 | + | #include <stdio.h> |
|
| 3 | + | #include <stdlib.h> |
|
| 4 | + | #include <string.h> |
|
| 5 | + | ||
| 6 | + | #include "ast.h" |
|
| 7 | + | #include "io.h" |
|
| 8 | + | #include "limits.h" |
|
| 9 | + | #include "module.h" |
|
| 10 | + | #include "parser.h" |
|
| 11 | + | #include "scanner.h" |
|
| 12 | + | #include "symtab.h" |
|
| 13 | + | #include "util.h" |
|
| 14 | + | ||
| 15 | + | /* Print an error string to `stderr`. */ |
|
| 16 | + | #define error(...) \ |
|
| 17 | + | do { \ |
|
| 18 | + | fprintf(stderr, "module: "); \ |
|
| 19 | + | fprintf(stderr, __VA_ARGS__); \ |
|
| 20 | + | fprintf(stderr, "\n"); \ |
|
| 21 | + | } while (0) |
|
| 22 | + | ||
| 23 | + | /* Extract the directory part of a path */ |
|
| 24 | + | static void getdirname(const char *path, char *result, size_t maxlen) { |
|
| 25 | + | char buf[MAX_PATH_LEN]; |
|
| 26 | + | strndup(buf, path, MAX_PATH_LEN); |
|
| 27 | + | ||
| 28 | + | char *dir = dirname(buf); |
|
| 29 | + | strndup(result, dir, maxlen); |
|
| 30 | + | } |
|
| 31 | + | ||
| 32 | + | /* Check if a file has .rad extension */ |
|
| 33 | + | static bool is_source(const char *path) { |
|
| 34 | + | const char *dot = strrchr(path, '.'); |
|
| 35 | + | if (!dot) |
|
| 36 | + | return false; |
|
| 37 | + | return strcmp(dot, SOURCE_EXT) == 0; |
|
| 38 | + | } |
|
| 39 | + | ||
| 40 | + | /* Helper function to convert path with / to qualified name with :: */ |
|
| 41 | + | static void path_to_qualified(const char *path, char *qualified, usize len) { |
|
| 42 | + | usize j = 0; |
|
| 43 | + | ||
| 44 | + | for (usize i = 0; path[i] && j < len - 2; i++) { |
|
| 45 | + | if (path[i] == '/') { |
|
| 46 | + | qualified[j++] = ':'; |
|
| 47 | + | qualified[j++] = ':'; |
|
| 48 | + | } else { |
|
| 49 | + | qualified[j++] = path[i]; |
|
| 50 | + | } |
|
| 51 | + | } |
|
| 52 | + | qualified[j] = '\0'; |
|
| 53 | + | } |
|
| 54 | + | ||
| 55 | + | /* Extract the file name without directory and extension */ |
|
| 56 | + | static void getbasename(const char *path, char *result, size_t maxlen) { |
|
| 57 | + | char buf[MAX_PATH_LEN]; |
|
| 58 | + | strndup(buf, path, MAX_PATH_LEN); |
|
| 59 | + | ||
| 60 | + | /* Remove .rad extension if present */ |
|
| 61 | + | char *base = basename(buf); |
|
| 62 | + | if (is_source(base)) { |
|
| 63 | + | *strrchr(base, '.') = '\0'; |
|
| 64 | + | } |
|
| 65 | + | strndup(result, base, maxlen); |
|
| 66 | + | } |
|
| 67 | + | ||
| 68 | + | /* Initialize the module manager */ |
|
| 69 | + | void module_manager_init(module_manager_t *mm, const char *entryfile) { |
|
| 70 | + | mm->nmodules = 0; |
|
| 71 | + | getdirname(entryfile, mm->rootdir, MAX_PATH_LEN); |
|
| 72 | + | } |
|
| 73 | + | ||
| 74 | + | /* Initialize a module */ |
|
| 75 | + | static void module_init(module_t *module, const char *path) { |
|
| 76 | + | memset(module, 0, sizeof(*module)); |
|
| 77 | + | strndup(module->path, path, MAX_PATH_LEN); |
|
| 78 | + | getbasename(path, module->name, MAX_PATH_LEN); |
|
| 79 | + | strndup(module->qualified, module->name, MAX_PATH_LEN); |
|
| 80 | + | ||
| 81 | + | module->state = MODULE_STATE_UNVISITED; |
|
| 82 | + | module->declared = false; |
|
| 83 | + | module->checked = false; |
|
| 84 | + | module->compiled = false; |
|
| 85 | + | module->source = NULL; |
|
| 86 | + | module->ast = NULL; |
|
| 87 | + | module->scope = NULL; |
|
| 88 | + | module->default_fn = NULL; |
|
| 89 | + | module->parent = NULL; |
|
| 90 | + | module->nchildren = 0; |
|
| 91 | + | } |
|
| 92 | + | ||
| 93 | + | /* Helper function to create a module path from a string */ |
|
| 94 | + | void module_path(char dest[MAX_QUALIFIED_NAME], const char *name) { |
|
| 95 | + | strncpy(dest, name, MAX_QUALIFIED_NAME); |
|
| 96 | + | } |
|
| 97 | + | ||
| 98 | + | /* Helper function to append a path component to a module path */ |
|
| 99 | + | void module_qualify_str( |
|
| 100 | + | char dest[MAX_QUALIFIED_NAME], const char *child, u16 len |
|
| 101 | + | ) { |
|
| 102 | + | strlcat(dest, "::", MAX_QUALIFIED_NAME); |
|
| 103 | + | strncat(dest, child, len); |
|
| 104 | + | } |
|
| 105 | + | ||
| 106 | + | /* Helper function to append a path component to a module path */ |
|
| 107 | + | void module_qualify(char dest[MAX_QUALIFIED_NAME], node_t *ident) { |
|
| 108 | + | module_qualify_str(dest, ident->val.ident.name, ident->val.ident.length); |
|
| 109 | + | } |
|
| 110 | + | ||
| 111 | + | /* Add a module to the manager with custom qualified name */ |
|
| 112 | + | module_t *module_manager_register_qualified( |
|
| 113 | + | module_manager_t *mm, const char *path, const char *qualified |
|
| 114 | + | ) { |
|
| 115 | + | if (!mm || !path) |
|
| 116 | + | return NULL; |
|
| 117 | + | ||
| 118 | + | if (mm->nmodules >= MAX_MODULES) { |
|
| 119 | + | error("maximum number of modules (%d) exceeded", MAX_MODULES); |
|
| 120 | + | return NULL; |
|
| 121 | + | } |
|
| 122 | + | module_t *mod = NULL; |
|
| 123 | + | if ((mod = module_manager_lookup(mm, path))) { |
|
| 124 | + | return mod; |
|
| 125 | + | } |
|
| 126 | + | mod = &mm->modules[mm->nmodules++]; |
|
| 127 | + | module_init(mod, path); |
|
| 128 | + | ||
| 129 | + | /* Build hierarchy if qualified name provided */ |
|
| 130 | + | if (qualified) { |
|
| 131 | + | char path_copy[MAX_PATH_LEN]; |
|
| 132 | + | strncpy(path_copy, qualified, MAX_PATH_LEN - 1); |
|
| 133 | + | path_copy[MAX_PATH_LEN - 1] = '\0'; |
|
| 134 | + | ||
| 135 | + | /* Remove .rad extension */ |
|
| 136 | + | char *ext = strrchr(path_copy, '.'); |
|
| 137 | + | if (ext) |
|
| 138 | + | *ext = '\0'; |
|
| 139 | + | ||
| 140 | + | /* Check if this is a submodule (contains /) */ |
|
| 141 | + | char *slash = strrchr(path_copy, '/'); // Find LAST slash |
|
| 142 | + | if (slash) { |
|
| 143 | + | *slash = '\0'; |
|
| 144 | + | char *parent_path = path_copy; // Everything before last slash |
|
| 145 | + | char *child_name = slash + 1; // Everything after last slash |
|
| 146 | + | ||
| 147 | + | /* Convert parent path to qualified name for lookup */ |
|
| 148 | + | char parent_q[MAX_PATH_LEN]; |
|
| 149 | + | path_to_qualified(parent_path, parent_q, MAX_PATH_LEN); |
|
| 150 | + | ||
| 151 | + | /* Look up parent using :: notation */ |
|
| 152 | + | module_t *parent = |
|
| 153 | + | module_manager_lookup_by_qualified_name(mm, parent_q); |
|
| 154 | + | ||
| 155 | + | if (!parent) { |
|
| 156 | + | error( |
|
| 157 | + | "parent module '%s' not found for '%s'", parent_q, qualified |
|
| 158 | + | ); |
|
| 159 | + | mm->nmodules--; // Rollback the module we just added |
|
| 160 | + | return NULL; |
|
| 161 | + | } |
|
| 162 | + | mod->parent = parent; |
|
| 163 | + | parent->children[parent->nchildren++] = mod; |
|
| 164 | + | ||
| 165 | + | /* Write module qualified name */ |
|
| 166 | + | module_path(mod->qualified, parent_q); |
|
| 167 | + | module_qualify_str(mod->qualified, child_name, strlen(child_name)); |
|
| 168 | + | } else { |
|
| 169 | + | /* No parent (root-level module) */ |
|
| 170 | + | module_path(mod->qualified, path_copy); |
|
| 171 | + | } |
|
| 172 | + | } |
|
| 173 | + | ||
| 174 | + | return mod; |
|
| 175 | + | } |
|
| 176 | + | ||
| 177 | + | /* Add a module to the manager */ |
|
| 178 | + | module_t *module_manager_register(module_manager_t *mm, const char *path) { |
|
| 179 | + | return module_manager_register_qualified(mm, path, NULL); |
|
| 180 | + | } |
|
| 181 | + | ||
| 182 | + | /* Find a module in the manager by path */ |
|
| 183 | + | module_t *module_manager_lookup(module_manager_t *mm, const char *path) { |
|
| 184 | + | for (usize i = 0; i < mm->nmodules; i++) { |
|
| 185 | + | if (strcmp(mm->modules[i].path, path) == 0) { |
|
| 186 | + | return &mm->modules[i]; |
|
| 187 | + | } |
|
| 188 | + | } |
|
| 189 | + | return NULL; |
|
| 190 | + | } |
|
| 191 | + | ||
| 192 | + | /* Find a module by name in the module manager */ |
|
| 193 | + | module_t *module_manager_lookup_by_name( |
|
| 194 | + | module_manager_t *mm, const char *name, u16 length |
|
| 195 | + | ) { |
|
| 196 | + | for (usize j = 0; j < mm->nmodules; j++) { |
|
| 197 | + | if (strncmp(mm->modules[j].name, name, length) == 0 && |
|
| 198 | + | strlen(mm->modules[j].name) == length) { |
|
| 199 | + | return &mm->modules[j]; |
|
| 200 | + | } |
|
| 201 | + | } |
|
| 202 | + | return NULL; |
|
| 203 | + | } |
|
| 204 | + | ||
| 205 | + | /* Find a module by qualified name in the module manager */ |
|
| 206 | + | module_t *module_manager_lookup_by_qualified_name( |
|
| 207 | + | module_manager_t *mm, const char *name |
|
| 208 | + | ) { |
|
| 209 | + | for (usize j = 0; j < mm->nmodules; j++) { |
|
| 210 | + | if (strcmp(mm->modules[j].qualified, name) == 0) { |
|
| 211 | + | return &mm->modules[j]; |
|
| 212 | + | } |
|
| 213 | + | } |
|
| 214 | + | return NULL; |
|
| 215 | + | } |
|
| 216 | + | ||
| 217 | + | /* Parse a single module, attaching diagnostics on failure. */ |
|
| 218 | + | bool module_parse(module_t *module, i32 *err) { |
|
| 219 | + | i32 size = readfile(module->path, &module->source); |
|
| 220 | + | if (size < 0) { |
|
| 221 | + | *err = MODULE_NOT_FOUND; |
|
| 222 | + | error("unable to read module file at '%s'", module->path); |
|
| 223 | + | return false; |
|
| 224 | + | } |
|
| 225 | + | scanner_init(&module->parser.scanner, module->path, module->source); |
|
| 226 | + | parser_init(&module->parser); |
|
| 227 | + | ||
| 228 | + | node_t *ast = parser_parse(&module->parser); |
|
| 229 | + | if (module->parser.errors) { |
|
| 230 | + | *err = MODULE_PARSE_ERROR; |
|
| 231 | + | return false; |
|
| 232 | + | } |
|
| 233 | + | if (!ast) { |
|
| 234 | + | *err = MODULE_PARSE_ERROR; |
|
| 235 | + | error("failed to parse module '%s'", module->path); |
|
| 236 | + | return false; |
|
| 237 | + | } |
|
| 238 | + | if (ast->cls != NODE_MOD_BODY) { |
|
| 239 | + | *err = MODULE_PARSE_ERROR; |
|
| 240 | + | error( |
|
| 241 | + | "module '%s' parsed with unexpected root node (%d)", |
|
| 242 | + | module->path, |
|
| 243 | + | ast->cls |
|
| 244 | + | ); |
|
| 245 | + | return false; |
|
| 246 | + | } |
|
| 247 | + | ||
| 248 | + | symbol_t sym = (symbol_t){ |
|
| 249 | + | .name = module->name, |
|
| 250 | + | .length = strlen(module->name), |
|
| 251 | + | .node = ast, |
|
| 252 | + | .kind = SYM_MODULE, |
|
| 253 | + | .e.mod = module, |
|
| 254 | + | }; |
|
| 255 | + | module->ast = ast; |
|
| 256 | + | module->ast->sym = alloc_symbol(sym); |
|
| 257 | + | ||
| 258 | + | return true; |
|
| 259 | + | } |
|
| 260 | + | ||
| 261 | + | bool module_manager_parse(module_manager_t *mm, i32 *err) { |
|
| 262 | + | *err = MODULE_OK; |
|
| 263 | + | ||
| 264 | + | for (usize i = 0; i < mm->nmodules; i++) { |
|
| 265 | + | if (!(module_parse(&mm->modules[i], err))) { |
|
| 266 | + | return false; |
|
| 267 | + | } |
|
| 268 | + | } |
|
| 269 | + | return true; |
|
| 270 | + | } |
|
| 271 | + | ||
| 272 | + | /* Find module by relative import path */ |
|
| 273 | + | module_t *module_manager_find_relative( |
|
| 274 | + | module_manager_t *mm, const char *basepath, const char *import |
|
| 275 | + | ) { |
|
| 276 | + | /* Find the importing module */ |
|
| 277 | + | module_t *importer = module_manager_lookup(mm, basepath); |
|
| 278 | + | if (!importer) { |
|
| 279 | + | return NULL; |
|
| 280 | + | } |
|
| 281 | + | ||
| 282 | + | char import_copy[MAX_PATH_LEN]; |
|
| 283 | + | strndup(import_copy, import, MAX_PATH_LEN); |
|
| 284 | + | ||
| 285 | + | char *segments[MAX_MODULES]; |
|
| 286 | + | usize nsegments = 0; |
|
| 287 | + | ||
| 288 | + | for (char *token = strtok(import_copy, ":"); token; |
|
| 289 | + | token = strtok(NULL, ":")) { |
|
| 290 | + | if (*token == '\0') |
|
| 291 | + | continue; |
|
| 292 | + | if (nsegments >= MAX_MODULES) |
|
| 293 | + | break; |
|
| 294 | + | segments[nsegments++] = token; |
|
| 295 | + | } |
|
| 296 | + | if (nsegments == 0) |
|
| 297 | + | return NULL; |
|
| 298 | + | ||
| 299 | + | module_t *current = importer; |
|
| 300 | + | usize index = 0; |
|
| 301 | + | ||
| 302 | + | while (index < nsegments && strcmp(segments[index], "super") == 0) { |
|
| 303 | + | if (!current || !current->parent) |
|
| 304 | + | return NULL; |
|
| 305 | + | current = current->parent; |
|
| 306 | + | index++; |
|
| 307 | + | } |
|
| 308 | + | ||
| 309 | + | if (index == nsegments) |
|
| 310 | + | return current; |
|
| 311 | + | ||
| 312 | + | if (index == 0 && nsegments == 1) { |
|
| 313 | + | const char *segment = segments[0]; |
|
| 314 | + | ||
| 315 | + | for (usize i = 0; i < importer->nchildren; i++) { |
|
| 316 | + | if (strcmp(importer->children[i]->name, segment) == 0) { |
|
| 317 | + | return importer->children[i]; |
|
| 318 | + | } |
|
| 319 | + | } |
|
| 320 | + | return module_manager_lookup_by_name(mm, segment, strlen(segment)); |
|
| 321 | + | } |
|
| 322 | + | ||
| 323 | + | if (index == 0) { |
|
| 324 | + | current = module_manager_lookup_by_name( |
|
| 325 | + | mm, segments[index], strlen(segments[index]) |
|
| 326 | + | ); |
|
| 327 | + | index++; |
|
| 328 | + | } |
|
| 329 | + | ||
| 330 | + | for (; index < nsegments && current; index++) { |
|
| 331 | + | module_t *child = NULL; |
|
| 332 | + | for (usize j = 0; j < current->nchildren; j++) { |
|
| 333 | + | if (strcmp(current->children[j]->name, segments[index]) == 0) { |
|
| 334 | + | child = current->children[j]; |
|
| 335 | + | break; |
|
| 336 | + | } |
|
| 337 | + | } |
|
| 338 | + | current = child; |
|
| 339 | + | } |
|
| 340 | + | return current; |
|
| 341 | + | } |
module.h
added
+80 -0
| 1 | + | #ifndef MODULE_H |
|
| 2 | + | #define MODULE_H |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "limits.h" |
|
| 6 | + | #include "parser.h" |
|
| 7 | + | #include "symtab.h" |
|
| 8 | + | ||
| 9 | + | /* Extension for source files */ |
|
| 10 | + | #define SOURCE_EXT ".rad" |
|
| 11 | + | ||
| 12 | + | /* Error codes for module loading */ |
|
| 13 | + | enum { |
|
| 14 | + | MODULE_OK = 0, |
|
| 15 | + | MODULE_NOT_FOUND = 1, |
|
| 16 | + | MODULE_PARSE_ERROR = 2, |
|
| 17 | + | MODULE_TYPE_ERROR = 3, |
|
| 18 | + | MODULE_CIRCULAR_DEP = 4 |
|
| 19 | + | }; |
|
| 20 | + | ||
| 21 | + | /* State of a module in the dependency graph */ |
|
| 22 | + | typedef enum { |
|
| 23 | + | MODULE_STATE_UNVISITED = 0, |
|
| 24 | + | MODULE_STATE_VISITING = 1, |
|
| 25 | + | MODULE_STATE_VISITED = 2 |
|
| 26 | + | } module_state_t; |
|
| 27 | + | ||
| 28 | + | typedef struct module_t module_t; |
|
| 29 | + | ||
| 30 | + | struct module_t { |
|
| 31 | + | char path[MAX_PATH_LEN]; |
|
| 32 | + | char name[MAX_PATH_LEN]; |
|
| 33 | + | char qualified[MAX_PATH_LEN]; |
|
| 34 | + | attrib_t attribs; |
|
| 35 | + | node_t *ast; |
|
| 36 | + | char *source; |
|
| 37 | + | parser_t parser; |
|
| 38 | + | module_state_t state; |
|
| 39 | + | scope_t *scope; |
|
| 40 | + | symbol_t *default_fn; |
|
| 41 | + | bool declared; |
|
| 42 | + | bool checked; |
|
| 43 | + | bool compiled; |
|
| 44 | + | module_t *parent; |
|
| 45 | + | module_t *children[MAX_MODULES]; |
|
| 46 | + | u8 nchildren; |
|
| 47 | + | }; |
|
| 48 | + | ||
| 49 | + | typedef struct { |
|
| 50 | + | module_t *root; |
|
| 51 | + | module_t modules[MAX_MODULES]; |
|
| 52 | + | u8 nmodules; |
|
| 53 | + | char rootdir[MAX_PATH_LEN]; |
|
| 54 | + | } module_manager_t; |
|
| 55 | + | ||
| 56 | + | void module_manager_init(module_manager_t *mm, const char *entryfile); |
|
| 57 | + | bool module_manager_parse(module_manager_t *mm, int *err); |
|
| 58 | + | module_t *module_manager_register(module_manager_t *mm, const char *path); |
|
| 59 | + | module_t *module_manager_register_qualified( |
|
| 60 | + | module_manager_t *mm, const char *path, const char *qualified |
|
| 61 | + | ); |
|
| 62 | + | module_t *module_manager_lookup(module_manager_t *mm, const char *path); |
|
| 63 | + | module_t *module_manager_lookup_by_name( |
|
| 64 | + | module_manager_t *mm, const char *name, u16 length |
|
| 65 | + | ); |
|
| 66 | + | module_t *module_manager_lookup_by_qualified_name( |
|
| 67 | + | module_manager_t *mm, const char *name |
|
| 68 | + | ); |
|
| 69 | + | module_t *module_manager_find_relative( |
|
| 70 | + | module_manager_t *mm, const char *base_path, const char *import_path |
|
| 71 | + | ); |
|
| 72 | + | bool module_parse(module_t *module, int *err); |
|
| 73 | + | void module_qualify(char dest[MAX_QUALIFIED_NAME], node_t *ident); |
|
| 74 | + | void module_qualify_str( |
|
| 75 | + | char dest[MAX_QUALIFIED_NAME], const char *child, u16 len |
|
| 76 | + | ); |
|
| 77 | + | void module_path(char dest[MAX_QUALIFIED_NAME], const char *name); |
|
| 78 | + | void module_register_test(module_t *mod, node_t *test); |
|
| 79 | + | ||
| 80 | + | #endif |
options.c
added
+49 -0
| 1 | + | #include <stdio.h> |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | #include <string.h> |
|
| 4 | + | ||
| 5 | + | #include "io.h" |
|
| 6 | + | #include "options.h" |
|
| 7 | + | #include "types.h" |
|
| 8 | + | ||
| 9 | + | /* Create a new options struct. */ |
|
| 10 | + | struct options options(int argc, char *argv[]) { |
|
| 11 | + | return (struct options){ |
|
| 12 | + | .inputs = { 0 }, |
|
| 13 | + | .ninputs = 0, |
|
| 14 | + | .modules = { 0 }, |
|
| 15 | + | .nmodules = 0, |
|
| 16 | + | .argv = argv, |
|
| 17 | + | .argc = argc, |
|
| 18 | + | .output = NULL, |
|
| 19 | + | }; |
|
| 20 | + | } |
|
| 21 | + | ||
| 22 | + | /* Parse the command line options. */ |
|
| 23 | + | int options_parse(struct options *o) { |
|
| 24 | + | for (int i = 1; i < o->argc; i++) { |
|
| 25 | + | if (o->argv[i][0] != '-') { |
|
| 26 | + | o->inputs[o->ninputs++] = o->argv[i]; |
|
| 27 | + | continue; |
|
| 28 | + | } |
|
| 29 | + | char *arg = &o->argv[i][1]; |
|
| 30 | + | ||
| 31 | + | if (!strcmp(arg, "o")) { |
|
| 32 | + | if (++i >= o->argc) |
|
| 33 | + | bail("`-o` requires an output path"); |
|
| 34 | + | o->output = o->argv[i]; |
|
| 35 | + | } else if (!strcmp(arg, "mod")) { |
|
| 36 | + | if (++i >= o->argc) |
|
| 37 | + | bail("`-mod` requires a module path"); |
|
| 38 | + | o->modules[o->nmodules++] = o->argv[i]; |
|
| 39 | + | } else if (!strcmp(arg, "pkg") || !strcmp(arg, "entry")) { |
|
| 40 | + | /* Ignored; consumed by the self-hosted compiler only. */ |
|
| 41 | + | i++; |
|
| 42 | + | } else if (!strcmp(arg, "test") || !strcmp(arg, "dump")) { |
|
| 43 | + | /* Ignored; consumed by the self-hosted compiler only. */ |
|
| 44 | + | } else { |
|
| 45 | + | bail("unknown option `-%s`", arg); |
|
| 46 | + | } |
|
| 47 | + | } |
|
| 48 | + | return 0; |
|
| 49 | + | } |
options.h
added
+26 -0
| 1 | + | #ifndef OPTIONS_H |
|
| 2 | + | #define OPTIONS_H |
|
| 3 | + | ||
| 4 | + | /* Command-line flags. */ |
|
| 5 | + | enum flags { |
|
| 6 | + | FLAG_TEST = 1 << 9, |
|
| 7 | + | }; |
|
| 8 | + | ||
| 9 | + | /* Command-line options structure. */ |
|
| 10 | + | struct options { |
|
| 11 | + | char *output; |
|
| 12 | + | char *inputs[32]; |
|
| 13 | + | int ninputs; |
|
| 14 | + | char *modules[64]; |
|
| 15 | + | int nmodules; |
|
| 16 | + | int argc; |
|
| 17 | + | char **argv; |
|
| 18 | + | }; |
|
| 19 | + | ||
| 20 | + | /* Create a new options struct. */ |
|
| 21 | + | struct options options(int argc, char *argv[]); |
|
| 22 | + | ||
| 23 | + | /* Parse the command line options. */ |
|
| 24 | + | int options_parse(struct options *o); |
|
| 25 | + | ||
| 26 | + | #endif |
parser.c
added
+2401 -0
| 1 | + | #include <stdarg.h> |
|
| 2 | + | #include <stdio.h> |
|
| 3 | + | #include <stdlib.h> |
|
| 4 | + | #include <string.h> |
|
| 5 | + | ||
| 6 | + | #include "ast.h" |
|
| 7 | + | #include "io.h" |
|
| 8 | + | #include "limits.h" |
|
| 9 | + | #include "parser.h" |
|
| 10 | + | #include "scanner.h" |
|
| 11 | + | #include "strings.h" |
|
| 12 | + | ||
| 13 | + | #define error(...) __error(__VA_ARGS__, NULL) |
|
| 14 | + | ||
| 15 | + | static node_t *parse_expr(parser_t *p); |
|
| 16 | + | static node_t *parse_stmt_or_block(parser_t *p); |
|
| 17 | + | static node_t *parse_cond(parser_t *p); |
|
| 18 | + | static node_t *parse_if(parser_t *p); |
|
| 19 | + | static node_t *parse_if_let(parser_t *p); |
|
| 20 | + | static node_t *parse_if_case(parser_t *p); |
|
| 21 | + | static node_t *parse_block(parser_t *p); |
|
| 22 | + | static node_t *parse_stmt(parser_t *p); |
|
| 23 | + | static node_t *parse_type(parser_t *p); |
|
| 24 | + | static node_t *parse_union(parser_t *p, node_t *attrs); |
|
| 25 | + | static node_t *parse_record(parser_t *p, node_t *attrs); |
|
| 26 | + | static node_t *parse_record_type(parser_t *p); |
|
| 27 | + | static node_t *parse_record_lit(parser_t *p, node_t *type_name); |
|
| 28 | + | static node_t *parse_postfix(parser_t *p, node_t *expr); |
|
| 29 | + | static node_t *parse_as_cast(parser_t *p, node_t *expr); |
|
| 30 | + | static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls); |
|
| 31 | + | static node_t *parse_static(parser_t *p); |
|
| 32 | + | static node_t *parse_ident(parser_t *p, const char *error); |
|
| 33 | + | static node_t *parse_ident_or_placeholder(parser_t *p, const char *error); |
|
| 34 | + | static node_t *parse_scope_segment(parser_t *p, const char *error); |
|
| 35 | + | static node_t *parse_label(parser_t *p, const char *error); |
|
| 36 | + | static node_t *parse_assignment(parser_t *p, node_t *lval); |
|
| 37 | + | static node_t *parse_fn_call_arg(parser_t *p); |
|
| 38 | + | static node_t *parse_match(parser_t *p); |
|
| 39 | + | static node_t *parse_match_case(parser_t *p); |
|
| 40 | + | static node_t *parse_builtin(parser_t *p); |
|
| 41 | + | static node_t *parse_throw(parser_t *p); |
|
| 42 | + | static node_t *parse_try(parser_t *p, bool panic, bool optional); |
|
| 43 | + | static node_t *parse_panic(parser_t *p); |
|
| 44 | + | static bool token_is_stmt_terminator(tokenclass_t cls); |
|
| 45 | + | static bool stmt_requires_semicolon(const node_t *stmt); |
|
| 46 | + | static bool consume_statement_separator( |
|
| 47 | + | parser_t *p, node_t *stmt, bool require |
|
| 48 | + | ); |
|
| 49 | + | ||
| 50 | + | /* Initialize parser. */ |
|
| 51 | + | void parser_init(parser_t *p) { |
|
| 52 | + | p->root = NULL; |
|
| 53 | + | p->errors = 0; |
|
| 54 | + | p->nnodes = 0; |
|
| 55 | + | p->nptrs = 0; |
|
| 56 | + | p->context = PARSE_CTX_NORMAL; |
|
| 57 | + | } |
|
| 58 | + | ||
| 59 | + | /* Report an error with optional format string. */ |
|
| 60 | + | static void __error(parser_t *p, const char *fmt, ...) { |
|
| 61 | + | va_list ap; |
|
| 62 | + | va_start(ap, fmt); |
|
| 63 | + | ||
| 64 | + | location_t loc = scanner_get_location(&p->scanner, p->current.position); |
|
| 65 | + | fprintf(stderr, "%s:%u:%u: error: ", loc.file, loc.line, loc.col); |
|
| 66 | + | vfprintf(stderr, fmt, ap); |
|
| 67 | + | fprintf(stderr, "\n"); |
|
| 68 | + | va_end(ap); |
|
| 69 | + | ||
| 70 | + | p->errors++; |
|
| 71 | + | } |
|
| 72 | + | ||
| 73 | + | /* Check that the current token is equal to the given type. */ |
|
| 74 | + | static bool check(parser_t *p, tokenclass_t cls) { |
|
| 75 | + | return p->current.cls == cls; |
|
| 76 | + | } |
|
| 77 | + | ||
| 78 | + | /* Advance the parser by one token. */ |
|
| 79 | + | static void advance(parser_t *p) { |
|
| 80 | + | p->previous = p->current; |
|
| 81 | + | p->current = scanner_next(&p->scanner); |
|
| 82 | + | } |
|
| 83 | + | ||
| 84 | + | /* Like `check`, but also advances the parser if it matches. */ |
|
| 85 | + | static bool consume(parser_t *p, tokenclass_t cls) { |
|
| 86 | + | if (check(p, cls)) { |
|
| 87 | + | advance(p); |
|
| 88 | + | return true; |
|
| 89 | + | } |
|
| 90 | + | return false; |
|
| 91 | + | } |
|
| 92 | + | ||
| 93 | + | /* Like `consume`, but report an error if it doesn't match. */ |
|
| 94 | + | __nodiscard static bool expect( |
|
| 95 | + | parser_t *p, tokenclass_t cls, const char *message |
|
| 96 | + | ) { |
|
| 97 | + | if (consume(p, cls)) { |
|
| 98 | + | return true; |
|
| 99 | + | } |
|
| 100 | + | error(p, message); |
|
| 101 | + | ||
| 102 | + | return false; |
|
| 103 | + | } |
|
| 104 | + | ||
| 105 | + | /* Allocate a new AST node. */ |
|
| 106 | + | static node_t *node(parser_t *p, nodeclass_t cls) { |
|
| 107 | + | if (p->nnodes >= MAX_NODES) { |
|
| 108 | + | abort(); |
|
| 109 | + | } |
|
| 110 | + | node_t *n = &p->nodes[p->nnodes++]; |
|
| 111 | + | n->cls = cls; |
|
| 112 | + | n->type = NULL; |
|
| 113 | + | n->sym = NULL; |
|
| 114 | + | n->offset = p->current.position; |
|
| 115 | + | n->length = p->current.length; |
|
| 116 | + | n->file = p->scanner.file; |
|
| 117 | + | ||
| 118 | + | return n; |
|
| 119 | + | } |
|
| 120 | + | ||
| 121 | + | /* Parse a type annotation. |
|
| 122 | + | * Eg. `i32` or `[i32; 12]` */ |
|
| 123 | + | static node_t *parse_type(parser_t *p) { |
|
| 124 | + | /* Parse optional types. */ |
|
| 125 | + | if (p->current.cls == T_QUESTION) { |
|
| 126 | + | node_t *opt = node(p, NODE_TYPE); |
|
| 127 | + | advance(p); /* Consume `?`. */ |
|
| 128 | + | ||
| 129 | + | node_t *elem_type = parse_type(p); |
|
| 130 | + | if (!elem_type) |
|
| 131 | + | return NULL; |
|
| 132 | + | ||
| 133 | + | opt->val.type.tclass = TYPE_OPT; |
|
| 134 | + | opt->val.type.elem_type = elem_type; |
|
| 135 | + | ||
| 136 | + | return opt; |
|
| 137 | + | } |
|
| 138 | + | ||
| 139 | + | /* Parse pointer types and slice types. */ |
|
| 140 | + | if (p->current.cls == T_STAR) { |
|
| 141 | + | advance(p); /* Consume `*`. */ |
|
| 142 | + | ||
| 143 | + | /* Consume `mut` */ |
|
| 144 | + | bool mut = consume(p, T_MUT); |
|
| 145 | + | ||
| 146 | + | /* Parse slice types like `*[i32]` or `*mut [i32]` */ |
|
| 147 | + | if (p->current.cls == T_LBRACKET) { |
|
| 148 | + | node_t *slice = node(p, NODE_TYPE); |
|
| 149 | + | advance(p); /* Consume `[`. */ |
|
| 150 | + | ||
| 151 | + | node_t *elem_type = parse_type(p); |
|
| 152 | + | if (!elem_type) |
|
| 153 | + | return NULL; |
|
| 154 | + | ||
| 155 | + | if (!expect(p, T_RBRACKET, "expected `]` after slice element type")) |
|
| 156 | + | return NULL; |
|
| 157 | + | ||
| 158 | + | slice->val.type.tclass = TYPE_SLICE; |
|
| 159 | + | slice->val.type.elem_type = elem_type; |
|
| 160 | + | slice->val.type.info.slice.mut = mut; |
|
| 161 | + | ||
| 162 | + | return slice; |
|
| 163 | + | } |
|
| 164 | + | ||
| 165 | + | /* Otherwise it's a pointer type like `*i32` or `*mut i32` */ |
|
| 166 | + | node_t *ptr = node(p, NODE_TYPE); |
|
| 167 | + | node_t *elem_type = parse_type(p); |
|
| 168 | + | if (!elem_type) |
|
| 169 | + | return NULL; |
|
| 170 | + | ||
| 171 | + | ptr->val.type.tclass = TYPE_PTR; |
|
| 172 | + | ptr->val.type.elem_type = elem_type; |
|
| 173 | + | ptr->val.type.info.ptr.mut = mut; |
|
| 174 | + | ||
| 175 | + | return ptr; |
|
| 176 | + | } |
|
| 177 | + | ||
| 178 | + | /* Parse array types. */ |
|
| 179 | + | if (p->current.cls == T_LBRACKET) { |
|
| 180 | + | advance(p); /* Consume `[`. */ |
|
| 181 | + | ||
| 182 | + | /* Get the element type. */ |
|
| 183 | + | node_t *elem_type = parse_type(p); |
|
| 184 | + | if (!elem_type) |
|
| 185 | + | return NULL; |
|
| 186 | + | ||
| 187 | + | /* Expect a semicolon separator. */ |
|
| 188 | + | if (!expect(p, T_SEMICOLON, "expected `;` in array type")) |
|
| 189 | + | return NULL; |
|
| 190 | + | ||
| 191 | + | /* Parse the array length. */ |
|
| 192 | + | node_t *length = parse_expr(p); |
|
| 193 | + | if (!length) { |
|
| 194 | + | error(p, "expected array size expression"); |
|
| 195 | + | return NULL; |
|
| 196 | + | } |
|
| 197 | + | /* Expect the closing bracket */ |
|
| 198 | + | if (!expect(p, T_RBRACKET, "expected `]` after array size")) |
|
| 199 | + | return NULL; |
|
| 200 | + | ||
| 201 | + | node_t *ary = node(p, NODE_TYPE); |
|
| 202 | + | ary->val.type.tclass = TYPE_ARRAY; |
|
| 203 | + | ary->val.type.elem_type = elem_type; |
|
| 204 | + | ary->val.type.info.array.length = length; |
|
| 205 | + | ||
| 206 | + | return ary; |
|
| 207 | + | } |
|
| 208 | + | ||
| 209 | + | /* Type identifiers are treated differently, as a concrete type cannot |
|
| 210 | + | * yet be assigned. */ |
|
| 211 | + | if (p->current.cls == T_IDENT || p->current.cls == T_SUPER) { |
|
| 212 | + | node_t *path = |
|
| 213 | + | parse_scope_segment(p, "expected type identifier or `super`"); |
|
| 214 | + | if (!path) |
|
| 215 | + | return NULL; |
|
| 216 | + | ||
| 217 | + | while (consume(p, T_COLON_COLON)) { |
|
| 218 | + | node_t *next = |
|
| 219 | + | parse_scope_segment(p, "expected identifier name after `::`"); |
|
| 220 | + | if (!next) |
|
| 221 | + | return NULL; |
|
| 222 | + | ||
| 223 | + | node_t *scope = node(p, NODE_SCOPE); |
|
| 224 | + | scope->val.access.lval = path; |
|
| 225 | + | scope->val.access.rval = next; |
|
| 226 | + | path = scope; |
|
| 227 | + | } |
|
| 228 | + | return path; |
|
| 229 | + | } |
|
| 230 | + | node_t *n = node(p, NODE_TYPE); |
|
| 231 | + | ||
| 232 | + | switch (p->current.cls) { |
|
| 233 | + | case T_I8: |
|
| 234 | + | advance(p); |
|
| 235 | + | n->val.type.tclass = TYPE_I8; |
|
| 236 | + | return n; |
|
| 237 | + | case T_I16: |
|
| 238 | + | advance(p); |
|
| 239 | + | n->val.type.tclass = TYPE_I16; |
|
| 240 | + | return n; |
|
| 241 | + | case T_I32: |
|
| 242 | + | advance(p); |
|
| 243 | + | n->val.type.tclass = TYPE_I32; |
|
| 244 | + | return n; |
|
| 245 | + | case T_U8: |
|
| 246 | + | advance(p); |
|
| 247 | + | n->val.type.tclass = TYPE_U8; |
|
| 248 | + | return n; |
|
| 249 | + | case T_U16: |
|
| 250 | + | advance(p); |
|
| 251 | + | n->val.type.tclass = TYPE_U16; |
|
| 252 | + | return n; |
|
| 253 | + | case T_U32: |
|
| 254 | + | advance(p); |
|
| 255 | + | n->val.type.tclass = TYPE_U32; |
|
| 256 | + | return n; |
|
| 257 | + | case T_BOOL: |
|
| 258 | + | advance(p); |
|
| 259 | + | n->val.type.tclass = TYPE_BOOL; |
|
| 260 | + | return n; |
|
| 261 | + | case T_VOID: |
|
| 262 | + | advance(p); |
|
| 263 | + | n->val.type.tclass = TYPE_VOID; |
|
| 264 | + | return n; |
|
| 265 | + | case T_OPAQUE: |
|
| 266 | + | advance(p); |
|
| 267 | + | n->val.type.tclass = TYPE_OPAQUE; |
|
| 268 | + | return n; |
|
| 269 | + | case T_FN: { |
|
| 270 | + | advance(p); /* consume `fn` */ |
|
| 271 | + | ||
| 272 | + | if (!expect(p, T_LPAREN, "expected `(` after `fn`")) |
|
| 273 | + | return NULL; |
|
| 274 | + | ||
| 275 | + | n->val.type.tclass = TYPE_FN; |
|
| 276 | + | n->val.type.info.fn.params = nodespan_alloc(p, MAX_FN_PARAMS); |
|
| 277 | + | n->val.type.info.fn.ret = NULL; |
|
| 278 | + | n->val.type.info.fn.throws = nodespan_alloc(p, MAX_FN_THROWS); |
|
| 279 | + | ||
| 280 | + | /* Parse parameter types */ |
|
| 281 | + | if (!check(p, T_RPAREN)) { |
|
| 282 | + | node_t *param = NULL; |
|
| 283 | + | ||
| 284 | + | do { |
|
| 285 | + | if (n->val.type.info.fn.params.len >= MAX_FN_PARAMS) { |
|
| 286 | + | error(p, "too many function pointer parameters"); |
|
| 287 | + | return NULL; |
|
| 288 | + | } |
|
| 289 | + | if (!(param = parse_type(p))) { |
|
| 290 | + | return NULL; |
|
| 291 | + | } |
|
| 292 | + | nodespan_push(p, &n->val.type.info.fn.params, param); |
|
| 293 | + | } while (consume(p, T_COMMA)); |
|
| 294 | + | } |
|
| 295 | + | if (!expect( |
|
| 296 | + | p, T_RPAREN, "expected `)` after function pointer parameters" |
|
| 297 | + | )) |
|
| 298 | + | return NULL; |
|
| 299 | + | ||
| 300 | + | /* Parse return type */ |
|
| 301 | + | if (consume(p, T_ARROW)) { |
|
| 302 | + | if (!(n->val.type.info.fn.ret = parse_type(p))) { |
|
| 303 | + | return NULL; |
|
| 304 | + | } |
|
| 305 | + | } |
|
| 306 | + | ||
| 307 | + | if (consume(p, T_THROWS)) { |
|
| 308 | + | if (!expect(p, T_LPAREN, "expected `(` after `throws`")) |
|
| 309 | + | return NULL; |
|
| 310 | + | ||
| 311 | + | if (!check(p, T_RPAREN)) { |
|
| 312 | + | do { |
|
| 313 | + | if (n->val.type.info.fn.throws.len >= MAX_FN_THROWS) { |
|
| 314 | + | error(p, "maximum number of thrown types exceeded"); |
|
| 315 | + | return NULL; |
|
| 316 | + | } |
|
| 317 | + | ||
| 318 | + | node_t *thrown = parse_type(p); |
|
| 319 | + | if (!thrown) |
|
| 320 | + | return NULL; |
|
| 321 | + | ||
| 322 | + | nodespan_push(p, &n->val.type.info.fn.throws, thrown); |
|
| 323 | + | } while (consume(p, T_COMMA)); |
|
| 324 | + | } |
|
| 325 | + | ||
| 326 | + | if (!expect(p, T_RPAREN, "expected `)` after throws clause")) |
|
| 327 | + | return NULL; |
|
| 328 | + | } |
|
| 329 | + | return n; |
|
| 330 | + | } |
|
| 331 | + | default: |
|
| 332 | + | error(p, "expected type annotation, eg. `i32`, `bool`, etc."); |
|
| 333 | + | return NULL; |
|
| 334 | + | } |
|
| 335 | + | } |
|
| 336 | + | ||
| 337 | + | /* Parse primary expressions. */ |
|
| 338 | + | static node_t *parse_array_literal(parser_t *p) { |
|
| 339 | + | node_t *n = NULL; |
|
| 340 | + | ||
| 341 | + | if (check(p, T_RBRACKET)) { /* Empty array `[]` */ |
|
| 342 | + | n = node(p, NODE_ARRAY_LIT); |
|
| 343 | + | n->val.array_lit.elems = (nodespan_t){ 0 }; |
|
| 344 | + | } else { |
|
| 345 | + | node_t *expr = parse_expr(p); |
|
| 346 | + | if (!expr) |
|
| 347 | + | return NULL; |
|
| 348 | + | ||
| 349 | + | /* Check if this is a repeat array [value; count] */ |
|
| 350 | + | if (consume(p, T_SEMICOLON)) { |
|
| 351 | + | n = node(p, NODE_ARRAY_REPEAT_LIT); |
|
| 352 | + | n->val.array_repeat_lit.value = expr; |
|
| 353 | + | n->val.array_repeat_lit.count = parse_expr(p); |
|
| 354 | + | ||
| 355 | + | if (!n->val.array_repeat_lit.count) |
|
| 356 | + | return NULL; |
|
| 357 | + | } else { |
|
| 358 | + | /* Regular array literal [a, b, ...] */ |
|
| 359 | + | n = node(p, NODE_ARRAY_LIT); |
|
| 360 | + | n->val.array_lit.elems = (nodespan_t){ 0 }; |
|
| 361 | + | nodespan_push(p, &n->val.array_lit.elems, expr); |
|
| 362 | + | ||
| 363 | + | /* Continue parsing remaining elements */ |
|
| 364 | + | while (consume(p, T_COMMA) && !check(p, T_RBRACKET)) { |
|
| 365 | + | node_t *elem = parse_expr(p); |
|
| 366 | + | if (!elem) |
|
| 367 | + | return NULL; |
|
| 368 | + | ||
| 369 | + | nodespan_push(p, &n->val.array_lit.elems, elem); |
|
| 370 | + | } |
|
| 371 | + | } |
|
| 372 | + | } |
|
| 373 | + | if (!expect(p, T_RBRACKET, "expected `]` after array elements")) |
|
| 374 | + | return NULL; |
|
| 375 | + | ||
| 376 | + | return n; |
|
| 377 | + | } |
|
| 378 | + | ||
| 379 | + | static node_t *parse_builtin(parser_t *p) { |
|
| 380 | + | node_t *n = node(p, NODE_BUILTIN); |
|
| 381 | + | ||
| 382 | + | /* Token is @identifier, skip the '@' to get the name. */ |
|
| 383 | + | const char *name = p->current.start + 1; |
|
| 384 | + | usize length = p->current.length - 1; |
|
| 385 | + | ||
| 386 | + | advance(p); /* consume `@identifier` */ |
|
| 387 | + | ||
| 388 | + | builtin_kind_t kind; |
|
| 389 | + | ||
| 390 | + | if (!strncmp(name, "sizeOf", 6)) { |
|
| 391 | + | kind = BUILTIN_SIZE_OF; |
|
| 392 | + | } else if (!strncmp(name, "alignOf", 7)) { |
|
| 393 | + | kind = BUILTIN_ALIGN_OF; |
|
| 394 | + | } else if (!strncmp(name, "sliceOf", 7)) { |
|
| 395 | + | kind = BUILTIN_SLICE_OF; |
|
| 396 | + | } else { |
|
| 397 | + | error(p, "unknown builtin `@%.*s`", (int)length, name); |
|
| 398 | + | return NULL; |
|
| 399 | + | } |
|
| 400 | + | if (!expect(p, T_LPAREN, "expected `(` after builtin name")) |
|
| 401 | + | return NULL; |
|
| 402 | + | ||
| 403 | + | n->val.builtin.kind = kind; |
|
| 404 | + | n->val.builtin.args = (nodespan_t){ 0 }; |
|
| 405 | + | ||
| 406 | + | /* @sliceOf takes two expression arguments: @sliceOf(ptr, len) */ |
|
| 407 | + | if (kind == BUILTIN_SLICE_OF) { |
|
| 408 | + | parse_ctx_t prev = p->context; |
|
| 409 | + | p->context = PARSE_CTX_NORMAL; |
|
| 410 | + | ||
| 411 | + | node_t *ptr_expr = parse_expr(p); |
|
| 412 | + | if (!ptr_expr) |
|
| 413 | + | return NULL; |
|
| 414 | + | nodespan_push(p, &n->val.builtin.args, ptr_expr); |
|
| 415 | + | ||
| 416 | + | if (!expect( |
|
| 417 | + | p, T_COMMA, "expected `,` after first argument to @sliceOf" |
|
| 418 | + | )) |
|
| 419 | + | return NULL; |
|
| 420 | + | ||
| 421 | + | node_t *len_expr = parse_expr(p); |
|
| 422 | + | if (!len_expr) |
|
| 423 | + | return NULL; |
|
| 424 | + | nodespan_push(p, &n->val.builtin.args, len_expr); |
|
| 425 | + | ||
| 426 | + | p->context = prev; |
|
| 427 | + | } else { |
|
| 428 | + | /* @sizeOf and @alignOf take type arguments only. */ |
|
| 429 | + | node_t *type_arg = parse_type(p); |
|
| 430 | + | if (!type_arg) |
|
| 431 | + | return NULL; |
|
| 432 | + | nodespan_push(p, &n->val.builtin.args, type_arg); |
|
| 433 | + | } |
|
| 434 | + | ||
| 435 | + | if (!expect(p, T_RPAREN, "expected `)` after builtin argument")) |
|
| 436 | + | return NULL; |
|
| 437 | + | ||
| 438 | + | return n; |
|
| 439 | + | } |
|
| 440 | + | ||
| 441 | + | static node_t *parse_primary(parser_t *p) { |
|
| 442 | + | node_t *n; |
|
| 443 | + | ||
| 444 | + | switch (p->current.cls) { |
|
| 445 | + | case T_LBRACKET: /* Array literal [a, b, c] */ |
|
| 446 | + | advance(p); |
|
| 447 | + | return parse_array_literal(p); |
|
| 448 | + | ||
| 449 | + | case T_NOT: /* Unary not operator */ |
|
| 450 | + | n = node(p, NODE_UNOP); |
|
| 451 | + | n->val.unop.op = OP_NOT; |
|
| 452 | + | advance(p); |
|
| 453 | + | ||
| 454 | + | if (!(n->val.unop.expr = parse_primary(p))) |
|
| 455 | + | return NULL; |
|
| 456 | + | ||
| 457 | + | return n; |
|
| 458 | + | ||
| 459 | + | case T_RECORD: { |
|
| 460 | + | advance(p); /* consume `record` */ |
|
| 461 | + | ||
| 462 | + | node_t *rtype = parse_record_type(p); |
|
| 463 | + | if (!rtype) |
|
| 464 | + | return NULL; |
|
| 465 | + | ||
| 466 | + | if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) { |
|
| 467 | + | return parse_record_lit(p, rtype); |
|
| 468 | + | } |
|
| 469 | + | if (p->context == PARSE_CTX_NORMAL) { |
|
| 470 | + | error(p, "expected `{` after anonymous record type"); |
|
| 471 | + | return NULL; |
|
| 472 | + | } |
|
| 473 | + | return rtype; |
|
| 474 | + | } |
|
| 475 | + | ||
| 476 | + | case T_LBRACE: |
|
| 477 | + | if (p->context == PARSE_CTX_CONDITION) { |
|
| 478 | + | error(p, "unexpected `{` in this context"); |
|
| 479 | + | return NULL; |
|
| 480 | + | } |
|
| 481 | + | advance(p); /* consume `{` */ |
|
| 482 | + | ||
| 483 | + | return parse_record_lit(p, NULL); |
|
| 484 | + | ||
| 485 | + | case T_MINUS: /* Unary negation operator */ |
|
| 486 | + | n = node(p, NODE_UNOP); |
|
| 487 | + | n->val.unop.op = OP_NEG; |
|
| 488 | + | advance(p); |
|
| 489 | + | ||
| 490 | + | if (!(n->val.unop.expr = parse_primary(p))) |
|
| 491 | + | return NULL; |
|
| 492 | + | ||
| 493 | + | return n; |
|
| 494 | + | ||
| 495 | + | case T_TILDE: /* Bitwise NOT operator */ |
|
| 496 | + | n = node(p, NODE_UNOP); |
|
| 497 | + | n->val.unop.op = OP_BNOT; |
|
| 498 | + | advance(p); |
|
| 499 | + | ||
| 500 | + | if (!(n->val.unop.expr = parse_primary(p))) |
|
| 501 | + | return NULL; |
|
| 502 | + | ||
| 503 | + | return n; |
|
| 504 | + | ||
| 505 | + | case T_AMP: |
|
| 506 | + | n = node(p, NODE_REF); |
|
| 507 | + | advance(p); |
|
| 508 | + | ||
| 509 | + | n->val.ref.mut = consume(p, T_MUT); |
|
| 510 | + | ||
| 511 | + | if (!(n->val.ref.target = parse_primary(p))) |
|
| 512 | + | return NULL; |
|
| 513 | + | ||
| 514 | + | return n; |
|
| 515 | + | ||
| 516 | + | case T_STAR: |
|
| 517 | + | n = node(p, NODE_UNOP); |
|
| 518 | + | advance(p); |
|
| 519 | + | ||
| 520 | + | n->val.unop.op = OP_DEREF; |
|
| 521 | + | if (!(n->val.unop.expr = parse_primary(p))) |
|
| 522 | + | return NULL; |
|
| 523 | + | ||
| 524 | + | return n; |
|
| 525 | + | ||
| 526 | + | case T_NUMBER: |
|
| 527 | + | n = node(p, NODE_NUMBER); |
|
| 528 | + | advance(p); |
|
| 529 | + | ||
| 530 | + | n->val.number.text = p->previous.start; |
|
| 531 | + | n->val.number.text_len = p->previous.length; |
|
| 532 | + | ||
| 533 | + | if (check(p, T_DOT_DOT)) { |
|
| 534 | + | return parse_postfix(p, n); |
|
| 535 | + | } |
|
| 536 | + | return n; |
|
| 537 | + | ||
| 538 | + | case T_CHAR: |
|
| 539 | + | n = node(p, NODE_CHAR); |
|
| 540 | + | advance(p); |
|
| 541 | + | ||
| 542 | + | if (p->previous.start[1] == '\\') { |
|
| 543 | + | switch (p->previous.start[2]) { |
|
| 544 | + | case 'n': |
|
| 545 | + | n->val.char_lit = '\n'; |
|
| 546 | + | break; |
|
| 547 | + | case 't': |
|
| 548 | + | n->val.char_lit = '\t'; |
|
| 549 | + | break; |
|
| 550 | + | case 'r': |
|
| 551 | + | n->val.char_lit = '\r'; |
|
| 552 | + | break; |
|
| 553 | + | case '\'': |
|
| 554 | + | n->val.char_lit = '\''; |
|
| 555 | + | break; |
|
| 556 | + | case '\\': |
|
| 557 | + | n->val.char_lit = '\\'; |
|
| 558 | + | break; |
|
| 559 | + | default: |
|
| 560 | + | abort(); |
|
| 561 | + | } |
|
| 562 | + | } else { |
|
| 563 | + | n->val.char_lit = p->previous.start[1]; |
|
| 564 | + | } |
|
| 565 | + | if (check(p, T_DOT_DOT)) { |
|
| 566 | + | return parse_postfix(p, n); |
|
| 567 | + | } |
|
| 568 | + | return n; |
|
| 569 | + | ||
| 570 | + | case T_STRING: { |
|
| 571 | + | n = node(p, NODE_STRING); |
|
| 572 | + | advance(p); |
|
| 573 | + | ||
| 574 | + | /* Account for quotes. */ |
|
| 575 | + | const char *data = p->previous.start + 1; |
|
| 576 | + | usize len = p->previous.length - 2; |
|
| 577 | + | ||
| 578 | + | /* Intern string. This escapes the string properly and |
|
| 579 | + | * NULL-terminates it. */ |
|
| 580 | + | n->val.string_lit.data = strings_alloc_len(data, len); |
|
| 581 | + | n->val.string_lit.length = strlen(n->val.string_lit.data); |
|
| 582 | + | ||
| 583 | + | return n; |
|
| 584 | + | } |
|
| 585 | + | ||
| 586 | + | case T_AT_IDENT: |
|
| 587 | + | return parse_builtin(p); |
|
| 588 | + | ||
| 589 | + | case T_SUPER: |
|
| 590 | + | n = node(p, NODE_SUPER); |
|
| 591 | + | advance(p); |
|
| 592 | + | ||
| 593 | + | if (check(p, T_COLON_COLON)) { |
|
| 594 | + | return parse_postfix(p, n); |
|
| 595 | + | } |
|
| 596 | + | return n; |
|
| 597 | + | ||
| 598 | + | case T_IDENT: |
|
| 599 | + | n = node(p, NODE_IDENT); |
|
| 600 | + | n->val.ident.name = p->current.start; |
|
| 601 | + | n->val.ident.length = p->current.length; |
|
| 602 | + | ||
| 603 | + | advance(p); |
|
| 604 | + | ||
| 605 | + | /* Check for record initializer, eg. `{ x: 1, y: 2 }` */ |
|
| 606 | + | if (p->context == PARSE_CTX_NORMAL && consume(p, T_LBRACE)) { |
|
| 607 | + | return parse_record_lit(p, n); |
|
| 608 | + | } |
|
| 609 | + | ||
| 610 | + | /* Check for field access or array indexing. */ |
|
| 611 | + | if (check(p, T_DOT) || check(p, T_LBRACKET) || |
|
| 612 | + | check(p, T_COLON_COLON) || check(p, T_LPAREN) || |
|
| 613 | + | check(p, T_DOT_DOT)) { |
|
| 614 | + | return parse_postfix(p, n); |
|
| 615 | + | } |
|
| 616 | + | return n; |
|
| 617 | + | ||
| 618 | + | case T_LPAREN: |
|
| 619 | + | advance(p); |
|
| 620 | + | ||
| 621 | + | /* Inside parentheses, we are in a normal parsing context */ |
|
| 622 | + | parse_ctx_t prev = p->context; |
|
| 623 | + | p->context = PARSE_CTX_NORMAL; |
|
| 624 | + | n = parse_expr(p); |
|
| 625 | + | p->context = prev; |
|
| 626 | + | ||
| 627 | + | if (!expect(p, T_RPAREN, "expected closing `)` after expression")) |
|
| 628 | + | return NULL; |
|
| 629 | + | ||
| 630 | + | /* Check for field access or array indexing. */ |
|
| 631 | + | if (check(p, T_DOT) || check(p, T_LBRACKET) || check(p, T_DOT_DOT)) { |
|
| 632 | + | return parse_postfix(p, n); |
|
| 633 | + | } |
|
| 634 | + | return n; |
|
| 635 | + | ||
| 636 | + | case T_TRUE: |
|
| 637 | + | n = node(p, NODE_BOOL); |
|
| 638 | + | n->val.bool_lit = true; |
|
| 639 | + | advance(p); |
|
| 640 | + | ||
| 641 | + | return n; |
|
| 642 | + | ||
| 643 | + | case T_FALSE: |
|
| 644 | + | n = node(p, NODE_BOOL); |
|
| 645 | + | n->val.bool_lit = false; |
|
| 646 | + | advance(p); |
|
| 647 | + | ||
| 648 | + | return n; |
|
| 649 | + | ||
| 650 | + | case T_NIL: |
|
| 651 | + | n = node(p, NODE_NIL); |
|
| 652 | + | advance(p); |
|
| 653 | + | ||
| 654 | + | return n; |
|
| 655 | + | ||
| 656 | + | case T_UNDEF: |
|
| 657 | + | n = node(p, NODE_UNDEF); |
|
| 658 | + | advance(p); |
|
| 659 | + | ||
| 660 | + | return n; |
|
| 661 | + | ||
| 662 | + | case T_UNDERSCORE: |
|
| 663 | + | n = node(p, NODE_PLACEHOLDER); |
|
| 664 | + | advance(p); |
|
| 665 | + | ||
| 666 | + | return n; |
|
| 667 | + | ||
| 668 | + | case T_TRY: { |
|
| 669 | + | advance(p); |
|
| 670 | + | ||
| 671 | + | bool panic = consume(p, T_BANG); |
|
| 672 | + | bool optional = consume(p, T_QUESTION); |
|
| 673 | + | ||
| 674 | + | node_t *expr = parse_try(p, panic, optional); |
|
| 675 | + | if (!expr) |
|
| 676 | + | return NULL; |
|
| 677 | + | ||
| 678 | + | if (check(p, T_DOT) || check(p, T_LBRACKET) || |
|
| 679 | + | check(p, T_COLON_COLON) || check(p, T_LPAREN) || |
|
| 680 | + | check(p, T_DOT_DOT)) { |
|
| 681 | + | return parse_postfix(p, expr); |
|
| 682 | + | } |
|
| 683 | + | return expr; |
|
| 684 | + | } |
|
| 685 | + | ||
| 686 | + | default: |
|
| 687 | + | error( |
|
| 688 | + | p, |
|
| 689 | + | "expected expression, got `%.*s`", |
|
| 690 | + | p->current.length, |
|
| 691 | + | p->current.start |
|
| 692 | + | ); |
|
| 693 | + | return NULL; |
|
| 694 | + | } |
|
| 695 | + | } |
|
| 696 | + | ||
| 697 | + | /* Parse binary expressions with precedence climbing. */ |
|
| 698 | + | static node_t *parse_binary(parser_t *p, node_t *left, int precedence) { |
|
| 699 | + | /* Operator precedence table. */ |
|
| 700 | + | static const struct { |
|
| 701 | + | tokenclass_t tok; |
|
| 702 | + | binop_t op; |
|
| 703 | + | int prec; |
|
| 704 | + | } ops[] = { |
|
| 705 | + | /* Arithmetic operators (higher precedence). */ |
|
| 706 | + | { T_PLUS, OP_ADD, 6 }, |
|
| 707 | + | { T_MINUS, OP_SUB, 6 }, |
|
| 708 | + | { T_STAR, OP_MUL, 7 }, |
|
| 709 | + | { T_SLASH, OP_DIV, 7 }, |
|
| 710 | + | { T_PERCENT, OP_MOD, 7 }, |
|
| 711 | + | /* Shift operators. */ |
|
| 712 | + | { T_LSHIFT, OP_SHL, 5 }, |
|
| 713 | + | { T_RSHIFT, OP_SHR, 5 }, |
|
| 714 | + | /* Bitwise operators. */ |
|
| 715 | + | { T_AMP, OP_BAND, 4 }, |
|
| 716 | + | { T_CARET, OP_XOR, 3 }, |
|
| 717 | + | { T_PIPE, OP_BOR, 2 }, |
|
| 718 | + | /* Comparison operators. */ |
|
| 719 | + | { T_EQ_EQ, OP_EQ, 1 }, |
|
| 720 | + | { T_BANG_EQ, OP_NE, 1 }, |
|
| 721 | + | { T_LT, OP_LT, 1 }, |
|
| 722 | + | { T_GT, OP_GT, 1 }, |
|
| 723 | + | { T_LT_EQ, OP_LE, 1 }, |
|
| 724 | + | { T_GT_EQ, OP_GE, 1 }, |
|
| 725 | + | /* Logical operators (lowest precedence). */ |
|
| 726 | + | { T_AND, OP_AND, 0 }, |
|
| 727 | + | { T_OR, OP_OR, 0 }, |
|
| 728 | + | }; |
|
| 729 | + | ||
| 730 | + | for (;;) { |
|
| 731 | + | int next = -1; |
|
| 732 | + | binop_t op; |
|
| 733 | + | ||
| 734 | + | /* Find matching operator and its precedence. */ |
|
| 735 | + | for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) { |
|
| 736 | + | if (check(p, ops[i].tok) && ops[i].prec > precedence) { |
|
| 737 | + | if (next == -1 || ops[i].prec < next) { |
|
| 738 | + | next = ops[i].prec; |
|
| 739 | + | op = ops[i].op; |
|
| 740 | + | } |
|
| 741 | + | } |
|
| 742 | + | } |
|
| 743 | + | if (next == -1) |
|
| 744 | + | break; |
|
| 745 | + | ||
| 746 | + | /* Consume the operator token. */ |
|
| 747 | + | advance(p); |
|
| 748 | + | ||
| 749 | + | /* Parse the right operand. */ |
|
| 750 | + | node_t *right = parse_primary(p); |
|
| 751 | + | ||
| 752 | + | if (!right) |
|
| 753 | + | return NULL; |
|
| 754 | + | ||
| 755 | + | /* Handle `as` casts on the right operand */ |
|
| 756 | + | while (check(p, T_AS)) { |
|
| 757 | + | right = parse_as_cast(p, right); |
|
| 758 | + | if (!right) |
|
| 759 | + | return NULL; |
|
| 760 | + | } |
|
| 761 | + | /* Look for higher precedence operators. */ |
|
| 762 | + | for (usize i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) { |
|
| 763 | + | if (check(p, ops[i].tok) && ops[i].prec > next) { |
|
| 764 | + | right = parse_binary(p, right, next); |
|
| 765 | + | break; |
|
| 766 | + | } |
|
| 767 | + | } |
|
| 768 | + | ||
| 769 | + | /* Build binary expression node. */ |
|
| 770 | + | node_t *binop = node(p, NODE_BINOP); |
|
| 771 | + | binop->offset = left->offset; |
|
| 772 | + | binop->length = right->offset + right->length - left->offset; |
|
| 773 | + | binop->val.binop.op = op; |
|
| 774 | + | binop->val.binop.left = left; |
|
| 775 | + | binop->val.binop.right = right; |
|
| 776 | + | left = binop; |
|
| 777 | + | } |
|
| 778 | + | return left; |
|
| 779 | + | } |
|
| 780 | + | ||
| 781 | + | /* Parse an `if let` statement. |
|
| 782 | + | * Syntax: if let x in (expr) { ... } else { ... } |
|
| 783 | + | */ |
|
| 784 | + | static node_t *parse_if_let(parser_t *p) { |
|
| 785 | + | /* Consume 'let' */ |
|
| 786 | + | if (!expect(p, T_LET, "expected 'let'")) |
|
| 787 | + | return NULL; |
|
| 788 | + | ||
| 789 | + | /* Check for `if let case` syntax. */ |
|
| 790 | + | if (check(p, T_CASE)) { |
|
| 791 | + | return parse_if_case(p); |
|
| 792 | + | } |
|
| 793 | + | node_t *n = node(p, NODE_IF_LET); |
|
| 794 | + | ||
| 795 | + | /* Parse identifier or placeholder */ |
|
| 796 | + | if (consume(p, T_UNDERSCORE)) { |
|
| 797 | + | n->val.if_let_stmt.var = node(p, NODE_PLACEHOLDER); |
|
| 798 | + | } else if (expect(p, T_IDENT, "expected identifier or '_' after 'let'")) { |
|
| 799 | + | n->val.if_let_stmt.var = node(p, NODE_IDENT); |
|
| 800 | + | n->val.if_let_stmt.var->val.ident.name = p->previous.start; |
|
| 801 | + | n->val.if_let_stmt.var->val.ident.length = p->previous.length; |
|
| 802 | + | } else { |
|
| 803 | + | return NULL; |
|
| 804 | + | } |
|
| 805 | + | n->val.if_let_stmt.guard = NULL; |
|
| 806 | + | ||
| 807 | + | /* Expect '=' */ |
|
| 808 | + | if (!expect(p, T_EQ, "expected `=` after identifier")) |
|
| 809 | + | return NULL; |
|
| 810 | + | ||
| 811 | + | /* Parse expression yielding an optional. */ |
|
| 812 | + | n->val.if_let_stmt.expr = parse_cond(p); |
|
| 813 | + | if (!n->val.if_let_stmt.expr) |
|
| 814 | + | return NULL; |
|
| 815 | + | ||
| 816 | + | /* Optional boolean guard. */ |
|
| 817 | + | if (consume(p, T_SEMICOLON)) { |
|
| 818 | + | n->val.if_let_stmt.guard = parse_cond(p); |
|
| 819 | + | } |
|
| 820 | + | /* Parse the 'then' branch */ |
|
| 821 | + | n->val.if_let_stmt.lbranch = parse_block(p); |
|
| 822 | + | if (!n->val.if_let_stmt.lbranch) |
|
| 823 | + | return NULL; |
|
| 824 | + | ||
| 825 | + | /* Parse optional 'else' branch */ |
|
| 826 | + | if (consume(p, T_ELSE)) { |
|
| 827 | + | /* Check for `else if` construct. */ |
|
| 828 | + | if (check(p, T_IF)) { |
|
| 829 | + | advance(p); /* Consume the 'if' token. */ |
|
| 830 | + | ||
| 831 | + | /* Create a block to hold the nested if statement. */ |
|
| 832 | + | node_t *block = node(p, NODE_BLOCK); |
|
| 833 | + | block->val.block.stmts = (nodespan_t){ 0 }; |
|
| 834 | + | ||
| 835 | + | node_t *nested_if = parse_if(p); |
|
| 836 | + | ||
| 837 | + | if (!nested_if) |
|
| 838 | + | return NULL; |
|
| 839 | + | ||
| 840 | + | /* Add the nested if as a statement in the block. */ |
|
| 841 | + | nodespan_push(p, &block->val.block.stmts, nested_if); |
|
| 842 | + | /* Set the block as the else branch. */ |
|
| 843 | + | n->val.if_let_stmt.rbranch = block; |
|
| 844 | + | } else { |
|
| 845 | + | /* Regular else clause. */ |
|
| 846 | + | n->val.if_let_stmt.rbranch = parse_block(p); |
|
| 847 | + | } |
|
| 848 | + | } else { |
|
| 849 | + | n->val.if_let_stmt.rbranch = NULL; |
|
| 850 | + | } |
|
| 851 | + | ||
| 852 | + | return n; |
|
| 853 | + | } |
|
| 854 | + | ||
| 855 | + | /* Parse an `if let case` statement. Called after 'let' has been consumed. */ |
|
| 856 | + | static node_t *parse_if_case(parser_t *p) { |
|
| 857 | + | node_t *n = node(p, NODE_IF_CASE); |
|
| 858 | + | ||
| 859 | + | if (!expect(p, T_CASE, "expected 'case'")) |
|
| 860 | + | return NULL; |
|
| 861 | + | ||
| 862 | + | parse_ctx_t pctx = p->context; |
|
| 863 | + | p->context = PARSE_CTX_NORMAL; |
|
| 864 | + | node_t *pattern = parse_primary(p); |
|
| 865 | + | p->context = pctx; |
|
| 866 | + | ||
| 867 | + | if (!pattern) |
|
| 868 | + | return NULL; |
|
| 869 | + | ||
| 870 | + | n->val.if_case_stmt.pattern = pattern; |
|
| 871 | + | ||
| 872 | + | if (!expect(p, T_EQ, "expected `=` after pattern")) |
|
| 873 | + | return NULL; |
|
| 874 | + | ||
| 875 | + | n->val.if_case_stmt.expr = parse_cond(p); |
|
| 876 | + | if (!n->val.if_case_stmt.expr) |
|
| 877 | + | return NULL; |
|
| 878 | + | ||
| 879 | + | n->val.if_case_stmt.guard = NULL; |
|
| 880 | + | ||
| 881 | + | if (consume(p, T_SEMICOLON)) { |
|
| 882 | + | n->val.if_case_stmt.guard = parse_cond(p); |
|
| 883 | + | if (!n->val.if_case_stmt.guard) |
|
| 884 | + | return NULL; |
|
| 885 | + | } |
|
| 886 | + | n->val.if_case_stmt.lbranch = parse_block(p); |
|
| 887 | + | if (!n->val.if_case_stmt.lbranch) |
|
| 888 | + | return NULL; |
|
| 889 | + | ||
| 890 | + | if (consume(p, T_ELSE)) { |
|
| 891 | + | if (check(p, T_IF)) { |
|
| 892 | + | advance(p); |
|
| 893 | + | ||
| 894 | + | node_t *block = node(p, NODE_BLOCK); |
|
| 895 | + | block->val.block.stmts = (nodespan_t){ 0 }; |
|
| 896 | + | ||
| 897 | + | node_t *nested_if = parse_if(p); |
|
| 898 | + | ||
| 899 | + | if (!nested_if) |
|
| 900 | + | return NULL; |
|
| 901 | + | ||
| 902 | + | nodespan_push(p, &block->val.block.stmts, nested_if); |
|
| 903 | + | n->val.if_case_stmt.rbranch = block; |
|
| 904 | + | } else { |
|
| 905 | + | n->val.if_case_stmt.rbranch = parse_block(p); |
|
| 906 | + | if (!n->val.if_case_stmt.rbranch) |
|
| 907 | + | return NULL; |
|
| 908 | + | } |
|
| 909 | + | } else { |
|
| 910 | + | n->val.if_case_stmt.rbranch = NULL; |
|
| 911 | + | } |
|
| 912 | + | return n; |
|
| 913 | + | } |
|
| 914 | + | ||
| 915 | + | /* Parse a `let case` statement: |
|
| 916 | + | * `let case PATTERN = EXPR [; GUARD] else { ... };` */ |
|
| 917 | + | static node_t *parse_let_case(parser_t *p) { |
|
| 918 | + | node_t *n = node(p, NODE_GUARD_CASE); |
|
| 919 | + | usize start = p->previous.position; |
|
| 920 | + | ||
| 921 | + | parse_ctx_t pctx = p->context; |
|
| 922 | + | p->context = PARSE_CTX_NORMAL; |
|
| 923 | + | node_t *pattern = parse_primary(p); |
|
| 924 | + | p->context = pctx; |
|
| 925 | + | ||
| 926 | + | if (!pattern) |
|
| 927 | + | return NULL; |
|
| 928 | + | ||
| 929 | + | n->val.guard_case_stmt.pattern = pattern; |
|
| 930 | + | ||
| 931 | + | if (!expect(p, T_EQ, "expected `=` after pattern")) |
|
| 932 | + | return NULL; |
|
| 933 | + | if (!(n->val.guard_case_stmt.expr = parse_cond(p))) |
|
| 934 | + | return NULL; |
|
| 935 | + | ||
| 936 | + | n->val.guard_case_stmt.guard = NULL; |
|
| 937 | + | ||
| 938 | + | if (consume(p, T_IF)) { |
|
| 939 | + | if (!(n->val.guard_case_stmt.guard = parse_cond(p))) |
|
| 940 | + | return NULL; |
|
| 941 | + | } |
|
| 942 | + | if (!expect(p, T_ELSE, "expected `else` after pattern")) |
|
| 943 | + | return NULL; |
|
| 944 | + | ||
| 945 | + | if (!(n->val.guard_case_stmt.rbranch = parse_stmt_or_block(p))) |
|
| 946 | + | return NULL; |
|
| 947 | + | ||
| 948 | + | n->offset = start; |
|
| 949 | + | n->length = p->previous.position + p->previous.length - start; |
|
| 950 | + | ||
| 951 | + | return n; |
|
| 952 | + | } |
|
| 953 | + | ||
| 954 | + | /* Parse an `if` expression, with optional `else` or `else if` clauses. |
|
| 955 | + | * `else if` is desugared into a nested if inside a block. */ |
|
| 956 | + | static node_t *parse_if(parser_t *p) { |
|
| 957 | + | /* Check for `if let` or `if let case` syntax */ |
|
| 958 | + | if (check(p, T_LET)) { |
|
| 959 | + | return parse_if_let(p); |
|
| 960 | + | } |
|
| 961 | + | /* Regular if statement */ |
|
| 962 | + | node_t *n = node(p, NODE_IF); |
|
| 963 | + | ||
| 964 | + | n->val.if_stmt.cond = parse_cond(p); |
|
| 965 | + | if (!n->val.if_stmt.cond) |
|
| 966 | + | return NULL; |
|
| 967 | + | ||
| 968 | + | n->val.if_stmt.lbranch = parse_block(p); |
|
| 969 | + | if (!n->val.if_stmt.lbranch) |
|
| 970 | + | return NULL; |
|
| 971 | + | ||
| 972 | + | if (consume(p, T_ELSE)) { |
|
| 973 | + | /* Check for `else if` construct. */ |
|
| 974 | + | if (check(p, T_IF)) { |
|
| 975 | + | advance(p); /* Consume the 'if' token. */ |
|
| 976 | + | ||
| 977 | + | /* Create a block to hold the nested if statement. */ |
|
| 978 | + | node_t *block = node(p, NODE_BLOCK); |
|
| 979 | + | block->val.block.stmts = (nodespan_t){ 0 }; |
|
| 980 | + | ||
| 981 | + | node_t *nested_if = parse_if(p); |
|
| 982 | + | ||
| 983 | + | if (!nested_if) |
|
| 984 | + | return NULL; |
|
| 985 | + | ||
| 986 | + | /* Add the nested if as a statement in the block. */ |
|
| 987 | + | nodespan_push(p, &block->val.block.stmts, nested_if); |
|
| 988 | + | /* Set the block as the else branch. */ |
|
| 989 | + | n->val.if_stmt.rbranch = block; |
|
| 990 | + | } else { |
|
| 991 | + | /* Regular else clause. */ |
|
| 992 | + | n->val.if_stmt.rbranch = parse_block(p); |
|
| 993 | + | } |
|
| 994 | + | } else { |
|
| 995 | + | n->val.if_stmt.rbranch = NULL; |
|
| 996 | + | } |
|
| 997 | + | return n; |
|
| 998 | + | } |
|
| 999 | + | ||
| 1000 | + | /* Parse a match statement. */ |
|
| 1001 | + | static node_t *parse_match(parser_t *p) { |
|
| 1002 | + | node_t *n = node(p, NODE_MATCH); |
|
| 1003 | + | n->val.match_stmt.cases = (nodespan_t){ 0 }; |
|
| 1004 | + | ||
| 1005 | + | /* Parse the expression to match on */ |
|
| 1006 | + | if (!(n->val.match_stmt.expr = parse_cond(p))) |
|
| 1007 | + | return NULL; |
|
| 1008 | + | if (!expect(p, T_LBRACE, "expected '{' before match cases")) |
|
| 1009 | + | return NULL; |
|
| 1010 | + | ||
| 1011 | + | /* Parse cases until we reach the end of the match block */ |
|
| 1012 | + | while (!check(p, T_RBRACE) && !check(p, T_EOF)) { |
|
| 1013 | + | node_t *case_node = parse_match_case(p); |
|
| 1014 | + | if (!case_node) |
|
| 1015 | + | return NULL; |
|
| 1016 | + | ||
| 1017 | + | if (!nodespan_push(p, &n->val.match_stmt.cases, case_node)) { |
|
| 1018 | + | error(p, "too many cases in match statement"); |
|
| 1019 | + | return NULL; |
|
| 1020 | + | } |
|
| 1021 | + | ||
| 1022 | + | /* Consume the comma separating cases if present */ |
|
| 1023 | + | bool consumed = consume(p, T_COMMA); |
|
| 1024 | + | (void)consumed; |
|
| 1025 | + | } |
|
| 1026 | + | if (!expect(p, T_RBRACE, "expected '}' after match cases")) |
|
| 1027 | + | return NULL; |
|
| 1028 | + | ||
| 1029 | + | return n; |
|
| 1030 | + | } |
|
| 1031 | + | ||
| 1032 | + | /* Parse a single match case. */ |
|
| 1033 | + | static node_t *parse_match_case(parser_t *p) { |
|
| 1034 | + | node_t *n = node(p, NODE_MATCH_CASE); |
|
| 1035 | + | n->val.match_case.patterns = (nodespan_t){ 0 }; |
|
| 1036 | + | n->val.match_case.guard = NULL; |
|
| 1037 | + | ||
| 1038 | + | if (check(p, T_ELSE)) { |
|
| 1039 | + | /* For the 'else' case, we use zero patterns |
|
| 1040 | + | * to indicate the else case */ |
|
| 1041 | + | advance(p); |
|
| 1042 | + | } else { |
|
| 1043 | + | if (!expect(p, T_CASE, "expected 'case' at start of match case")) |
|
| 1044 | + | return NULL; |
|
| 1045 | + | ||
| 1046 | + | /* Parse one or more comma-separated patterns */ |
|
| 1047 | + | do { |
|
| 1048 | + | parse_ctx_t pctx = p->context; |
|
| 1049 | + | p->context = PARSE_CTX_NORMAL; |
|
| 1050 | + | node_t *pattern = parse_primary(p); |
|
| 1051 | + | p->context = pctx; |
|
| 1052 | + | ||
| 1053 | + | if (!pattern) { |
|
| 1054 | + | return NULL; |
|
| 1055 | + | } |
|
| 1056 | + | /* Add pattern to the case */ |
|
| 1057 | + | if (!nodespan_push(p, &n->val.match_case.patterns, pattern)) { |
|
| 1058 | + | error(p, "too many patterns in case statement"); |
|
| 1059 | + | return NULL; |
|
| 1060 | + | } |
|
| 1061 | + | } while (consume(p, T_COMMA)); /* Continue if there's a comma */ |
|
| 1062 | + | ||
| 1063 | + | if (consume(p, T_IF)) { |
|
| 1064 | + | if (!(n->val.match_case.guard = parse_cond(p))) |
|
| 1065 | + | return NULL; |
|
| 1066 | + | } |
|
| 1067 | + | } |
|
| 1068 | + | if (!expect(p, T_FAT_ARROW, "expected `=>` after case pattern")) |
|
| 1069 | + | return NULL; |
|
| 1070 | + | ||
| 1071 | + | n->val.match_case.body = parse_stmt(p); |
|
| 1072 | + | if (!n->val.match_case.body) |
|
| 1073 | + | return NULL; |
|
| 1074 | + | ||
| 1075 | + | return n; |
|
| 1076 | + | } |
|
| 1077 | + | ||
| 1078 | + | /* Parse a `log` statement. */ |
|
| 1079 | + | /* Parse a record declaration. */ |
|
| 1080 | + | static node_t *parse_record(parser_t *p, node_t *attrs) { |
|
| 1081 | + | node_t *n = node(p, NODE_RECORD); |
|
| 1082 | + | n->val.record_decl.attribs = attrs; |
|
| 1083 | + | n->val.record_decl.fields = (nodespan_t){ 0 }; |
|
| 1084 | + | n->val.record_decl.tuple = false; |
|
| 1085 | + | n->val.record_decl.name = parse_ident(p, "expected record name"); |
|
| 1086 | + | ||
| 1087 | + | if (!n->val.record_decl.name) |
|
| 1088 | + | return NULL; |
|
| 1089 | + | ||
| 1090 | + | if (consume(p, T_LPAREN)) { |
|
| 1091 | + | n->val.record_decl.tuple = true; |
|
| 1092 | + | ||
| 1093 | + | if (!check(p, T_RPAREN)) { |
|
| 1094 | + | do { |
|
| 1095 | + | node_t *field = node(p, NODE_RECORD_FIELD); |
|
| 1096 | + | field->val.var.ident = NULL; /* No field name for tuples */ |
|
| 1097 | + | field->val.var.type = parse_type(p); |
|
| 1098 | + | field->val.var.value = NULL; |
|
| 1099 | + | field->val.var.align = NULL; |
|
| 1100 | + | ||
| 1101 | + | if (!field->val.var.type) |
|
| 1102 | + | return NULL; |
|
| 1103 | + | ||
| 1104 | + | if (!nodespan_push(p, &n->val.record_decl.fields, field)) { |
|
| 1105 | + | error(p, "too many record fields"); |
|
| 1106 | + | return NULL; |
|
| 1107 | + | } |
|
| 1108 | + | } while (consume(p, T_COMMA) && !check(p, T_RPAREN)); |
|
| 1109 | + | } |
|
| 1110 | + | if (!expect(p, T_RPAREN, "expected `)` after record fields")) |
|
| 1111 | + | return NULL; |
|
| 1112 | + | ||
| 1113 | + | /* Unlabeled records must end with semicolon */ |
|
| 1114 | + | if (!expect(p, T_SEMICOLON, "expected `;` after record declaration")) |
|
| 1115 | + | return NULL; |
|
| 1116 | + | } else { |
|
| 1117 | + | /* Record with named fields */ |
|
| 1118 | + | if (!expect(p, T_LBRACE, "expected `{` before record body")) |
|
| 1119 | + | return NULL; |
|
| 1120 | + | ||
| 1121 | + | node_t *field; |
|
| 1122 | + | do { |
|
| 1123 | + | if (!(field = parse_name_type_value(p, NODE_RECORD_FIELD))) |
|
| 1124 | + | return NULL; |
|
| 1125 | + | if (!nodespan_push(p, &n->val.record_decl.fields, field)) { |
|
| 1126 | + | error(p, "too many record fields"); |
|
| 1127 | + | return NULL; |
|
| 1128 | + | } |
|
| 1129 | + | } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); |
|
| 1130 | + | ||
| 1131 | + | if (!expect(p, T_RBRACE, "expected `}`")) |
|
| 1132 | + | return NULL; |
|
| 1133 | + | } |
|
| 1134 | + | return n; |
|
| 1135 | + | } |
|
| 1136 | + | ||
| 1137 | + | static node_t *parse_record_type(parser_t *p) { |
|
| 1138 | + | node_t *n = node(p, NODE_RECORD_TYPE); |
|
| 1139 | + | n->val.record_type.fields = (nodespan_t){ 0 }; |
|
| 1140 | + | ||
| 1141 | + | if (!expect(p, T_LBRACE, "expected `{` after `record`")) |
|
| 1142 | + | return NULL; |
|
| 1143 | + | ||
| 1144 | + | if (!check(p, T_RBRACE)) { |
|
| 1145 | + | do { |
|
| 1146 | + | node_t *field = parse_name_type_value(p, NODE_RECORD_FIELD); |
|
| 1147 | + | if (!field) |
|
| 1148 | + | return NULL; |
|
| 1149 | + | if (field->val.var.value) { |
|
| 1150 | + | error(p, "anonymous record fields cannot have initializers"); |
|
| 1151 | + | return NULL; |
|
| 1152 | + | } |
|
| 1153 | + | if (!nodespan_push(p, &n->val.record_type.fields, field)) { |
|
| 1154 | + | error(p, "too many record fields"); |
|
| 1155 | + | return NULL; |
|
| 1156 | + | } |
|
| 1157 | + | } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); |
|
| 1158 | + | } |
|
| 1159 | + | ||
| 1160 | + | if (!expect(p, T_RBRACE, "expected `}` after record fields")) |
|
| 1161 | + | return NULL; |
|
| 1162 | + | ||
| 1163 | + | return n; |
|
| 1164 | + | } |
|
| 1165 | + | ||
| 1166 | + | /* Parse a single record literal field (labeled or shorthand). */ |
|
| 1167 | + | static node_t *parse_record_lit_field(parser_t *p) { |
|
| 1168 | + | node_t *n = node(p, NODE_RECORD_LIT_FIELD); |
|
| 1169 | + | usize start = p->current.position; |
|
| 1170 | + | ||
| 1171 | + | record_lit_field_t *field = &n->val.record_lit_field; |
|
| 1172 | + | ||
| 1173 | + | /* Field must start with an identifier. */ |
|
| 1174 | + | node_t *name = parse_ident(p, "expected field name"); |
|
| 1175 | + | if (!name) |
|
| 1176 | + | return NULL; |
|
| 1177 | + | ||
| 1178 | + | if (consume(p, T_COLON)) { |
|
| 1179 | + | /* Labeled field: `name: value` */ |
|
| 1180 | + | field->name = name; |
|
| 1181 | + | field->value = parse_expr(p); |
|
| 1182 | + | if (!field->value) |
|
| 1183 | + | return NULL; |
|
| 1184 | + | } else { |
|
| 1185 | + | /* Shorthand syntax: `{ x }` is equivalent to `{ x: x }` */ |
|
| 1186 | + | field->name = name; |
|
| 1187 | + | field->value = name; |
|
| 1188 | + | } |
|
| 1189 | + | n->offset = start; |
|
| 1190 | + | n->length = p->previous.position + p->previous.length - start; |
|
| 1191 | + | ||
| 1192 | + | return n; |
|
| 1193 | + | } |
|
| 1194 | + | ||
| 1195 | + | /* Parse a record literal expression (e.g., Point { x: 1, y: 2 }) |
|
| 1196 | + | * Also handles pattern syntax: Variant { .. } to discard all fields */ |
|
| 1197 | + | static node_t *parse_record_lit(parser_t *p, node_t *type_name) { |
|
| 1198 | + | node_t *n = node(p, NODE_RECORD_LIT); |
|
| 1199 | + | n->val.record_lit.type = type_name; |
|
| 1200 | + | n->val.record_lit.fields = (nodespan_t){ 0 }; |
|
| 1201 | + | n->val.record_lit.etc = false; |
|
| 1202 | + | ||
| 1203 | + | do { |
|
| 1204 | + | /* Check for `..` to discard remaining fields. */ |
|
| 1205 | + | if (consume(p, T_DOT_DOT)) { |
|
| 1206 | + | n->val.record_lit.etc = true; |
|
| 1207 | + | break; |
|
| 1208 | + | } |
|
| 1209 | + | node_t *field = parse_record_lit_field(p); |
|
| 1210 | + | if (!field) |
|
| 1211 | + | return NULL; |
|
| 1212 | + | ||
| 1213 | + | if (!nodespan_push(p, &n->val.record_lit.fields, field)) { |
|
| 1214 | + | error(p, "too many record fields"); |
|
| 1215 | + | return NULL; |
|
| 1216 | + | } |
|
| 1217 | + | ||
| 1218 | + | } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); |
|
| 1219 | + | ||
| 1220 | + | if (!expect(p, T_RBRACE, "expected '}' to end record literal")) |
|
| 1221 | + | return NULL; |
|
| 1222 | + | ||
| 1223 | + | return n; |
|
| 1224 | + | } |
|
| 1225 | + | ||
| 1226 | + | /* Parse a union declaration. |
|
| 1227 | + | * Eg. `union Color { Red, Green, Blue = 5 }` */ |
|
| 1228 | + | static node_t *parse_union(parser_t *p, node_t *attrs) { |
|
| 1229 | + | node_t *n = node(p, NODE_UNION); |
|
| 1230 | + | n->val.union_decl.attribs = attrs; |
|
| 1231 | + | n->val.union_decl.variants = (nodespan_t){ 0 }; |
|
| 1232 | + | n->val.union_decl.name = parse_ident(p, "expected union name"); |
|
| 1233 | + | ||
| 1234 | + | if (!n->val.union_decl.name) |
|
| 1235 | + | return NULL; |
|
| 1236 | + | ||
| 1237 | + | /* Parse union body with { ... } */ |
|
| 1238 | + | if (!expect(p, T_LBRACE, "expected `{` before union body")) |
|
| 1239 | + | return NULL; |
|
| 1240 | + | ||
| 1241 | + | /* Parse union variants. */ |
|
| 1242 | + | if (!check(p, T_RBRACE)) { |
|
| 1243 | + | do { |
|
| 1244 | + | /* Allow optional `case` keyword before variant name. */ |
|
| 1245 | + | consume(p, T_CASE); |
|
| 1246 | + | ||
| 1247 | + | /* Parse variant name. */ |
|
| 1248 | + | node_t *variant_name = parse_ident(p, "expected variant name"); |
|
| 1249 | + | if (!variant_name) |
|
| 1250 | + | return NULL; |
|
| 1251 | + | ||
| 1252 | + | node_t *v = node(p, NODE_UNION_VARIANT); |
|
| 1253 | + | union_variant_t *variant = &v->val.union_variant; |
|
| 1254 | + | ||
| 1255 | + | variant->name = variant_name; |
|
| 1256 | + | variant->type = NULL; |
|
| 1257 | + | variant->value_expr = NULL; |
|
| 1258 | + | ||
| 1259 | + | if (consume(p, T_LPAREN)) { |
|
| 1260 | + | /* Tuple-like variant: Foo(Type) */ |
|
| 1261 | + | node_t *payload = parse_type(p); |
|
| 1262 | + | if (!payload) |
|
| 1263 | + | return NULL; |
|
| 1264 | + | variant->type = payload; |
|
| 1265 | + | if (!expect(p, T_RPAREN, "expected `)` after variant type")) |
|
| 1266 | + | return NULL; |
|
| 1267 | + | } else if (check(p, T_LBRACE)) { |
|
| 1268 | + | /* Struct-like variant: Bar { x: i32, y: i32 } */ |
|
| 1269 | + | node_t *payload = parse_record_type(p); |
|
| 1270 | + | if (!payload) |
|
| 1271 | + | return NULL; |
|
| 1272 | + | variant->type = payload; |
|
| 1273 | + | } else { |
|
| 1274 | + | /* Check for explicit value assignment. */ |
|
| 1275 | + | if (consume(p, T_EQ)) { |
|
| 1276 | + | if (!expect( |
|
| 1277 | + | p, T_NUMBER, "expected integer literal after `=`" |
|
| 1278 | + | )) |
|
| 1279 | + | return NULL; |
|
| 1280 | + | ||
| 1281 | + | token_t literal_tok = p->previous; |
|
| 1282 | + | node_t *literal = node(p, NODE_NUMBER); |
|
| 1283 | + | ||
| 1284 | + | literal->offset = literal_tok.position; |
|
| 1285 | + | literal->length = literal_tok.length; |
|
| 1286 | + | literal->val.number.text = literal_tok.start; |
|
| 1287 | + | literal->val.number.text_len = literal_tok.length; |
|
| 1288 | + | literal->val.number.value = (imm_t){ 0 }; |
|
| 1289 | + | ||
| 1290 | + | variant->value_expr = literal; |
|
| 1291 | + | } else { |
|
| 1292 | + | /* Auto-assign value. */ |
|
| 1293 | + | } |
|
| 1294 | + | } |
|
| 1295 | + | /* Add variant to declaration node. */ |
|
| 1296 | + | if (!nodespan_push(p, &n->val.union_decl.variants, v)) { |
|
| 1297 | + | error(p, "too many union variants"); |
|
| 1298 | + | return NULL; |
|
| 1299 | + | } |
|
| 1300 | + | /* Allow trailing comma. */ |
|
| 1301 | + | } while (consume(p, T_COMMA) && !check(p, T_RBRACE)); |
|
| 1302 | + | } |
|
| 1303 | + | if (!expect(p, T_RBRACE, "expected `}`")) |
|
| 1304 | + | return NULL; |
|
| 1305 | + | ||
| 1306 | + | return n; |
|
| 1307 | + | } |
|
| 1308 | + | ||
| 1309 | + | /* Parse a code block or an expression */ |
|
| 1310 | + | static node_t *parse_stmt_or_block(parser_t *p) { |
|
| 1311 | + | if (check(p, T_LBRACE)) { |
|
| 1312 | + | return parse_block(p); |
|
| 1313 | + | } |
|
| 1314 | + | node_t *stmt = parse_stmt(p); |
|
| 1315 | + | node_t *blk = node(p, NODE_BLOCK); |
|
| 1316 | + | blk->val.block.stmts = (nodespan_t){ 0 }; |
|
| 1317 | + | nodespan_push(p, &blk->val.block.stmts, stmt); |
|
| 1318 | + | ||
| 1319 | + | return blk; |
|
| 1320 | + | } |
|
| 1321 | + | ||
| 1322 | + | /* Parse a code block, enclosed by `{}`. */ |
|
| 1323 | + | static node_t *parse_block(parser_t *p) { |
|
| 1324 | + | if (!expect(p, T_LBRACE, "expected '{' before block")) { |
|
| 1325 | + | return NULL; |
|
| 1326 | + | } |
|
| 1327 | + | node_t *n = node(p, NODE_BLOCK); |
|
| 1328 | + | node_t *stmt; |
|
| 1329 | + | ||
| 1330 | + | /* Parse statements. */ |
|
| 1331 | + | n->val.block.stmts = (nodespan_t){ 0 }; |
|
| 1332 | + | while (!check(p, T_RBRACE) && !check(p, T_EOF)) { |
|
| 1333 | + | usize start = p->current.position; |
|
| 1334 | + | ||
| 1335 | + | if (!(stmt = parse_stmt(p))) |
|
| 1336 | + | return NULL; |
|
| 1337 | + | ||
| 1338 | + | if (!consume_statement_separator(p, stmt, true)) |
|
| 1339 | + | return NULL; |
|
| 1340 | + | ||
| 1341 | + | stmt->offset = start; |
|
| 1342 | + | stmt->length = p->current.position - start; |
|
| 1343 | + | ||
| 1344 | + | if (!nodespan_push(p, &n->val.block.stmts, stmt)) { |
|
| 1345 | + | error(p, "too many statements in block"); |
|
| 1346 | + | return NULL; |
|
| 1347 | + | } |
|
| 1348 | + | } |
|
| 1349 | + | ||
| 1350 | + | if (!expect(p, T_RBRACE, "expected matching '}' after block")) |
|
| 1351 | + | return NULL; |
|
| 1352 | + | ||
| 1353 | + | return n; |
|
| 1354 | + | } |
|
| 1355 | + | ||
| 1356 | + | /* Parse an expression. */ |
|
| 1357 | + | static node_t *parse_expr(parser_t *p) { |
|
| 1358 | + | node_t *lval; |
|
| 1359 | + | ||
| 1360 | + | if ((lval = parse_primary(p)) == NULL) |
|
| 1361 | + | return NULL; |
|
| 1362 | + | ||
| 1363 | + | /* Handle `as` casts before binary operators (higher precedence than |
|
| 1364 | + | * binary ops, lower than unary) */ |
|
| 1365 | + | while (check(p, T_AS)) { |
|
| 1366 | + | lval = parse_as_cast(p, lval); |
|
| 1367 | + | if (!lval) |
|
| 1368 | + | return NULL; |
|
| 1369 | + | } |
|
| 1370 | + | lval = parse_binary(p, lval, -1); |
|
| 1371 | + | ||
| 1372 | + | return lval; |
|
| 1373 | + | } |
|
| 1374 | + | ||
| 1375 | + | /* Parse an assignment statement. */ |
|
| 1376 | + | static node_t *parse_assignment(parser_t *p, node_t *lval) { |
|
| 1377 | + | /* We've already verified this is an assignment. */ |
|
| 1378 | + | if (lval->cls != NODE_IDENT && lval->cls != NODE_ACCESS && |
|
| 1379 | + | lval->cls != NODE_ARRAY_INDEX && |
|
| 1380 | + | !(lval->cls == NODE_UNOP && lval->val.unop.op == OP_DEREF)) { |
|
| 1381 | + | error( |
|
| 1382 | + | p, |
|
| 1383 | + | "can't assign to `%.*s`", |
|
| 1384 | + | lval->length, |
|
| 1385 | + | &p->scanner.source[lval->offset] |
|
| 1386 | + | ); |
|
| 1387 | + | return NULL; |
|
| 1388 | + | } |
|
| 1389 | + | node_t *rval; |
|
| 1390 | + | ||
| 1391 | + | if (!(rval = parse_expr(p))) |
|
| 1392 | + | return NULL; |
|
| 1393 | + | ||
| 1394 | + | node_t *assign = node(p, NODE_ASSIGN); |
|
| 1395 | + | assign->val.assign.lval = lval; |
|
| 1396 | + | assign->val.assign.rval = rval; |
|
| 1397 | + | ||
| 1398 | + | return assign; |
|
| 1399 | + | } |
|
| 1400 | + | ||
| 1401 | + | /* Parse a condition. */ |
|
| 1402 | + | static node_t *parse_cond(parser_t *p) { |
|
| 1403 | + | parse_ctx_t prev = p->context; |
|
| 1404 | + | p->context = PARSE_CTX_CONDITION; |
|
| 1405 | + | ||
| 1406 | + | node_t *cond = parse_expr(p); |
|
| 1407 | + | if (!cond) { |
|
| 1408 | + | p->context = prev; |
|
| 1409 | + | return NULL; |
|
| 1410 | + | } |
|
| 1411 | + | p->context = prev; |
|
| 1412 | + | ||
| 1413 | + | return cond; |
|
| 1414 | + | } |
|
| 1415 | + | ||
| 1416 | + | static bool token_is_stmt_terminator(tokenclass_t cls) { |
|
| 1417 | + | switch (cls) { |
|
| 1418 | + | case T_SEMICOLON: |
|
| 1419 | + | case T_RBRACE: |
|
| 1420 | + | case T_COMMA: |
|
| 1421 | + | case T_CASE: |
|
| 1422 | + | case T_ELSE: |
|
| 1423 | + | case T_EOF: |
|
| 1424 | + | return true; |
|
| 1425 | + | default: |
|
| 1426 | + | return false; |
|
| 1427 | + | } |
|
| 1428 | + | } |
|
| 1429 | + | ||
| 1430 | + | static bool stmt_requires_semicolon(const node_t *stmt) { |
|
| 1431 | + | switch (stmt->cls) { |
|
| 1432 | + | case NODE_IF: |
|
| 1433 | + | case NODE_IF_LET: |
|
| 1434 | + | case NODE_IF_CASE: |
|
| 1435 | + | case NODE_WHILE: |
|
| 1436 | + | case NODE_WHILE_LET: |
|
| 1437 | + | case NODE_LOOP: |
|
| 1438 | + | case NODE_FOR: |
|
| 1439 | + | case NODE_MATCH: |
|
| 1440 | + | case NODE_BLOCK: |
|
| 1441 | + | case NODE_FN: |
|
| 1442 | + | case NODE_RECORD: |
|
| 1443 | + | case NODE_UNION: |
|
| 1444 | + | return false; |
|
| 1445 | + | default: |
|
| 1446 | + | return true; |
|
| 1447 | + | } |
|
| 1448 | + | } |
|
| 1449 | + | ||
| 1450 | + | static bool consume_statement_separator( |
|
| 1451 | + | parser_t *p, node_t *stmt, bool require |
|
| 1452 | + | ) { |
|
| 1453 | + | if (stmt_requires_semicolon(stmt)) { |
|
| 1454 | + | return expect(p, T_SEMICOLON, "expected `;` after statement"); |
|
| 1455 | + | } |
|
| 1456 | + | if (require) |
|
| 1457 | + | consume(p, T_SEMICOLON); |
|
| 1458 | + | return true; |
|
| 1459 | + | } |
|
| 1460 | + | ||
| 1461 | + | /* Parse a `return` statement. */ |
|
| 1462 | + | static node_t *parse_return(parser_t *p) { |
|
| 1463 | + | node_t *n = node(p, NODE_RETURN); |
|
| 1464 | + | ||
| 1465 | + | if (!token_is_stmt_terminator(p->current.cls)) { |
|
| 1466 | + | n->val.return_stmt.value = parse_expr(p); |
|
| 1467 | + | if (!n->val.return_stmt.value) |
|
| 1468 | + | return NULL; |
|
| 1469 | + | } else { |
|
| 1470 | + | n->val.return_stmt.value = NULL; /* Return void. */ |
|
| 1471 | + | } |
|
| 1472 | + | ||
| 1473 | + | return n; |
|
| 1474 | + | } |
|
| 1475 | + | ||
| 1476 | + | static node_t *parse_throw(parser_t *p) { |
|
| 1477 | + | node_t *n = node(p, NODE_THROW); |
|
| 1478 | + | ||
| 1479 | + | if (!(n->val.throw_stmt.expr = parse_expr(p))) |
|
| 1480 | + | return NULL; |
|
| 1481 | + | ||
| 1482 | + | return n; |
|
| 1483 | + | } |
|
| 1484 | + | ||
| 1485 | + | /* Parse a `break` statement. */ |
|
| 1486 | + | static node_t *parse_break(parser_t *p) { |
|
| 1487 | + | node_t *n = node(p, NODE_BREAK); |
|
| 1488 | + | ||
| 1489 | + | return n; |
|
| 1490 | + | } |
|
| 1491 | + | ||
| 1492 | + | /* Parse a `for` statement. */ |
|
| 1493 | + | static node_t *parse_for(parser_t *p) { |
|
| 1494 | + | node_t *n = node(p, NODE_FOR); |
|
| 1495 | + | n->val.for_stmt.rbranch = NULL; |
|
| 1496 | + | n->val.for_stmt.idx = NULL; |
|
| 1497 | + | ||
| 1498 | + | /* Parse the loop variable name or placeholder */ |
|
| 1499 | + | if (!(n->val.for_stmt.var = |
|
| 1500 | + | parse_ident_or_placeholder(p, "expected identifier or '_'"))) |
|
| 1501 | + | return NULL; |
|
| 1502 | + | ||
| 1503 | + | /* Check for optional index variable: `for x, i in xs` */ |
|
| 1504 | + | if (consume(p, T_COMMA)) { |
|
| 1505 | + | /* Parse the index variable name or placeholder */ |
|
| 1506 | + | if (!(n->val.for_stmt.idx = parse_ident_or_placeholder( |
|
| 1507 | + | p, "expected index identifier or '_' after comma" |
|
| 1508 | + | ))) |
|
| 1509 | + | return NULL; |
|
| 1510 | + | } |
|
| 1511 | + | ||
| 1512 | + | if (!expect(p, T_IN, "expected `in`")) |
|
| 1513 | + | return NULL; |
|
| 1514 | + | ||
| 1515 | + | if (!(n->val.for_stmt.iter = parse_cond(p))) |
|
| 1516 | + | return NULL; |
|
| 1517 | + | ||
| 1518 | + | if (!(n->val.for_stmt.body = parse_block(p))) |
|
| 1519 | + | return NULL; |
|
| 1520 | + | ||
| 1521 | + | /* Parse optional `else` clause */ |
|
| 1522 | + | if (consume(p, T_ELSE)) { |
|
| 1523 | + | if (!(n->val.for_stmt.rbranch = parse_block(p))) |
|
| 1524 | + | return NULL; |
|
| 1525 | + | } |
|
| 1526 | + | return n; |
|
| 1527 | + | } |
|
| 1528 | + | ||
| 1529 | + | /* Parse a `while let` statement. */ |
|
| 1530 | + | static node_t *parse_while_let(parser_t *p) { |
|
| 1531 | + | if (!expect(p, T_LET, "expected `let`")) |
|
| 1532 | + | return NULL; |
|
| 1533 | + | ||
| 1534 | + | node_t *n = node(p, NODE_WHILE_LET); |
|
| 1535 | + | ||
| 1536 | + | /* Parse identifier or placeholder */ |
|
| 1537 | + | if (consume(p, T_UNDERSCORE)) { |
|
| 1538 | + | n->val.while_let_stmt.var = node(p, NODE_PLACEHOLDER); |
|
| 1539 | + | } else if (expect(p, T_IDENT, "expected identifier or '_' after `let`")) { |
|
| 1540 | + | n->val.while_let_stmt.var = node(p, NODE_IDENT); |
|
| 1541 | + | n->val.while_let_stmt.var->val.ident.name = p->previous.start; |
|
| 1542 | + | n->val.while_let_stmt.var->val.ident.length = p->previous.length; |
|
| 1543 | + | } else { |
|
| 1544 | + | return NULL; |
|
| 1545 | + | } |
|
| 1546 | + | n->val.while_let_stmt.guard = NULL; |
|
| 1547 | + | n->val.while_let_stmt.rbranch = NULL; |
|
| 1548 | + | ||
| 1549 | + | if (!expect(p, T_EQ, "expected `=` after identifier")) |
|
| 1550 | + | return NULL; |
|
| 1551 | + | ||
| 1552 | + | /* Parse expression yielding an optional. */ |
|
| 1553 | + | n->val.while_let_stmt.expr = parse_cond(p); |
|
| 1554 | + | if (!n->val.while_let_stmt.expr) |
|
| 1555 | + | return NULL; |
|
| 1556 | + | ||
| 1557 | + | /* Optional guard condition after semicolon. */ |
|
| 1558 | + | if (consume(p, T_SEMICOLON)) { |
|
| 1559 | + | if (!(n->val.while_let_stmt.guard = parse_cond(p))) |
|
| 1560 | + | return NULL; |
|
| 1561 | + | } |
|
| 1562 | + | ||
| 1563 | + | /* Parse the loop body and optional 'else' branch */ |
|
| 1564 | + | if (!(n->val.while_let_stmt.body = parse_block(p))) |
|
| 1565 | + | return NULL; |
|
| 1566 | + | if (consume(p, T_ELSE)) { |
|
| 1567 | + | if (!(n->val.while_let_stmt.rbranch = parse_block(p))) |
|
| 1568 | + | return NULL; |
|
| 1569 | + | } |
|
| 1570 | + | return n; |
|
| 1571 | + | } |
|
| 1572 | + | ||
| 1573 | + | /* Parse a `while` statement. */ |
|
| 1574 | + | static node_t *parse_while(parser_t *p) { |
|
| 1575 | + | /* Check for `while let` syntax */ |
|
| 1576 | + | if (check(p, T_LET)) { |
|
| 1577 | + | return parse_while_let(p); |
|
| 1578 | + | } |
|
| 1579 | + | node_t *n = node(p, NODE_WHILE); |
|
| 1580 | + | n->val.while_stmt.rbranch = NULL; |
|
| 1581 | + | ||
| 1582 | + | if (!(n->val.while_stmt.cond = parse_cond(p))) |
|
| 1583 | + | return NULL; |
|
| 1584 | + | if (!(n->val.while_stmt.body = parse_block(p))) |
|
| 1585 | + | return NULL; |
|
| 1586 | + | ||
| 1587 | + | /* Parse optional else clause */ |
|
| 1588 | + | if (consume(p, T_ELSE)) { |
|
| 1589 | + | if (!(n->val.while_stmt.rbranch = parse_block(p))) |
|
| 1590 | + | return NULL; |
|
| 1591 | + | } |
|
| 1592 | + | return n; |
|
| 1593 | + | } |
|
| 1594 | + | ||
| 1595 | + | /* Parse a `loop` statement. */ |
|
| 1596 | + | static node_t *parse_loop(parser_t *p) { |
|
| 1597 | + | node_t *n = node(p, NODE_LOOP); |
|
| 1598 | + | ||
| 1599 | + | if (!(n->val.loop_stmt.body = parse_block(p))) |
|
| 1600 | + | return NULL; |
|
| 1601 | + | ||
| 1602 | + | return n; |
|
| 1603 | + | } |
|
| 1604 | + | ||
| 1605 | + | static node_t *parse_try(parser_t *p, bool panic, bool optional) { |
|
| 1606 | + | node_t *n = node(p, NODE_TRY); |
|
| 1607 | + | ||
| 1608 | + | n->val.try_expr.expr = NULL; |
|
| 1609 | + | n->val.try_expr.catch_expr = NULL; |
|
| 1610 | + | n->val.try_expr.handlers = nodespan_alloc(p, MAX_TRY_CATCHES); |
|
| 1611 | + | n->val.try_expr.panic = panic; |
|
| 1612 | + | n->val.try_expr.optional = optional; |
|
| 1613 | + | ||
| 1614 | + | if (!(n->val.try_expr.expr = parse_primary(p))) |
|
| 1615 | + | return NULL; |
|
| 1616 | + | ||
| 1617 | + | /* Parse catch clause: `catch { ... }` or `catch e { ... }` */ |
|
| 1618 | + | if (consume(p, T_CATCH)) { |
|
| 1619 | + | node_t *catch_node = node(p, NODE_CATCH); |
|
| 1620 | + | catch_node->val.catch_clause.binding = NULL; |
|
| 1621 | + | catch_node->val.catch_clause.body = NULL; |
|
| 1622 | + | catch_node->val.catch_clause.scope = NULL; |
|
| 1623 | + | ||
| 1624 | + | /* Check for error binding: `catch e { ... }` */ |
|
| 1625 | + | if (check(p, T_IDENT)) { |
|
| 1626 | + | node_t *binding = node(p, NODE_IDENT); |
|
| 1627 | + | binding->val.ident.name = p->current.start; |
|
| 1628 | + | binding->val.ident.length = p->current.length; |
|
| 1629 | + | catch_node->val.catch_clause.binding = binding; |
|
| 1630 | + | advance(p); |
|
| 1631 | + | } |
|
| 1632 | + | ||
| 1633 | + | if (!check(p, T_LBRACE)) { |
|
| 1634 | + | error(p, "expected `{` after `catch`"); |
|
| 1635 | + | return NULL; |
|
| 1636 | + | } |
|
| 1637 | + | if (!(catch_node->val.catch_clause.body = parse_block(p))) |
|
| 1638 | + | return NULL; |
|
| 1639 | + | ||
| 1640 | + | n->val.try_expr.catch_expr = catch_node; |
|
| 1641 | + | } |
|
| 1642 | + | return n; |
|
| 1643 | + | } |
|
| 1644 | + | ||
| 1645 | + | static node_t *parse_panic(parser_t *p) { |
|
| 1646 | + | node_t *panic = node(p, NODE_PANIC); |
|
| 1647 | + | ||
| 1648 | + | /* `panic { "Something's wrong!" }` */ |
|
| 1649 | + | if (consume(p, T_LBRACE)) { |
|
| 1650 | + | node_t *expr = parse_expr(p); |
|
| 1651 | + | if (!(panic->val.panic_stmt.message = expr)) |
|
| 1652 | + | return NULL; |
|
| 1653 | + | if (!expect(p, T_RBRACE, "expected closing `}` after expression")) |
|
| 1654 | + | return NULL; |
|
| 1655 | + | ||
| 1656 | + | return panic; |
|
| 1657 | + | } |
|
| 1658 | + | ||
| 1659 | + | if (token_is_stmt_terminator(p->current.cls)) { |
|
| 1660 | + | panic->val.panic_stmt.message = NULL; |
|
| 1661 | + | return panic; |
|
| 1662 | + | } |
|
| 1663 | + | ||
| 1664 | + | node_t *expr = parse_expr(p); |
|
| 1665 | + | if (!(panic->val.panic_stmt.message = expr)) |
|
| 1666 | + | return NULL; |
|
| 1667 | + | ||
| 1668 | + | return panic; |
|
| 1669 | + | } |
|
| 1670 | + | ||
| 1671 | + | /* Parse a name, type, and optional value. |
|
| 1672 | + | * |
|
| 1673 | + | * Used for record field declarations, variable declarations, and record field |
|
| 1674 | + | * initializations. */ |
|
| 1675 | + | static node_t *parse_name_type_value(parser_t *p, nodeclass_t cls) { |
|
| 1676 | + | node_t *n = node(p, cls); |
|
| 1677 | + | usize start = p->current.position; |
|
| 1678 | + | node_t *type = NULL; |
|
| 1679 | + | bool is_typed = false; |
|
| 1680 | + | ||
| 1681 | + | n->val.var.ident = |
|
| 1682 | + | parse_ident_or_placeholder(p, "expected identifier or '_'"); |
|
| 1683 | + | if (!n->val.var.ident) |
|
| 1684 | + | return NULL; |
|
| 1685 | + | ||
| 1686 | + | if (cls == NODE_VAR) { |
|
| 1687 | + | /* Type annotation is optional for variable declarations. */ |
|
| 1688 | + | if (consume(p, T_COLON)) |
|
| 1689 | + | is_typed = true; |
|
| 1690 | + | } else { |
|
| 1691 | + | if (!expect(p, T_COLON, "expected `:` after identifier")) |
|
| 1692 | + | return NULL; |
|
| 1693 | + | is_typed = true; |
|
| 1694 | + | } |
|
| 1695 | + | ||
| 1696 | + | if (is_typed) { |
|
| 1697 | + | type = parse_type(p); |
|
| 1698 | + | if (!type) |
|
| 1699 | + | return NULL; |
|
| 1700 | + | ||
| 1701 | + | if (cls == NODE_VAR) { |
|
| 1702 | + | n->val.var.align = NULL; |
|
| 1703 | + | ||
| 1704 | + | if (consume(p, T_ALIGN)) { |
|
| 1705 | + | if (!expect(p, T_LPAREN, "expected `(` after `align`")) |
|
| 1706 | + | return NULL; |
|
| 1707 | + | ||
| 1708 | + | n->val.var.align = node(p, NODE_ALIGN); |
|
| 1709 | + | n->val.var.align->val.align = parse_expr(p); |
|
| 1710 | + | ||
| 1711 | + | if (!expect(p, T_RPAREN, "expected `)` after expression")) |
|
| 1712 | + | return NULL; |
|
| 1713 | + | } |
|
| 1714 | + | } |
|
| 1715 | + | } else if (cls == NODE_VAR) { |
|
| 1716 | + | n->val.var.align = NULL; |
|
| 1717 | + | } |
|
| 1718 | + | n->val.var.type = type; |
|
| 1719 | + | n->val.var.value = NULL; |
|
| 1720 | + | ||
| 1721 | + | /* Parse the optional value. */ |
|
| 1722 | + | if (consume(p, T_EQ)) { |
|
| 1723 | + | node_t *value = parse_expr(p); |
|
| 1724 | + | if (!value) |
|
| 1725 | + | return NULL; |
|
| 1726 | + | n->val.var.value = value; |
|
| 1727 | + | } |
|
| 1728 | + | /* Set the node location. */ |
|
| 1729 | + | n->offset = start; |
|
| 1730 | + | n->length = p->previous.position + p->previous.length - start; |
|
| 1731 | + | ||
| 1732 | + | return n; |
|
| 1733 | + | } |
|
| 1734 | + | ||
| 1735 | + | /* Parse a variable declaration. */ |
|
| 1736 | + | static node_t *parse_var(parser_t *p, bool mutable) { |
|
| 1737 | + | node_t *var = parse_name_type_value(p, NODE_VAR); |
|
| 1738 | + | ||
| 1739 | + | if (!var) |
|
| 1740 | + | return NULL; |
|
| 1741 | + | ||
| 1742 | + | var->val.var.mutable = mutable; |
|
| 1743 | + | ||
| 1744 | + | /* Parse optional `else` clause. */ |
|
| 1745 | + | if (consume(p, T_ELSE)) { |
|
| 1746 | + | if (mutable) { |
|
| 1747 | + | error(p, "let-else bindings cannot be mutable"); |
|
| 1748 | + | return NULL; |
|
| 1749 | + | } |
|
| 1750 | + | if (!var->val.var.value) { |
|
| 1751 | + | error(p, "let-else requires an initializer"); |
|
| 1752 | + | return NULL; |
|
| 1753 | + | } |
|
| 1754 | + | node_t *rbranch = parse_stmt_or_block(p); |
|
| 1755 | + | if (!rbranch) |
|
| 1756 | + | return NULL; |
|
| 1757 | + | ||
| 1758 | + | var->cls = NODE_GUARD_LET; |
|
| 1759 | + | var->val.guard_let_stmt.var = var->val.var.ident; |
|
| 1760 | + | var->val.guard_let_stmt.expr = var->val.var.value; |
|
| 1761 | + | var->val.guard_let_stmt.rbranch = rbranch; |
|
| 1762 | + | var->length = p->previous.position + p->previous.length - var->offset; |
|
| 1763 | + | ||
| 1764 | + | return var; |
|
| 1765 | + | } |
|
| 1766 | + | var->length = p->previous.position + p->previous.length - var->offset; |
|
| 1767 | + | ||
| 1768 | + | return var; |
|
| 1769 | + | } |
|
| 1770 | + | ||
| 1771 | + | /* Parse a static variable declaration. */ |
|
| 1772 | + | static node_t *parse_static(parser_t *p) { |
|
| 1773 | + | node_t *n = node(p, NODE_STATIC); |
|
| 1774 | + | usize start = p->previous.position; |
|
| 1775 | + | ||
| 1776 | + | node_t *ident = parse_label(p, "expected identifier in static declaration"); |
|
| 1777 | + | if (!ident) |
|
| 1778 | + | return NULL; |
|
| 1779 | + | ||
| 1780 | + | node_t *type = parse_type(p); |
|
| 1781 | + | if (!type) |
|
| 1782 | + | return NULL; |
|
| 1783 | + | ||
| 1784 | + | if (!expect(p, T_EQ, "expected `=` in static declaration")) |
|
| 1785 | + | return NULL; |
|
| 1786 | + | ||
| 1787 | + | node_t *value = parse_expr(p); |
|
| 1788 | + | if (!value) |
|
| 1789 | + | return NULL; |
|
| 1790 | + | ||
| 1791 | + | n->val.static_decl.ident = ident; |
|
| 1792 | + | n->val.static_decl.type = type; |
|
| 1793 | + | n->val.static_decl.value = value; |
|
| 1794 | + | n->offset = start; |
|
| 1795 | + | n->length = p->previous.position + p->previous.length - start; |
|
| 1796 | + | ||
| 1797 | + | return n; |
|
| 1798 | + | } |
|
| 1799 | + | ||
| 1800 | + | /* Parse a constant declaration. */ |
|
| 1801 | + | static node_t *parse_const(parser_t *p) { |
|
| 1802 | + | node_t *var = parse_name_type_value(p, NODE_CONST); |
|
| 1803 | + | ||
| 1804 | + | if (!var) |
|
| 1805 | + | return NULL; |
|
| 1806 | + | ||
| 1807 | + | return var; |
|
| 1808 | + | } |
|
| 1809 | + | ||
| 1810 | + | /* Parse a module use declaration. */ |
|
| 1811 | + | static node_t *parse_use(parser_t *p, node_t *attrs) { |
|
| 1812 | + | usize start = p->current.position; |
|
| 1813 | + | ||
| 1814 | + | /* Parse the first identifier in the path. */ |
|
| 1815 | + | node_t *path = parse_scope_segment(p, "expected module name after 'use'"); |
|
| 1816 | + | if (!path) |
|
| 1817 | + | return NULL; |
|
| 1818 | + | ||
| 1819 | + | /* Track if this is a wildcard import. */ |
|
| 1820 | + | bool wildcard = false; |
|
| 1821 | + | ||
| 1822 | + | /* Continue parsing the dotted path if present. */ |
|
| 1823 | + | while (consume(p, T_COLON_COLON)) { |
|
| 1824 | + | /* Check for wildcard import (e.g., `use foo::*`) */ |
|
| 1825 | + | if (consume(p, T_STAR)) { |
|
| 1826 | + | wildcard = true; |
|
| 1827 | + | break; |
|
| 1828 | + | } |
|
| 1829 | + | ||
| 1830 | + | node_t *n = node(p, NODE_SCOPE); |
|
| 1831 | + | n->val.access.lval = path; |
|
| 1832 | + | ||
| 1833 | + | /* Parse the sub-module name. */ |
|
| 1834 | + | node_t *mod = |
|
| 1835 | + | parse_scope_segment(p, "expected identifier or '*' after '::'"); |
|
| 1836 | + | if (!mod) |
|
| 1837 | + | return NULL; |
|
| 1838 | + | ||
| 1839 | + | n->val.access.rval = mod; |
|
| 1840 | + | path = n; |
|
| 1841 | + | } |
|
| 1842 | + | ||
| 1843 | + | /* Create a use node and wrap the path. */ |
|
| 1844 | + | node_t *use_node = node(p, NODE_USE); |
|
| 1845 | + | use_node->val.use_decl.path = path; |
|
| 1846 | + | use_node->val.use_decl.attribs = attrs; |
|
| 1847 | + | use_node->val.use_decl.wildcard = wildcard; |
|
| 1848 | + | ||
| 1849 | + | /* Set position information. */ |
|
| 1850 | + | use_node->offset = start; |
|
| 1851 | + | use_node->length = p->previous.position + p->previous.length - start; |
|
| 1852 | + | ||
| 1853 | + | return use_node; |
|
| 1854 | + | } |
|
| 1855 | + | ||
| 1856 | + | /* Parse a module declaration. */ |
|
| 1857 | + | static node_t *parse_mod(parser_t *p, node_t *attrs) { |
|
| 1858 | + | usize start = p->current.position; |
|
| 1859 | + | ||
| 1860 | + | node_t *ident = parse_ident(p, "expected module name after 'mod'"); |
|
| 1861 | + | if (!ident) |
|
| 1862 | + | return NULL; |
|
| 1863 | + | node_t *mod_node = node(p, NODE_MOD); |
|
| 1864 | + | mod_node->val.mod_decl.ident = ident; |
|
| 1865 | + | mod_node->val.mod_decl.attribs = attrs; |
|
| 1866 | + | ||
| 1867 | + | mod_node->offset = start; |
|
| 1868 | + | mod_node->length = p->previous.position + p->previous.length - start; |
|
| 1869 | + | ||
| 1870 | + | return mod_node; |
|
| 1871 | + | } |
|
| 1872 | + | ||
| 1873 | + | /* Parse a function parameter. */ |
|
| 1874 | + | static node_t *parse_fn_param(parser_t *p) { |
|
| 1875 | + | /* Create parameter node. */ |
|
| 1876 | + | node_t *param = node(p, NODE_PARAM); |
|
| 1877 | + | node_t *name = parse_label(p, "expected parameter name"); |
|
| 1878 | + | if (!name) |
|
| 1879 | + | return NULL; |
|
| 1880 | + | ||
| 1881 | + | param->val.param.ident = name; |
|
| 1882 | + | ||
| 1883 | + | /* Parse and store parameter type. */ |
|
| 1884 | + | if (!(param->val.param.type = parse_type(p))) |
|
| 1885 | + | return NULL; |
|
| 1886 | + | ||
| 1887 | + | return param; |
|
| 1888 | + | } |
|
| 1889 | + | ||
| 1890 | + | static node_t *parse_module_body(parser_t *p) { |
|
| 1891 | + | node_t *mod = node(p, NODE_MOD_BODY); |
|
| 1892 | + | mod->val.block.stmts = (nodespan_t){ 0 }; |
|
| 1893 | + | ||
| 1894 | + | while (!check(p, T_EOF)) { |
|
| 1895 | + | node_t *stmt; |
|
| 1896 | + | usize start = p->current.position; |
|
| 1897 | + | ||
| 1898 | + | if (!(stmt = parse_stmt(p))) |
|
| 1899 | + | return NULL; |
|
| 1900 | + | ||
| 1901 | + | if (!consume_statement_separator(p, stmt, true)) |
|
| 1902 | + | return NULL; |
|
| 1903 | + | ||
| 1904 | + | stmt->offset = start; |
|
| 1905 | + | stmt->length = p->current.position - start; |
|
| 1906 | + | ||
| 1907 | + | if (!nodespan_push(p, &mod->val.block.stmts, stmt)) { |
|
| 1908 | + | error(p, "too many statements in module"); |
|
| 1909 | + | return NULL; |
|
| 1910 | + | } |
|
| 1911 | + | } |
|
| 1912 | + | return mod; |
|
| 1913 | + | } |
|
| 1914 | + | ||
| 1915 | + | /* Parse a function definition. */ |
|
| 1916 | + | static node_t *parse_fn(parser_t *p, node_t *attrs) { |
|
| 1917 | + | node_t *n = node(p, NODE_FN); |
|
| 1918 | + | node_t *param = NULL; |
|
| 1919 | + | ||
| 1920 | + | /* Parse the function name. */ |
|
| 1921 | + | node_t *name = parse_ident(p, "expected function name"); |
|
| 1922 | + | if (!name) |
|
| 1923 | + | return NULL; |
|
| 1924 | + | ||
| 1925 | + | n->val.fn_decl.ident = name; |
|
| 1926 | + | n->val.fn_decl.params = nodespan_alloc(p, MAX_FN_PARAMS); |
|
| 1927 | + | n->val.fn_decl.throws = nodespan_alloc(p, MAX_FN_THROWS); |
|
| 1928 | + | n->val.fn_decl.attribs = attrs; |
|
| 1929 | + | n->val.fn_decl.body = NULL; |
|
| 1930 | + | ||
| 1931 | + | /* Check if it's an extern function */ |
|
| 1932 | + | bool is_extern = (attrs && attrs->val.attrib & ATTRIB_EXTERN); |
|
| 1933 | + | ||
| 1934 | + | if (!expect(p, T_LPAREN, "expected `(` after function name")) |
|
| 1935 | + | return NULL; |
|
| 1936 | + | ||
| 1937 | + | /* Parse parameters with types */ |
|
| 1938 | + | if (!check(p, T_RPAREN)) { |
|
| 1939 | + | do { |
|
| 1940 | + | if (n->val.fn_decl.params.len >= MAX_FN_PARAMS) { |
|
| 1941 | + | error( |
|
| 1942 | + | p, |
|
| 1943 | + | "maximum number of function parameters (%d) exceeded", |
|
| 1944 | + | MAX_FN_PARAMS |
|
| 1945 | + | ); |
|
| 1946 | + | return NULL; |
|
| 1947 | + | } |
|
| 1948 | + | if (!(param = parse_fn_param(p))) { |
|
| 1949 | + | return NULL; |
|
| 1950 | + | } |
|
| 1951 | + | node_fn_add_param(p, n, param); |
|
| 1952 | + | ||
| 1953 | + | } while (consume(p, T_COMMA)); |
|
| 1954 | + | } |
|
| 1955 | + | if (!expect(p, T_RPAREN, "expected matching `)` after parameters list")) |
|
| 1956 | + | return NULL; |
|
| 1957 | + | ||
| 1958 | + | if (consume(p, T_ARROW)) { |
|
| 1959 | + | if (!(n->val.fn_decl.return_type = parse_type(p))) { |
|
| 1960 | + | return NULL; |
|
| 1961 | + | } |
|
| 1962 | + | } else { |
|
| 1963 | + | n->val.fn_decl.return_type = NULL; |
|
| 1964 | + | } |
|
| 1965 | + | if (consume(p, T_THROWS)) { |
|
| 1966 | + | if (!expect(p, T_LPAREN, "expected `(` after `throws`")) |
|
| 1967 | + | return NULL; |
|
| 1968 | + | ||
| 1969 | + | if (!check(p, T_RPAREN)) { |
|
| 1970 | + | do { |
|
| 1971 | + | if (n->val.fn_decl.throws.len >= MAX_FN_THROWS) { |
|
| 1972 | + | error(p, "maximum number of thrown types exceeded"); |
|
| 1973 | + | return NULL; |
|
| 1974 | + | } |
|
| 1975 | + | node_t *thrown = parse_type(p); |
|
| 1976 | + | if (!thrown) |
|
| 1977 | + | return NULL; |
|
| 1978 | + | ||
| 1979 | + | nodespan_push(p, &n->val.fn_decl.throws, thrown); |
|
| 1980 | + | } while (consume(p, T_COMMA)); |
|
| 1981 | + | } |
|
| 1982 | + | if (!expect(p, T_RPAREN, "expected `)` after throws clause")) |
|
| 1983 | + | return NULL; |
|
| 1984 | + | } |
|
| 1985 | + | ||
| 1986 | + | /* For extern functions, expect semicolon instead of body */ |
|
| 1987 | + | if (is_extern) { |
|
| 1988 | + | if (!expect( |
|
| 1989 | + | p, T_SEMICOLON, "expected `;` after extern function declaration" |
|
| 1990 | + | )) |
|
| 1991 | + | return NULL; |
|
| 1992 | + | } else { |
|
| 1993 | + | if (!(n->val.fn_decl.body = parse_block(p))) |
|
| 1994 | + | return NULL; |
|
| 1995 | + | } |
|
| 1996 | + | return n; |
|
| 1997 | + | } |
|
| 1998 | + | ||
| 1999 | + | /* Try to parse an annotation like `@default`. |
|
| 2000 | + | * Returns true if a known annotation was found and consumed. |
|
| 2001 | + | * Returns false if not an annotation (e.g. @sizeOf) - tokens not consumed. */ |
|
| 2002 | + | static bool try_parse_annotation(parser_t *p, attrib_t *attrs) { |
|
| 2003 | + | if (!check(p, T_AT_IDENT)) |
|
| 2004 | + | return false; |
|
| 2005 | + | ||
| 2006 | + | /* Token is @identifier, skip the '@' to get the name. */ |
|
| 2007 | + | const char *name = p->current.start + 1; |
|
| 2008 | + | usize length = p->current.length - 1; |
|
| 2009 | + | ||
| 2010 | + | if (length == 7 && !strncmp(name, "default", 7)) { |
|
| 2011 | + | advance(p); /* Consume `@default`. */ |
|
| 2012 | + | *attrs |= ATTRIB_DEFAULT; |
|
| 2013 | + | return true; |
|
| 2014 | + | } |
|
| 2015 | + | if (length == 4 && !strncmp(name, "test", 4)) { |
|
| 2016 | + | advance(p); /* Consume `@test`. */ |
|
| 2017 | + | *attrs |= ATTRIB_TEST; |
|
| 2018 | + | return true; |
|
| 2019 | + | } |
|
| 2020 | + | if (length == 9 && !strncmp(name, "intrinsic", 9)) { |
|
| 2021 | + | advance(p); /* Consume `@intrinsic`. */ |
|
| 2022 | + | *attrs |= ATTRIB_INTRINSIC; |
|
| 2023 | + | return true; |
|
| 2024 | + | } |
|
| 2025 | + | /* Not a known annotation - leave for parse_builtin to handle. */ |
|
| 2026 | + | return false; |
|
| 2027 | + | } |
|
| 2028 | + | ||
| 2029 | + | /* Parse statement attributes. */ |
|
| 2030 | + | static node_t *parse_attribs(parser_t *p) { |
|
| 2031 | + | node_t *n = NULL; |
|
| 2032 | + | attrib_t attrs = ATTRIB_NONE; |
|
| 2033 | + | ||
| 2034 | + | for (;;) { |
|
| 2035 | + | if (consume(p, T_PUB)) { |
|
| 2036 | + | if (attrs & ATTRIB_PUB) { |
|
| 2037 | + | error(p, "duplicate `pub` attribute"); |
|
| 2038 | + | return NULL; |
|
| 2039 | + | } |
|
| 2040 | + | attrs |= ATTRIB_PUB; |
|
| 2041 | + | } else if (try_parse_annotation(p, &attrs)) { |
|
| 2042 | + | /* Annotation was consumed, continue. */ |
|
| 2043 | + | } else if (consume(p, T_EXTERN)) { |
|
| 2044 | + | if (attrs & ATTRIB_EXTERN) { |
|
| 2045 | + | error(p, "duplicate `extern` attribute"); |
|
| 2046 | + | return NULL; |
|
| 2047 | + | } |
|
| 2048 | + | attrs |= ATTRIB_EXTERN; |
|
| 2049 | + | } else { |
|
| 2050 | + | break; |
|
| 2051 | + | } |
|
| 2052 | + | } |
|
| 2053 | + | ||
| 2054 | + | if (attrs != ATTRIB_NONE) { |
|
| 2055 | + | n = node(p, NODE_ATTRIBUTE); |
|
| 2056 | + | n->val.attrib = attrs; |
|
| 2057 | + | } |
|
| 2058 | + | return n; |
|
| 2059 | + | } |
|
| 2060 | + | ||
| 2061 | + | /* Parse a statement. */ |
|
| 2062 | + | static node_t *parse_stmt(parser_t *p) { |
|
| 2063 | + | /* Parse any attributes that come before the statement. */ |
|
| 2064 | + | node_t *attrs = parse_attribs(p); |
|
| 2065 | + | ||
| 2066 | + | if (attrs) { |
|
| 2067 | + | switch (p->current.cls) { |
|
| 2068 | + | case T_FN: |
|
| 2069 | + | case T_UNION: |
|
| 2070 | + | case T_RECORD: |
|
| 2071 | + | case T_MOD: |
|
| 2072 | + | case T_CONST: |
|
| 2073 | + | case T_USE: |
|
| 2074 | + | break; |
|
| 2075 | + | default: |
|
| 2076 | + | error(p, "attributes are not allowed in this context"); |
|
| 2077 | + | return NULL; |
|
| 2078 | + | } |
|
| 2079 | + | ||
| 2080 | + | /* Verify extern is only used with functions */ |
|
| 2081 | + | if ((attrs->val.attrib & ATTRIB_EXTERN) && p->current.cls != T_FN) { |
|
| 2082 | + | error( |
|
| 2083 | + | p, "extern attribute is only allowed on function declarations" |
|
| 2084 | + | ); |
|
| 2085 | + | return NULL; |
|
| 2086 | + | } |
|
| 2087 | + | } |
|
| 2088 | + | ||
| 2089 | + | switch (p->current.cls) { |
|
| 2090 | + | case T_LBRACE: |
|
| 2091 | + | return parse_block(p); |
|
| 2092 | + | case T_LET: |
|
| 2093 | + | advance(p); |
|
| 2094 | + | if (consume(p, T_CASE)) { |
|
| 2095 | + | return parse_let_case(p); |
|
| 2096 | + | } |
|
| 2097 | + | if (consume(p, T_MUT)) { |
|
| 2098 | + | return parse_var(p, true); |
|
| 2099 | + | } |
|
| 2100 | + | return parse_var(p, false); |
|
| 2101 | + | case T_STATIC: |
|
| 2102 | + | advance(p); |
|
| 2103 | + | return parse_static(p); |
|
| 2104 | + | case T_CONST: |
|
| 2105 | + | advance(p); |
|
| 2106 | + | return parse_const(p); |
|
| 2107 | + | case T_USE: |
|
| 2108 | + | advance(p); |
|
| 2109 | + | return parse_use(p, attrs); |
|
| 2110 | + | case T_MOD: |
|
| 2111 | + | advance(p); |
|
| 2112 | + | return parse_mod(p, attrs); |
|
| 2113 | + | case T_RETURN: |
|
| 2114 | + | advance(p); |
|
| 2115 | + | return parse_return(p); |
|
| 2116 | + | case T_THROW: |
|
| 2117 | + | advance(p); |
|
| 2118 | + | return parse_throw(p); |
|
| 2119 | + | case T_BREAK: |
|
| 2120 | + | advance(p); |
|
| 2121 | + | return parse_break(p); |
|
| 2122 | + | case T_WHILE: |
|
| 2123 | + | advance(p); |
|
| 2124 | + | return parse_while(p); |
|
| 2125 | + | case T_FOR: |
|
| 2126 | + | advance(p); |
|
| 2127 | + | return parse_for(p); |
|
| 2128 | + | case T_LOOP: |
|
| 2129 | + | advance(p); |
|
| 2130 | + | return parse_loop(p); |
|
| 2131 | + | case T_IF: |
|
| 2132 | + | advance(p); |
|
| 2133 | + | return parse_if(p); |
|
| 2134 | + | case T_MATCH: |
|
| 2135 | + | advance(p); |
|
| 2136 | + | return parse_match(p); |
|
| 2137 | + | case T_FN: |
|
| 2138 | + | advance(p); |
|
| 2139 | + | return parse_fn(p, attrs); |
|
| 2140 | + | case T_UNION: |
|
| 2141 | + | advance(p); |
|
| 2142 | + | return parse_union(p, attrs); |
|
| 2143 | + | case T_RECORD: |
|
| 2144 | + | advance(p); |
|
| 2145 | + | return parse_record(p, attrs); |
|
| 2146 | + | case T_PANIC: |
|
| 2147 | + | advance(p); |
|
| 2148 | + | return parse_panic(p); |
|
| 2149 | + | default: |
|
| 2150 | + | break; |
|
| 2151 | + | } |
|
| 2152 | + | /* Parse an expression as a statement or an assignment statement. */ |
|
| 2153 | + | node_t *expr; |
|
| 2154 | + | ||
| 2155 | + | if ((expr = parse_expr(p)) == NULL) |
|
| 2156 | + | return NULL; |
|
| 2157 | + | ||
| 2158 | + | /* If we see an equals sign, this is an assignment statement */ |
|
| 2159 | + | if (consume(p, T_EQ)) { |
|
| 2160 | + | return parse_assignment(p, expr); |
|
| 2161 | + | } |
|
| 2162 | + | ||
| 2163 | + | /* Create an expression statement node. */ |
|
| 2164 | + | node_t *stmt = node(p, NODE_EXPR_STMT); |
|
| 2165 | + | stmt->val.expr_stmt = expr; |
|
| 2166 | + | ||
| 2167 | + | return stmt; |
|
| 2168 | + | } |
|
| 2169 | + | ||
| 2170 | + | /* Parse a function argument, which may have an optional label. */ |
|
| 2171 | + | static node_t *parse_fn_call_arg(parser_t *p) { |
|
| 2172 | + | usize start = p->current.position; |
|
| 2173 | + | node_t *arg = node(p, NODE_CALL_ARG); |
|
| 2174 | + | ||
| 2175 | + | /* Parse the expression first */ |
|
| 2176 | + | node_t *expr = parse_expr(p); |
|
| 2177 | + | if (!expr) |
|
| 2178 | + | return NULL; |
|
| 2179 | + | ||
| 2180 | + | /* Check if this was an identifier followed by a colon |
|
| 2181 | + | * (making it a label), or the complete expression. */ |
|
| 2182 | + | if (expr->cls == NODE_IDENT && consume(p, T_COLON)) { |
|
| 2183 | + | /* It's a label, parse the actual value expression */ |
|
| 2184 | + | arg->val.call_arg.label = expr; |
|
| 2185 | + | ||
| 2186 | + | if (!(arg->val.call_arg.expr = parse_expr(p))) { |
|
| 2187 | + | return NULL; |
|
| 2188 | + | } |
|
| 2189 | + | } else { |
|
| 2190 | + | arg->val.call_arg.label = NULL; |
|
| 2191 | + | arg->val.call_arg.expr = expr; |
|
| 2192 | + | } |
|
| 2193 | + | arg->offset = start; |
|
| 2194 | + | arg->length = p->previous.position + p->previous.length - start; |
|
| 2195 | + | ||
| 2196 | + | return arg; |
|
| 2197 | + | } |
|
| 2198 | + | ||
| 2199 | + | /* Parse an identifier. */ |
|
| 2200 | + | static node_t *parse_ident(parser_t *p, const char *error) { |
|
| 2201 | + | if (!expect(p, T_IDENT, error)) |
|
| 2202 | + | return NULL; |
|
| 2203 | + | ||
| 2204 | + | node_t *ident = node(p, NODE_IDENT); |
|
| 2205 | + | ||
| 2206 | + | ident->val.ident.name = p->previous.start; |
|
| 2207 | + | ident->val.ident.length = p->previous.length; |
|
| 2208 | + | ||
| 2209 | + | return ident; |
|
| 2210 | + | } |
|
| 2211 | + | ||
| 2212 | + | /* Parse either an identifier or a placeholder ('_'). */ |
|
| 2213 | + | static node_t *parse_ident_or_placeholder(parser_t *p, const char *error) { |
|
| 2214 | + | if (consume(p, T_UNDERSCORE)) { |
|
| 2215 | + | return node(p, NODE_PLACEHOLDER); |
|
| 2216 | + | } |
|
| 2217 | + | return parse_ident(p, error); |
|
| 2218 | + | } |
|
| 2219 | + | ||
| 2220 | + | /* Parse a label. |
|
| 2221 | + | * Returns an identifier node. Expects IDENT followed by COLON. */ |
|
| 2222 | + | static node_t *parse_label(parser_t *p, const char *error) { |
|
| 2223 | + | if (!expect(p, T_IDENT, error)) |
|
| 2224 | + | return NULL; |
|
| 2225 | + | ||
| 2226 | + | node_t *ident = node(p, NODE_IDENT); |
|
| 2227 | + | ||
| 2228 | + | ident->val.ident.name = p->previous.start; |
|
| 2229 | + | ident->val.ident.length = p->previous.length; |
|
| 2230 | + | ||
| 2231 | + | if (!expect(p, T_COLON, "expected ':' after identifier")) |
|
| 2232 | + | return NULL; |
|
| 2233 | + | ||
| 2234 | + | return ident; |
|
| 2235 | + | } |
|
| 2236 | + | ||
| 2237 | + | static node_t *parse_scope_segment(parser_t *p, const char *error) { |
|
| 2238 | + | if (check(p, T_SUPER)) { |
|
| 2239 | + | node_t *super_node = node(p, NODE_SUPER); |
|
| 2240 | + | advance(p); |
|
| 2241 | + | return super_node; |
|
| 2242 | + | } |
|
| 2243 | + | return parse_ident(p, error); |
|
| 2244 | + | } |
|
| 2245 | + | ||
| 2246 | + | static node_t *parse_as_cast(parser_t *p, node_t *expr) { |
|
| 2247 | + | if (!consume(p, T_AS)) |
|
| 2248 | + | return NULL; |
|
| 2249 | + | ||
| 2250 | + | node_t *as = node(p, NODE_AS); |
|
| 2251 | + | as->val.as_expr.expr = expr; |
|
| 2252 | + | ||
| 2253 | + | /* Parse the target type */ |
|
| 2254 | + | node_t *typ = parse_type(p); |
|
| 2255 | + | if (!typ) |
|
| 2256 | + | return NULL; |
|
| 2257 | + | ||
| 2258 | + | as->val.as_expr.type = typ; |
|
| 2259 | + | as->offset = expr->offset; |
|
| 2260 | + | as->length = p->current.position - as->offset; |
|
| 2261 | + | ||
| 2262 | + | return as; |
|
| 2263 | + | } |
|
| 2264 | + | ||
| 2265 | + | /* Parse postfix expressions (field access and array indexing). |
|
| 2266 | + | * |
|
| 2267 | + | * This function handles both field access (expr.field) and array indexing |
|
| 2268 | + | * (expr[index]) in a unified way, enabling arbitrarily complex nested |
|
| 2269 | + | * expressions like `x.y.z[1].w[2][3].q`. |
|
| 2270 | + | */ |
|
| 2271 | + | static node_t *parse_postfix(parser_t *p, node_t *expr) { |
|
| 2272 | + | node_t *result = expr; |
|
| 2273 | + | ||
| 2274 | + | for (;;) { |
|
| 2275 | + | if (consume(p, T_DOT)) { /* Field access. */ |
|
| 2276 | + | node_t *n = node(p, NODE_ACCESS); |
|
| 2277 | + | n->val.access.lval = result; |
|
| 2278 | + | ||
| 2279 | + | node_t *field = parse_ident(p, "expected field name after `.`"); |
|
| 2280 | + | if (!field) |
|
| 2281 | + | return NULL; |
|
| 2282 | + | ||
| 2283 | + | field->val.ident.name = p->previous.start; |
|
| 2284 | + | field->val.ident.length = p->previous.length; |
|
| 2285 | + | n->val.access.rval = field; |
|
| 2286 | + | ||
| 2287 | + | result = n; |
|
| 2288 | + | } else if (consume(p, T_DOT_DOT)) { |
|
| 2289 | + | node_t *range = node(p, NODE_RANGE); |
|
| 2290 | + | range->val.range.start = result; |
|
| 2291 | + | range->val.range.end = NULL; |
|
| 2292 | + | ||
| 2293 | + | /* Check if there's a right-hand side for the range. */ |
|
| 2294 | + | if (!check(p, T_RBRACKET) && !check(p, T_SEMICOLON) && |
|
| 2295 | + | !check(p, T_COMMA) && !check(p, T_RPAREN) && |
|
| 2296 | + | !check(p, T_LBRACE)) { |
|
| 2297 | + | if (!(range->val.range.end = parse_expr(p))) { |
|
| 2298 | + | return NULL; |
|
| 2299 | + | } |
|
| 2300 | + | } |
|
| 2301 | + | result = range; |
|
| 2302 | + | } else if (consume(p, T_COLON_COLON)) { /* Scope access */ |
|
| 2303 | + | node_t *ident = |
|
| 2304 | + | parse_scope_segment(p, "expected identifier name after `::`"); |
|
| 2305 | + | if (!ident) |
|
| 2306 | + | return NULL; |
|
| 2307 | + | ||
| 2308 | + | node_t *n = node(p, NODE_SCOPE); |
|
| 2309 | + | n->val.access.lval = result; |
|
| 2310 | + | n->val.access.rval = ident; |
|
| 2311 | + | ||
| 2312 | + | result = n; |
|
| 2313 | + | } else if (consume(p, T_LBRACKET)) { /* Array indexing or slicing. */ |
|
| 2314 | + | node_t *expr = NULL; |
|
| 2315 | + | ||
| 2316 | + | if (consume(p, T_DOT_DOT)) { |
|
| 2317 | + | /* Either `..` or `..n` */ |
|
| 2318 | + | /* Create range node with NULL start and end. */ |
|
| 2319 | + | expr = node(p, NODE_RANGE); |
|
| 2320 | + | expr->val.range.start = NULL; |
|
| 2321 | + | expr->val.range.end = NULL; |
|
| 2322 | + | ||
| 2323 | + | if (!check(p, T_RBRACKET)) { |
|
| 2324 | + | if (!(expr->val.range.end = parse_expr(p))) { |
|
| 2325 | + | return NULL; |
|
| 2326 | + | } |
|
| 2327 | + | } |
|
| 2328 | + | } else { |
|
| 2329 | + | /* Either `n`, `n..` or `n..m` */ |
|
| 2330 | + | node_t *index = parse_expr(p); |
|
| 2331 | + | if (!index) |
|
| 2332 | + | return NULL; |
|
| 2333 | + | ||
| 2334 | + | expr = index; |
|
| 2335 | + | } |
|
| 2336 | + | /* Create array index node with the index expression */ |
|
| 2337 | + | node_t *n = node(p, NODE_ARRAY_INDEX); |
|
| 2338 | + | n->val.access.lval = result; |
|
| 2339 | + | n->val.access.rval = expr; |
|
| 2340 | + | ||
| 2341 | + | n->offset = result->offset; |
|
| 2342 | + | n->length = result->length; |
|
| 2343 | + | ||
| 2344 | + | /* Expect closing bracket */ |
|
| 2345 | + | if (!expect(p, T_RBRACKET, "expected `]` after array index")) |
|
| 2346 | + | return NULL; |
|
| 2347 | + | ||
| 2348 | + | result = n; |
|
| 2349 | + | } else if (consume(p, T_LPAREN)) { /* Parse function call. */ |
|
| 2350 | + | node_t *call = node(p, NODE_CALL); |
|
| 2351 | + | call->val.call.callee = result; |
|
| 2352 | + | call->val.call.args = nodespan_alloc(p, MAX_FN_PARAMS); |
|
| 2353 | + | ||
| 2354 | + | node_t *arg = NULL; |
|
| 2355 | + | if (!check(p, T_RPAREN)) { |
|
| 2356 | + | do { |
|
| 2357 | + | if (!(arg = parse_fn_call_arg(p))) { |
|
| 2358 | + | return NULL; |
|
| 2359 | + | } |
|
| 2360 | + | nodespan_push(p, &call->val.call.args, arg); |
|
| 2361 | + | } while (consume(p, T_COMMA)); |
|
| 2362 | + | } |
|
| 2363 | + | if (!expect(p, T_RPAREN, "expected `)` after function arguments")) |
|
| 2364 | + | return NULL; |
|
| 2365 | + | ||
| 2366 | + | result = call; |
|
| 2367 | + | } else if (p->context == PARSE_CTX_NORMAL && |
|
| 2368 | + | result->cls == NODE_SCOPE && check(p, T_LBRACE)) { |
|
| 2369 | + | /* Record literal after scope access: `Union::Variant { ... }`. */ |
|
| 2370 | + | advance(p); /* consume `{` */ |
|
| 2371 | + | ||
| 2372 | + | node_t *literal = parse_record_lit(p, result); |
|
| 2373 | + | if (!literal) |
|
| 2374 | + | return NULL; |
|
| 2375 | + | ||
| 2376 | + | result = literal; |
|
| 2377 | + | } else { |
|
| 2378 | + | /* No postfix operators to try. */ |
|
| 2379 | + | break; |
|
| 2380 | + | } |
|
| 2381 | + | } |
|
| 2382 | + | return result; |
|
| 2383 | + | } |
|
| 2384 | + | ||
| 2385 | + | /* Parse a complete program, return the root of the AST, or `NULL` |
|
| 2386 | + | * if parsing failed. */ |
|
| 2387 | + | node_t *parser_parse(parser_t *p) { |
|
| 2388 | + | p->current = scanner_next(&p->scanner); |
|
| 2389 | + | ||
| 2390 | + | /* Create a top-level module. */ |
|
| 2391 | + | node_t *root = parse_module_body(p); |
|
| 2392 | + | if (!root) |
|
| 2393 | + | return NULL; |
|
| 2394 | + | ||
| 2395 | + | if (!expect(p, T_EOF, "expected end-of-file")) |
|
| 2396 | + | return NULL; |
|
| 2397 | + | ||
| 2398 | + | root->length = (usize)(p->scanner.cursor - p->scanner.source); |
|
| 2399 | + | ||
| 2400 | + | return (p->root = root); |
|
| 2401 | + | } |
parser.h
added
+38 -0
| 1 | + | #ifndef PARSER_H |
|
| 2 | + | #define PARSER_H |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "limits.h" |
|
| 6 | + | #include "scanner.h" |
|
| 7 | + | ||
| 8 | + | /* Parsing context to handle ambiguities */ |
|
| 9 | + | typedef enum { |
|
| 10 | + | PARSE_CTX_NORMAL, /* Normal expression context */ |
|
| 11 | + | PARSE_CTX_CONDITION, /* Inside condition where { starts block */ |
|
| 12 | + | } parse_ctx_t; |
|
| 13 | + | ||
| 14 | + | /* Parser state */ |
|
| 15 | + | typedef struct parser_t { |
|
| 16 | + | scanner_t scanner; |
|
| 17 | + | token_t current; |
|
| 18 | + | token_t previous; |
|
| 19 | + | node_t *root; |
|
| 20 | + | u32 errors; |
|
| 21 | + | node_t nodes[MAX_NODES]; |
|
| 22 | + | u32 nnodes; |
|
| 23 | + | parse_ctx_t context; |
|
| 24 | + | ||
| 25 | + | /* Pool for variable-length node pointer arrays. |
|
| 26 | + | * Nodes store an index + count into this pool instead of |
|
| 27 | + | * embedding large arrays, keeping node_t small. */ |
|
| 28 | + | struct node_t *ptrs[MAX_NODEPTR_POOL]; |
|
| 29 | + | u32 nptrs; |
|
| 30 | + | } parser_t; |
|
| 31 | + | ||
| 32 | + | /* Initialize parser with scanner */ |
|
| 33 | + | void parser_init(parser_t *p); |
|
| 34 | + | ||
| 35 | + | /* Parse a complete program */ |
|
| 36 | + | node_t *parser_parse(parser_t *p); |
|
| 37 | + | ||
| 38 | + | #endif |
radiance.c
added
+147 -0
| 1 | + | #include <errno.h> |
|
| 2 | + | #include <stdio.h> |
|
| 3 | + | #include <stdlib.h> |
|
| 4 | + | #include <string.h> |
|
| 5 | + | #include <unistd.h> |
|
| 6 | + | ||
| 7 | + | #include "ast.h" |
|
| 8 | + | #include "desugar.h" |
|
| 9 | + | #include "gen.h" |
|
| 10 | + | #include "io.h" |
|
| 11 | + | #include "module.h" |
|
| 12 | + | #include "options.h" |
|
| 13 | + | #include "parser.h" |
|
| 14 | + | #include "resolver.h" |
|
| 15 | + | #include "scanner.h" |
|
| 16 | + | #include "strings.h" |
|
| 17 | + | #include "symtab.h" |
|
| 18 | + | #include "types.h" |
|
| 19 | + | #include "util.h" |
|
| 20 | + | ||
| 21 | + | static int compile(struct options *o) { |
|
| 22 | + | if (o->ninputs > 1) { |
|
| 23 | + | bail("too many inputs (%d)", o->ninputs); |
|
| 24 | + | } |
|
| 25 | + | if (o->ninputs < 1) { |
|
| 26 | + | bail("no input files"); |
|
| 27 | + | } |
|
| 28 | + | if (!o->output) { |
|
| 29 | + | bail("an output file must be specified with `-o`"); |
|
| 30 | + | } |
|
| 31 | + | ||
| 32 | + | FILE *text = NULL; |
|
| 33 | + | FILE *data_ro = NULL; |
|
| 34 | + | FILE *data_rw = NULL; |
|
| 35 | + | const char *rootpath = o->inputs[0]; |
|
| 36 | + | int err = MODULE_OK; |
|
| 37 | + | ||
| 38 | + | static module_manager_t mm; |
|
| 39 | + | module_manager_init(&mm, rootpath); |
|
| 40 | + | ||
| 41 | + | /* Register the root module */ |
|
| 42 | + | if (!(mm.root = module_manager_register(&mm, rootpath))) { |
|
| 43 | + | bail("error registering root module '%s'", rootpath); |
|
| 44 | + | } |
|
| 45 | + | ||
| 46 | + | /* Register additional modules specified with -mod. |
|
| 47 | + | * |
|
| 48 | + | * Module paths are given as full relative paths (e.g. `lib/std/foo.rad`). |
|
| 49 | + | * The qualified name is derived by stripping the leading directory |
|
| 50 | + | * component (e.g. `std/foo.rad`). */ |
|
| 51 | + | for (int i = 0; i < o->nmodules; i++) { |
|
| 52 | + | const char *path = o->modules[i]; |
|
| 53 | + | const char *qualified = strchr(path, '/'); |
|
| 54 | + | qualified = qualified ? qualified + 1 : path; |
|
| 55 | + | ||
| 56 | + | if (!module_manager_register_qualified(&mm, path, qualified)) { |
|
| 57 | + | bail("error registering module '%s'", path); |
|
| 58 | + | } |
|
| 59 | + | } |
|
| 60 | + | ||
| 61 | + | /* Parse all modules */ |
|
| 62 | + | if (!module_manager_parse(&mm, &err)) { |
|
| 63 | + | bail("error parsing modules"); |
|
| 64 | + | } |
|
| 65 | + | ||
| 66 | + | /* Run desugaring pass on all modules */ |
|
| 67 | + | for (usize i = 0; i < mm.nmodules; i++) { |
|
| 68 | + | module_t *mod = &mm.modules[i]; |
|
| 69 | + | ||
| 70 | + | if (mod->ast) { |
|
| 71 | + | static desugar_t d; |
|
| 72 | + | ||
| 73 | + | mod->ast = desugar_run(&d, mod, mod->ast); |
|
| 74 | + | if (!mod->ast) { |
|
| 75 | + | bail("desugaring failed for module %s", mod->name); |
|
| 76 | + | } |
|
| 77 | + | } |
|
| 78 | + | } |
|
| 79 | + | ||
| 80 | + | static resolve_t t; |
|
| 81 | + | resolve_init(&t, &mm); |
|
| 82 | + | ||
| 83 | + | if (!resolve_run(&t, mm.root)) { |
|
| 84 | + | bail("type checking failed"); |
|
| 85 | + | } |
|
| 86 | + | ||
| 87 | + | /* Initialize code generator */ |
|
| 88 | + | static gen_t g; |
|
| 89 | + | gen_init(&g, &t.types, &mm, 0); |
|
| 90 | + | ||
| 91 | + | if (!(text = fopen(o->output, "w"))) { |
|
| 92 | + | bail("failed to open '%s' for writing: %s", o->output, strerror(errno)); |
|
| 93 | + | } |
|
| 94 | + | ||
| 95 | + | /* Generate code */ |
|
| 96 | + | if (gen_emit(&g, mm.root) != 0) { |
|
| 97 | + | bail("code generation failed"); |
|
| 98 | + | } |
|
| 99 | + | ||
| 100 | + | /* Write binary output */ |
|
| 101 | + | if (g.data.ro_size > 0) { |
|
| 102 | + | char datapath_ro[MAX_PATH_LEN] = { 0 }; |
|
| 103 | + | strncpy(datapath_ro, o->output, MAX_PATH_LEN); |
|
| 104 | + | strlcat(datapath_ro, ".ro.data", MAX_PATH_LEN); |
|
| 105 | + | ||
| 106 | + | if (!(data_ro = fopen(datapath_ro, "w"))) { |
|
| 107 | + | bail( |
|
| 108 | + | "failed to open '%s' for writing: %s", |
|
| 109 | + | datapath_ro, |
|
| 110 | + | strerror(errno) |
|
| 111 | + | ); |
|
| 112 | + | } |
|
| 113 | + | } |
|
| 114 | + | if (g.data.rw_init_total > 0) { |
|
| 115 | + | char datapath[MAX_PATH_LEN] = { 0 }; |
|
| 116 | + | strncpy(datapath, o->output, MAX_PATH_LEN); |
|
| 117 | + | strlcat(datapath, ".rw.data", MAX_PATH_LEN); |
|
| 118 | + | ||
| 119 | + | if (!(data_rw = fopen(datapath, "w"))) { |
|
| 120 | + | bail( |
|
| 121 | + | "failed to open '%s' for writing: %s", datapath, strerror(errno) |
|
| 122 | + | ); |
|
| 123 | + | } |
|
| 124 | + | } |
|
| 125 | + | gen_dump_bin(&g, text, data_ro, data_rw); |
|
| 126 | + | ||
| 127 | + | if (data_ro) |
|
| 128 | + | fclose(data_ro); |
|
| 129 | + | if (data_rw) |
|
| 130 | + | fclose(data_rw); |
|
| 131 | + | if (text) |
|
| 132 | + | fclose(text); |
|
| 133 | + | ||
| 134 | + | return 0; |
|
| 135 | + | } |
|
| 136 | + | ||
| 137 | + | int main(int argc, char *argv[]) { |
|
| 138 | + | strings_init(); |
|
| 139 | + | ||
| 140 | + | struct options o = options(argc, argv); |
|
| 141 | + | options_parse(&o); |
|
| 142 | + | ||
| 143 | + | if (compile(&o) != 0) { |
|
| 144 | + | bail("compilation failed"); |
|
| 145 | + | } |
|
| 146 | + | return 0; |
|
| 147 | + | } |
ralloc.c
added
+88 -0
| 1 | + | /** |
|
| 2 | + | * Register allocator. |
|
| 3 | + | * Uses a simple stack-based algorithm. |
|
| 4 | + | */ |
|
| 5 | + | #include <stdio.h> |
|
| 6 | + | #include <stdlib.h> |
|
| 7 | + | ||
| 8 | + | #include "io.h" |
|
| 9 | + | #include "ralloc.h" |
|
| 10 | + | #include "riscv.h" |
|
| 11 | + | #include "types.h" |
|
| 12 | + | ||
| 13 | + | /* Order of temporary registers to allocate. */ |
|
| 14 | + | const reg_t ralloc_regs[] = { A0, A1, A2, A3, A4, A5, A6, A7, |
|
| 15 | + | T0, T1, T2, T3, T4, T5, T6 }; |
|
| 16 | + | ||
| 17 | + | ralloc_t ralloc(void) { |
|
| 18 | + | return (ralloc_t){ .regs = { false } }; |
|
| 19 | + | } |
|
| 20 | + | ||
| 21 | + | reg_t ralloc_next(ralloc_t *ra) { |
|
| 22 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 23 | + | if (!ra->regs[i]) { |
|
| 24 | + | ra->regs[i] = true; |
|
| 25 | + | return ralloc_regs[i]; |
|
| 26 | + | } |
|
| 27 | + | } |
|
| 28 | + | bail("out of registers"); |
|
| 29 | + | } |
|
| 30 | + | ||
| 31 | + | reg_t ralloc_next_except(ralloc_t *ra, reg_t avoid) { |
|
| 32 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 33 | + | if (!ra->regs[i] && ralloc_regs[i] != avoid) { |
|
| 34 | + | ra->regs[i] = true; |
|
| 35 | + | return ralloc_regs[i]; |
|
| 36 | + | } |
|
| 37 | + | } |
|
| 38 | + | return ralloc_next(ra); |
|
| 39 | + | } |
|
| 40 | + | ||
| 41 | + | void ralloc_free(ralloc_t *ra, reg_t r) { |
|
| 42 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 43 | + | if (ralloc_regs[i] == r) { |
|
| 44 | + | ra->regs[i] = false; |
|
| 45 | + | break; |
|
| 46 | + | } |
|
| 47 | + | } |
|
| 48 | + | } |
|
| 49 | + | ||
| 50 | + | void ralloc_reserve(ralloc_t *ra, reg_t r) { |
|
| 51 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 52 | + | if (ralloc_regs[i] == r) { |
|
| 53 | + | ra->regs[i] = true; |
|
| 54 | + | break; |
|
| 55 | + | } |
|
| 56 | + | } |
|
| 57 | + | } |
|
| 58 | + | ||
| 59 | + | bool ralloc_is_free(ralloc_t *ra, reg_t r) { |
|
| 60 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 61 | + | if (ralloc_regs[i] == r) { |
|
| 62 | + | return !ra->regs[i]; |
|
| 63 | + | } |
|
| 64 | + | } |
|
| 65 | + | return false; |
|
| 66 | + | } |
|
| 67 | + | ||
| 68 | + | void ralloc_free_all(ralloc_t *ra) { |
|
| 69 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 70 | + | ra->regs[i] = false; |
|
| 71 | + | } |
|
| 72 | + | } |
|
| 73 | + | ||
| 74 | + | void ralloc_save(ralloc_t *ra, bool *reserved) { |
|
| 75 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 76 | + | if (ralloc_regs[i] != A0) { |
|
| 77 | + | reserved[i] = ra->regs[i]; |
|
| 78 | + | } |
|
| 79 | + | } |
|
| 80 | + | } |
|
| 81 | + | ||
| 82 | + | void ralloc_restore(ralloc_t *ra, bool *reserved) { |
|
| 83 | + | for (int i = 0; i < RALLOC_NREGS; i++) { |
|
| 84 | + | if (ralloc_regs[i] != A0) { |
|
| 85 | + | ra->regs[i] = reserved[i]; |
|
| 86 | + | } |
|
| 87 | + | } |
|
| 88 | + | } |
ralloc.h
added
+39 -0
| 1 | + | #ifndef ralloc_H |
|
| 2 | + | #define ralloc_H |
|
| 3 | + | ||
| 4 | + | #include <stdio.h> |
|
| 5 | + | ||
| 6 | + | #include "riscv.h" |
|
| 7 | + | #include "types.h" |
|
| 8 | + | ||
| 9 | + | /* Number of available registers. */ |
|
| 10 | + | #define RALLOC_NREGS 15 |
|
| 11 | + | ||
| 12 | + | /* Order of registers to allocate. */ |
|
| 13 | + | extern const reg_t ralloc_regs[RALLOC_NREGS]; |
|
| 14 | + | ||
| 15 | + | /* Register allocator context. */ |
|
| 16 | + | typedef struct { |
|
| 17 | + | bool regs[RALLOC_NREGS]; /* Registers status. */ |
|
| 18 | + | } ralloc_t; |
|
| 19 | + | ||
| 20 | + | /* Return a new register allocator. */ |
|
| 21 | + | ralloc_t ralloc(void); |
|
| 22 | + | /* Allocate and return a new register. */ |
|
| 23 | + | reg_t ralloc_next(ralloc_t *ra); |
|
| 24 | + | /* Allocate and return a new register, avoiding `r` when possible. */ |
|
| 25 | + | reg_t ralloc_next_except(ralloc_t *ra, reg_t r); |
|
| 26 | + | /* Free the given register. */ |
|
| 27 | + | void ralloc_free(ralloc_t *ra, reg_t r); |
|
| 28 | + | /* Free all allocated registers. */ |
|
| 29 | + | void ralloc_free_all(ralloc_t *ra); |
|
| 30 | + | /* Returns whether this register is free to reserve. */ |
|
| 31 | + | bool ralloc_is_free(ralloc_t *ra, reg_t r); |
|
| 32 | + | /* Reserve a register. Fails if the register is already reserved. */ |
|
| 33 | + | void ralloc_reserve(ralloc_t *ra, reg_t r); |
|
| 34 | + | /* Save the register allocation state. */ |
|
| 35 | + | void ralloc_save(ralloc_t *ra, bool *reserved); |
|
| 36 | + | /* Restore the register allocation state. */ |
|
| 37 | + | void ralloc_restore(ralloc_t *ra, bool *reserved); |
|
| 38 | + | ||
| 39 | + | #endif |
resolver.c
added
+3600 -0
| 1 | + | #include <assert.h> |
|
| 2 | + | #include <limits.h> |
|
| 3 | + | #include <stdint.h> |
|
| 4 | + | #include <stdio.h> |
|
| 5 | + | #include <stdlib.h> |
|
| 6 | + | #include <string.h> |
|
| 7 | + | ||
| 8 | + | #include "ast.h" |
|
| 9 | + | #include "io.h" |
|
| 10 | + | #include "limits.h" |
|
| 11 | + | #include "module.h" |
|
| 12 | + | #include "resolver.h" |
|
| 13 | + | #include "riscv.h" |
|
| 14 | + | #include "strings.h" |
|
| 15 | + | #include "symtab.h" |
|
| 16 | + | #include "util.h" |
|
| 17 | + | ||
| 18 | + | #define max(a, b) (a > b ? a : b) |
|
| 19 | + | ||
| 20 | + | #define DEFAULT_SIZE 4 |
|
| 21 | + | #define DEFAULT_ALIGN 4 |
|
| 22 | + | ||
| 23 | + | static type_t *alloc_type( |
|
| 24 | + | resolve_t *t, |
|
| 25 | + | typeclass_t kind, |
|
| 26 | + | const char *name, |
|
| 27 | + | usize namel, |
|
| 28 | + | i32 size, |
|
| 29 | + | i32 align |
|
| 30 | + | ); |
|
| 31 | + | static type_t *alloc_array_type(resolve_t *t, type_t *elem, usize length); |
|
| 32 | + | static type_t *alloc_slice_type( |
|
| 33 | + | resolve_t *t, type_t *elem, type_t *base, bool mut |
|
| 34 | + | ); |
|
| 35 | + | static type_t *alloc_union_type(resolve_t *t, union_decl_t *uni); |
|
| 36 | + | static type_t *alloc_result_type(resolve_t *t, type_t *payload, type_t *err); |
|
| 37 | + | static type_t *alloc_record_type(resolve_t *t, record_decl_t *rec); |
|
| 38 | + | static type_t *alloc_anonymous_record_type(resolve_t *t); |
|
| 39 | + | static type_t *alloc_ptr_type(resolve_t *t, type_t *base, bool mut); |
|
| 40 | + | static type_t *alloc_opt_type(resolve_t *t, type_t *elem); |
|
| 41 | + | static type_t *resolve_node(resolve_t *t, node_t *n, type_t *expected_type); |
|
| 42 | + | static type_t *resolve_var(resolve_t *t, node_t *n); |
|
| 43 | + | static type_t *resolve_const(resolve_t *t, node_t *n); |
|
| 44 | + | static type_t *resolve_static(resolve_t *t, node_t *n); |
|
| 45 | + | static type_t *resolve_use(resolve_t *t, node_t *n); |
|
| 46 | + | static type_t *resolve_mod_decl(resolve_t *t, node_t *n); |
|
| 47 | + | static bool resolve_mod_def(resolve_t *t, module_t *module); |
|
| 48 | + | static type_t *resolve_scope(resolve_t *t, node_t *n); |
|
| 49 | + | static type_t *resolve_block(resolve_t *t, node_t *n); |
|
| 50 | + | static type_t *resolve_fn_def(resolve_t *t, node_t *n); |
|
| 51 | + | static type_t *resolve_fn_decl(resolve_t *t, node_t *n); |
|
| 52 | + | static type_t *resolve_number(resolve_t *t, node_t *n, type_t *expected); |
|
| 53 | + | static type_t *resolve_builtin(resolve_t *t, node_t *n, type_t *expected); |
|
| 54 | + | static bool resolve_decls(resolve_t *t, module_t *module); |
|
| 55 | + | static type_t *resolve_throw(resolve_t *t, node_t *n); |
|
| 56 | + | static type_t *resolve_try_expr(resolve_t *t, node_t *n, type_t *expected); |
|
| 57 | + | static bool declare_record(resolve_t *t, node_t *n); |
|
| 58 | + | static bool declare_enum(resolve_t *t, node_t *n); |
|
| 59 | + | static type_t *resolve_tuple_record_constructor( |
|
| 60 | + | resolve_t *t, node_t *call, type_t *record_type |
|
| 61 | + | ); |
|
| 62 | + | static type_t *type_unify( |
|
| 63 | + | resolve_t *t, type_t *a, type_t *b, node_t *n, bool co, const char *ctx |
|
| 64 | + | ); |
|
| 65 | + | static type_t *resolve_type(resolve_t *t, node_t *n); |
|
| 66 | + | static symbol_t *resolve_name(resolve_t *t, node_t *n, symkind_t kind); |
|
| 67 | + | static bool resolve_const_usize(resolve_t *t, node_t *expr, usize *value); |
|
| 68 | + | static bool symbol_add(resolve_t *t, node_t *ident, node_t *n); |
|
| 69 | + | static void finalize_type_layout(resolve_t *t); |
|
| 70 | + | static void module_scope_path(node_t *node, char *path_str); |
|
| 71 | + | static bool node_is_super(const node_t *n); |
|
| 72 | + | static module_t *module_super_ancestor(module_t *mod, usize depth); |
|
| 73 | + | static bool node_diverges(node_t *n); |
|
| 74 | + | ||
| 75 | + | /* Initialize type checker. */ |
|
| 76 | + | void resolve_init(resolve_t *t, module_manager_t *mm) { |
|
| 77 | + | t->fn = NULL; |
|
| 78 | + | t->global = symtab_scope(NULL, NULL); |
|
| 79 | + | t->scope = t->global; |
|
| 80 | + | t->mm = mm; |
|
| 81 | + | t->module = NULL; |
|
| 82 | + | t->recordid = 0; |
|
| 83 | + | t->ctx = TC_CTX_NORMAL; |
|
| 84 | + | t->types.nsympool = 0; |
|
| 85 | + | t->types.ntypepool = 0; |
|
| 86 | + | t->types.type_bool = alloc_type(t, TYPE_BOOL, "bool", 4, 1, 1); |
|
| 87 | + | t->types.type_char = |
|
| 88 | + | alloc_type(t, TYPE_I8, "i8", 2, sizeof(i8), sizeof(i8)); |
|
| 89 | + | t->types.type_i8 = alloc_type(t, TYPE_I8, "i8", 2, sizeof(i8), sizeof(i8)); |
|
| 90 | + | t->types.type_i16 = |
|
| 91 | + | alloc_type(t, TYPE_I16, "i16", 3, sizeof(i16), sizeof(i16)); |
|
| 92 | + | t->types.type_i32 = |
|
| 93 | + | alloc_type(t, TYPE_I32, "i32", 3, sizeof(i32), sizeof(i32)); |
|
| 94 | + | t->types.type_u8 = alloc_type(t, TYPE_U8, "u8", 2, sizeof(u8), sizeof(u8)); |
|
| 95 | + | t->types.type_u16 = |
|
| 96 | + | alloc_type(t, TYPE_U16, "u16", 3, sizeof(u16), sizeof(u16)); |
|
| 97 | + | t->types.type_u32 = |
|
| 98 | + | alloc_type(t, TYPE_U32, "u32", 3, sizeof(u32), sizeof(u32)); |
|
| 99 | + | t->types.type_str = alloc_slice_type(t, t->types.type_u8, NULL, false); |
|
| 100 | + | t->types.type_void = alloc_type(t, TYPE_VOID, "void", 4, 0, 0); |
|
| 101 | + | t->types.type_opaque = alloc_type(t, TYPE_OPAQUE, "opaque", 6, 0, 0); |
|
| 102 | + | t->types.type_never = alloc_type(t, TYPE_NEVER, "never", 5, 0, 0); |
|
| 103 | + | ||
| 104 | + | /* Add root module to global scope |
|
| 105 | + | * so it can be accessed with `::module` */ |
|
| 106 | + | if (mm->root && mm->root->ast && mm->root->ast->sym) { |
|
| 107 | + | /* Root module declarations are checked later, so just add the symbol */ |
|
| 108 | + | symtab_add_symbol(t->global, mm->root->ast->sym); |
|
| 109 | + | } |
|
| 110 | + | } |
|
| 111 | + | ||
| 112 | + | symbol_t **types_alloc_sympool(types_t *t, u8 n) { |
|
| 113 | + | assert(t->nsympool + n <= MAX_SYMPTR_POOL); |
|
| 114 | + | symbol_t **ptr = &t->sympool[t->nsympool]; |
|
| 115 | + | t->nsympool += n; |
|
| 116 | + | return ptr; |
|
| 117 | + | } |
|
| 118 | + | ||
| 119 | + | type_t **types_alloc_typepool(types_t *t, u8 n) { |
|
| 120 | + | assert(t->ntypepool + n <= MAX_TYPEPTR_POOL); |
|
| 121 | + | type_t **ptr = &t->typepool[t->ntypepool]; |
|
| 122 | + | t->ntypepool += n; |
|
| 123 | + | return ptr; |
|
| 124 | + | } |
|
| 125 | + | ||
| 126 | + | type_t *deref_type(type_t *ref) { |
|
| 127 | + | type_t *target = ref->info.ptr.target; |
|
| 128 | + | ||
| 129 | + | return target; |
|
| 130 | + | } |
|
| 131 | + | ||
| 132 | + | bool ident_eq(node_t *ident, const char *str, usize len) { |
|
| 133 | + | const char *ident_str = ident->val.ident.name; |
|
| 134 | + | usize ident_len = ident->val.ident.length; |
|
| 135 | + | ||
| 136 | + | return ident_len == len && (memcmp(ident_str, str, len) == 0); |
|
| 137 | + | } |
|
| 138 | + | ||
| 139 | + | static bool node_is_super(const node_t *n) { |
|
| 140 | + | return n && n->cls == NODE_SUPER; |
|
| 141 | + | } |
|
| 142 | + | ||
| 143 | + | static module_t *module_super_ancestor(module_t *mod, usize depth) { |
|
| 144 | + | module_t *current = mod; |
|
| 145 | + | ||
| 146 | + | for (usize i = 0; i < depth; i++) { |
|
| 147 | + | if (!current || !current->parent) |
|
| 148 | + | return NULL; |
|
| 149 | + | current = current->parent; |
|
| 150 | + | } |
|
| 151 | + | return current; |
|
| 152 | + | } |
|
| 153 | + | ||
| 154 | + | inline bool type_is_packed(type_t *t) { |
|
| 155 | + | switch (t->cls) { |
|
| 156 | + | case TYPE_RECORD: |
|
| 157 | + | if ((i32)t->info.srt.packedsize != t->size) { |
|
| 158 | + | return false; |
|
| 159 | + | } |
|
| 160 | + | break; |
|
| 161 | + | case TYPE_ARRAY: |
|
| 162 | + | case TYPE_SLICE: |
|
| 163 | + | default: |
|
| 164 | + | break; |
|
| 165 | + | } |
|
| 166 | + | return true; |
|
| 167 | + | } |
|
| 168 | + | ||
| 169 | + | inline bool type_is_numeric(typeclass_t t) { |
|
| 170 | + | return t >= TYPE_I8 && t <= TYPE_U32; |
|
| 171 | + | } |
|
| 172 | + | ||
| 173 | + | inline bool type_is_address(typeclass_t t) { |
|
| 174 | + | return t == TYPE_PTR || t == TYPE_SLICE || t == TYPE_FN; |
|
| 175 | + | } |
|
| 176 | + | ||
| 177 | + | inline bool type_is_compound(type_t *t) { |
|
| 178 | + | typeclass_t cls = t->cls; |
|
| 179 | + | ||
| 180 | + | return cls == TYPE_ARRAY || cls == TYPE_RECORD || cls == TYPE_SLICE || |
|
| 181 | + | cls == TYPE_PTR || cls == TYPE_OPT || cls == TYPE_RESULT || |
|
| 182 | + | cls == TYPE_FN || type_is_union_with_payload(t); |
|
| 183 | + | } |
|
| 184 | + | ||
| 185 | + | inline bool type_is_passed_by_ref(type_t *t) { |
|
| 186 | + | typeclass_t cls = t->cls; |
|
| 187 | + | ||
| 188 | + | return cls == TYPE_ARRAY || cls == TYPE_RECORD || cls == TYPE_SLICE || |
|
| 189 | + | cls == TYPE_OPT || cls == TYPE_RESULT || |
|
| 190 | + | type_is_union_with_payload(t); |
|
| 191 | + | } |
|
| 192 | + | ||
| 193 | + | inline bool type_is_union_with_payload(type_t *ty) { |
|
| 194 | + | return ty->cls == TYPE_UNION && ty->info.uni.has_payload; |
|
| 195 | + | } |
|
| 196 | + | ||
| 197 | + | inline bool type_is_tagged_value(type_t *ty) { |
|
| 198 | + | return ty->cls == TYPE_OPT || ty->cls == TYPE_RESULT || |
|
| 199 | + | type_is_union_with_payload(ty); |
|
| 200 | + | } |
|
| 201 | + | ||
| 202 | + | inline bool type_is_primitive(type_t *t) { |
|
| 203 | + | return !type_is_compound(t); |
|
| 204 | + | } |
|
| 205 | + | ||
| 206 | + | inline bool type_is_int(typeclass_t t) { |
|
| 207 | + | return t >= TYPE_I8 && t <= TYPE_U32; |
|
| 208 | + | } |
|
| 209 | + | ||
| 210 | + | inline bool type_is_unsigned(typeclass_t t) { |
|
| 211 | + | return t == TYPE_U8 || t == TYPE_U16 || t == TYPE_U32; |
|
| 212 | + | } |
|
| 213 | + | ||
| 214 | + | bool type_coercible(type_t *a, type_t *b) { |
|
| 215 | + | if (a == b) |
|
| 216 | + | return true; |
|
| 217 | + | ||
| 218 | + | /* Handle slice coercion: *mut [T] can coerce to *[T] */ |
|
| 219 | + | if (a->cls == TYPE_SLICE && b->cls == TYPE_SLICE) { |
|
| 220 | + | if (a->info.slc.elem != b->info.slc.elem) |
|
| 221 | + | return false; |
|
| 222 | + | /* Mutable can coerce to immutable, but not vice versa */ |
|
| 223 | + | if (!a->info.slc.mut && b->info.slc.mut) |
|
| 224 | + | return false; |
|
| 225 | + | return true; |
|
| 226 | + | } |
|
| 227 | + | /* Handle pointer coercion: *mut T can coerce to *T */ |
|
| 228 | + | if (a->cls == TYPE_PTR && b->cls == TYPE_PTR) { |
|
| 229 | + | if (a->info.ptr.target != b->info.ptr.target) |
|
| 230 | + | return false; |
|
| 231 | + | if (!a->info.ptr.mut && b->info.ptr.mut) |
|
| 232 | + | return false; |
|
| 233 | + | return true; |
|
| 234 | + | } |
|
| 235 | + | return false; |
|
| 236 | + | } |
|
| 237 | + | ||
| 238 | + | /* Unify two types, attempting to find the most general unifier. |
|
| 239 | + | * Returns the unified type on success, or `NULL` if types cannot be unified. |
|
| 240 | + | * If `n` and `context` are provided, reports an error on failure. */ |
|
| 241 | + | static type_t *type_unify( |
|
| 242 | + | resolve_t *t, |
|
| 243 | + | type_t *a, |
|
| 244 | + | type_t *b, |
|
| 245 | + | node_t *n, /* Node to report error on, or NULL for silent */ |
|
| 246 | + | bool coerce, /* Allow safe type coercion */ |
|
| 247 | + | const char *context /* Context string for error message */ |
|
| 248 | + | ) { |
|
| 249 | + | /* If the pointers are equal, they're already unified */ |
|
| 250 | + | if (a == b) |
|
| 251 | + | return a; |
|
| 252 | + | /* If they are both `NULL`, there's nothing we can do */ |
|
| 253 | + | if (!a && !b) |
|
| 254 | + | return NULL; |
|
| 255 | + | ||
| 256 | + | /* Treat `never` as compatible with any type. */ |
|
| 257 | + | if (a && a->cls == TYPE_NEVER) |
|
| 258 | + | return b ? b : a; |
|
| 259 | + | if (b && b->cls == TYPE_NEVER) |
|
| 260 | + | return a ? a : b; |
|
| 261 | + | ||
| 262 | + | /* If one type is NULL, create optional of the other type */ |
|
| 263 | + | if (!a && b && (b->cls == TYPE_OPT)) |
|
| 264 | + | return alloc_opt_type(t, b); |
|
| 265 | + | if (!b && a && (a->cls == TYPE_OPT)) |
|
| 266 | + | return alloc_opt_type(t, a); |
|
| 267 | + | ||
| 268 | + | /* If either type is `NULL` and the other is not an optional, bail, |
|
| 269 | + | * because we have an error. */ |
|
| 270 | + | if (!a || !b) { |
|
| 271 | + | return NULL; |
|
| 272 | + | } |
|
| 273 | + | /* Handle coercion of T to ?T */ |
|
| 274 | + | if (coerce) { |
|
| 275 | + | if (b->cls == TYPE_OPT && a->cls != TYPE_OPT) { |
|
| 276 | + | if (type_unify(t, a, b->info.opt.elem, n, coerce, context)) { |
|
| 277 | + | return b; /* a unifies with ?T's element, result is ?T */ |
|
| 278 | + | } |
|
| 279 | + | /* Try to unify a with the optional's element type */ |
|
| 280 | + | type_t *unified = |
|
| 281 | + | type_unify(t, a, b->info.opt.elem, NULL, coerce, NULL); |
|
| 282 | + | if (unified) { |
|
| 283 | + | return alloc_opt_type(t, unified); |
|
| 284 | + | } |
|
| 285 | + | } |
|
| 286 | + | } |
|
| 287 | + | /* Handle pointer types */ |
|
| 288 | + | if (a->cls == TYPE_PTR && b->cls == TYPE_PTR) { |
|
| 289 | + | /* Allow coercion from *T to *opaque and *mut T to *mut opaque */ |
|
| 290 | + | if (coerce && (a->info.ptr.target->cls == TYPE_OPAQUE || |
|
| 291 | + | b->info.ptr.target->cls == TYPE_OPAQUE)) { |
|
| 292 | + | return a->info.ptr.target->cls == TYPE_OPAQUE ? a : b; |
|
| 293 | + | } |
|
| 294 | + | ||
| 295 | + | type_t *unified = type_unify( |
|
| 296 | + | t, a->info.ptr.target, b->info.ptr.target, NULL, coerce, NULL |
|
| 297 | + | ); |
|
| 298 | + | if (unified) { |
|
| 299 | + | /* When coercing *mut T to *T, prefer immutable target */ |
|
| 300 | + | if (coerce && a->info.ptr.mut && !b->info.ptr.mut) { |
|
| 301 | + | return b; |
|
| 302 | + | } |
|
| 303 | + | if (unified == a->info.ptr.target) { |
|
| 304 | + | return a; |
|
| 305 | + | } else if (unified == b->info.ptr.target) { |
|
| 306 | + | return b; |
|
| 307 | + | } else { |
|
| 308 | + | return alloc_ptr_type(t, unified, a->info.ptr.mut); |
|
| 309 | + | } |
|
| 310 | + | } |
|
| 311 | + | goto error; |
|
| 312 | + | } |
|
| 313 | + | /* Handle numeric type unification - promote to wider type */ |
|
| 314 | + | if (type_is_numeric(a->cls) && type_is_numeric(b->cls)) { |
|
| 315 | + | /* Return the "wider" type based on size and signedness */ |
|
| 316 | + | if (a->size > b->size) { |
|
| 317 | + | return a; |
|
| 318 | + | } else if (b->size > a->size) { |
|
| 319 | + | return b; |
|
| 320 | + | } else { |
|
| 321 | + | /* Same size - prefer unsigned over signed */ |
|
| 322 | + | if ((a->cls >= TYPE_U8 && a->cls <= TYPE_U32) && |
|
| 323 | + | (b->cls >= TYPE_I8 && b->cls <= TYPE_I32)) { |
|
| 324 | + | return a; /* a is unsigned, b is signed */ |
|
| 325 | + | } else if ((b->cls >= TYPE_U8 && b->cls <= TYPE_U32) && |
|
| 326 | + | (a->cls >= TYPE_I8 && a->cls <= TYPE_I32)) { |
|
| 327 | + | return b; /* b is unsigned, a is signed */ |
|
| 328 | + | } else { |
|
| 329 | + | return a; /* Default to first type if same category */ |
|
| 330 | + | } |
|
| 331 | + | } |
|
| 332 | + | } |
|
| 333 | + | /* Handle array types */ |
|
| 334 | + | if (a->cls == TYPE_ARRAY && b->cls == TYPE_ARRAY) { |
|
| 335 | + | /* Arrays must have same length to unify */ |
|
| 336 | + | if (a->info.ary.length != b->info.ary.length) { |
|
| 337 | + | goto error; |
|
| 338 | + | } |
|
| 339 | + | /* Unify element types */ |
|
| 340 | + | type_t *unified = type_unify( |
|
| 341 | + | t, a->info.ary.elem, b->info.ary.elem, NULL, false, NULL |
|
| 342 | + | ); |
|
| 343 | + | if (unified) { |
|
| 344 | + | /* If element types are already the same, return existing array */ |
|
| 345 | + | if (unified == a->info.ary.elem) { |
|
| 346 | + | return a; |
|
| 347 | + | } else if (unified == b->info.ary.elem) { |
|
| 348 | + | return b; |
|
| 349 | + | } else { |
|
| 350 | + | return alloc_array_type(t, unified, a->info.ary.length); |
|
| 351 | + | } |
|
| 352 | + | } |
|
| 353 | + | goto error; |
|
| 354 | + | } |
|
| 355 | + | /* Handle slice types */ |
|
| 356 | + | if (a->cls == TYPE_SLICE && b->cls == TYPE_SLICE) { |
|
| 357 | + | /* Allow coercion from *[T] to *[opaque] */ |
|
| 358 | + | if (coerce && (a->info.slc.elem->cls == TYPE_OPAQUE || |
|
| 359 | + | b->info.slc.elem->cls == TYPE_OPAQUE)) { |
|
| 360 | + | return a->info.slc.elem->cls == TYPE_OPAQUE ? a : b; |
|
| 361 | + | } |
|
| 362 | + | type_t *unified = type_unify( |
|
| 363 | + | t, a->info.slc.elem, b->info.slc.elem, NULL, false, NULL |
|
| 364 | + | ); |
|
| 365 | + | if (unified) { |
|
| 366 | + | /* When coercing *mut [T] to *[T], prefer immutable target */ |
|
| 367 | + | if (coerce && a->info.slc.mut && !b->info.slc.mut) { |
|
| 368 | + | return b; |
|
| 369 | + | } |
|
| 370 | + | if (unified == a->info.slc.elem) { |
|
| 371 | + | return a; |
|
| 372 | + | } else if (unified == b->info.slc.elem) { |
|
| 373 | + | return b; |
|
| 374 | + | } else { |
|
| 375 | + | return alloc_slice_type(t, unified, NULL, a->info.slc.mut); |
|
| 376 | + | } |
|
| 377 | + | } |
|
| 378 | + | goto error; |
|
| 379 | + | } |
|
| 380 | + | /* Handle optional types */ |
|
| 381 | + | if (a->cls == TYPE_OPT && b->cls == TYPE_OPT) { |
|
| 382 | + | type_t *unified = type_unify( |
|
| 383 | + | t, a->info.opt.elem, b->info.opt.elem, NULL, coerce, NULL |
|
| 384 | + | ); |
|
| 385 | + | if (unified) { |
|
| 386 | + | if (unified == a->info.opt.elem) { |
|
| 387 | + | return a; |
|
| 388 | + | } else if (unified == b->info.opt.elem) { |
|
| 389 | + | return b; |
|
| 390 | + | } else { |
|
| 391 | + | return alloc_opt_type(t, unified); |
|
| 392 | + | } |
|
| 393 | + | } |
|
| 394 | + | goto error; |
|
| 395 | + | } |
|
| 396 | + | if (a->cls == TYPE_RESULT && b->cls == TYPE_RESULT) { |
|
| 397 | + | if (a->info.res.err != b->info.res.err) |
|
| 398 | + | goto error; |
|
| 399 | + | ||
| 400 | + | type_t *payload = type_unify( |
|
| 401 | + | t, a->info.res.payload, b->info.res.payload, NULL, coerce, NULL |
|
| 402 | + | ); |
|
| 403 | + | ||
| 404 | + | if (payload) { |
|
| 405 | + | if (payload == a->info.res.payload) { |
|
| 406 | + | return a; |
|
| 407 | + | } else if (payload == b->info.res.payload) { |
|
| 408 | + | return b; |
|
| 409 | + | } |
|
| 410 | + | } |
|
| 411 | + | goto error; |
|
| 412 | + | } |
|
| 413 | + | /* Handle array to slice conversion */ |
|
| 414 | + | if (a->cls == TYPE_ARRAY && b->cls == TYPE_SLICE) { |
|
| 415 | + | if (b->info.slc.mut) { |
|
| 416 | + | goto error; |
|
| 417 | + | } |
|
| 418 | + | type_t *unified = type_unify( |
|
| 419 | + | t, a->info.ary.elem, b->info.slc.elem, NULL, coerce, NULL |
|
| 420 | + | ); |
|
| 421 | + | if (unified && unified == a->info.ary.elem) { |
|
| 422 | + | return a->slice; /* Convert array to its slice type */ |
|
| 423 | + | } |
|
| 424 | + | goto error; |
|
| 425 | + | } |
|
| 426 | + | if (b->cls == TYPE_ARRAY && a->cls == TYPE_SLICE) { |
|
| 427 | + | if (a->info.slc.mut) { |
|
| 428 | + | goto error; |
|
| 429 | + | } |
|
| 430 | + | type_t *unified = type_unify( |
|
| 431 | + | t, a->info.slc.elem, b->info.ary.elem, NULL, coerce, NULL |
|
| 432 | + | ); |
|
| 433 | + | if (unified && unified == b->info.ary.elem) { |
|
| 434 | + | return b->slice; /* Convert array to its slice type */ |
|
| 435 | + | } |
|
| 436 | + | goto error; |
|
| 437 | + | } |
|
| 438 | + | if (a->cls == TYPE_FN && b->cls == TYPE_FN) { |
|
| 439 | + | usize nparams = a->info.fun.nparams; |
|
| 440 | + | if (b->info.fun.nparams != nparams) { |
|
| 441 | + | goto error; |
|
| 442 | + | } |
|
| 443 | + | for (usize i = 0; i < nparams; i++) { |
|
| 444 | + | type_t *pa = a->info.fun.params[i]; |
|
| 445 | + | type_t *pb = b->info.fun.params[i]; |
|
| 446 | + | ||
| 447 | + | if (pa != pb) |
|
| 448 | + | goto error; |
|
| 449 | + | } |
|
| 450 | + | if (a->info.fun.ret != b->info.fun.ret) |
|
| 451 | + | goto error; |
|
| 452 | + | ||
| 453 | + | return a; |
|
| 454 | + | } |
|
| 455 | + | ||
| 456 | + | error: |
|
| 457 | + | return NULL; |
|
| 458 | + | } |
|
| 459 | + | ||
| 460 | + | static type_t *resolve_throw(resolve_t *t, node_t *n) { |
|
| 461 | + | type_t *fn_ret = t->fn->node->type->info.fun.ret; |
|
| 462 | + | type_t *err_type = fn_ret->info.res.err; |
|
| 463 | + | ||
| 464 | + | if (!resolve_node(t, n->val.throw_stmt.expr, err_type)) |
|
| 465 | + | return NULL; |
|
| 466 | + | ||
| 467 | + | return (n->type = fn_ret); |
|
| 468 | + | } |
|
| 469 | + | ||
| 470 | + | static type_t *resolve_try_expr(resolve_t *t, node_t *n, type_t *expected) { |
|
| 471 | + | bool optional = n->val.try_expr.optional; |
|
| 472 | + | node_t *expr = n->val.try_expr.expr; |
|
| 473 | + | node_t *catch_expr = n->val.try_expr.catch_expr; |
|
| 474 | + | ||
| 475 | + | resolve_ctx_t pctx = t->ctx; |
|
| 476 | + | t->ctx = TC_CTX_TRY; |
|
| 477 | + | type_t *expr_type = resolve_node(t, expr, NULL); |
|
| 478 | + | t->ctx = pctx; |
|
| 479 | + | ||
| 480 | + | if (!expr_type) |
|
| 481 | + | return NULL; |
|
| 482 | + | type_t *payload = expr_type->info.res.payload; |
|
| 483 | + | ||
| 484 | + | /* `try?` converts errors to nil and returns an optional type. */ |
|
| 485 | + | if (optional) { |
|
| 486 | + | if (payload->cls != TYPE_OPT) { |
|
| 487 | + | payload = alloc_opt_type(t, payload); |
|
| 488 | + | } |
|
| 489 | + | return (n->type = payload); |
|
| 490 | + | } |
|
| 491 | + | ||
| 492 | + | if (catch_expr) { |
|
| 493 | + | node_t *catch_binding = catch_expr->val.catch_clause.binding; |
|
| 494 | + | node_t *catch_body = catch_expr->val.catch_clause.body; |
|
| 495 | + | type_t *err_type = expr_type->info.res.err; |
|
| 496 | + | ||
| 497 | + | /* If there's a binding, create a scope and add the error variable. */ |
|
| 498 | + | if (catch_binding) { |
|
| 499 | + | catch_expr->val.catch_clause.scope = symtab_scope(t->scope, NULL); |
|
| 500 | + | t->scope = catch_expr->val.catch_clause.scope; |
|
| 501 | + | ||
| 502 | + | catch_binding->type = err_type; |
|
| 503 | + | if (!symbol_add(t, catch_binding, catch_binding)) |
|
| 504 | + | return NULL; |
|
| 505 | + | ||
| 506 | + | catch_binding->sym->e.var.typ = err_type; |
|
| 507 | + | catch_binding->sym->e.var.align = err_type->align; |
|
| 508 | + | catch_binding->sym->scope = t->scope; |
|
| 509 | + | } |
|
| 510 | + | type_t *catch_type = resolve_node(t, catch_body, NULL); |
|
| 511 | + | ||
| 512 | + | if (catch_binding) { |
|
| 513 | + | t->scope = t->scope->parent; |
|
| 514 | + | } |
|
| 515 | + | if (!catch_type) |
|
| 516 | + | return NULL; |
|
| 517 | + | if (catch_type->cls != TYPE_NEVER) |
|
| 518 | + | return (n->type = t->types.type_void); |
|
| 519 | + | ||
| 520 | + | /* Divergent catch block: fall through and keep payload type. */ |
|
| 521 | + | } |
|
| 522 | + | ||
| 523 | + | if (expected) { |
|
| 524 | + | type_t *target = expected; |
|
| 525 | + | ||
| 526 | + | if (expected->cls == TYPE_RESULT) |
|
| 527 | + | target = expected->info.res.payload; |
|
| 528 | + | ||
| 529 | + | type_t *unified = type_unify(t, payload, target, n, true, NULL); |
|
| 530 | + | if (unified) |
|
| 531 | + | payload = unified; |
|
| 532 | + | } |
|
| 533 | + | return (n->type = payload); |
|
| 534 | + | } |
|
| 535 | + | ||
| 536 | + | /* Process a submodule declaration */ |
|
| 537 | + | static type_t *resolve_mod_decl(resolve_t *t, node_t *n) { |
|
| 538 | + | node_t *name = n->val.mod_decl.ident; |
|
| 539 | + | ||
| 540 | + | char rel[MAX_PATH_LEN] = { 0 }; |
|
| 541 | + | strncpy(rel, name->val.ident.name, name->val.ident.length); |
|
| 542 | + | ||
| 543 | + | /* Convert to path relative to current module and find it */ |
|
| 544 | + | module_t *submod = |
|
| 545 | + | module_manager_find_relative(t->mm, t->module->path, rel); |
|
| 546 | + | if (!submod) |
|
| 547 | + | return NULL; |
|
| 548 | + | symbol_t *sym = symtab_scope_lookup( |
|
| 549 | + | t->scope, name->val.ident.name, name->val.ident.length, SYM_MODULE |
|
| 550 | + | ); |
|
| 551 | + | if (sym) { |
|
| 552 | + | n->sym = sym; |
|
| 553 | + | } else { |
|
| 554 | + | if (!symbol_add(t, name, n)) { /* Add module to current scope */ |
|
| 555 | + | return NULL; |
|
| 556 | + | } |
|
| 557 | + | } |
|
| 558 | + | if (!resolve_decls(t, submod)) { |
|
| 559 | + | return NULL; |
|
| 560 | + | } |
|
| 561 | + | /* For mod declarations, also do full type checking */ |
|
| 562 | + | if (!resolve_mod_def(t, submod)) { |
|
| 563 | + | return NULL; |
|
| 564 | + | } |
|
| 565 | + | n->sym->e.mod = submod; |
|
| 566 | + | n->sym->e.mod->attribs = n->val.mod_decl.attribs |
|
| 567 | + | ? n->val.mod_decl.attribs->val.attrib |
|
| 568 | + | : ATTRIB_NONE; |
|
| 569 | + | module_path(submod->qualified, t->module->qualified); |
|
| 570 | + | module_qualify(submod->qualified, name); |
|
| 571 | + | ||
| 572 | + | return (n->type = t->types.type_void); |
|
| 573 | + | } |
|
| 574 | + | ||
| 575 | + | /* Helper function to look up a symbol in a module's scope */ |
|
| 576 | + | static type_t *module_lookup( |
|
| 577 | + | resolve_t *t, node_t *n, node_t *child, module_t *module |
|
| 578 | + | ) { |
|
| 579 | + | /* If the module hasn't been checked yet, check it on-demand. |
|
| 580 | + | * This allows parent modules to reference submodule types. */ |
|
| 581 | + | if (!module->scope && !module->declared && |
|
| 582 | + | module->state != MODULE_STATE_VISITING) { |
|
| 583 | + | if (!resolve_decls(t, module)) { |
|
| 584 | + | return NULL; |
|
| 585 | + | } |
|
| 586 | + | } |
|
| 587 | + | if (!module->scope) |
|
| 588 | + | return NULL; |
|
| 589 | + | ||
| 590 | + | symbol_t *sym = symtab_scope_lookup( |
|
| 591 | + | module->scope, child->val.ident.name, child->val.ident.length, SYM_ANY |
|
| 592 | + | ); |
|
| 593 | + | if (!sym) |
|
| 594 | + | return NULL; |
|
| 595 | + | n->sym = sym; |
|
| 596 | + | n->type = sym->node->type; |
|
| 597 | + | ||
| 598 | + | return n->type; |
|
| 599 | + | } |
|
| 600 | + | ||
| 601 | + | static symbol_t *union_variant_lookup(type_t *typ, node_t *n) { |
|
| 602 | + | for (usize i = 0; i < typ->info.uni.nvariants; i++) { |
|
| 603 | + | symbol_t *v = typ->info.uni.variants[i]; |
|
| 604 | + | if (ident_eq(n, v->name, v->length)) { |
|
| 605 | + | return v; |
|
| 606 | + | } |
|
| 607 | + | } |
|
| 608 | + | return NULL; |
|
| 609 | + | } |
|
| 610 | + | ||
| 611 | + | /* Look up a record field by name. */ |
|
| 612 | + | static symbol_t *record_field_lookup(type_t *typ, node_t *n) { |
|
| 613 | + | for (usize i = 0; i < typ->info.srt.nfields; i++) { |
|
| 614 | + | symbol_t *f = typ->info.srt.fields[i]; |
|
| 615 | + | if (ident_eq(n, f->name, f->length)) { |
|
| 616 | + | return f; |
|
| 617 | + | } |
|
| 618 | + | } |
|
| 619 | + | return NULL; |
|
| 620 | + | } |
|
| 621 | + | ||
| 622 | + | /* Add a field to a record type. */ |
|
| 623 | + | static bool record_field_add( |
|
| 624 | + | resolve_t *t, |
|
| 625 | + | type_t *rec_typ, |
|
| 626 | + | node_t *field, |
|
| 627 | + | node_t *field_ident, |
|
| 628 | + | type_t *field_typ |
|
| 629 | + | ) { |
|
| 630 | + | (void)t; |
|
| 631 | + | const char *field_name; |
|
| 632 | + | usize field_len; |
|
| 633 | + | char tuple_name[16]; |
|
| 634 | + | ||
| 635 | + | if (field_ident) { |
|
| 636 | + | field_name = field_ident->val.ident.name; |
|
| 637 | + | field_len = field_ident->val.ident.length; |
|
| 638 | + | } else { |
|
| 639 | + | /* Tuple field: generate synthetic name based on index */ |
|
| 640 | + | snprintf( |
|
| 641 | + | tuple_name, |
|
| 642 | + | sizeof(tuple_name), |
|
| 643 | + | "%u", |
|
| 644 | + | (unsigned)rec_typ->info.srt.nfields |
|
| 645 | + | ); |
|
| 646 | + | field_name = strings_alloc(tuple_name); |
|
| 647 | + | field_len = strlen(field_name); |
|
| 648 | + | } |
|
| 649 | + | field->type = field_typ; |
|
| 650 | + | ||
| 651 | + | /* Nb. Since we're modifying the record size as we add fields, we always |
|
| 652 | + | * add new fields at the end of the record. */ |
|
| 653 | + | i32 field_align = field_typ->align; |
|
| 654 | + | i32 aligned_offset = align(rec_typ->size, field_align); |
|
| 655 | + | ||
| 656 | + | /* Keep track of packed size */ |
|
| 657 | + | rec_typ->info.srt.packedsize += field_typ->size; |
|
| 658 | + | ||
| 659 | + | field->sym = alloc_symbol((symbol_t){ |
|
| 660 | + | .name = field_name, |
|
| 661 | + | .length = field_len, |
|
| 662 | + | .node = field, |
|
| 663 | + | .kind = SYM_FIELD, |
|
| 664 | + | .e.field = { |
|
| 665 | + | .typ = field_typ, |
|
| 666 | + | .offset = (i32)aligned_offset, |
|
| 667 | + | }, |
|
| 668 | + | }); |
|
| 669 | + | /* Update record size to include this new field */ |
|
| 670 | + | rec_typ->size = aligned_offset + field_typ->size; |
|
| 671 | + | ||
| 672 | + | /* Update record alignment to be the maximum of its current alignment |
|
| 673 | + | * and the new field's alignment */ |
|
| 674 | + | rec_typ->align = |
|
| 675 | + | (rec_typ->align > field_typ->align) ? rec_typ->align : field_typ->align; |
|
| 676 | + | /* Add field to record type. */ |
|
| 677 | + | rec_typ->info.srt.fields[rec_typ->info.srt.nfields++] = field->sym; |
|
| 678 | + | ||
| 679 | + | return true; |
|
| 680 | + | } |
|
| 681 | + | ||
| 682 | + | static bool update_i32(i32 *dst, i32 value) { |
|
| 683 | + | if (*dst == value) |
|
| 684 | + | return false; |
|
| 685 | + | *dst = value; |
|
| 686 | + | return true; |
|
| 687 | + | } |
|
| 688 | + | ||
| 689 | + | static bool update_bool(bool *dst, bool value) { |
|
| 690 | + | if (*dst == value) |
|
| 691 | + | return false; |
|
| 692 | + | *dst = value; |
|
| 693 | + | return true; |
|
| 694 | + | } |
|
| 695 | + | ||
| 696 | + | static bool update_record_layout(type_t *strct_typ) { |
|
| 697 | + | i32 size = 0; |
|
| 698 | + | i32 record_align = 1; |
|
| 699 | + | u32 packedsize = 0; |
|
| 700 | + | bool changed = false; |
|
| 701 | + | ||
| 702 | + | for (usize i = 0; i < strct_typ->info.srt.nfields; i++) { |
|
| 703 | + | symbol_t *field_sym = strct_typ->info.srt.fields[i]; |
|
| 704 | + | type_t *field_type = field_sym->e.field.typ; |
|
| 705 | + | ||
| 706 | + | i32 field_align = field_type->align ? field_type->align : DEFAULT_ALIGN; |
|
| 707 | + | i32 field_size = field_type->size; |
|
| 708 | + | i32 offset = align(size, field_align); |
|
| 709 | + | ||
| 710 | + | if (field_sym->e.field.offset != offset) { |
|
| 711 | + | field_sym->e.field.offset = offset; |
|
| 712 | + | changed = true; |
|
| 713 | + | } |
|
| 714 | + | ||
| 715 | + | size = offset + field_size; |
|
| 716 | + | if (field_align > record_align) |
|
| 717 | + | record_align = field_align; |
|
| 718 | + | ||
| 719 | + | packedsize += (u32)field_size; |
|
| 720 | + | } |
|
| 721 | + | /* Round overall size up to record alignment to match C layout. */ |
|
| 722 | + | size = align(size, record_align); |
|
| 723 | + | ||
| 724 | + | changed |= update_i32(&strct_typ->size, size); |
|
| 725 | + | changed |= update_i32(&strct_typ->align, record_align); |
|
| 726 | + | if (strct_typ->info.srt.packedsize != packedsize) { |
|
| 727 | + | strct_typ->info.srt.packedsize = packedsize; |
|
| 728 | + | changed = true; |
|
| 729 | + | } |
|
| 730 | + | ||
| 731 | + | return changed; |
|
| 732 | + | } |
|
| 733 | + | ||
| 734 | + | static bool update_array_layout(type_t *typ) { |
|
| 735 | + | type_t *elem = typ->info.ary.elem; |
|
| 736 | + | if (!elem) |
|
| 737 | + | return false; |
|
| 738 | + | ||
| 739 | + | i32 elem_align = elem->align ? elem->align : DEFAULT_ALIGN; |
|
| 740 | + | i32 size = elem->size * (i32)typ->info.ary.length; |
|
| 741 | + | bool changed = false; |
|
| 742 | + | ||
| 743 | + | changed |= update_i32(&typ->size, size); |
|
| 744 | + | changed |= update_i32(&typ->align, elem_align); |
|
| 745 | + | ||
| 746 | + | return changed; |
|
| 747 | + | } |
|
| 748 | + | ||
| 749 | + | static bool update_opt_layout(type_t *typ) { |
|
| 750 | + | type_t *elem = typ->info.opt.elem; |
|
| 751 | + | if (!elem) |
|
| 752 | + | return false; |
|
| 753 | + | ||
| 754 | + | i32 elem_align = elem->align ? elem->align : DEFAULT_ALIGN; |
|
| 755 | + | i32 alignment = max(elem_align, TAG_SIZE); |
|
| 756 | + | i32 val_offset = align(TAG_SIZE, elem_align); |
|
| 757 | + | i32 size = align(val_offset + elem->size, alignment); |
|
| 758 | + | bool changed = false; |
|
| 759 | + | ||
| 760 | + | changed |= update_i32(&typ->size, size); |
|
| 761 | + | changed |= update_i32(&typ->align, alignment); |
|
| 762 | + | ||
| 763 | + | return changed; |
|
| 764 | + | } |
|
| 765 | + | ||
| 766 | + | static bool update_result_layout(resolve_t *t, type_t *typ) { |
|
| 767 | + | type_t *payload = typ->info.res.payload; |
|
| 768 | + | type_t *err = typ->info.res.err; |
|
| 769 | + | ||
| 770 | + | i32 payload_align = |
|
| 771 | + | payload == t->types.type_void ? TAG_SIZE : payload->align; |
|
| 772 | + | i32 err_align = err == t->types.type_void ? TAG_SIZE : err->align; |
|
| 773 | + | i32 alignment = max(max(payload_align, err_align), TAG_SIZE); |
|
| 774 | + | ||
| 775 | + | i32 payload_size = payload == t->types.type_void ? 0 : payload->size; |
|
| 776 | + | i32 err_size = err == t->types.type_void ? 0 : err->size; |
|
| 777 | + | i32 val_offset = align(TAG_SIZE, alignment); |
|
| 778 | + | i32 value_size = align(max(payload_size, err_size), alignment); |
|
| 779 | + | i32 size = val_offset + value_size; |
|
| 780 | + | bool changed = false; |
|
| 781 | + | ||
| 782 | + | changed |= update_i32(&typ->size, size); |
|
| 783 | + | changed |= update_i32(&typ->align, alignment); |
|
| 784 | + | ||
| 785 | + | return changed; |
|
| 786 | + | } |
|
| 787 | + | ||
| 788 | + | static bool update_enum_layout(type_t *typ) { |
|
| 789 | + | i32 new_align = typ->info.uni.base ? typ->info.uni.base->align : 0; |
|
| 790 | + | bool has_payload = false; |
|
| 791 | + | i32 variantsize = 0; |
|
| 792 | + | bool changed = false; |
|
| 793 | + | ||
| 794 | + | if (new_align <= 0) |
|
| 795 | + | new_align = TAG_SIZE; |
|
| 796 | + | ||
| 797 | + | for (usize i = 0; i < typ->info.uni.nvariants; i++) { |
|
| 798 | + | symbol_t *variant_sym = typ->info.uni.variants[i]; |
|
| 799 | + | if (!variant_sym || !variant_sym->node) |
|
| 800 | + | continue; |
|
| 801 | + | ||
| 802 | + | node_t *variant_node = variant_sym->node; |
|
| 803 | + | type_t *payload = variant_node->type; |
|
| 804 | + | ||
| 805 | + | if (!payload || payload->cls == TYPE_VOID) |
|
| 806 | + | continue; |
|
| 807 | + | ||
| 808 | + | has_payload = true; |
|
| 809 | + | if (payload->size > variantsize) |
|
| 810 | + | variantsize = payload->size; |
|
| 811 | + | if (payload->align > new_align) |
|
| 812 | + | new_align = payload->align; |
|
| 813 | + | } |
|
| 814 | + | ||
| 815 | + | if (new_align <= 0) |
|
| 816 | + | new_align = TAG_SIZE; |
|
| 817 | + | ||
| 818 | + | i32 size = typ->info.uni.base ? typ->info.uni.base->size : TAG_SIZE; |
|
| 819 | + | if (has_payload) { |
|
| 820 | + | i32 val_offset = align(TAG_SIZE, new_align); |
|
| 821 | + | i32 aligned_payload = |
|
| 822 | + | variantsize > 0 ? align(variantsize, new_align) : 0; |
|
| 823 | + | size = val_offset + aligned_payload; |
|
| 824 | + | } |
|
| 825 | + | changed |= update_bool(&typ->info.uni.has_payload, has_payload); |
|
| 826 | + | changed |= update_i32(&typ->info.uni.variantsize, variantsize); |
|
| 827 | + | changed |= update_i32(&typ->align, new_align); |
|
| 828 | + | changed |= update_i32(&typ->size, size); |
|
| 829 | + | ||
| 830 | + | return changed; |
|
| 831 | + | } |
|
| 832 | + | ||
| 833 | + | static bool update_type_layout(resolve_t *t, type_t *typ) { |
|
| 834 | + | switch (typ->cls) { |
|
| 835 | + | case TYPE_ARRAY: |
|
| 836 | + | return update_array_layout(typ); |
|
| 837 | + | case TYPE_UNION: |
|
| 838 | + | return update_enum_layout(typ); |
|
| 839 | + | case TYPE_RECORD: |
|
| 840 | + | return update_record_layout(typ); |
|
| 841 | + | case TYPE_OPT: |
|
| 842 | + | return update_opt_layout(typ); |
|
| 843 | + | case TYPE_RESULT: |
|
| 844 | + | return update_result_layout(t, typ); |
|
| 845 | + | default: |
|
| 846 | + | return false; |
|
| 847 | + | } |
|
| 848 | + | } |
|
| 849 | + | ||
| 850 | + | static void finalize_type_layout(resolve_t *t) { |
|
| 851 | + | usize max_passes = t->types.nobjects ? t->types.nobjects : 1; |
|
| 852 | + | for (usize pass = 0; pass < max_passes; pass++) { |
|
| 853 | + | bool changed = false; |
|
| 854 | + | ||
| 855 | + | for (usize i = 0; i < t->types.nobjects; i++) { |
|
| 856 | + | type_t *typ = &t->types.objects[i]; |
|
| 857 | + | if (update_type_layout(t, typ)) |
|
| 858 | + | changed = true; |
|
| 859 | + | } |
|
| 860 | + | if (!changed) |
|
| 861 | + | return; |
|
| 862 | + | } |
|
| 863 | + | bail("type layout failed to stabilize"); |
|
| 864 | + | } |
|
| 865 | + | ||
| 866 | + | static bool declare_enum(resolve_t *t, node_t *n) { |
|
| 867 | + | union_decl_t *decl = &n->val.union_decl; |
|
| 868 | + | ||
| 869 | + | if (!n->sym) { |
|
| 870 | + | if (!symbol_add(t, decl->name, n)) |
|
| 871 | + | return false; |
|
| 872 | + | if (!n->sym) |
|
| 873 | + | return false; |
|
| 874 | + | } |
|
| 875 | + | if (!n->type) { |
|
| 876 | + | type_t *typ = alloc_union_type(t, decl); |
|
| 877 | + | n->sym->e.typ.info = n->type = typ; |
|
| 878 | + | } else if (!n->sym->e.typ.info) { |
|
| 879 | + | n->sym->e.typ.info = n->type; |
|
| 880 | + | } |
|
| 881 | + | return true; |
|
| 882 | + | } |
|
| 883 | + | ||
| 884 | + | static bool declare_record(resolve_t *t, node_t *n) { |
|
| 885 | + | record_decl_t *decl = &n->val.record_decl; |
|
| 886 | + | ||
| 887 | + | if (!n->sym) { |
|
| 888 | + | if (!symbol_add(t, decl->name, n)) |
|
| 889 | + | return false; |
|
| 890 | + | if (!n->sym) |
|
| 891 | + | return false; |
|
| 892 | + | } |
|
| 893 | + | if (!n->type) { |
|
| 894 | + | type_t *strct_typ = alloc_record_type(t, decl); |
|
| 895 | + | n->sym->e.typ.info = n->type = strct_typ; |
|
| 896 | + | } else if (!n->sym->e.typ.info) { |
|
| 897 | + | n->sym->e.typ.info = n->type; |
|
| 898 | + | } |
|
| 899 | + | return true; |
|
| 900 | + | } |
|
| 901 | + | ||
| 902 | + | static bool resolve_const_usize(resolve_t *t, node_t *expr, usize *value) { |
|
| 903 | + | if (expr->cls == NODE_NUMBER) { |
|
| 904 | + | *value = expr->val.number.value.u; |
|
| 905 | + | return true; |
|
| 906 | + | } |
|
| 907 | + | symbol_t *sym = expr->sym; |
|
| 908 | + | ||
| 909 | + | if (!sym && (expr->cls == NODE_IDENT || expr->cls == NODE_SCOPE)) { |
|
| 910 | + | sym = resolve_name(t, expr, SYM_CONSTANT); |
|
| 911 | + | ||
| 912 | + | if (!sym) |
|
| 913 | + | return false; |
|
| 914 | + | } |
|
| 915 | + | ||
| 916 | + | if (!sym || sym->kind != SYM_CONSTANT || !sym->node || |
|
| 917 | + | sym->node->cls != NODE_CONST) |
|
| 918 | + | return false; |
|
| 919 | + | ||
| 920 | + | node_t *value_node = sym->node->val.constant.value; |
|
| 921 | + | if (!value_node || value_node->cls != NODE_NUMBER) |
|
| 922 | + | return false; |
|
| 923 | + | *value = value_node->val.number.value.u; |
|
| 924 | + | ||
| 925 | + | return true; |
|
| 926 | + | } |
|
| 927 | + | ||
| 928 | + | static bool resolve_record_literal_fields( |
|
| 929 | + | resolve_t *t, node_t *lit, type_t *record_type |
|
| 930 | + | ) { |
|
| 931 | + | node_t **lit_fields = |
|
| 932 | + | nodespan_ptrs(&t->module->parser, lit->val.record_lit.fields); |
|
| 933 | + | for (usize i = 0; i < lit->val.record_lit.fields.len; i++) { |
|
| 934 | + | node_t *field_init = lit_fields[i]; |
|
| 935 | + | record_lit_field_t *init = &field_init->val.record_lit_field; |
|
| 936 | + | ||
| 937 | + | symbol_t *field_sym = record_field_lookup(record_type, init->name); |
|
| 938 | + | if (!field_sym) |
|
| 939 | + | return false; |
|
| 940 | + | ||
| 941 | + | type_t *field_typ = field_sym->e.field.typ; |
|
| 942 | + | ||
| 943 | + | if (!resolve_node(t, init->value, field_typ)) |
|
| 944 | + | return false; |
|
| 945 | + | ||
| 946 | + | field_init->sym = field_sym; |
|
| 947 | + | } |
|
| 948 | + | return true; |
|
| 949 | + | } |
|
| 950 | + | ||
| 951 | + | static bool resolve_record_literal_types( |
|
| 952 | + | resolve_t *t, |
|
| 953 | + | node_t *type_node, |
|
| 954 | + | type_t *expected, |
|
| 955 | + | type_t **out_record, |
|
| 956 | + | type_t **out_result, |
|
| 957 | + | symbol_t **out_variant |
|
| 958 | + | ) { |
|
| 959 | + | type_t *record_type = NULL; |
|
| 960 | + | type_t *result_type = NULL; |
|
| 961 | + | symbol_t *variant_sym = NULL; |
|
| 962 | + | ||
| 963 | + | /* Explicit type annotation: either |
|
| 964 | + | * `Type { ... }` or |
|
| 965 | + | * `module::Type { ... }`, or |
|
| 966 | + | * `Enum::Variant { ... }` */ |
|
| 967 | + | if (type_node) { |
|
| 968 | + | switch (type_node->cls) { |
|
| 969 | + | case NODE_SCOPE: |
|
| 970 | + | case NODE_IDENT: { |
|
| 971 | + | symbol_t *sym = resolve_name(t, type_node, SYM_ANY); |
|
| 972 | + | if (!sym) |
|
| 973 | + | return false; |
|
| 974 | + | ||
| 975 | + | type_t *resolved = type_node->type; |
|
| 976 | + | if (!resolved && sym->node) |
|
| 977 | + | resolved = sym->node->type; |
|
| 978 | + | ||
| 979 | + | if (type_node->cls == NODE_SCOPE && sym->kind == SYM_VARIANT && |
|
| 980 | + | sym->node->cls == NODE_UNION_VARIANT) { |
|
| 981 | + | if (!resolved || resolved->cls != TYPE_UNION) |
|
| 982 | + | return false; |
|
| 983 | + | ||
| 984 | + | type_t *variant_type = sym->node->type; |
|
| 985 | + | if (!variant_type || variant_type->cls != TYPE_RECORD) |
|
| 986 | + | return false; |
|
| 987 | + | ||
| 988 | + | record_type = variant_type; |
|
| 989 | + | result_type = resolved; |
|
| 990 | + | variant_sym = sym; |
|
| 991 | + | ||
| 992 | + | break; |
|
| 993 | + | } |
|
| 994 | + | ||
| 995 | + | if (!resolved) { |
|
| 996 | + | resolved = resolve_type(t, type_node); |
|
| 997 | + | if (!resolved) |
|
| 998 | + | return false; |
|
| 999 | + | } |
|
| 1000 | + | ||
| 1001 | + | if (resolved->cls != TYPE_RECORD) |
|
| 1002 | + | return false; |
|
| 1003 | + | ||
| 1004 | + | record_type = resolved; |
|
| 1005 | + | result_type = record_type; |
|
| 1006 | + | ||
| 1007 | + | break; |
|
| 1008 | + | } |
|
| 1009 | + | case NODE_RECORD_TYPE: { |
|
| 1010 | + | type_t *resolved = resolve_type(t, type_node); |
|
| 1011 | + | if (!resolved) |
|
| 1012 | + | return false; |
|
| 1013 | + | ||
| 1014 | + | if (resolved->cls != TYPE_RECORD) |
|
| 1015 | + | return false; |
|
| 1016 | + | ||
| 1017 | + | record_type = resolved; |
|
| 1018 | + | result_type = record_type; |
|
| 1019 | + | ||
| 1020 | + | break; |
|
| 1021 | + | } |
|
| 1022 | + | default: |
|
| 1023 | + | return false; |
|
| 1024 | + | } |
|
| 1025 | + | } else { |
|
| 1026 | + | /* No explicit type: fall back to the expected type from context */ |
|
| 1027 | + | if (!expected) |
|
| 1028 | + | return false; |
|
| 1029 | + | ||
| 1030 | + | if (expected->cls == TYPE_OPT) |
|
| 1031 | + | expected = expected->info.opt.elem; |
|
| 1032 | + | ||
| 1033 | + | if (expected->cls != TYPE_RECORD) |
|
| 1034 | + | return false; |
|
| 1035 | + | ||
| 1036 | + | record_type = expected; |
|
| 1037 | + | result_type = record_type; |
|
| 1038 | + | } |
|
| 1039 | + | ||
| 1040 | + | *out_record = record_type; |
|
| 1041 | + | *out_result = result_type; |
|
| 1042 | + | if (out_variant) |
|
| 1043 | + | *out_variant = variant_sym; |
|
| 1044 | + | ||
| 1045 | + | return true; |
|
| 1046 | + | } |
|
| 1047 | + | ||
| 1048 | + | static bool anonymous_record_equals( |
|
| 1049 | + | resolve_t *t, type_t *typ, record_type_t *stype |
|
| 1050 | + | ) { |
|
| 1051 | + | if (typ->info.srt.nfields != stype->fields.len) |
|
| 1052 | + | return false; |
|
| 1053 | + | ||
| 1054 | + | node_t **fields = nodespan_ptrs(&t->module->parser, stype->fields); |
|
| 1055 | + | for (usize i = 0; i < stype->fields.len; i++) { |
|
| 1056 | + | node_t *field_node = fields[i]; |
|
| 1057 | + | symbol_t *field_sym = typ->info.srt.fields[i]; |
|
| 1058 | + | ||
| 1059 | + | if (field_node->type != field_sym->e.field.typ) |
|
| 1060 | + | return false; |
|
| 1061 | + | ||
| 1062 | + | if (!ident_eq( |
|
| 1063 | + | field_node->val.var.ident, field_sym->name, field_sym->length |
|
| 1064 | + | )) |
|
| 1065 | + | return false; |
|
| 1066 | + | } |
|
| 1067 | + | return true; |
|
| 1068 | + | } |
|
| 1069 | + | ||
| 1070 | + | static type_t *anonymous_record_lookup(resolve_t *t, record_type_t *stype) { |
|
| 1071 | + | for (usize i = 0; i < t->types.nobjects; i++) { |
|
| 1072 | + | type_t *typ = &t->types.objects[i]; |
|
| 1073 | + | ||
| 1074 | + | if (typ->cls != TYPE_RECORD || !typ->info.srt.anonymous) |
|
| 1075 | + | continue; |
|
| 1076 | + | if (anonymous_record_equals(t, typ, stype)) |
|
| 1077 | + | return typ; |
|
| 1078 | + | } |
|
| 1079 | + | return NULL; |
|
| 1080 | + | } |
|
| 1081 | + | ||
| 1082 | + | static bool union_variant_add( |
|
| 1083 | + | resolve_t *t, type_t *typ, node_t *v, usize idx, i32 *iota |
|
| 1084 | + | ) { |
|
| 1085 | + | (void)idx; |
|
| 1086 | + | union_variant_t *variant = &v->val.union_variant; |
|
| 1087 | + | const char *name = variant->name->val.ident.name; |
|
| 1088 | + | const usize length = variant->name->val.ident.length; |
|
| 1089 | + | ||
| 1090 | + | symbol_t *sym = alloc_symbol((symbol_t){ |
|
| 1091 | + | .name = name, |
|
| 1092 | + | .length = length, |
|
| 1093 | + | .node = v, |
|
| 1094 | + | .kind = SYM_VARIANT, |
|
| 1095 | + | }); |
|
| 1096 | + | ||
| 1097 | + | if (variant->type) { |
|
| 1098 | + | type_t *payload = resolve_type(t, variant->type); |
|
| 1099 | + | if (!payload) |
|
| 1100 | + | return false; |
|
| 1101 | + | ||
| 1102 | + | v->type = payload; |
|
| 1103 | + | variant->value = *iota; |
|
| 1104 | + | *iota = variant->value + 1; |
|
| 1105 | + | } else { |
|
| 1106 | + | v->type = t->types.type_void; |
|
| 1107 | + | ||
| 1108 | + | if (variant->value_expr) { |
|
| 1109 | + | if (!resolve_number(t, variant->value_expr, t->types.type_i32)) |
|
| 1110 | + | return false; |
|
| 1111 | + | ||
| 1112 | + | variant->value = variant->value_expr->val.number.value.i; |
|
| 1113 | + | *iota = variant->value + 1; |
|
| 1114 | + | } else { |
|
| 1115 | + | variant->value = *iota; |
|
| 1116 | + | *iota = variant->value + 1; |
|
| 1117 | + | } |
|
| 1118 | + | } |
|
| 1119 | + | assert(typ->info.uni.nvariants < MAX_UNION_VARIANTS); |
|
| 1120 | + | typ->info.uni.variants[typ->info.uni.nvariants++] = sym; |
|
| 1121 | + | update_enum_layout(typ); |
|
| 1122 | + | ||
| 1123 | + | return true; |
|
| 1124 | + | } |
|
| 1125 | + | ||
| 1126 | + | /* Allocate a type. */ |
|
| 1127 | + | static type_t *alloc_type( |
|
| 1128 | + | resolve_t *t, |
|
| 1129 | + | typeclass_t kind, |
|
| 1130 | + | const char *name, |
|
| 1131 | + | usize namelen, |
|
| 1132 | + | i32 size, |
|
| 1133 | + | i32 align |
|
| 1134 | + | ) { |
|
| 1135 | + | if (t->types.nobjects >= MAX_TYPES) { |
|
| 1136 | + | bail("type overflow: too many types"); |
|
| 1137 | + | return NULL; |
|
| 1138 | + | } |
|
| 1139 | + | type_t *slot = &t->types.objects[t->types.nobjects++]; |
|
| 1140 | + | ||
| 1141 | + | slot->name = name; |
|
| 1142 | + | slot->namelen = namelen; |
|
| 1143 | + | slot->cls = kind; |
|
| 1144 | + | slot->size = size; |
|
| 1145 | + | slot->align = align; |
|
| 1146 | + | slot->ptr = NULL; |
|
| 1147 | + | slot->ptr_mut = NULL; |
|
| 1148 | + | slot->slice = NULL; |
|
| 1149 | + | slot->slice_mut = NULL; |
|
| 1150 | + | ||
| 1151 | + | /* For non-pointer types, allocate a pointer type and |
|
| 1152 | + | * link it to the target type. */ |
|
| 1153 | + | if (kind != TYPE_PTR) { |
|
| 1154 | + | slot->ptr = alloc_ptr_type(t, slot, false); |
|
| 1155 | + | } |
|
| 1156 | + | return slot; |
|
| 1157 | + | } |
|
| 1158 | + | ||
| 1159 | + | /* Allocate a slice type. |
|
| 1160 | + | * `base` can be `NULL` for things like `*[u8]` from string literals. */ |
|
| 1161 | + | static type_t *alloc_slice_type( |
|
| 1162 | + | resolve_t *t, type_t *elem, type_t *base, bool mut |
|
| 1163 | + | ) { |
|
| 1164 | + | if (base) { |
|
| 1165 | + | if (!mut && base->slice) { |
|
| 1166 | + | return base->slice; |
|
| 1167 | + | } |
|
| 1168 | + | if (mut && base->slice_mut) { |
|
| 1169 | + | return base->slice_mut; |
|
| 1170 | + | } |
|
| 1171 | + | } else { |
|
| 1172 | + | if (!mut && elem->slice) { |
|
| 1173 | + | return elem->slice; |
|
| 1174 | + | } |
|
| 1175 | + | if (mut && elem->slice_mut) { |
|
| 1176 | + | return elem->slice_mut; |
|
| 1177 | + | } |
|
| 1178 | + | } |
|
| 1179 | + | ||
| 1180 | + | char buf[MAX_STRING_LEN] = { 0 }; |
|
| 1181 | + | if (mut) { |
|
| 1182 | + | snprintf( |
|
| 1183 | + | buf, MAX_STRING_LEN, "*mut [%.*s]", (int)elem->namelen, elem->name |
|
| 1184 | + | ); |
|
| 1185 | + | } else { |
|
| 1186 | + | snprintf( |
|
| 1187 | + | buf, MAX_STRING_LEN, "*[%.*s]", (int)elem->namelen, elem->name |
|
| 1188 | + | ); |
|
| 1189 | + | } |
|
| 1190 | + | const char *name = strings_alloc(buf); |
|
| 1191 | + | ||
| 1192 | + | type_t *typ = |
|
| 1193 | + | alloc_type(t, TYPE_SLICE, name, strlen(name), WORD_SIZE * 2, WORD_SIZE); |
|
| 1194 | + | typ->info.slc.elem = elem; |
|
| 1195 | + | typ->info.slc.base = base; |
|
| 1196 | + | typ->info.slc.mut = mut; |
|
| 1197 | + | ||
| 1198 | + | if (base) { |
|
| 1199 | + | if (!mut) { |
|
| 1200 | + | base->slice = typ; |
|
| 1201 | + | } else { |
|
| 1202 | + | base->slice_mut = typ; |
|
| 1203 | + | } |
|
| 1204 | + | } else { |
|
| 1205 | + | if (!mut) { |
|
| 1206 | + | elem->slice = typ; |
|
| 1207 | + | } else { |
|
| 1208 | + | elem->slice_mut = typ; |
|
| 1209 | + | } |
|
| 1210 | + | } |
|
| 1211 | + | return typ; |
|
| 1212 | + | } |
|
| 1213 | + | ||
| 1214 | + | /* Allocate a pointer type. */ |
|
| 1215 | + | static type_t *alloc_ptr_type(resolve_t *t, type_t *base, bool mut) { |
|
| 1216 | + | if (!mut && base->ptr) { |
|
| 1217 | + | return base->ptr; |
|
| 1218 | + | } |
|
| 1219 | + | if (mut && base->ptr_mut) { |
|
| 1220 | + | return base->ptr_mut; |
|
| 1221 | + | } |
|
| 1222 | + | ||
| 1223 | + | char buf[MAX_STRING_LEN] = { 0 }; |
|
| 1224 | + | if (mut) { |
|
| 1225 | + | snprintf( |
|
| 1226 | + | buf, MAX_STRING_LEN, "*mut %.*s", (int)base->namelen, base->name |
|
| 1227 | + | ); |
|
| 1228 | + | } else { |
|
| 1229 | + | snprintf(buf, MAX_STRING_LEN, "*%.*s", (int)base->namelen, base->name); |
|
| 1230 | + | } |
|
| 1231 | + | const char *name = strings_alloc(buf); |
|
| 1232 | + | ||
| 1233 | + | type_t *typ = |
|
| 1234 | + | alloc_type(t, TYPE_PTR, name, strlen(name), WORD_SIZE, WORD_SIZE); |
|
| 1235 | + | typ->info.ptr.target = base; |
|
| 1236 | + | typ->info.ptr.mut = mut; |
|
| 1237 | + | ||
| 1238 | + | if (!mut) { |
|
| 1239 | + | base->ptr = typ; |
|
| 1240 | + | } else { |
|
| 1241 | + | base->ptr_mut = typ; |
|
| 1242 | + | } |
|
| 1243 | + | return typ; |
|
| 1244 | + | } |
|
| 1245 | + | ||
| 1246 | + | /* Allocate an array type. */ |
|
| 1247 | + | static type_t *alloc_array_type(resolve_t *t, type_t *elem, usize length) { |
|
| 1248 | + | /* First check if we already have this array type */ |
|
| 1249 | + | for (usize i = 0; i < t->types.nobjects; i++) { |
|
| 1250 | + | type_t *typ = &t->types.objects[i]; |
|
| 1251 | + | ||
| 1252 | + | if (typ->cls == TYPE_ARRAY && typ->info.ary.elem == elem && |
|
| 1253 | + | typ->info.ary.length == length) { |
|
| 1254 | + | return typ; |
|
| 1255 | + | } |
|
| 1256 | + | } |
|
| 1257 | + | char buf[MAX_STRING_LEN] = { 0 }; |
|
| 1258 | + | snprintf( |
|
| 1259 | + | buf, |
|
| 1260 | + | MAX_STRING_LEN, |
|
| 1261 | + | "[%.*s; %ld]", |
|
| 1262 | + | (int)elem->namelen, |
|
| 1263 | + | elem->name, |
|
| 1264 | + | length |
|
| 1265 | + | ); |
|
| 1266 | + | const char *name = strings_alloc(buf); |
|
| 1267 | + | ||
| 1268 | + | type_t *array_type = alloc_type(t, TYPE_ARRAY, name, strlen(name), 0, 0); |
|
| 1269 | + | ||
| 1270 | + | array_type->info.ary.elem = elem; |
|
| 1271 | + | array_type->info.ary.length = length; |
|
| 1272 | + | update_array_layout(array_type); |
|
| 1273 | + | ||
| 1274 | + | array_type->slice = alloc_slice_type(t, elem, array_type, false); |
|
| 1275 | + | array_type->ptr = alloc_ptr_type(t, array_type, false); |
|
| 1276 | + | ||
| 1277 | + | return array_type; |
|
| 1278 | + | } |
|
| 1279 | + | ||
| 1280 | + | static type_t *alloc_union_type(resolve_t *t, union_decl_t *uni) { |
|
| 1281 | + | type_t *typ = alloc_type( |
|
| 1282 | + | t, |
|
| 1283 | + | TYPE_UNION, |
|
| 1284 | + | uni->name->val.ident.name, |
|
| 1285 | + | uni->name->val.ident.length, |
|
| 1286 | + | WORD_SIZE, |
|
| 1287 | + | WORD_SIZE |
|
| 1288 | + | ); |
|
| 1289 | + | /* TODO: use correct type based on union variants. |
|
| 1290 | + | * For now, default all enums to an `i32` base type. */ |
|
| 1291 | + | typ->info.uni.decl = uni; |
|
| 1292 | + | typ->info.uni.base = t->types.type_i32; |
|
| 1293 | + | typ->info.uni.variants = types_alloc_sympool(&t->types, MAX_UNION_VARIANTS); |
|
| 1294 | + | typ->info.uni.nvariants = 0; |
|
| 1295 | + | typ->info.uni.variantsize = 0; |
|
| 1296 | + | typ->info.uni.has_payload = false; |
|
| 1297 | + | ||
| 1298 | + | return typ; |
|
| 1299 | + | } |
|
| 1300 | + | ||
| 1301 | + | static type_t *alloc_fn_type( |
|
| 1302 | + | resolve_t *t, node_t *n, type_t *ret, usize nparams |
|
| 1303 | + | ) { |
|
| 1304 | + | type_t *type = alloc_type( |
|
| 1305 | + | t, |
|
| 1306 | + | TYPE_FN, |
|
| 1307 | + | n->sym ? n->sym->name : "#fn", |
|
| 1308 | + | n->sym ? n->sym->length : 3, |
|
| 1309 | + | DEFAULT_SIZE, |
|
| 1310 | + | DEFAULT_ALIGN |
|
| 1311 | + | ); |
|
| 1312 | + | type->info.fun.ret = ret ? ret : t->types.type_void; |
|
| 1313 | + | type->info.fun.params = types_alloc_typepool(&t->types, MAX_FN_PARAMS); |
|
| 1314 | + | type->info.fun.throws = types_alloc_typepool(&t->types, MAX_FN_THROWS); |
|
| 1315 | + | type->info.fun.nparams = nparams; |
|
| 1316 | + | type->info.fun.nthrows = 0; |
|
| 1317 | + | ||
| 1318 | + | return (n->type = type); |
|
| 1319 | + | } |
|
| 1320 | + | ||
| 1321 | + | static type_t *alloc_record_type(resolve_t *t, record_decl_t *srt) { |
|
| 1322 | + | type_t *typ = alloc_type( |
|
| 1323 | + | t, |
|
| 1324 | + | TYPE_RECORD, |
|
| 1325 | + | srt->name->val.ident.name, |
|
| 1326 | + | srt->name->val.ident.length, |
|
| 1327 | + | 0, /* Size will be updated when we add fields */ |
|
| 1328 | + | DEFAULT_ALIGN |
|
| 1329 | + | ); |
|
| 1330 | + | typ->info.srt.fields = types_alloc_sympool(&t->types, MAX_RECORD_FIELDS); |
|
| 1331 | + | typ->info.srt.nfields = 0; |
|
| 1332 | + | typ->info.srt.packedsize = 0; |
|
| 1333 | + | typ->info.srt.anonymous = false; |
|
| 1334 | + | typ->info.srt.tuple = srt->tuple; |
|
| 1335 | + | ||
| 1336 | + | return typ; |
|
| 1337 | + | } |
|
| 1338 | + | ||
| 1339 | + | static type_t *alloc_anonymous_record_type(resolve_t *t) { |
|
| 1340 | + | char buf[32]; |
|
| 1341 | + | snprintf(buf, sizeof(buf), "record#%u", (unsigned)t->recordid++); |
|
| 1342 | + | const char *name = strings_alloc(buf); |
|
| 1343 | + | ||
| 1344 | + | type_t *typ = alloc_type( |
|
| 1345 | + | t, |
|
| 1346 | + | TYPE_RECORD, |
|
| 1347 | + | name, |
|
| 1348 | + | strlen(name), |
|
| 1349 | + | 0, /* Size will be updated when we add fields */ |
|
| 1350 | + | DEFAULT_ALIGN |
|
| 1351 | + | ); |
|
| 1352 | + | typ->info.srt.fields = types_alloc_sympool(&t->types, MAX_RECORD_FIELDS); |
|
| 1353 | + | typ->info.srt.nfields = 0; |
|
| 1354 | + | typ->info.srt.packedsize = 0; |
|
| 1355 | + | typ->info.srt.anonymous = true; |
|
| 1356 | + | ||
| 1357 | + | return typ; |
|
| 1358 | + | } |
|
| 1359 | + | ||
| 1360 | + | /* Allocate an optional type. */ |
|
| 1361 | + | static type_t *alloc_opt_type(resolve_t *t, type_t *elem) { |
|
| 1362 | + | /* First check if we already have this optional type */ |
|
| 1363 | + | for (usize i = 0; i < t->types.nobjects; i++) { |
|
| 1364 | + | type_t *typ = &t->types.objects[i]; |
|
| 1365 | + | ||
| 1366 | + | if (typ->cls == TYPE_OPT && typ->info.opt.elem == elem) { |
|
| 1367 | + | return typ; |
|
| 1368 | + | } |
|
| 1369 | + | } |
|
| 1370 | + | char buf[MAX_STRING_LEN] = { 0 }; |
|
| 1371 | + | snprintf(buf, MAX_STRING_LEN, "?%.*s", (int)elem->namelen, elem->name); |
|
| 1372 | + | const char *name = strings_alloc(buf); |
|
| 1373 | + | ||
| 1374 | + | type_t *opt_type = alloc_type(t, TYPE_OPT, name, strlen(name), 0, 0); |
|
| 1375 | + | ||
| 1376 | + | opt_type->info.opt.elem = elem; |
|
| 1377 | + | update_opt_layout(opt_type); |
|
| 1378 | + | ||
| 1379 | + | return opt_type; |
|
| 1380 | + | } |
|
| 1381 | + | ||
| 1382 | + | static type_t *alloc_result_type(resolve_t *t, type_t *payload, type_t *err) { |
|
| 1383 | + | /* Find existing result type that matches this one. */ |
|
| 1384 | + | for (usize i = 0; i < t->types.nobjects; i++) { |
|
| 1385 | + | type_t *typ = &t->types.objects[i]; |
|
| 1386 | + | ||
| 1387 | + | if (typ->cls == TYPE_RESULT && typ->info.res.payload == payload && |
|
| 1388 | + | typ->info.res.err == err) { |
|
| 1389 | + | return typ; |
|
| 1390 | + | } |
|
| 1391 | + | } |
|
| 1392 | + | ||
| 1393 | + | char buf[MAX_STRING_LEN] = { 0 }; |
|
| 1394 | + | snprintf( |
|
| 1395 | + | buf, |
|
| 1396 | + | MAX_STRING_LEN, |
|
| 1397 | + | "result<%.*s, %.*s>", |
|
| 1398 | + | (int)err->namelen, |
|
| 1399 | + | err->name, |
|
| 1400 | + | (int)payload->namelen, |
|
| 1401 | + | payload->name |
|
| 1402 | + | ); |
|
| 1403 | + | const char *name = strings_alloc(buf); |
|
| 1404 | + | ||
| 1405 | + | type_t *result_typ = alloc_type(t, TYPE_RESULT, name, strlen(name), 0, 0); |
|
| 1406 | + | ||
| 1407 | + | result_typ->info.res.err = err; |
|
| 1408 | + | result_typ->info.res.payload = payload; |
|
| 1409 | + | update_result_layout(t, result_typ); |
|
| 1410 | + | ||
| 1411 | + | return result_typ; |
|
| 1412 | + | } |
|
| 1413 | + | ||
| 1414 | + | static bool resolve_fn_throws( |
|
| 1415 | + | resolve_t *t, type_t *fn_type, nodespan_t throws, type_t *ret_payload |
|
| 1416 | + | ) { |
|
| 1417 | + | usize nthrows = throws.len; |
|
| 1418 | + | if (nthrows == 0) { |
|
| 1419 | + | fn_type->info.fun.ret = ret_payload; |
|
| 1420 | + | return true; |
|
| 1421 | + | } |
|
| 1422 | + | if (nthrows > MAX_FN_THROWS) |
|
| 1423 | + | bail("too many throw types"); |
|
| 1424 | + | ||
| 1425 | + | node_t **throw_nodes = nodespan_ptrs(&t->module->parser, throws); |
|
| 1426 | + | for (usize i = 0; i < nthrows; i++) { |
|
| 1427 | + | node_t *thrown = throw_nodes[i]; |
|
| 1428 | + | type_t *thrown_typ = resolve_type(t, thrown); |
|
| 1429 | + | ||
| 1430 | + | if (!thrown_typ) |
|
| 1431 | + | return false; |
|
| 1432 | + | fn_type->info.fun.throws[i] = thrown_typ; |
|
| 1433 | + | fn_type->info.fun.nthrows++; |
|
| 1434 | + | } |
|
| 1435 | + | type_t *thrown_typ = fn_type->info.fun.throws[0]; |
|
| 1436 | + | type_t *result_typ = alloc_result_type(t, ret_payload, thrown_typ); |
|
| 1437 | + | ||
| 1438 | + | fn_type->info.fun.ret = result_typ; |
|
| 1439 | + | ||
| 1440 | + | return true; |
|
| 1441 | + | } |
|
| 1442 | + | ||
| 1443 | + | static bool union_variant_validate_args( |
|
| 1444 | + | resolve_t *t, node_t *call, symbol_t *variant_sym, node_t **out_arg_expr |
|
| 1445 | + | ) { |
|
| 1446 | + | (void)t; |
|
| 1447 | + | type_t *variant_type = variant_sym->node->type; |
|
| 1448 | + | usize nargs = call->val.call.args.len; |
|
| 1449 | + | ||
| 1450 | + | if (variant_type->cls == TYPE_VOID) { |
|
| 1451 | + | if (out_arg_expr) |
|
| 1452 | + | *out_arg_expr = NULL; |
|
| 1453 | + | return nargs == 0; |
|
| 1454 | + | } |
|
| 1455 | + | if (nargs != 1) |
|
| 1456 | + | return false; |
|
| 1457 | + | ||
| 1458 | + | if (out_arg_expr) |
|
| 1459 | + | *out_arg_expr = |
|
| 1460 | + | nodespan_ptrs(&t->module->parser, call->val.call.args)[0] |
|
| 1461 | + | ->val.call_arg.expr; |
|
| 1462 | + | ||
| 1463 | + | return true; |
|
| 1464 | + | } |
|
| 1465 | + | ||
| 1466 | + | /* Check a union constructor call like `Expr::number(42)`. */ |
|
| 1467 | + | static type_t *resolve_enum_constructor( |
|
| 1468 | + | resolve_t *t, node_t *call, type_t *union_type, symbol_t *variant_sym |
|
| 1469 | + | ) { |
|
| 1470 | + | type_t *variant_type = variant_sym->node->type; |
|
| 1471 | + | node_t *arg_expr = NULL; |
|
| 1472 | + | ||
| 1473 | + | if (!union_variant_validate_args(t, call, variant_sym, &arg_expr)) |
|
| 1474 | + | return NULL; |
|
| 1475 | + | ||
| 1476 | + | if (arg_expr) { |
|
| 1477 | + | if (!resolve_node(t, arg_expr, variant_type)) |
|
| 1478 | + | return NULL; |
|
| 1479 | + | } |
|
| 1480 | + | ||
| 1481 | + | call->sym = variant_sym; |
|
| 1482 | + | call->type = union_type; |
|
| 1483 | + | ||
| 1484 | + | return union_type; |
|
| 1485 | + | } |
|
| 1486 | + | ||
| 1487 | + | /* Check tuple record constructor call */ |
|
| 1488 | + | static type_t *resolve_tuple_record_constructor( |
|
| 1489 | + | resolve_t *t, node_t *call, type_t *record_type |
|
| 1490 | + | ) { |
|
| 1491 | + | usize nfields = record_type->info.srt.nfields; |
|
| 1492 | + | usize nargs = call->val.call.args.len; |
|
| 1493 | + | ||
| 1494 | + | if (nargs != nfields) |
|
| 1495 | + | return NULL; |
|
| 1496 | + | ||
| 1497 | + | /* Type check each argument against the corresponding field type. */ |
|
| 1498 | + | for (usize i = 0; i < nargs; i++) { |
|
| 1499 | + | node_t *arg = nodespan_ptrs(&t->module->parser, call->val.call.args)[i]; |
|
| 1500 | + | symbol_t *field_sym = record_type->info.srt.fields[i]; |
|
| 1501 | + | type_t *field_typ = field_sym->e.field.typ; |
|
| 1502 | + | ||
| 1503 | + | if (!resolve_node(t, arg, field_typ)) |
|
| 1504 | + | return NULL; |
|
| 1505 | + | } |
|
| 1506 | + | call->sym = NULL; |
|
| 1507 | + | ||
| 1508 | + | return (call->type = record_type); |
|
| 1509 | + | } |
|
| 1510 | + | ||
| 1511 | + | static bool symbol_add(resolve_t *t, node_t *ident, node_t *n) { |
|
| 1512 | + | if (ident->cls == NODE_PLACEHOLDER) |
|
| 1513 | + | return true; |
|
| 1514 | + | ||
| 1515 | + | return symtab_add_ident(t->scope, ident, n); |
|
| 1516 | + | } |
|
| 1517 | + | ||
| 1518 | + | static symbol_t *resolve_name(resolve_t *t, node_t *n, symkind_t kind) { |
|
| 1519 | + | n->sym = NULL; |
|
| 1520 | + | ||
| 1521 | + | if (n->cls == NODE_SCOPE) { |
|
| 1522 | + | if (!resolve_scope(t, n) || !n->sym) |
|
| 1523 | + | return NULL; |
|
| 1524 | + | if (kind != SYM_ANY && n->sym->kind != kind) |
|
| 1525 | + | return NULL; |
|
| 1526 | + | return n->sym; |
|
| 1527 | + | } |
|
| 1528 | + | ||
| 1529 | + | symbol_t *sym = |
|
| 1530 | + | symtab_lookup(t->scope, n->val.ident.name, n->val.ident.length, kind); |
|
| 1531 | + | ||
| 1532 | + | if (!sym && kind == SYM_ANY) { |
|
| 1533 | + | sym = symtab_lookup( |
|
| 1534 | + | t->scope, n->val.ident.name, n->val.ident.length, SYM_ANY |
|
| 1535 | + | ); |
|
| 1536 | + | } |
|
| 1537 | + | ||
| 1538 | + | if (sym) { |
|
| 1539 | + | n->sym = sym; |
|
| 1540 | + | ||
| 1541 | + | if (sym->node && sym->node->type && !n->type) |
|
| 1542 | + | n->type = sym->node->type; |
|
| 1543 | + | ||
| 1544 | + | return sym; |
|
| 1545 | + | } |
|
| 1546 | + | return NULL; |
|
| 1547 | + | } |
|
| 1548 | + | ||
| 1549 | + | /* Resolve a type by looking up its definition if necessary, eg. for custom |
|
| 1550 | + | * types defined in the source code. */ |
|
| 1551 | + | static type_t *resolve_type(resolve_t *t, node_t *n) { |
|
| 1552 | + | if (n->type) |
|
| 1553 | + | return n->type; |
|
| 1554 | + | ||
| 1555 | + | switch (n->cls) { |
|
| 1556 | + | case NODE_TYPE: |
|
| 1557 | + | switch (n->val.type.tclass) { |
|
| 1558 | + | case TYPE_U8: |
|
| 1559 | + | return (n->type = t->types.type_u8); |
|
| 1560 | + | case TYPE_U16: |
|
| 1561 | + | return (n->type = t->types.type_u16); |
|
| 1562 | + | case TYPE_U32: |
|
| 1563 | + | return (n->type = t->types.type_u32); |
|
| 1564 | + | case TYPE_I8: |
|
| 1565 | + | return (n->type = t->types.type_i8); |
|
| 1566 | + | case TYPE_I16: |
|
| 1567 | + | return (n->type = t->types.type_i16); |
|
| 1568 | + | case TYPE_I32: |
|
| 1569 | + | return (n->type = t->types.type_i32); |
|
| 1570 | + | case TYPE_BOOL: |
|
| 1571 | + | return (n->type = t->types.type_bool); |
|
| 1572 | + | case TYPE_VOID: |
|
| 1573 | + | return (n->type = t->types.type_void); |
|
| 1574 | + | case TYPE_OPAQUE: |
|
| 1575 | + | return (n->type = t->types.type_opaque); |
|
| 1576 | + | case TYPE_FN: { |
|
| 1577 | + | /* Resolve return type */ |
|
| 1578 | + | type_t *ret_type = n->val.type.info.fn.ret |
|
| 1579 | + | ? resolve_type(t, n->val.type.info.fn.ret) |
|
| 1580 | + | : t->types.type_void; |
|
| 1581 | + | if (!ret_type) |
|
| 1582 | + | return NULL; |
|
| 1583 | + | ||
| 1584 | + | n->type = |
|
| 1585 | + | alloc_fn_type(t, n, ret_type, n->val.type.info.fn.params.len); |
|
| 1586 | + | ||
| 1587 | + | /* Resolve parameter types */ |
|
| 1588 | + | for (usize i = 0; i < n->val.type.info.fn.params.len; i++) { |
|
| 1589 | + | type_t *param_typ = resolve_type( |
|
| 1590 | + | t, |
|
| 1591 | + | nodespan_ptrs( |
|
| 1592 | + | &t->module->parser, n->val.type.info.fn.params |
|
| 1593 | + | )[i] |
|
| 1594 | + | ); |
|
| 1595 | + | if (!param_typ) |
|
| 1596 | + | return NULL; |
|
| 1597 | + | ||
| 1598 | + | n->type->info.fun.params[i] = param_typ; |
|
| 1599 | + | } |
|
| 1600 | + | if (!resolve_fn_throws( |
|
| 1601 | + | t, n->type, n->val.type.info.fn.throws, ret_type |
|
| 1602 | + | )) |
|
| 1603 | + | return NULL; |
|
| 1604 | + | ||
| 1605 | + | return n->type; |
|
| 1606 | + | } |
|
| 1607 | + | case TYPE_ARRAY: { |
|
| 1608 | + | type_t *elem_typ = resolve_type(t, n->val.type.elem_type); |
|
| 1609 | + | ||
| 1610 | + | if (!elem_typ) |
|
| 1611 | + | return NULL; |
|
| 1612 | + | ||
| 1613 | + | node_t *len_node = n->val.type.info.array.length; |
|
| 1614 | + | if (!resolve_node(t, len_node, t->types.type_u32)) |
|
| 1615 | + | return NULL; |
|
| 1616 | + | ||
| 1617 | + | usize len = 0; |
|
| 1618 | + | if (!resolve_const_usize(t, len_node, &len)) |
|
| 1619 | + | return NULL; |
|
| 1620 | + | return (n->type = alloc_array_type(t, elem_typ, len)); |
|
| 1621 | + | } |
|
| 1622 | + | case TYPE_SLICE: { |
|
| 1623 | + | type_t *elem_typ = resolve_type(t, n->val.type.elem_type); |
|
| 1624 | + | if (!elem_typ) |
|
| 1625 | + | return NULL; |
|
| 1626 | + | ||
| 1627 | + | bool mut = n->val.type.info.slice.mut; |
|
| 1628 | + | return (n->type = alloc_slice_type(t, elem_typ, NULL, mut)); |
|
| 1629 | + | } |
|
| 1630 | + | case TYPE_UNION: |
|
| 1631 | + | case TYPE_RESULT: |
|
| 1632 | + | case TYPE_RECORD: |
|
| 1633 | + | abort(); |
|
| 1634 | + | case TYPE_PTR: { |
|
| 1635 | + | type_t *elem_typ = resolve_type(t, n->val.type.elem_type); |
|
| 1636 | + | ||
| 1637 | + | if (!elem_typ) |
|
| 1638 | + | return NULL; |
|
| 1639 | + | bool mut = n->val.type.info.ptr.mut; |
|
| 1640 | + | ||
| 1641 | + | return (n->type = alloc_ptr_type(t, elem_typ, mut)); |
|
| 1642 | + | } |
|
| 1643 | + | case TYPE_OPT: { |
|
| 1644 | + | type_t *elem_typ = resolve_type(t, n->val.type.elem_type); |
|
| 1645 | + | ||
| 1646 | + | if (!elem_typ) |
|
| 1647 | + | return NULL; |
|
| 1648 | + | ||
| 1649 | + | return (n->type = alloc_opt_type(t, elem_typ)); |
|
| 1650 | + | } |
|
| 1651 | + | default: |
|
| 1652 | + | break; |
|
| 1653 | + | } |
|
| 1654 | + | break; |
|
| 1655 | + | case NODE_RECORD_TYPE: { |
|
| 1656 | + | record_type_t *stype = &n->val.record_type; |
|
| 1657 | + | node_t **fields = nodespan_ptrs(&t->module->parser, stype->fields); |
|
| 1658 | + | ||
| 1659 | + | for (usize i = 0; i < stype->fields.len; i++) { |
|
| 1660 | + | node_t *field = fields[i]; |
|
| 1661 | + | type_t *typ = resolve_type(t, field->val.var.type); |
|
| 1662 | + | ||
| 1663 | + | if (!typ) |
|
| 1664 | + | return NULL; |
|
| 1665 | + | ||
| 1666 | + | field->type = typ; |
|
| 1667 | + | } |
|
| 1668 | + | type_t *existing = anonymous_record_lookup(t, stype); |
|
| 1669 | + | if (existing) |
|
| 1670 | + | return (n->type = existing); |
|
| 1671 | + | ||
| 1672 | + | type_t *typ = alloc_anonymous_record_type(t); |
|
| 1673 | + | for (usize i = 0; i < stype->fields.len; i++) { |
|
| 1674 | + | node_t *field = fields[i]; |
|
| 1675 | + | ||
| 1676 | + | if (!record_field_add( |
|
| 1677 | + | t, typ, field, field->val.var.ident, field->type |
|
| 1678 | + | )) |
|
| 1679 | + | return NULL; |
|
| 1680 | + | } |
|
| 1681 | + | ||
| 1682 | + | return (n->type = typ); |
|
| 1683 | + | } |
|
| 1684 | + | case NODE_SCOPE: |
|
| 1685 | + | case NODE_IDENT: { |
|
| 1686 | + | symbol_t *sym = resolve_name(t, n, SYM_TYPE); |
|
| 1687 | + | ||
| 1688 | + | if (!sym) |
|
| 1689 | + | return NULL; |
|
| 1690 | + | ||
| 1691 | + | if (!sym->node || !sym->node->type) |
|
| 1692 | + | bail("type symbol missing type information"); |
|
| 1693 | + | ||
| 1694 | + | return (n->type = sym->node->type); |
|
| 1695 | + | } |
|
| 1696 | + | default: |
|
| 1697 | + | bail("node is not a kind of type, class is %d", n->cls); |
|
| 1698 | + | } |
|
| 1699 | + | return NULL; |
|
| 1700 | + | } |
|
| 1701 | + | ||
| 1702 | + | static type_t *resolve_number(resolve_t *t, node_t *n, type_t *expected) { |
|
| 1703 | + | if (!expected) |
|
| 1704 | + | expected = t->types.type_i32; |
|
| 1705 | + | if (expected->cls == TYPE_OPT) |
|
| 1706 | + | expected = expected->info.opt.elem; |
|
| 1707 | + | ||
| 1708 | + | if (!expected || !type_is_numeric(expected->cls)) |
|
| 1709 | + | return NULL; |
|
| 1710 | + | ||
| 1711 | + | type_t *result_type = expected; |
|
| 1712 | + | typeclass_t tclass = expected->cls; |
|
| 1713 | + | imm_t value = { 0 }; |
|
| 1714 | + | ||
| 1715 | + | /* Create a null-terminated copy of the text for strto* functions. */ |
|
| 1716 | + | static char text[16] = { 0 }; |
|
| 1717 | + | memcpy(text, n->val.number.text, n->val.number.text_len); |
|
| 1718 | + | text[n->val.number.text_len] = '\0'; |
|
| 1719 | + | ||
| 1720 | + | /* Manual binary literal parsing since `strtol` doesn't support 0b in this |
|
| 1721 | + | * environment. */ |
|
| 1722 | + | bool is_binary = (text[0] == '0' && (text[1] == 'b' || text[1] == 'B')); |
|
| 1723 | + | u32 binval = 0; |
|
| 1724 | + | ||
| 1725 | + | if (is_binary) { |
|
| 1726 | + | for (usize i = 2; text[i]; i++) { |
|
| 1727 | + | binval = (binval << 1) + (text[i] - '0'); |
|
| 1728 | + | } |
|
| 1729 | + | } |
|
| 1730 | + | ||
| 1731 | + | /* Parse the number based on the type */ |
|
| 1732 | + | switch (tclass) { |
|
| 1733 | + | case TYPE_I8: |
|
| 1734 | + | case TYPE_I16: |
|
| 1735 | + | case TYPE_I32: { |
|
| 1736 | + | i32 val; |
|
| 1737 | + | if (is_binary) { |
|
| 1738 | + | val = (i32)binval; |
|
| 1739 | + | } else { |
|
| 1740 | + | val = strtol(text, NULL, 0); |
|
| 1741 | + | } |
|
| 1742 | + | value.i = val; |
|
| 1743 | + | break; |
|
| 1744 | + | } |
|
| 1745 | + | case TYPE_U8: |
|
| 1746 | + | case TYPE_U16: |
|
| 1747 | + | case TYPE_U32: { |
|
| 1748 | + | u32 val; |
|
| 1749 | + | if (is_binary) { |
|
| 1750 | + | val = binval; |
|
| 1751 | + | } else { |
|
| 1752 | + | val = strtoul(text, NULL, 0); |
|
| 1753 | + | } |
|
| 1754 | + | value.u = val; |
|
| 1755 | + | break; |
|
| 1756 | + | } |
|
| 1757 | + | default: |
|
| 1758 | + | break; |
|
| 1759 | + | } |
|
| 1760 | + | n->val.number.value = value; |
|
| 1761 | + | ||
| 1762 | + | return (n->type = result_type); |
|
| 1763 | + | } |
|
| 1764 | + | ||
| 1765 | + | static type_t *resolve_builtin(resolve_t *t, node_t *n, type_t *expected) { |
|
| 1766 | + | (void)expected; |
|
| 1767 | + | ||
| 1768 | + | builtin_kind_t kind = n->val.builtin.kind; |
|
| 1769 | + | node_t **args = nodespan_ptrs(&t->module->parser, n->val.builtin.args); |
|
| 1770 | + | type_t *typ; |
|
| 1771 | + | ||
| 1772 | + | /* @sliceOf is handled separately since it takes two runtime arguments */ |
|
| 1773 | + | if (kind == BUILTIN_SLICE_OF) { |
|
| 1774 | + | /* Check first argument (pointer) */ |
|
| 1775 | + | type_t *ptr_type = resolve_node(t, args[0], NULL); |
|
| 1776 | + | if (!ptr_type) |
|
| 1777 | + | return NULL; |
|
| 1778 | + | ||
| 1779 | + | /* Check second argument (length) */ |
|
| 1780 | + | type_t *len_type = resolve_node(t, args[1], t->types.type_u32); |
|
| 1781 | + | if (!len_type) |
|
| 1782 | + | return NULL; |
|
| 1783 | + | ||
| 1784 | + | /* Result is a slice of the pointer's element type */ |
|
| 1785 | + | type_t *elem_type = ptr_type->info.ptr.target; |
|
| 1786 | + | bool mut = ptr_type->info.ptr.mut; |
|
| 1787 | + | ||
| 1788 | + | return (n->type = alloc_slice_type(t, elem_type, NULL, mut)); |
|
| 1789 | + | } |
|
| 1790 | + | ||
| 1791 | + | node_t *expr = args[0]; |
|
| 1792 | + | switch (expr->cls) { |
|
| 1793 | + | case NODE_TYPE: |
|
| 1794 | + | case NODE_RECORD_TYPE: |
|
| 1795 | + | typ = resolve_type(t, expr); |
|
| 1796 | + | break; |
|
| 1797 | + | default: |
|
| 1798 | + | typ = resolve_node(t, expr, NULL); |
|
| 1799 | + | break; |
|
| 1800 | + | } |
|
| 1801 | + | if (!typ) |
|
| 1802 | + | return NULL; |
|
| 1803 | + | ||
| 1804 | + | u32 value = 0; |
|
| 1805 | + | ||
| 1806 | + | switch (kind) { |
|
| 1807 | + | case BUILTIN_SIZE_OF: |
|
| 1808 | + | value = (u32)typ->size; |
|
| 1809 | + | break; |
|
| 1810 | + | case BUILTIN_ALIGN_OF: |
|
| 1811 | + | value = (u32)typ->align; |
|
| 1812 | + | if (expr->sym && expr->sym->kind == SYM_VARIABLE && |
|
| 1813 | + | expr->sym->e.var.align > 0) { |
|
| 1814 | + | value = (u32)expr->sym->e.var.align; |
|
| 1815 | + | } |
|
| 1816 | + | break; |
|
| 1817 | + | case BUILTIN_SLICE_OF: |
|
| 1818 | + | /* Already handled above */ |
|
| 1819 | + | break; |
|
| 1820 | + | } |
|
| 1821 | + | n->cls = NODE_NUMBER; |
|
| 1822 | + | ||
| 1823 | + | n->val.number.text = NULL; |
|
| 1824 | + | n->val.number.text_len = 0; |
|
| 1825 | + | n->val.number.value.u = value; |
|
| 1826 | + | ||
| 1827 | + | return (n->type = t->types.type_u32); |
|
| 1828 | + | } |
|
| 1829 | + | ||
| 1830 | + | /* Bind a pattern variable to a field type. Handles identifiers and |
|
| 1831 | + | * placeholders. If is_ref_match is true, the binding type is wrapped |
|
| 1832 | + | * in a pointer type. */ |
|
| 1833 | + | static bool bind_pattern_var( |
|
| 1834 | + | resolve_t *t, |
|
| 1835 | + | node_t *binding, |
|
| 1836 | + | type_t *field_typ, |
|
| 1837 | + | bool is_ref_match, |
|
| 1838 | + | bool ref_mut |
|
| 1839 | + | ) { |
|
| 1840 | + | type_t *binding_type = |
|
| 1841 | + | is_ref_match ? alloc_ptr_type(t, field_typ, ref_mut) : field_typ; |
|
| 1842 | + | if (binding->cls == NODE_IDENT) { |
|
| 1843 | + | binding->type = binding_type; |
|
| 1844 | + | if (!symbol_add(t, binding, binding)) |
|
| 1845 | + | return false; |
|
| 1846 | + | binding->sym->e.var.typ = binding_type; |
|
| 1847 | + | binding->sym->e.var.align = binding_type->align; |
|
| 1848 | + | binding->sym->scope = t->scope; |
|
| 1849 | + | } |
|
| 1850 | + | return true; |
|
| 1851 | + | } |
|
| 1852 | + | ||
| 1853 | + | /* Bind pattern variables to record fields. Works for both tuple-style S(x, y) |
|
| 1854 | + | * and labeled T { x, y } patterns. Returns false on error. |
|
| 1855 | + | * If is_ref_match is true, bindings are pointer types. */ |
|
| 1856 | + | static bool resolve_record_pattern_bindings( |
|
| 1857 | + | resolve_t *t, |
|
| 1858 | + | node_t *pattern, |
|
| 1859 | + | type_t *rec_type, |
|
| 1860 | + | bool is_ref_match, |
|
| 1861 | + | bool ref_mut |
|
| 1862 | + | ) { |
|
| 1863 | + | if (pattern->cls == NODE_CALL) { |
|
| 1864 | + | usize nargs = pattern->val.call.args.len; |
|
| 1865 | + | for (usize i = 0; i < nargs; i++) { |
|
| 1866 | + | node_t *arg_node = |
|
| 1867 | + | nodespan_ptrs(&t->module->parser, pattern->val.call.args)[i]; |
|
| 1868 | + | node_t *arg = (arg_node->cls == NODE_CALL_ARG) |
|
| 1869 | + | ? arg_node->val.call_arg.expr |
|
| 1870 | + | : arg_node; |
|
| 1871 | + | symbol_t *field_sym = rec_type->info.srt.fields[i]; |
|
| 1872 | + | ||
| 1873 | + | if (!bind_pattern_var( |
|
| 1874 | + | t, arg, field_sym->e.field.typ, is_ref_match, ref_mut |
|
| 1875 | + | )) |
|
| 1876 | + | return false; |
|
| 1877 | + | arg_node->sym = field_sym; |
|
| 1878 | + | } |
|
| 1879 | + | } else if (pattern->cls == NODE_RECORD_LIT) { |
|
| 1880 | + | node_t **fields = |
|
| 1881 | + | nodespan_ptrs(&t->module->parser, pattern->val.record_lit.fields); |
|
| 1882 | + | ||
| 1883 | + | for (usize f = 0; f < pattern->val.record_lit.fields.len; f++) { |
|
| 1884 | + | node_t *field_node = fields[f]; |
|
| 1885 | + | node_t *binding = field_node->val.record_lit_field.value; |
|
| 1886 | + | node_t *name_node = field_node->val.record_lit_field.name; |
|
| 1887 | + | node_t *lookup_node = name_node ? name_node : binding; |
|
| 1888 | + | ||
| 1889 | + | symbol_t *field_sym = record_field_lookup(rec_type, lookup_node); |
|
| 1890 | + | if (!field_sym) |
|
| 1891 | + | return false; |
|
| 1892 | + | ||
| 1893 | + | field_node->sym = field_sym; |
|
| 1894 | + | ||
| 1895 | + | if (!bind_pattern_var( |
|
| 1896 | + | t, binding, field_sym->e.field.typ, is_ref_match, ref_mut |
|
| 1897 | + | )) |
|
| 1898 | + | return false; |
|
| 1899 | + | } |
|
| 1900 | + | } |
|
| 1901 | + | return true; |
|
| 1902 | + | } |
|
| 1903 | + | ||
| 1904 | + | /* Check a match statement case. */ |
|
| 1905 | + | static type_t *resolve_match_case(resolve_t *t, node_t *n, type_t *match_typ) { |
|
| 1906 | + | /* If this is the default (else) case, there are no patterns to check. */ |
|
| 1907 | + | if (!n->val.match_case.patterns.len) { |
|
| 1908 | + | if (!resolve_node(t, n->val.match_case.body, NULL)) |
|
| 1909 | + | return NULL; |
|
| 1910 | + | ||
| 1911 | + | return (n->type = t->types.type_void); |
|
| 1912 | + | } |
|
| 1913 | + | scope_t *prev = t->scope; |
|
| 1914 | + | ||
| 1915 | + | /* Check if matching on a pointer to a union - bindings will be pointers */ |
|
| 1916 | + | bool is_ref_match = false; |
|
| 1917 | + | bool ref_mut = false; |
|
| 1918 | + | type_t *union_typ = match_typ; |
|
| 1919 | + | ||
| 1920 | + | if (match_typ->cls == TYPE_PTR && |
|
| 1921 | + | type_is_union_with_payload(match_typ->info.ptr.target)) { |
|
| 1922 | + | is_ref_match = true; |
|
| 1923 | + | ref_mut = match_typ->info.ptr.mut; |
|
| 1924 | + | union_typ = match_typ->info.ptr.target; |
|
| 1925 | + | } |
|
| 1926 | + | ||
| 1927 | + | if (type_is_union_with_payload((union_typ))) { |
|
| 1928 | + | /* Create a shared scope for all patterns in this case */ |
|
| 1929 | + | scope_t *case_scope = symtab_scope(t->scope, NULL); |
|
| 1930 | + | t->scope = case_scope; |
|
| 1931 | + | n->val.match_case.variable = NULL; |
|
| 1932 | + | ||
| 1933 | + | /* Check each pattern in this case */ |
|
| 1934 | + | node_t **patterns = |
|
| 1935 | + | nodespan_ptrs(&t->module->parser, n->val.match_case.patterns); |
|
| 1936 | + | for (usize p = 0; p < n->val.match_case.patterns.len; p++) { |
|
| 1937 | + | node_t *pattern = patterns[p]; |
|
| 1938 | + | node_t *callee = NULL; |
|
| 1939 | + | bool is_call = (pattern->cls == NODE_CALL); |
|
| 1940 | + | bool is_reclit = (pattern->cls == NODE_RECORD_LIT); |
|
| 1941 | + | ||
| 1942 | + | if (is_call) { |
|
| 1943 | + | callee = pattern->val.call.callee; |
|
| 1944 | + | } else if (is_reclit) { |
|
| 1945 | + | callee = pattern->val.record_lit.type; |
|
| 1946 | + | if (!callee) |
|
| 1947 | + | return NULL; |
|
| 1948 | + | } else if (pattern->cls == NODE_SCOPE) { |
|
| 1949 | + | callee = pattern; |
|
| 1950 | + | } else { |
|
| 1951 | + | return NULL; |
|
| 1952 | + | } |
|
| 1953 | + | ||
| 1954 | + | type_t *parent = resolve_scope(t, callee); |
|
| 1955 | + | node_t *variant = callee->val.access.rval; |
|
| 1956 | + | ||
| 1957 | + | if (!parent) |
|
| 1958 | + | return NULL; |
|
| 1959 | + | ||
| 1960 | + | symbol_t *variant_sym = union_variant_lookup(union_typ, variant); |
|
| 1961 | + | if (!variant_sym) |
|
| 1962 | + | return NULL; |
|
| 1963 | + | variant->sym = variant_sym; |
|
| 1964 | + | ||
| 1965 | + | type_t *variant_type = variant_sym->node->type; |
|
| 1966 | + | ||
| 1967 | + | if (variant_type->cls == TYPE_VOID) { |
|
| 1968 | + | if (is_call) { |
|
| 1969 | + | if (!union_variant_validate_args( |
|
| 1970 | + | t, pattern, variant_sym, NULL |
|
| 1971 | + | )) |
|
| 1972 | + | return NULL; |
|
| 1973 | + | } |
|
| 1974 | + | } else if (is_reclit) { |
|
| 1975 | + | if (variant_type->cls != TYPE_RECORD) |
|
| 1976 | + | return NULL; |
|
| 1977 | + | if (!resolve_record_pattern_bindings( |
|
| 1978 | + | t, pattern, variant_type, is_ref_match, ref_mut |
|
| 1979 | + | )) |
|
| 1980 | + | return NULL; |
|
| 1981 | + | } else { |
|
| 1982 | + | node_t *arg_expr = NULL; |
|
| 1983 | + | ||
| 1984 | + | if (!union_variant_validate_args( |
|
| 1985 | + | t, pattern, variant_sym, &arg_expr |
|
| 1986 | + | )) |
|
| 1987 | + | return NULL; |
|
| 1988 | + | ||
| 1989 | + | node_t *variable = arg_expr; |
|
| 1990 | + | n->val.match_case.variable = variable; |
|
| 1991 | + | ||
| 1992 | + | /* Create scope for the bound variable. |
|
| 1993 | + | * If matching on a pointer to union, binding is a pointer. */ |
|
| 1994 | + | type_t *binding_type = |
|
| 1995 | + | is_ref_match ? alloc_ptr_type(t, variant_type, ref_mut) |
|
| 1996 | + | : variant_type; |
|
| 1997 | + | variable->type = binding_type; |
|
| 1998 | + | ||
| 1999 | + | if (variable->cls == NODE_IDENT) { |
|
| 2000 | + | /* Add the bound variable to the scope */ |
|
| 2001 | + | if (!symbol_add(t, variable, variable)) |
|
| 2002 | + | return NULL; |
|
| 2003 | + | ||
| 2004 | + | variable->sym->e.var.typ = binding_type; |
|
| 2005 | + | variable->sym->e.var.align = binding_type->align; |
|
| 2006 | + | variable->sym->scope = t->scope; |
|
| 2007 | + | } |
|
| 2008 | + | } |
|
| 2009 | + | /* Set the pattern type to the union type. */ |
|
| 2010 | + | pattern->type = match_typ; |
|
| 2011 | + | } |
|
| 2012 | + | } else if (match_typ->cls == TYPE_RECORD) { |
|
| 2013 | + | /* Record pattern matching: `match rec { case T(x) => ... }` */ |
|
| 2014 | + | scope_t *case_scope = symtab_scope(t->scope, NULL); |
|
| 2015 | + | t->scope = case_scope; |
|
| 2016 | + | n->val.match_case.variable = NULL; |
|
| 2017 | + | ||
| 2018 | + | node_t **patterns = |
|
| 2019 | + | nodespan_ptrs(&t->module->parser, n->val.match_case.patterns); |
|
| 2020 | + | for (usize p = 0; p < n->val.match_case.patterns.len; p++) { |
|
| 2021 | + | node_t *pattern = patterns[p]; |
|
| 2022 | + | if (!resolve_record_pattern_bindings( |
|
| 2023 | + | t, pattern, match_typ, false, false |
|
| 2024 | + | )) |
|
| 2025 | + | return NULL; |
|
| 2026 | + | pattern->type = match_typ; |
|
| 2027 | + | } |
|
| 2028 | + | } else { |
|
| 2029 | + | bool pctx = t->ctx; |
|
| 2030 | + | t->ctx = TC_CTX_PATTERN; |
|
| 2031 | + | ||
| 2032 | + | /* Check each pattern in this case */ |
|
| 2033 | + | node_t **patterns2 = |
|
| 2034 | + | nodespan_ptrs(&t->module->parser, n->val.match_case.patterns); |
|
| 2035 | + | for (usize p = 0; p < n->val.match_case.patterns.len; p++) { |
|
| 2036 | + | node_t *pattern = patterns2[p]; |
|
| 2037 | + | if (!resolve_node(t, pattern, match_typ)) |
|
| 2038 | + | return NULL; |
|
| 2039 | + | } |
|
| 2040 | + | t->ctx = pctx; |
|
| 2041 | + | } |
|
| 2042 | + | if (n->val.match_case.guard) { |
|
| 2043 | + | if (!resolve_node(t, n->val.match_case.guard, t->types.type_bool)) |
|
| 2044 | + | return NULL; |
|
| 2045 | + | } |
|
| 2046 | + | /* Check case body */ |
|
| 2047 | + | if (!resolve_node(t, n->val.match_case.body, NULL)) { |
|
| 2048 | + | t->scope = prev; |
|
| 2049 | + | return NULL; |
|
| 2050 | + | } |
|
| 2051 | + | t->scope = prev; |
|
| 2052 | + | ||
| 2053 | + | return (n->type = t->types.type_void); |
|
| 2054 | + | } |
|
| 2055 | + | ||
| 2056 | + | static type_t *resolve_call_fn_ptr(resolve_t *t, symbol_t *sym, node_t *call) { |
|
| 2057 | + | node_t *fn = sym->node; |
|
| 2058 | + | ||
| 2059 | + | if (fn->type->cls != TYPE_FN) |
|
| 2060 | + | return NULL; |
|
| 2061 | + | ||
| 2062 | + | /* Check each argument type. */ |
|
| 2063 | + | for (usize i = 0; i < call->val.call.args.len; i++) { |
|
| 2064 | + | node_t *arg_node = |
|
| 2065 | + | nodespan_ptrs(&t->module->parser, call->val.call.args)[i]; |
|
| 2066 | + | type_t *param_typ = fn->type->info.fun.params[i]; |
|
| 2067 | + | ||
| 2068 | + | if (!resolve_node(t, arg_node->val.call_arg.expr, param_typ)) |
|
| 2069 | + | return NULL; |
|
| 2070 | + | } |
|
| 2071 | + | call->sym = sym; |
|
| 2072 | + | ||
| 2073 | + | return (call->type = fn->type->info.fun.ret); |
|
| 2074 | + | } |
|
| 2075 | + | ||
| 2076 | + | static type_t *resolve_call_fn(resolve_t *t, symbol_t *sym, node_t *call) { |
|
| 2077 | + | node_t *fn = sym->node; |
|
| 2078 | + | ||
| 2079 | + | if (fn->type->cls != TYPE_FN) |
|
| 2080 | + | return NULL; |
|
| 2081 | + | ||
| 2082 | + | sym->e.fn.used = true; |
|
| 2083 | + | ||
| 2084 | + | /* Check each argument type. */ |
|
| 2085 | + | for (usize i = 0; i < call->val.call.args.len; i++) { |
|
| 2086 | + | node_t *arg_node = |
|
| 2087 | + | nodespan_ptrs(&t->module->parser, call->val.call.args)[i]; |
|
| 2088 | + | node_t *param_node = |
|
| 2089 | + | nodespan_ptrs(&sym->scope->mod->parser, fn->val.fn_decl.params)[i]; |
|
| 2090 | + | type_t *param_type = resolve_type(t, param_node->val.param.type); |
|
| 2091 | + | ||
| 2092 | + | if (!resolve_node(t, arg_node->val.call_arg.expr, param_type)) |
|
| 2093 | + | return NULL; |
|
| 2094 | + | } |
|
| 2095 | + | call->sym = sym; |
|
| 2096 | + | ||
| 2097 | + | return (call->type = fn->type->info.fun.ret); |
|
| 2098 | + | } |
|
| 2099 | + | ||
| 2100 | + | /* Helper function to build a module path from NODE_ACCESS nodes */ |
|
| 2101 | + | static void module_scope_path(node_t *node, char *path_str) { |
|
| 2102 | + | if (node->cls == NODE_IDENT) { |
|
| 2103 | + | strncat(path_str, node->val.ident.name, node->val.ident.length); |
|
| 2104 | + | } else if (node->cls == NODE_SUPER) { |
|
| 2105 | + | strlcat(path_str, "super", MAX_PATH_LEN); |
|
| 2106 | + | } else if (node->cls == NODE_SCOPE) { |
|
| 2107 | + | module_scope_path(node->val.access.lval, path_str); |
|
| 2108 | + | strlcat(path_str, "::", MAX_PATH_LEN); |
|
| 2109 | + | module_scope_path(node->val.access.rval, path_str); |
|
| 2110 | + | } else { |
|
| 2111 | + | } |
|
| 2112 | + | } |
|
| 2113 | + | ||
| 2114 | + | static type_t *resolve_use(resolve_t *t, node_t *n) { |
|
| 2115 | + | /* Extract the import path from the `use` node */ |
|
| 2116 | + | node_t *path_node = n->val.use_decl.path; |
|
| 2117 | + | bool wildcard = n->val.use_decl.wildcard; |
|
| 2118 | + | ||
| 2119 | + | /* Get the last component (symbol name) and parent scope */ |
|
| 2120 | + | node_t *last = path_node; |
|
| 2121 | + | node_t *parent = NULL; |
|
| 2122 | + | ||
| 2123 | + | while (last && (last->cls == NODE_SCOPE)) { |
|
| 2124 | + | parent = last->val.access.lval; |
|
| 2125 | + | last = last->val.access.rval; |
|
| 2126 | + | } |
|
| 2127 | + | ||
| 2128 | + | /* Try to find as a module first */ |
|
| 2129 | + | char filepath[MAX_PATH_LEN] = { 0 }; |
|
| 2130 | + | module_scope_path(path_node, filepath); |
|
| 2131 | + | module_t *imported = |
|
| 2132 | + | module_manager_find_relative(t->mm, t->module->path, filepath); |
|
| 2133 | + | ||
| 2134 | + | if (imported) { |
|
| 2135 | + | /* Module import: check both declarations and definitions */ |
|
| 2136 | + | if (!resolve_decls(t, imported)) { |
|
| 2137 | + | return NULL; |
|
| 2138 | + | } |
|
| 2139 | + | bool in_decl_phase = t->module && !t->module->declared; |
|
| 2140 | + | ||
| 2141 | + | if (!in_decl_phase) { |
|
| 2142 | + | if (!resolve_mod_def(t, imported)) { |
|
| 2143 | + | return NULL; |
|
| 2144 | + | } |
|
| 2145 | + | } |
|
| 2146 | + | if (wildcard) { |
|
| 2147 | + | /* Re-export all public symbols from the imported module */ |
|
| 2148 | + | for (usize i = 0; i < imported->scope->nsymbols; i++) { |
|
| 2149 | + | symbol_t *sym = imported->scope->symbols[i]; |
|
| 2150 | + | if (!sym || !sym->node) |
|
| 2151 | + | continue; |
|
| 2152 | + | ||
| 2153 | + | /* Only re-export public symbols */ |
|
| 2154 | + | attrib_t sym_attribs = 0; |
|
| 2155 | + | switch (sym->kind) { |
|
| 2156 | + | case SYM_FUNCTION: |
|
| 2157 | + | sym_attribs = sym->e.fn.attribs; |
|
| 2158 | + | break; |
|
| 2159 | + | case SYM_TYPE: |
|
| 2160 | + | /* Check if it's a record or union declaration */ |
|
| 2161 | + | if (sym->node->cls == NODE_RECORD) { |
|
| 2162 | + | node_t *attribs_node = |
|
| 2163 | + | sym->node->val.record_decl.attribs; |
|
| 2164 | + | if (attribs_node && |
|
| 2165 | + | attribs_node->cls == NODE_ATTRIBUTE) { |
|
| 2166 | + | sym_attribs = attribs_node->val.attrib; |
|
| 2167 | + | } |
|
| 2168 | + | } else if (sym->node->cls == NODE_UNION) { |
|
| 2169 | + | node_t *attribs_node = |
|
| 2170 | + | sym->node->val.union_decl.attribs; |
|
| 2171 | + | if (attribs_node && |
|
| 2172 | + | attribs_node->cls == NODE_ATTRIBUTE) { |
|
| 2173 | + | sym_attribs = attribs_node->val.attrib; |
|
| 2174 | + | } |
|
| 2175 | + | } |
|
| 2176 | + | break; |
|
| 2177 | + | case SYM_MODULE: |
|
| 2178 | + | /* Check module declaration attributes */ |
|
| 2179 | + | if (sym->node->cls == NODE_MOD) { |
|
| 2180 | + | node_t *attribs_node = sym->node->val.mod_decl.attribs; |
|
| 2181 | + | if (attribs_node && |
|
| 2182 | + | attribs_node->cls == NODE_ATTRIBUTE) { |
|
| 2183 | + | sym_attribs = attribs_node->val.attrib; |
|
| 2184 | + | } |
|
| 2185 | + | } |
|
| 2186 | + | break; |
|
| 2187 | + | default: |
|
| 2188 | + | /* Skip other symbol types for now */ |
|
| 2189 | + | continue; |
|
| 2190 | + | } |
|
| 2191 | + | ||
| 2192 | + | if (sym_attribs & ATTRIB_PUB) { |
|
| 2193 | + | if (!symtab_add_alias(t->scope, sym->node, sym)) { |
|
| 2194 | + | /* Symbol already exists, skip it */ |
|
| 2195 | + | } |
|
| 2196 | + | } |
|
| 2197 | + | } |
|
| 2198 | + | return (n->type = t->types.type_void); |
|
| 2199 | + | } else { |
|
| 2200 | + | /* Regular module import */ |
|
| 2201 | + | if (!symbol_add(t, last, n)) { |
|
| 2202 | + | return NULL; |
|
| 2203 | + | } |
|
| 2204 | + | n->sym->e.mod = imported; |
|
| 2205 | + | n->sym->scope = imported->scope; |
|
| 2206 | + | ||
| 2207 | + | return (n->type = t->types.type_void); |
|
| 2208 | + | } |
|
| 2209 | + | } |
|
| 2210 | + | ||
| 2211 | + | /* Try function/symbol import if there's a parent scope */ |
|
| 2212 | + | if (parent) { |
|
| 2213 | + | char modulepath[MAX_PATH_LEN] = { 0 }; |
|
| 2214 | + | module_scope_path(parent, modulepath); |
|
| 2215 | + | module_t *parent_mod = |
|
| 2216 | + | module_manager_find_relative(t->mm, t->module->path, modulepath); |
|
| 2217 | + | ||
| 2218 | + | if (parent_mod && resolve_mod_def(t, parent_mod)) { |
|
| 2219 | + | symbol_t *sym = symtab_scope_lookup( |
|
| 2220 | + | parent_mod->scope, |
|
| 2221 | + | last->val.ident.name, |
|
| 2222 | + | last->val.ident.length, |
|
| 2223 | + | SYM_ANY |
|
| 2224 | + | ); |
|
| 2225 | + | if (sym) { /* Add alias with qualified name */ |
|
| 2226 | + | symtab_add_alias(t->scope, last, sym); |
|
| 2227 | + | ||
| 2228 | + | n->sym = sym; |
|
| 2229 | + | n->sym->scope = parent_mod->scope; |
|
| 2230 | + | ||
| 2231 | + | return (n->type = t->types.type_void); |
|
| 2232 | + | } |
|
| 2233 | + | } |
|
| 2234 | + | } |
|
| 2235 | + | return NULL; |
|
| 2236 | + | } |
|
| 2237 | + | ||
| 2238 | + | /* Scope access, eg. `foo::bar` */ |
|
| 2239 | + | static type_t *resolve_scope(resolve_t *t, node_t *n) { |
|
| 2240 | + | node_t *parent = n->val.access.lval; |
|
| 2241 | + | node_t *child = n->val.access.rval; |
|
| 2242 | + | ||
| 2243 | + | /* Handle absolute path from global root: ::module::symbol */ |
|
| 2244 | + | if (parent == NULL) { |
|
| 2245 | + | /* Look up module in global scope */ |
|
| 2246 | + | symbol_t *sym = symtab_lookup( |
|
| 2247 | + | t->global, |
|
| 2248 | + | child->val.ident.name, |
|
| 2249 | + | child->val.ident.length, |
|
| 2250 | + | SYM_MODULE |
|
| 2251 | + | ); |
|
| 2252 | + | if (sym) { |
|
| 2253 | + | n->sym = sym; |
|
| 2254 | + | return (n->type = t->types.type_void); |
|
| 2255 | + | } |
|
| 2256 | + | return NULL; |
|
| 2257 | + | } |
|
| 2258 | + | ||
| 2259 | + | /* Handle `super::` references */ |
|
| 2260 | + | if (node_is_super(parent)) { |
|
| 2261 | + | if (!t->module) |
|
| 2262 | + | return NULL; |
|
| 2263 | + | ||
| 2264 | + | module_t *target = module_super_ancestor(t->module, 1); |
|
| 2265 | + | if (!target) |
|
| 2266 | + | return NULL; |
|
| 2267 | + | if (!target->declared && target->state != MODULE_STATE_VISITING) { |
|
| 2268 | + | if (!resolve_decls(t, target)) { |
|
| 2269 | + | return NULL; |
|
| 2270 | + | } |
|
| 2271 | + | } |
|
| 2272 | + | if (target->ast && target->ast->sym) { |
|
| 2273 | + | parent->sym = target->ast->sym; |
|
| 2274 | + | parent->type = t->types.type_void; |
|
| 2275 | + | } |
|
| 2276 | + | return module_lookup(t, n, child, target); |
|
| 2277 | + | } |
|
| 2278 | + | ||
| 2279 | + | /* Handle direct module access: module::symbol */ |
|
| 2280 | + | if (parent->cls == NODE_IDENT) { |
|
| 2281 | + | symbol_t *sym = resolve_name(t, parent, SYM_MODULE); |
|
| 2282 | + | if (sym) { |
|
| 2283 | + | return module_lookup(t, n, child, sym->e.mod); |
|
| 2284 | + | } |
|
| 2285 | + | } else if (parent->cls == NODE_SCOPE) { |
|
| 2286 | + | /* Handle recursive scope access: foo::bar::baz */ |
|
| 2287 | + | type_t *parent_type = resolve_scope(t, parent); |
|
| 2288 | + | if (!parent_type) |
|
| 2289 | + | return NULL; |
|
| 2290 | + | ||
| 2291 | + | /* If parent is a module, look up symbol in module scope */ |
|
| 2292 | + | if (parent->sym && parent->sym->kind == SYM_MODULE) { |
|
| 2293 | + | return module_lookup(t, n, child, parent->sym->e.mod); |
|
| 2294 | + | } |
|
| 2295 | + | /* If parent is a union, handle union scope */ |
|
| 2296 | + | if (parent_type->cls == TYPE_UNION) { |
|
| 2297 | + | symbol_t *sym = union_variant_lookup(parent_type, child); |
|
| 2298 | + | ||
| 2299 | + | if (sym) { |
|
| 2300 | + | n->sym = sym; |
|
| 2301 | + | return (n->type = parent_type); |
|
| 2302 | + | } |
|
| 2303 | + | } |
|
| 2304 | + | return NULL; |
|
| 2305 | + | } |
|
| 2306 | + | ||
| 2307 | + | /* If not a module, treat it as a normal type */ |
|
| 2308 | + | type_t *parent_type = resolve_node(t, parent, NULL); |
|
| 2309 | + | if (!parent_type) |
|
| 2310 | + | return NULL; |
|
| 2311 | + | ||
| 2312 | + | /* Handle union variant access */ |
|
| 2313 | + | if (parent_type->cls == TYPE_UNION) { |
|
| 2314 | + | if ((n->sym = union_variant_lookup(parent_type, child))) { |
|
| 2315 | + | /* For unions we store the type of the enum, not the variant */ |
|
| 2316 | + | return (n->type = parent_type); |
|
| 2317 | + | } |
|
| 2318 | + | } |
|
| 2319 | + | ||
| 2320 | + | return NULL; |
|
| 2321 | + | } |
|
| 2322 | + | ||
| 2323 | + | static type_t *resolve_array_repeat(resolve_t *t, node_t *n, type_t *expected) { |
|
| 2324 | + | if (expected->cls == TYPE_OPT) { |
|
| 2325 | + | expected = expected->info.opt.elem; |
|
| 2326 | + | } |
|
| 2327 | + | ||
| 2328 | + | node_t *value = n->val.array_repeat_lit.value; |
|
| 2329 | + | node_t *count = n->val.array_repeat_lit.count; |
|
| 2330 | + | ||
| 2331 | + | /* Type check the value expression */ |
|
| 2332 | + | type_t *expected_typ = expected->info.ary.elem; |
|
| 2333 | + | type_t *value_typ = resolve_node(t, value, expected_typ); |
|
| 2334 | + | ||
| 2335 | + | if (!value_typ) |
|
| 2336 | + | return NULL; |
|
| 2337 | + | ||
| 2338 | + | /* Type check the count expression */ |
|
| 2339 | + | if (!resolve_node(t, count, t->types.type_u32)) |
|
| 2340 | + | return NULL; |
|
| 2341 | + | ||
| 2342 | + | /* Ensure the count is a compile-time constant */ |
|
| 2343 | + | usize length = 0; |
|
| 2344 | + | ||
| 2345 | + | if (!resolve_const_usize(t, count, &length)) |
|
| 2346 | + | return NULL; |
|
| 2347 | + | ||
| 2348 | + | /* For array contexts, use expected type */ |
|
| 2349 | + | if (expected->cls == TYPE_ARRAY) { |
|
| 2350 | + | n->type = expected; |
|
| 2351 | + | } else { |
|
| 2352 | + | /* For slice contexts, create a new array type */ |
|
| 2353 | + | n->type = alloc_array_type(t, expected_typ, length); |
|
| 2354 | + | } |
|
| 2355 | + | return n->type; |
|
| 2356 | + | } |
|
| 2357 | + | ||
| 2358 | + | /* Check expression types. */ |
|
| 2359 | + | static type_t *resolve_node(resolve_t *t, node_t *n, type_t *expected) { |
|
| 2360 | + | /* Short-circuit if we've already traversed this node. */ |
|
| 2361 | + | if (n->type && n->cls != NODE_RECORD && n->cls != NODE_UNION) |
|
| 2362 | + | return n->type; |
|
| 2363 | + | ||
| 2364 | + | switch (n->cls) { |
|
| 2365 | + | case NODE_ARRAY_LIT: { |
|
| 2366 | + | if (expected->cls == TYPE_OPT) { |
|
| 2367 | + | expected = expected->info.opt.elem; |
|
| 2368 | + | } |
|
| 2369 | + | ||
| 2370 | + | usize length = n->val.array_lit.elems.len; |
|
| 2371 | + | if (length == 0) { |
|
| 2372 | + | /* Create an empty array type with the expected element type. */ |
|
| 2373 | + | type_t *elem_type = expected->info.slc.elem; |
|
| 2374 | + | n->type = alloc_array_type(t, elem_type, 0); |
|
| 2375 | + | return n->type; |
|
| 2376 | + | } |
|
| 2377 | + | /* Get the expected element type */ |
|
| 2378 | + | type_t *expected_typ = expected->cls == TYPE_SLICE |
|
| 2379 | + | ? expected->info.slc.elem |
|
| 2380 | + | : expected->info.ary.elem; |
|
| 2381 | + | ||
| 2382 | + | /* Check all elements */ |
|
| 2383 | + | node_t **elems = |
|
| 2384 | + | nodespan_ptrs(&t->module->parser, n->val.array_lit.elems); |
|
| 2385 | + | for (usize i = 0; i < length; i++) { |
|
| 2386 | + | if (!resolve_node(t, elems[i], expected_typ)) |
|
| 2387 | + | return NULL; |
|
| 2388 | + | } |
|
| 2389 | + | if (expected->cls == TYPE_ARRAY) { |
|
| 2390 | + | n->type = expected; |
|
| 2391 | + | } else { |
|
| 2392 | + | /* For slice contexts, create a new array type */ |
|
| 2393 | + | n->type = alloc_array_type(t, expected_typ, length); |
|
| 2394 | + | } |
|
| 2395 | + | return n->type; |
|
| 2396 | + | } |
|
| 2397 | + | ||
| 2398 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 2399 | + | return resolve_array_repeat(t, n, expected); |
|
| 2400 | + | ||
| 2401 | + | case NODE_ARRAY_INDEX: { |
|
| 2402 | + | type_t *array_typ = resolve_node(t, n->val.access.lval, NULL); |
|
| 2403 | + | if (!array_typ) |
|
| 2404 | + | return NULL; |
|
| 2405 | + | ||
| 2406 | + | if (array_typ->cls == TYPE_PTR) |
|
| 2407 | + | array_typ = deref_type(array_typ); |
|
| 2408 | + | ||
| 2409 | + | node_t *idx_node = n->val.access.rval; |
|
| 2410 | + | ||
| 2411 | + | if (idx_node->cls == NODE_RANGE) { |
|
| 2412 | + | if (array_typ->cls == TYPE_SLICE) { |
|
| 2413 | + | n->type = array_typ; |
|
| 2414 | + | if (array_typ->info.slc.base) |
|
| 2415 | + | array_typ = array_typ->info.slc.base; |
|
| 2416 | + | } |
|
| 2417 | + | if (idx_node->val.range.start) { |
|
| 2418 | + | if (!resolve_node( |
|
| 2419 | + | t, idx_node->val.range.start, t->types.type_u32 |
|
| 2420 | + | )) |
|
| 2421 | + | return NULL; |
|
| 2422 | + | } |
|
| 2423 | + | if (idx_node->val.range.end) { |
|
| 2424 | + | if (!resolve_node( |
|
| 2425 | + | t, idx_node->val.range.end, t->types.type_u32 |
|
| 2426 | + | )) |
|
| 2427 | + | return NULL; |
|
| 2428 | + | } |
|
| 2429 | + | return (n->type = n->type ? n->type : array_typ->slice); |
|
| 2430 | + | } else { |
|
| 2431 | + | type_t *elem_typ = array_typ->cls == TYPE_SLICE |
|
| 2432 | + | ? array_typ->info.slc.elem |
|
| 2433 | + | : array_typ->info.ary.elem; |
|
| 2434 | + | ||
| 2435 | + | if (!resolve_node(t, idx_node, t->types.type_u32)) |
|
| 2436 | + | return NULL; |
|
| 2437 | + | ||
| 2438 | + | return (n->type = elem_typ); |
|
| 2439 | + | } |
|
| 2440 | + | } |
|
| 2441 | + | ||
| 2442 | + | case NODE_UNION: { |
|
| 2443 | + | union_decl_t *decl = &n->val.union_decl; |
|
| 2444 | + | node_t **variants = nodespan_ptrs(&t->module->parser, decl->variants); |
|
| 2445 | + | if (!declare_enum(t, n)) { |
|
| 2446 | + | return NULL; |
|
| 2447 | + | } |
|
| 2448 | + | type_t *typ = n->type; |
|
| 2449 | + | ||
| 2450 | + | /* Add each variant to the union's symbol table. */ |
|
| 2451 | + | i32 iota = 0; |
|
| 2452 | + | for (usize i = 0; i < decl->variants.len; i++) { |
|
| 2453 | + | node_t *v = variants[i]; |
|
| 2454 | + | ||
| 2455 | + | if (!union_variant_add(t, typ, v, i, &iota)) |
|
| 2456 | + | return NULL; |
|
| 2457 | + | } |
|
| 2458 | + | update_enum_layout(typ); |
|
| 2459 | + | n->sym->e.typ.info = typ; |
|
| 2460 | + | ||
| 2461 | + | return (n->type = typ); |
|
| 2462 | + | } |
|
| 2463 | + | ||
| 2464 | + | case NODE_RECORD: { |
|
| 2465 | + | record_decl_t *decl = &n->val.record_decl; |
|
| 2466 | + | node_t **fields = nodespan_ptrs(&t->module->parser, decl->fields); |
|
| 2467 | + | if (!declare_record(t, n)) { |
|
| 2468 | + | return NULL; |
|
| 2469 | + | } |
|
| 2470 | + | type_t *strct_typ = n->type; |
|
| 2471 | + | ||
| 2472 | + | /* Add each field to the record. */ |
|
| 2473 | + | for (usize i = 0; i < decl->fields.len; i++) { |
|
| 2474 | + | node_t *f = fields[i]; |
|
| 2475 | + | var_decl_t *field = &f->val.var; |
|
| 2476 | + | type_t *field_type = resolve_type(t, field->type); |
|
| 2477 | + | ||
| 2478 | + | if (!field_type) |
|
| 2479 | + | return NULL; |
|
| 2480 | + | ||
| 2481 | + | if (!record_field_add(t, strct_typ, f, field->ident, field_type)) { |
|
| 2482 | + | return NULL; |
|
| 2483 | + | } |
|
| 2484 | + | } |
|
| 2485 | + | n->sym->e.typ.info = strct_typ; |
|
| 2486 | + | ||
| 2487 | + | return strct_typ; |
|
| 2488 | + | } |
|
| 2489 | + | ||
| 2490 | + | case NODE_RECORD_TYPE: |
|
| 2491 | + | return resolve_type(t, n); |
|
| 2492 | + | ||
| 2493 | + | case NODE_RECORD_FIELD: |
|
| 2494 | + | /* Record fields are handled when processing the parent record. */ |
|
| 2495 | + | return n->type; |
|
| 2496 | + | ||
| 2497 | + | case NODE_RECORD_LIT_FIELD: |
|
| 2498 | + | return n->type; |
|
| 2499 | + | ||
| 2500 | + | case NODE_RECORD_LIT: { |
|
| 2501 | + | type_t *record_type = NULL; |
|
| 2502 | + | type_t *result_type = NULL; |
|
| 2503 | + | symbol_t *variant_sym = NULL; |
|
| 2504 | + | ||
| 2505 | + | if (!resolve_record_literal_types( |
|
| 2506 | + | t, |
|
| 2507 | + | n->val.record_lit.type, |
|
| 2508 | + | expected, |
|
| 2509 | + | &record_type, |
|
| 2510 | + | &result_type, |
|
| 2511 | + | &variant_sym |
|
| 2512 | + | )) |
|
| 2513 | + | return NULL; |
|
| 2514 | + | ||
| 2515 | + | if (!resolve_record_literal_fields(t, n, record_type)) |
|
| 2516 | + | return NULL; |
|
| 2517 | + | ||
| 2518 | + | if (variant_sym) |
|
| 2519 | + | n->sym = variant_sym; |
|
| 2520 | + | ||
| 2521 | + | return (n->type = result_type); |
|
| 2522 | + | } |
|
| 2523 | + | ||
| 2524 | + | case NODE_NUMBER: |
|
| 2525 | + | return resolve_number(t, n, expected); |
|
| 2526 | + | ||
| 2527 | + | case NODE_CHAR: |
|
| 2528 | + | return (n->type = t->types.type_u8); |
|
| 2529 | + | ||
| 2530 | + | case NODE_STRING: |
|
| 2531 | + | return (n->type = t->types.type_str); |
|
| 2532 | + | ||
| 2533 | + | case NODE_BOOL: |
|
| 2534 | + | return (n->type = t->types.type_bool); |
|
| 2535 | + | ||
| 2536 | + | case NODE_UNDEF: |
|
| 2537 | + | return (n->type = expected); |
|
| 2538 | + | ||
| 2539 | + | case NODE_NIL: |
|
| 2540 | + | if (expected) { |
|
| 2541 | + | if (expected->cls == TYPE_OPT) |
|
| 2542 | + | return (n->type = expected); |
|
| 2543 | + | return (n->type = alloc_opt_type(t, expected)); |
|
| 2544 | + | } |
|
| 2545 | + | return NULL; |
|
| 2546 | + | ||
| 2547 | + | case NODE_SUPER: |
|
| 2548 | + | return NULL; |
|
| 2549 | + | ||
| 2550 | + | case NODE_IDENT: |
|
| 2551 | + | case NODE_SCOPE: { |
|
| 2552 | + | bool pattern_ctx = (t->ctx == TC_CTX_PATTERN && n->cls == NODE_IDENT); |
|
| 2553 | + | symbol_t *sym = resolve_name(t, n, SYM_ANY); |
|
| 2554 | + | ||
| 2555 | + | if (!sym) { |
|
| 2556 | + | if (pattern_ctx) { |
|
| 2557 | + | type_t *bind_type = expected ? expected : t->types.type_void; |
|
| 2558 | + | n->type = bind_type; |
|
| 2559 | + | n->sym = NULL; |
|
| 2560 | + | return bind_type; |
|
| 2561 | + | } |
|
| 2562 | + | return NULL; |
|
| 2563 | + | } |
|
| 2564 | + | type_t *scoped_type = n->type; |
|
| 2565 | + | ||
| 2566 | + | switch (sym->kind) { |
|
| 2567 | + | case SYM_VARIABLE: |
|
| 2568 | + | case SYM_VARIANT: |
|
| 2569 | + | case SYM_CONSTANT: { |
|
| 2570 | + | if (!sym->node) |
|
| 2571 | + | return NULL; |
|
| 2572 | + | if (sym->node->cls == NODE_UNION_VARIANT) { |
|
| 2573 | + | if (!scoped_type || scoped_type->cls != TYPE_UNION) |
|
| 2574 | + | return NULL; |
|
| 2575 | + | type_t *variant_type = sym->node->type; |
|
| 2576 | + | if (variant_type->cls == TYPE_VOID) |
|
| 2577 | + | return (n->type = scoped_type); |
|
| 2578 | + | return NULL; |
|
| 2579 | + | } |
|
| 2580 | + | return (n->type = sym->node->type); |
|
| 2581 | + | } |
|
| 2582 | + | case SYM_FUNCTION: |
|
| 2583 | + | if (!sym->node) |
|
| 2584 | + | return NULL; |
|
| 2585 | + | sym->e.fn.used = true; |
|
| 2586 | + | n->type = sym->node->type; |
|
| 2587 | + | return n->type; |
|
| 2588 | + | case SYM_TYPE: |
|
| 2589 | + | if (!sym->node) |
|
| 2590 | + | return NULL; |
|
| 2591 | + | n->type = sym->node->type; |
|
| 2592 | + | return n->type; |
|
| 2593 | + | default: |
|
| 2594 | + | return NULL; |
|
| 2595 | + | } |
|
| 2596 | + | } |
|
| 2597 | + | ||
| 2598 | + | case NODE_REF: { |
|
| 2599 | + | node_t *target = n->val.ref.target; |
|
| 2600 | + | type_t *target_typ = resolve_node(t, target, expected); |
|
| 2601 | + | ||
| 2602 | + | if (!target_typ) |
|
| 2603 | + | return NULL; |
|
| 2604 | + | ||
| 2605 | + | bool mut_ref = n->val.ref.mut; |
|
| 2606 | + | type_t *exp = expected; |
|
| 2607 | + | ||
| 2608 | + | while (exp && exp->cls == TYPE_OPT) { |
|
| 2609 | + | exp = exp->info.opt.elem; |
|
| 2610 | + | } |
|
| 2611 | + | switch (target->cls) { |
|
| 2612 | + | case NODE_IDENT: { |
|
| 2613 | + | return (n->type = alloc_ptr_type(t, target_typ, mut_ref)); |
|
| 2614 | + | } |
|
| 2615 | + | case NODE_ARRAY_INDEX: { |
|
| 2616 | + | node_t *idx = target->val.access.rval; |
|
| 2617 | + | if (idx->cls == NODE_RANGE) { |
|
| 2618 | + | /* Array slice reference (e.g., &ary[0..3]) */ |
|
| 2619 | + | ||
| 2620 | + | if (target_typ->info.slc.mut == mut_ref) { |
|
| 2621 | + | return (n->type = target_typ); |
|
| 2622 | + | } |
|
| 2623 | + | type_t *slice_type = alloc_slice_type( |
|
| 2624 | + | t, |
|
| 2625 | + | target_typ->info.slc.elem, |
|
| 2626 | + | target_typ->info.slc.base, |
|
| 2627 | + | mut_ref |
|
| 2628 | + | ); |
|
| 2629 | + | return (n->type = slice_type); |
|
| 2630 | + | } else { |
|
| 2631 | + | /* Array element reference (e.g., &ary[3]) */ |
|
| 2632 | + | return (n->type = alloc_ptr_type(t, target_typ, mut_ref)); |
|
| 2633 | + | } |
|
| 2634 | + | } |
|
| 2635 | + | case NODE_ARRAY_LIT: |
|
| 2636 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 2637 | + | /* Slice literal. */ |
|
| 2638 | + | if (target_typ->cls == TYPE_ARRAY) { |
|
| 2639 | + | type_t *slice_type = alloc_slice_type( |
|
| 2640 | + | t, target_typ->info.ary.elem, target_typ, mut_ref |
|
| 2641 | + | ); |
|
| 2642 | + | return (n->type = slice_type); |
|
| 2643 | + | } else if (target_typ->cls == TYPE_SLICE) { |
|
| 2644 | + | type_t *slice_type = alloc_slice_type( |
|
| 2645 | + | t, |
|
| 2646 | + | target_typ->info.slc.elem, |
|
| 2647 | + | target_typ->info.slc.base, |
|
| 2648 | + | mut_ref |
|
| 2649 | + | ); |
|
| 2650 | + | return (n->type = slice_type); |
|
| 2651 | + | } else { |
|
| 2652 | + | bail("unexpected slice literal type"); |
|
| 2653 | + | } |
|
| 2654 | + | case NODE_ACCESS: |
|
| 2655 | + | /* Field access. */ |
|
| 2656 | + | return (n->type = alloc_ptr_type(t, target_typ, mut_ref)); |
|
| 2657 | + | default: |
|
| 2658 | + | bail("can't take reference of %s", node_names[target->cls]); |
|
| 2659 | + | } |
|
| 2660 | + | } |
|
| 2661 | + | ||
| 2662 | + | case NODE_UNOP: { |
|
| 2663 | + | switch (n->val.unop.op) { |
|
| 2664 | + | case OP_NOT: { |
|
| 2665 | + | type_t *typ = resolve_node(t, n->val.unop.expr, expected); |
|
| 2666 | + | if (!typ) |
|
| 2667 | + | return NULL; |
|
| 2668 | + | return (n->type = typ); |
|
| 2669 | + | } |
|
| 2670 | + | case OP_NEG: { |
|
| 2671 | + | type_t *typ = resolve_node(t, n->val.unop.expr, expected); |
|
| 2672 | + | if (!typ) |
|
| 2673 | + | return NULL; |
|
| 2674 | + | return (n->type = typ); |
|
| 2675 | + | } |
|
| 2676 | + | case OP_DEREF: { |
|
| 2677 | + | type_t *target_typ = resolve_node(t, n->val.unop.expr, NULL); |
|
| 2678 | + | if (!target_typ) |
|
| 2679 | + | return NULL; |
|
| 2680 | + | ||
| 2681 | + | return (n->type = deref_type(target_typ)); |
|
| 2682 | + | } |
|
| 2683 | + | case OP_BNOT: { |
|
| 2684 | + | type_t *typ = resolve_node(t, n->val.unop.expr, expected); |
|
| 2685 | + | if (!typ) |
|
| 2686 | + | return NULL; |
|
| 2687 | + | return (n->type = typ); |
|
| 2688 | + | } |
|
| 2689 | + | default: |
|
| 2690 | + | abort(); |
|
| 2691 | + | } |
|
| 2692 | + | } |
|
| 2693 | + | ||
| 2694 | + | case NODE_BINOP: { |
|
| 2695 | + | node_t *lhs = n->val.binop.left; |
|
| 2696 | + | node_t *rhs = n->val.binop.right; |
|
| 2697 | + | bool left_is_nil = lhs && lhs->cls == NODE_NIL; |
|
| 2698 | + | ||
| 2699 | + | /* Check operands without forcing specific expected types */ |
|
| 2700 | + | type_t *left = NULL; |
|
| 2701 | + | type_t *right = NULL; |
|
| 2702 | + | ||
| 2703 | + | if (left_is_nil && rhs && rhs->cls != NODE_NIL) { |
|
| 2704 | + | right = resolve_node(t, rhs, NULL); |
|
| 2705 | + | left = resolve_node(t, lhs, right); |
|
| 2706 | + | } else { |
|
| 2707 | + | left = resolve_node(t, lhs, NULL); |
|
| 2708 | + | right = resolve_node(t, rhs, left); |
|
| 2709 | + | } |
|
| 2710 | + | type_t *unified = NULL; |
|
| 2711 | + | ||
| 2712 | + | if (!left && !right) |
|
| 2713 | + | return NULL; |
|
| 2714 | + | ||
| 2715 | + | /* Check for pointer arithmetic before type unification */ |
|
| 2716 | + | if (n->val.binop.op == OP_ADD || n->val.binop.op == OP_SUB) { |
|
| 2717 | + | if (left && right) { |
|
| 2718 | + | /* Allow pointer + integer or integer + pointer */ |
|
| 2719 | + | if (left->cls == TYPE_PTR && type_is_int(right->cls)) { |
|
| 2720 | + | return (n->type = left); |
|
| 2721 | + | } |
|
| 2722 | + | if (n->val.binop.op == OP_ADD && right->cls == TYPE_PTR && |
|
| 2723 | + | type_is_int(left->cls)) { |
|
| 2724 | + | return (n->type = right); |
|
| 2725 | + | } |
|
| 2726 | + | } |
|
| 2727 | + | } |
|
| 2728 | + | ||
| 2729 | + | bool coerce = n->val.binop.op == OP_EQ || n->val.binop.op == OP_NE; |
|
| 2730 | + | if (coerce && left && right) { |
|
| 2731 | + | if (left->cls == TYPE_OPT && right->cls != TYPE_OPT) { |
|
| 2732 | + | /* Flip arguments because coercion only applies to the rval */ |
|
| 2733 | + | unified = |
|
| 2734 | + | type_unify(t, right, left, n, coerce, "binary operation"); |
|
| 2735 | + | } else { |
|
| 2736 | + | unified = |
|
| 2737 | + | type_unify(t, left, right, n, coerce, "binary operation"); |
|
| 2738 | + | } |
|
| 2739 | + | } else { |
|
| 2740 | + | unified = type_unify(t, left, right, n, coerce, "binary operation"); |
|
| 2741 | + | } |
|
| 2742 | + | if (!unified) |
|
| 2743 | + | return NULL; |
|
| 2744 | + | ||
| 2745 | + | /* Set operand types to unified type if they were previously NULL */ |
|
| 2746 | + | if (!left) |
|
| 2747 | + | n->val.binop.left->type = unified; |
|
| 2748 | + | if (!right) |
|
| 2749 | + | n->val.binop.right->type = unified; |
|
| 2750 | + | ||
| 2751 | + | /* Check numeric operations. */ |
|
| 2752 | + | if (n->val.binop.op <= OP_MOD) { |
|
| 2753 | + | if (expected) { |
|
| 2754 | + | /* If we have an expected numeric type different from unified, |
|
| 2755 | + | * coerce to it. This will affect the instructions used by the |
|
| 2756 | + | * code generator. Note that we don't try to unify the two types |
|
| 2757 | + | * as this will promote the smaller type to the larger one. */ |
|
| 2758 | + | if (expected != unified) |
|
| 2759 | + | return (n->type = expected); |
|
| 2760 | + | } |
|
| 2761 | + | return (n->type = unified); |
|
| 2762 | + | } |
|
| 2763 | + | ||
| 2764 | + | /* Check comparison operations. */ |
|
| 2765 | + | switch (n->val.binop.op) { |
|
| 2766 | + | case OP_EQ: |
|
| 2767 | + | case OP_NE: |
|
| 2768 | + | case OP_GT: |
|
| 2769 | + | case OP_LT: |
|
| 2770 | + | case OP_LE: |
|
| 2771 | + | case OP_GE: |
|
| 2772 | + | /* Update operand types to unified type for comparison */ |
|
| 2773 | + | n->val.binop.left->type = unified; |
|
| 2774 | + | n->val.binop.right->type = unified; |
|
| 2775 | + | return (n->type = t->types.type_bool); |
|
| 2776 | + | case OP_AND: |
|
| 2777 | + | case OP_OR: |
|
| 2778 | + | return (n->type = unified); |
|
| 2779 | + | case OP_BAND: |
|
| 2780 | + | case OP_BOR: |
|
| 2781 | + | case OP_XOR: |
|
| 2782 | + | case OP_SHL: |
|
| 2783 | + | case OP_SHR: |
|
| 2784 | + | return (n->type = unified); |
|
| 2785 | + | case OP_ADD: |
|
| 2786 | + | case OP_SUB: |
|
| 2787 | + | case OP_MUL: |
|
| 2788 | + | case OP_DIV: |
|
| 2789 | + | case OP_MOD: |
|
| 2790 | + | /* These are handled above in the numeric operations section */ |
|
| 2791 | + | abort(); |
|
| 2792 | + | } |
|
| 2793 | + | return NULL; |
|
| 2794 | + | } |
|
| 2795 | + | ||
| 2796 | + | case NODE_ACCESS: { |
|
| 2797 | + | node_t *expr = n->val.access.lval; |
|
| 2798 | + | node_t *field = n->val.access.rval; |
|
| 2799 | + | ||
| 2800 | + | type_t *decl_type = resolve_node(t, expr, NULL); |
|
| 2801 | + | if (!decl_type) |
|
| 2802 | + | return NULL; |
|
| 2803 | + | ||
| 2804 | + | while (decl_type->cls == TYPE_PTR) |
|
| 2805 | + | decl_type = deref_type(decl_type); |
|
| 2806 | + | ||
| 2807 | + | if (decl_type->cls == TYPE_RECORD) { |
|
| 2808 | + | symbol_t *field_sym = record_field_lookup(decl_type, field); |
|
| 2809 | + | if (!field_sym) |
|
| 2810 | + | return NULL; |
|
| 2811 | + | ||
| 2812 | + | n->sym = field_sym; |
|
| 2813 | + | return (n->type = field_sym->e.field.typ); |
|
| 2814 | + | } else if (decl_type->cls == TYPE_ARRAY) { |
|
| 2815 | + | if (ident_eq(field, LEN_FIELD, LEN_FIELD_LEN)) { |
|
| 2816 | + | n->cls = NODE_NUMBER; |
|
| 2817 | + | n->type = t->types.type_u32; |
|
| 2818 | + | n->val.number.value.u = decl_type->info.ary.length; |
|
| 2819 | + | n->val.number.text = NULL; |
|
| 2820 | + | n->val.number.text_len = 0; |
|
| 2821 | + | return n->type; |
|
| 2822 | + | } |
|
| 2823 | + | return NULL; |
|
| 2824 | + | } else if (decl_type->cls == TYPE_SLICE) { |
|
| 2825 | + | if (ident_eq(field, LEN_FIELD, LEN_FIELD_LEN)) |
|
| 2826 | + | return (n->type = t->types.type_u32); |
|
| 2827 | + | if (ident_eq(field, PTR_FIELD, PTR_FIELD_LEN)) |
|
| 2828 | + | return (n->type = decl_type->info.slc.elem->ptr); |
|
| 2829 | + | return NULL; |
|
| 2830 | + | } |
|
| 2831 | + | return NULL; |
|
| 2832 | + | } |
|
| 2833 | + | ||
| 2834 | + | case NODE_USE: |
|
| 2835 | + | return resolve_use(t, n); |
|
| 2836 | + | ||
| 2837 | + | case NODE_CALL: { |
|
| 2838 | + | node_t *callee = n->val.call.callee; |
|
| 2839 | + | symbol_t *sym = resolve_name(t, callee, SYM_ANY); |
|
| 2840 | + | ||
| 2841 | + | if (!sym) |
|
| 2842 | + | return NULL; |
|
| 2843 | + | ||
| 2844 | + | /* Function call */ |
|
| 2845 | + | if (sym->kind == SYM_FUNCTION) { |
|
| 2846 | + | n->sym = sym; |
|
| 2847 | + | ||
| 2848 | + | return resolve_call_fn(t, sym, n); |
|
| 2849 | + | } |
|
| 2850 | + | /* Tuple record constructor call */ |
|
| 2851 | + | if (sym->kind == SYM_TYPE) { |
|
| 2852 | + | type_t *typ = sym->e.typ.info; |
|
| 2853 | + | ||
| 2854 | + | if (typ && typ->cls == TYPE_RECORD && typ->info.srt.tuple) { |
|
| 2855 | + | return resolve_tuple_record_constructor(t, n, typ); |
|
| 2856 | + | } |
|
| 2857 | + | } |
|
| 2858 | + | /* Function pointer call */ |
|
| 2859 | + | if (sym->kind == SYM_VARIABLE) { |
|
| 2860 | + | if (callee->cls == NODE_IDENT) { |
|
| 2861 | + | if (sym->node->type && sym->node->type->cls == TYPE_FN) { |
|
| 2862 | + | n->sym = sym; |
|
| 2863 | + | return resolve_call_fn_ptr(t, sym, n); |
|
| 2864 | + | } |
|
| 2865 | + | return NULL; |
|
| 2866 | + | } |
|
| 2867 | + | } else if (sym->kind == SYM_VARIANT) { |
|
| 2868 | + | if (callee->cls == NODE_SCOPE) { |
|
| 2869 | + | type_t *scope = resolve_scope(t, callee); |
|
| 2870 | + | ||
| 2871 | + | if (scope && type_is_union_with_payload(scope)) |
|
| 2872 | + | return resolve_enum_constructor(t, n, scope, sym); |
|
| 2873 | + | } |
|
| 2874 | + | } |
|
| 2875 | + | return NULL; |
|
| 2876 | + | } |
|
| 2877 | + | case NODE_BUILTIN: |
|
| 2878 | + | return resolve_builtin(t, n, expected); |
|
| 2879 | + | case NODE_CALL_ARG: |
|
| 2880 | + | return (n->type = resolve_node(t, n->val.call_arg.expr, expected)); |
|
| 2881 | + | ||
| 2882 | + | case NODE_THROW: |
|
| 2883 | + | return resolve_throw(t, n); |
|
| 2884 | + | case NODE_TRY: |
|
| 2885 | + | return resolve_try_expr(t, n, expected); |
|
| 2886 | + | case NODE_CATCH: |
|
| 2887 | + | bail("cannot type check %s", node_names[n->cls]); |
|
| 2888 | + | ||
| 2889 | + | case NODE_RETURN: { |
|
| 2890 | + | type_t *fn_ret = t->fn->node->type->info.fun.ret; |
|
| 2891 | + | type_t *expected = fn_ret; |
|
| 2892 | + | ||
| 2893 | + | if (fn_ret->cls == TYPE_RESULT) |
|
| 2894 | + | expected = fn_ret->info.res.payload; |
|
| 2895 | + | ||
| 2896 | + | if (expected == t->types.type_void) { |
|
| 2897 | + | if (n->val.return_stmt.value) |
|
| 2898 | + | return NULL; |
|
| 2899 | + | return (n->type = fn_ret); |
|
| 2900 | + | } |
|
| 2901 | + | if (n->val.return_stmt.value) { |
|
| 2902 | + | if (!resolve_node(t, n->val.return_stmt.value, expected)) |
|
| 2903 | + | return NULL; |
|
| 2904 | + | } |
|
| 2905 | + | return (n->type = fn_ret); |
|
| 2906 | + | } |
|
| 2907 | + | ||
| 2908 | + | case NODE_IF: |
|
| 2909 | + | if (!resolve_node(t, n->val.if_stmt.cond, t->types.type_bool)) |
|
| 2910 | + | return NULL; |
|
| 2911 | + | ||
| 2912 | + | type_t *result_typ = expected ? expected : t->types.type_void; |
|
| 2913 | + | type_t *lbranch_typ = resolve_node(t, n->val.if_stmt.lbranch, expected); |
|
| 2914 | + | if (!lbranch_typ) |
|
| 2915 | + | return NULL; |
|
| 2916 | + | ||
| 2917 | + | if (n->val.if_stmt.rbranch) { |
|
| 2918 | + | type_t *rbranch_typ = |
|
| 2919 | + | resolve_node(t, n->val.if_stmt.rbranch, expected); |
|
| 2920 | + | if (!rbranch_typ) |
|
| 2921 | + | return NULL; |
|
| 2922 | + | ||
| 2923 | + | if (!expected) { |
|
| 2924 | + | type_t *unified = |
|
| 2925 | + | type_unify(t, lbranch_typ, rbranch_typ, n, false, NULL); |
|
| 2926 | + | if (unified) |
|
| 2927 | + | result_typ = unified; |
|
| 2928 | + | } |
|
| 2929 | + | } |
|
| 2930 | + | return (n->type = result_typ); |
|
| 2931 | + | ||
| 2932 | + | case NODE_IF_LET: { |
|
| 2933 | + | type_t *expr_type = resolve_node(t, n->val.if_let_stmt.expr, NULL); |
|
| 2934 | + | if (!expr_type) |
|
| 2935 | + | return NULL; |
|
| 2936 | + | /* Create scope for the bound variable */ |
|
| 2937 | + | n->val.if_let_stmt.scope = symtab_scope(t->scope, NULL); |
|
| 2938 | + | n->val.if_let_stmt.var->type = expr_type->info.opt.elem; |
|
| 2939 | + | t->scope = n->val.if_let_stmt.scope; |
|
| 2940 | + | ||
| 2941 | + | /* Add the bound variable to the scope */ |
|
| 2942 | + | if (!symbol_add(t, n->val.if_let_stmt.var, n->val.if_let_stmt.var)) |
|
| 2943 | + | return NULL; |
|
| 2944 | + | ||
| 2945 | + | /* Only set symbol data if not a placeholder */ |
|
| 2946 | + | if (n->val.if_let_stmt.var->cls != NODE_PLACEHOLDER) { |
|
| 2947 | + | n->val.if_let_stmt.var->sym->e.var.typ = expr_type->info.opt.elem; |
|
| 2948 | + | n->val.if_let_stmt.var->sym->e.var.align = |
|
| 2949 | + | expr_type->info.opt.elem->align; |
|
| 2950 | + | n->val.if_let_stmt.var->sym->scope = t->scope; |
|
| 2951 | + | } |
|
| 2952 | + | ||
| 2953 | + | if (n->val.if_let_stmt.guard) { |
|
| 2954 | + | if (!resolve_node(t, n->val.if_let_stmt.guard, t->types.type_bool)) |
|
| 2955 | + | return NULL; |
|
| 2956 | + | } |
|
| 2957 | + | ||
| 2958 | + | if (!resolve_block(t, n->val.if_let_stmt.lbranch)) |
|
| 2959 | + | return NULL; |
|
| 2960 | + | ||
| 2961 | + | t->scope = t->scope->parent; |
|
| 2962 | + | ||
| 2963 | + | if (n->val.if_let_stmt.rbranch) { |
|
| 2964 | + | if (!resolve_block(t, n->val.if_let_stmt.rbranch)) |
|
| 2965 | + | return NULL; |
|
| 2966 | + | } |
|
| 2967 | + | return (n->type = t->types.type_void); |
|
| 2968 | + | } |
|
| 2969 | + | ||
| 2970 | + | case NODE_MATCH: { |
|
| 2971 | + | /* Check the match operand */ |
|
| 2972 | + | type_t *match_typ = resolve_node(t, n->val.match_stmt.expr, NULL); |
|
| 2973 | + | if (!match_typ) |
|
| 2974 | + | return NULL; |
|
| 2975 | + | ||
| 2976 | + | /* Check each case to ensure patterns match the |
|
| 2977 | + | * match operand type. */ |
|
| 2978 | + | node_t **cases = |
|
| 2979 | + | nodespan_ptrs(&t->module->parser, n->val.match_stmt.cases); |
|
| 2980 | + | bool all_diverge = n->val.match_stmt.cases.len > 0; |
|
| 2981 | + | ||
| 2982 | + | for (usize i = 0; i < n->val.match_stmt.cases.len; i++) { |
|
| 2983 | + | node_t *c = cases[i]; |
|
| 2984 | + | ||
| 2985 | + | type_t *case_typ = resolve_match_case(t, c, match_typ); |
|
| 2986 | + | if (!case_typ) |
|
| 2987 | + | return NULL; |
|
| 2988 | + | ||
| 2989 | + | /* Check if this case diverges. */ |
|
| 2990 | + | if (!node_diverges(c->val.match_case.body)) |
|
| 2991 | + | all_diverge = false; |
|
| 2992 | + | } |
|
| 2993 | + | /* Match diverges if all cases diverge. */ |
|
| 2994 | + | if (all_diverge) |
|
| 2995 | + | return (n->type = t->types.type_never); |
|
| 2996 | + | ||
| 2997 | + | return (n->type = t->types.type_void); |
|
| 2998 | + | } |
|
| 2999 | + | case NODE_MATCH_CASE: |
|
| 3000 | + | /* Handled in `NODE_MATCH` */ |
|
| 3001 | + | case NODE_BLOCK: |
|
| 3002 | + | return (n->type = resolve_block(t, n)); |
|
| 3003 | + | case NODE_FN: |
|
| 3004 | + | /* Handled at the module level */ |
|
| 3005 | + | case NODE_LOOP: |
|
| 3006 | + | return (n->type = resolve_block(t, n->val.loop_stmt.body)); |
|
| 3007 | + | case NODE_BREAK: |
|
| 3008 | + | return (n->type = t->types.type_never); |
|
| 3009 | + | case NODE_VAR: |
|
| 3010 | + | return resolve_var(t, n); |
|
| 3011 | + | case NODE_CONST: |
|
| 3012 | + | return resolve_const(t, n); |
|
| 3013 | + | case NODE_STATIC: |
|
| 3014 | + | return resolve_static(t, n); |
|
| 3015 | + | case NODE_PARAM: |
|
| 3016 | + | abort(); |
|
| 3017 | + | case NODE_ASSIGN: { |
|
| 3018 | + | type_t *ltype = resolve_node(t, n->val.assign.lval, NULL); |
|
| 3019 | + | if (!ltype) |
|
| 3020 | + | return NULL; |
|
| 3021 | + | ||
| 3022 | + | if (!resolve_node(t, n->val.assign.rval, ltype)) |
|
| 3023 | + | return NULL; |
|
| 3024 | + | ||
| 3025 | + | return (n->type = ltype); |
|
| 3026 | + | } |
|
| 3027 | + | ||
| 3028 | + | case NODE_ATTRIBUTE: |
|
| 3029 | + | return (n->type = t->types.type_void); |
|
| 3030 | + | ||
| 3031 | + | case NODE_EXPR_STMT: { |
|
| 3032 | + | /* Check the expression but don't use its result value. */ |
|
| 3033 | + | type_t *typ = resolve_node(t, n->val.expr_stmt, NULL); |
|
| 3034 | + | if (!typ) |
|
| 3035 | + | return NULL; |
|
| 3036 | + | ||
| 3037 | + | /* Expression statements don't produce values. */ |
|
| 3038 | + | return (n->type = t->types.type_void); |
|
| 3039 | + | } |
|
| 3040 | + | ||
| 3041 | + | case NODE_MOD: |
|
| 3042 | + | return resolve_mod_decl(t, n); |
|
| 3043 | + | ||
| 3044 | + | case NODE_RANGE: { |
|
| 3045 | + | /* Check range start expression if provided */ |
|
| 3046 | + | if (n->val.range.start) { |
|
| 3047 | + | if (!resolve_node(t, n->val.range.start, t->types.type_u32)) |
|
| 3048 | + | return NULL; |
|
| 3049 | + | } |
|
| 3050 | + | /* Check range end expression if provided */ |
|
| 3051 | + | if (n->val.range.end) { |
|
| 3052 | + | if (!resolve_node(t, n->val.range.end, t->types.type_u32)) |
|
| 3053 | + | return NULL; |
|
| 3054 | + | } |
|
| 3055 | + | /* Range nodes don't have a specific type, they're contextual */ |
|
| 3056 | + | return (n->type = t->types.type_void); |
|
| 3057 | + | } |
|
| 3058 | + | ||
| 3059 | + | case NODE_AS: { |
|
| 3060 | + | if (!resolve_node(t, n->val.as_expr.expr, NULL)) |
|
| 3061 | + | return NULL; |
|
| 3062 | + | ||
| 3063 | + | type_t *target_type = resolve_type(t, n->val.as_expr.type); |
|
| 3064 | + | if (!target_type) |
|
| 3065 | + | return NULL; |
|
| 3066 | + | ||
| 3067 | + | return (n->type = target_type); |
|
| 3068 | + | } |
|
| 3069 | + | case NODE_PANIC: |
|
| 3070 | + | return (n->type = t->types.type_never); |
|
| 3071 | + | ||
| 3072 | + | case NODE_WHILE: |
|
| 3073 | + | case NODE_WHILE_LET: |
|
| 3074 | + | case NODE_IF_CASE: |
|
| 3075 | + | case NODE_GUARD_CASE: |
|
| 3076 | + | case NODE_GUARD_LET: |
|
| 3077 | + | case NODE_FOR: |
|
| 3078 | + | ||
| 3079 | + | case NODE_PLACEHOLDER: |
|
| 3080 | + | /* Placeholders don't produce a value, so return NULL type */ |
|
| 3081 | + | return (n->type = NULL); |
|
| 3082 | + | ||
| 3083 | + | case NODE_TYPE: |
|
| 3084 | + | case NODE_UNION_VARIANT: |
|
| 3085 | + | case NODE_PTR: |
|
| 3086 | + | case NODE_MOD_BODY: |
|
| 3087 | + | case NODE_ALIGN: |
|
| 3088 | + | bail("unsupported node type %s", node_names[n->cls]); |
|
| 3089 | + | } |
|
| 3090 | + | return NULL; |
|
| 3091 | + | } |
|
| 3092 | + | ||
| 3093 | + | static node_t *binding_ident(node_t *n) { |
|
| 3094 | + | switch (n->cls) { |
|
| 3095 | + | case NODE_VAR: |
|
| 3096 | + | return n->val.var.ident; |
|
| 3097 | + | case NODE_CONST: |
|
| 3098 | + | return n->val.constant.ident; |
|
| 3099 | + | case NODE_STATIC: |
|
| 3100 | + | return n->val.static_decl.ident; |
|
| 3101 | + | default: |
|
| 3102 | + | bail("unexpected binding node %s", node_names[n->cls]); |
|
| 3103 | + | } |
|
| 3104 | + | } |
|
| 3105 | + | ||
| 3106 | + | static type_t *resolve_binding( |
|
| 3107 | + | resolve_t *t, node_t *n, node_t *val, node_t *typ |
|
| 3108 | + | ) { |
|
| 3109 | + | type_t *declared = NULL; |
|
| 3110 | + | if (typ) { |
|
| 3111 | + | /* Resolve the declared type before checking the value */ |
|
| 3112 | + | if (!(declared = resolve_type(t, typ))) |
|
| 3113 | + | return NULL; |
|
| 3114 | + | } |
|
| 3115 | + | /* Check the value with the declared type as expected type */ |
|
| 3116 | + | type_t *inferred = resolve_node(t, val, declared); |
|
| 3117 | + | if (!inferred) |
|
| 3118 | + | return NULL; |
|
| 3119 | + | ||
| 3120 | + | type_t *final_type = inferred; |
|
| 3121 | + | ||
| 3122 | + | if (declared) { |
|
| 3123 | + | final_type = |
|
| 3124 | + | type_unify(t, inferred, declared, val, true, "variable binding"); |
|
| 3125 | + | ||
| 3126 | + | if (!final_type) |
|
| 3127 | + | return NULL; |
|
| 3128 | + | } |
|
| 3129 | + | ||
| 3130 | + | node_t *ident = binding_ident(n); |
|
| 3131 | + | ||
| 3132 | + | /* symbol_add handles placeholders internally */ |
|
| 3133 | + | if (!symbol_add(t, ident, n)) |
|
| 3134 | + | return NULL; |
|
| 3135 | + | ||
| 3136 | + | /* Only set symbol data if not a placeholder */ |
|
| 3137 | + | if (ident->cls != NODE_PLACEHOLDER) { |
|
| 3138 | + | n->sym->scope = t->scope; |
|
| 3139 | + | n->sym->e.var.typ = final_type; |
|
| 3140 | + | n->sym->e.var.align = final_type->align; |
|
| 3141 | + | } |
|
| 3142 | + | ||
| 3143 | + | return (n->type = final_type); |
|
| 3144 | + | } |
|
| 3145 | + | ||
| 3146 | + | /* Check if a `const` declaration is valid. */ |
|
| 3147 | + | static type_t *resolve_const(resolve_t *t, node_t *n) { |
|
| 3148 | + | return resolve_binding(t, n, n->val.constant.value, n->val.constant.type); |
|
| 3149 | + | } |
|
| 3150 | + | ||
| 3151 | + | static type_t *resolve_static(resolve_t *t, node_t *n) { |
|
| 3152 | + | return resolve_binding( |
|
| 3153 | + | t, n, n->val.static_decl.value, n->val.static_decl.type |
|
| 3154 | + | ); |
|
| 3155 | + | } |
|
| 3156 | + | ||
| 3157 | + | /* Check if a `let` or `mut` declaration is valid. */ |
|
| 3158 | + | static type_t *resolve_var(resolve_t *t, node_t *n) { |
|
| 3159 | + | node_t *type = n->val.var.type; |
|
| 3160 | + | node_t *value = n->val.var.value; |
|
| 3161 | + | ||
| 3162 | + | if (!value) |
|
| 3163 | + | return NULL; |
|
| 3164 | + | ||
| 3165 | + | type_t *var_type = resolve_binding(t, n, value, type); |
|
| 3166 | + | ||
| 3167 | + | if (!var_type) |
|
| 3168 | + | return NULL; |
|
| 3169 | + | ||
| 3170 | + | if (n->val.var.align) { |
|
| 3171 | + | node_t *align = n->val.var.align->val.align; |
|
| 3172 | + | ||
| 3173 | + | if (!resolve_node(t, align, t->types.type_u32)) |
|
| 3174 | + | return NULL; |
|
| 3175 | + | ||
| 3176 | + | usize c = 0; |
|
| 3177 | + | ||
| 3178 | + | if (!resolve_const_usize(t, align, &c)) |
|
| 3179 | + | return NULL; |
|
| 3180 | + | ||
| 3181 | + | n->sym->e.var.align = (i32)c; |
|
| 3182 | + | } |
|
| 3183 | + | return var_type; |
|
| 3184 | + | } |
|
| 3185 | + | ||
| 3186 | + | static bool node_diverges(node_t *n) { |
|
| 3187 | + | if (!n) |
|
| 3188 | + | return false; |
|
| 3189 | + | ||
| 3190 | + | switch (n->cls) { |
|
| 3191 | + | case NODE_RETURN: |
|
| 3192 | + | case NODE_THROW: |
|
| 3193 | + | case NODE_PANIC: |
|
| 3194 | + | return true; |
|
| 3195 | + | case NODE_BLOCK: |
|
| 3196 | + | return n->type && n->type->cls == TYPE_NEVER; |
|
| 3197 | + | case NODE_IF: { |
|
| 3198 | + | node_t *then_branch = n->val.if_stmt.lbranch; |
|
| 3199 | + | node_t *else_branch = n->val.if_stmt.rbranch; |
|
| 3200 | + | ||
| 3201 | + | if (!then_branch || !else_branch) |
|
| 3202 | + | return false; |
|
| 3203 | + | ||
| 3204 | + | return node_diverges(then_branch) && node_diverges(else_branch); |
|
| 3205 | + | } |
|
| 3206 | + | case NODE_IF_LET: |
|
| 3207 | + | case NODE_IF_CASE: { |
|
| 3208 | + | node_t *then_branch = NULL; |
|
| 3209 | + | node_t *else_branch = NULL; |
|
| 3210 | + | ||
| 3211 | + | if (n->cls == NODE_IF_LET) { |
|
| 3212 | + | then_branch = n->val.if_let_stmt.lbranch; |
|
| 3213 | + | else_branch = n->val.if_let_stmt.rbranch; |
|
| 3214 | + | } else { |
|
| 3215 | + | then_branch = n->val.if_case_stmt.lbranch; |
|
| 3216 | + | else_branch = n->val.if_case_stmt.rbranch; |
|
| 3217 | + | } |
|
| 3218 | + | if (!then_branch || !else_branch) |
|
| 3219 | + | return false; |
|
| 3220 | + | ||
| 3221 | + | return node_diverges(then_branch) && node_diverges(else_branch); |
|
| 3222 | + | } |
|
| 3223 | + | case NODE_EXPR_STMT: { |
|
| 3224 | + | node_t *expr = n->val.expr_stmt; |
|
| 3225 | + | ||
| 3226 | + | if (!expr) |
|
| 3227 | + | return false; |
|
| 3228 | + | if (expr->type && expr->type->cls == TYPE_NEVER) |
|
| 3229 | + | return true; |
|
| 3230 | + | ||
| 3231 | + | if (expr->cls == NODE_CALL && expr->sym && |
|
| 3232 | + | expr->sym->kind == SYM_FUNCTION) { |
|
| 3233 | + | const char *qualified = expr->sym->qualified; |
|
| 3234 | + | ||
| 3235 | + | if (qualified && |
|
| 3236 | + | strcmp(qualified, "core::intrinsics::ebreak") == 0) { |
|
| 3237 | + | return true; |
|
| 3238 | + | } |
|
| 3239 | + | } |
|
| 3240 | + | return false; |
|
| 3241 | + | } |
|
| 3242 | + | case NODE_MATCH: |
|
| 3243 | + | /* Match diverges if its type is TYPE_NEVER (all cases diverge). */ |
|
| 3244 | + | return n->type && n->type->cls == TYPE_NEVER; |
|
| 3245 | + | default: |
|
| 3246 | + | break; |
|
| 3247 | + | } |
|
| 3248 | + | return false; |
|
| 3249 | + | } |
|
| 3250 | + | ||
| 3251 | + | /* Check a code block. */ |
|
| 3252 | + | static type_t *resolve_block(resolve_t *t, node_t *n) { |
|
| 3253 | + | /* Create a new scope for this block. */ |
|
| 3254 | + | scope_t *parent = t->scope; |
|
| 3255 | + | n->val.block.scope = symtab_scope(parent, NULL); |
|
| 3256 | + | t->scope = n->val.block.scope; |
|
| 3257 | + | ||
| 3258 | + | /* Check each statement in the block. */ |
|
| 3259 | + | node_t **stmts = nodespan_ptrs(&t->module->parser, n->val.block.stmts); |
|
| 3260 | + | for (usize i = 0; i < n->val.block.stmts.len; i++) { |
|
| 3261 | + | if (!resolve_node(t, stmts[i], NULL)) |
|
| 3262 | + | return NULL; |
|
| 3263 | + | } |
|
| 3264 | + | /* Return to parent scope. */ |
|
| 3265 | + | t->scope = parent; |
|
| 3266 | + | ||
| 3267 | + | type_t *block_type = t->types.type_void; |
|
| 3268 | + | ||
| 3269 | + | if (n->val.block.stmts.len > 0) { |
|
| 3270 | + | node_t *last = stmts[n->val.block.stmts.len - 1]; |
|
| 3271 | + | ||
| 3272 | + | if (node_diverges(last)) |
|
| 3273 | + | block_type = t->types.type_never; |
|
| 3274 | + | } |
|
| 3275 | + | return (n->type = block_type); |
|
| 3276 | + | } |
|
| 3277 | + | ||
| 3278 | + | /* Type check a complete AST, starting from the root module. */ |
|
| 3279 | + | bool resolve_run(resolve_t *t, module_t *root) { |
|
| 3280 | + | if (!resolve_mod_def(t, root)) |
|
| 3281 | + | return false; |
|
| 3282 | + | ||
| 3283 | + | for (usize i = 0; i < t->mm->nmodules; i++) { |
|
| 3284 | + | module_t *mod = &t->mm->modules[i]; |
|
| 3285 | + | ||
| 3286 | + | if (!mod->checked) { |
|
| 3287 | + | if (!resolve_mod_def(t, mod)) { |
|
| 3288 | + | return false; |
|
| 3289 | + | } |
|
| 3290 | + | } |
|
| 3291 | + | } |
|
| 3292 | + | return true; |
|
| 3293 | + | } |
|
| 3294 | + | ||
| 3295 | + | /* Type check a module. */ |
|
| 3296 | + | static bool resolve_mod_def(resolve_t *t, module_t *module) { |
|
| 3297 | + | /* First check all function signatures and type declarations */ |
|
| 3298 | + | if (!resolve_decls(t, module)) { |
|
| 3299 | + | return false; |
|
| 3300 | + | } |
|
| 3301 | + | if (module->state == MODULE_STATE_VISITING) |
|
| 3302 | + | return false; |
|
| 3303 | + | if (module->state == MODULE_STATE_VISITED && module->checked) { |
|
| 3304 | + | return true; |
|
| 3305 | + | } |
|
| 3306 | + | ||
| 3307 | + | module_t *pmodule = t->module; |
|
| 3308 | + | scope_t *pscope = t->scope; |
|
| 3309 | + | ||
| 3310 | + | module->state = MODULE_STATE_VISITING; |
|
| 3311 | + | t->module = module; |
|
| 3312 | + | t->scope = module->scope; |
|
| 3313 | + | ||
| 3314 | + | /* Type check function bodies */ |
|
| 3315 | + | node_t **mod_stmts = |
|
| 3316 | + | nodespan_ptrs(&module->parser, module->ast->val.block.stmts); |
|
| 3317 | + | for (usize i = 0; i < module->ast->val.block.stmts.len; i++) { |
|
| 3318 | + | node_t *stmt = mod_stmts[i]; |
|
| 3319 | + | ||
| 3320 | + | if (stmt->cls == NODE_FN) { |
|
| 3321 | + | if (!resolve_fn_def(t, stmt)) { |
|
| 3322 | + | return false; |
|
| 3323 | + | } |
|
| 3324 | + | if (stmt->val.fn_decl.attribs && |
|
| 3325 | + | stmt->val.fn_decl.attribs->val.attrib & ATTRIB_DEFAULT) { |
|
| 3326 | + | if (module->default_fn == NULL) { |
|
| 3327 | + | module->default_fn = stmt->sym; |
|
| 3328 | + | } |
|
| 3329 | + | } |
|
| 3330 | + | } |
|
| 3331 | + | } |
|
| 3332 | + | if (!module->default_fn) { |
|
| 3333 | + | for (usize i = 0; i < module->ast->val.block.stmts.len; i++) { |
|
| 3334 | + | node_t *stmt = mod_stmts[i]; |
|
| 3335 | + | if (stmt->cls == NODE_FN && stmt->val.fn_decl.attribs && |
|
| 3336 | + | stmt->val.fn_decl.attribs->val.attrib & ATTRIB_DEFAULT && |
|
| 3337 | + | stmt->sym) { |
|
| 3338 | + | module->default_fn = stmt->sym; |
|
| 3339 | + | break; |
|
| 3340 | + | } |
|
| 3341 | + | } |
|
| 3342 | + | } |
|
| 3343 | + | module->checked = true; |
|
| 3344 | + | module->state = MODULE_STATE_VISITED; |
|
| 3345 | + | module->ast->type = t->types.type_void; |
|
| 3346 | + | ||
| 3347 | + | t->module = pmodule; |
|
| 3348 | + | t->scope = pscope; |
|
| 3349 | + | ||
| 3350 | + | return true; |
|
| 3351 | + | } |
|
| 3352 | + | ||
| 3353 | + | /* Check function and type declarations */ |
|
| 3354 | + | static bool resolve_decls(resolve_t *t, module_t *module) { |
|
| 3355 | + | if (module->state == MODULE_STATE_VISITING) |
|
| 3356 | + | return false; |
|
| 3357 | + | if (module->state == MODULE_STATE_VISITED && module->declared) { |
|
| 3358 | + | return true; |
|
| 3359 | + | } |
|
| 3360 | + | ||
| 3361 | + | module_t *parent = t->module; |
|
| 3362 | + | ||
| 3363 | + | module->state = MODULE_STATE_VISITING; |
|
| 3364 | + | module->scope = symtab_scope(t->scope, module); |
|
| 3365 | + | t->module = module; |
|
| 3366 | + | t->scope = module->scope; |
|
| 3367 | + | ||
| 3368 | + | node_t *module_stmts[MAX_BLOCK_STATEMENTS] = { 0 }; |
|
| 3369 | + | module_t *module_refs[MAX_BLOCK_STATEMENTS] = { 0 }; |
|
| 3370 | + | usize nmodules = 0; |
|
| 3371 | + | ||
| 3372 | + | /* Predeclare child modules so their symbols are available early. */ |
|
| 3373 | + | node_t **decl_stmts = |
|
| 3374 | + | nodespan_ptrs(&module->parser, module->ast->val.block.stmts); |
|
| 3375 | + | for (usize i = 0; i < module->ast->val.block.stmts.len; i++) { |
|
| 3376 | + | node_t *stmt = decl_stmts[i]; |
|
| 3377 | + | ||
| 3378 | + | if (stmt->cls != NODE_MOD) |
|
| 3379 | + | continue; |
|
| 3380 | + | ||
| 3381 | + | node_t *name = stmt->val.mod_decl.ident; |
|
| 3382 | + | ||
| 3383 | + | char rel[MAX_PATH_LEN] = { 0 }; |
|
| 3384 | + | strncpy(rel, name->val.ident.name, name->val.ident.length); |
|
| 3385 | + | ||
| 3386 | + | module_t *submod = |
|
| 3387 | + | module_manager_find_relative(t->mm, module->path, rel); |
|
| 3388 | + | if (!submod) { |
|
| 3389 | + | if (stmt->val.mod_decl.attribs && |
|
| 3390 | + | (stmt->val.mod_decl.attribs->val.attrib & ATTRIB_TEST)) |
|
| 3391 | + | continue; |
|
| 3392 | + | return false; |
|
| 3393 | + | } |
|
| 3394 | + | symbol_t *sym = symtab_scope_lookup( |
|
| 3395 | + | module->scope, |
|
| 3396 | + | name->val.ident.name, |
|
| 3397 | + | name->val.ident.length, |
|
| 3398 | + | SYM_MODULE |
|
| 3399 | + | ); |
|
| 3400 | + | if (!sym) { |
|
| 3401 | + | if (!symbol_add(t, name, stmt)) { |
|
| 3402 | + | return false; |
|
| 3403 | + | } |
|
| 3404 | + | sym = stmt->sym; |
|
| 3405 | + | } else { |
|
| 3406 | + | stmt->sym = sym; |
|
| 3407 | + | } |
|
| 3408 | + | sym->e.mod = submod; |
|
| 3409 | + | sym->scope = submod->scope; |
|
| 3410 | + | submod->attribs = stmt->val.mod_decl.attribs |
|
| 3411 | + | ? stmt->val.mod_decl.attribs->val.attrib |
|
| 3412 | + | : ATTRIB_NONE; |
|
| 3413 | + | module_path(submod->qualified, module->qualified); |
|
| 3414 | + | module_qualify(submod->qualified, name); |
|
| 3415 | + | ||
| 3416 | + | module_stmts[nmodules] = stmt; |
|
| 3417 | + | module_refs[nmodules++] = submod; |
|
| 3418 | + | } |
|
| 3419 | + | ||
| 3420 | + | /* Predeclare named types so mutually recursive definitions can resolve. */ |
|
| 3421 | + | for (usize i = 0; i < module->ast->val.block.stmts.len; i++) { |
|
| 3422 | + | node_t *stmt = decl_stmts[i]; |
|
| 3423 | + | ||
| 3424 | + | if (stmt->cls == NODE_RECORD) { |
|
| 3425 | + | if (!declare_record(t, stmt)) { |
|
| 3426 | + | return false; |
|
| 3427 | + | } |
|
| 3428 | + | } else if (stmt->cls == NODE_UNION) { |
|
| 3429 | + | if (!declare_enum(t, stmt)) { |
|
| 3430 | + | return false; |
|
| 3431 | + | } |
|
| 3432 | + | } |
|
| 3433 | + | } |
|
| 3434 | + | ||
| 3435 | + | for (usize i = 0; i < module->ast->val.block.stmts.len; i++) { |
|
| 3436 | + | node_t *stmt = decl_stmts[i]; |
|
| 3437 | + | ||
| 3438 | + | switch (stmt->cls) { |
|
| 3439 | + | case NODE_USE: |
|
| 3440 | + | if (!resolve_use(t, stmt)) { |
|
| 3441 | + | return false; |
|
| 3442 | + | } |
|
| 3443 | + | break; |
|
| 3444 | + | case NODE_FN: |
|
| 3445 | + | if (!resolve_fn_decl(t, stmt)) { |
|
| 3446 | + | return false; |
|
| 3447 | + | } |
|
| 3448 | + | break; |
|
| 3449 | + | case NODE_RECORD: |
|
| 3450 | + | case NODE_UNION: |
|
| 3451 | + | if (!resolve_node(t, stmt, NULL)) { |
|
| 3452 | + | return false; |
|
| 3453 | + | } |
|
| 3454 | + | break; |
|
| 3455 | + | case NODE_MOD: |
|
| 3456 | + | stmt->type = t->types.type_void; |
|
| 3457 | + | break; |
|
| 3458 | + | case NODE_CONST: |
|
| 3459 | + | if (!resolve_const(t, stmt)) { |
|
| 3460 | + | return false; |
|
| 3461 | + | } |
|
| 3462 | + | break; |
|
| 3463 | + | case NODE_STATIC: |
|
| 3464 | + | if (!resolve_static(t, stmt)) { |
|
| 3465 | + | return false; |
|
| 3466 | + | } |
|
| 3467 | + | break; |
|
| 3468 | + | default: |
|
| 3469 | + | break; |
|
| 3470 | + | } |
|
| 3471 | + | } |
|
| 3472 | + | ||
| 3473 | + | /* Check submodule declarations after parent types are sized, |
|
| 3474 | + | * so that `super::` references can resolve to fully-typed symbols. |
|
| 3475 | + | * Skip submodules that are already being visited to avoid false |
|
| 3476 | + | * circular dependency errors when `use X::Y` directly imports a |
|
| 3477 | + | * submodule and that submodule uses `super::`. */ |
|
| 3478 | + | for (usize i = 0; i < nmodules; i++) { |
|
| 3479 | + | module_t *submod = module_refs[i]; |
|
| 3480 | + | ||
| 3481 | + | if (!submod->declared && submod->state != MODULE_STATE_VISITING) { |
|
| 3482 | + | if (!resolve_decls(t, submod)) { |
|
| 3483 | + | return false; |
|
| 3484 | + | } |
|
| 3485 | + | } |
|
| 3486 | + | if (module_stmts[i] && module_stmts[i]->sym) { |
|
| 3487 | + | module_stmts[i]->sym->scope = submod->scope; |
|
| 3488 | + | } |
|
| 3489 | + | } |
|
| 3490 | + | ||
| 3491 | + | for (usize i = 0; i < nmodules; i++) { |
|
| 3492 | + | module_t *submod = module_refs[i]; |
|
| 3493 | + | ||
| 3494 | + | if (!submod) |
|
| 3495 | + | return false; |
|
| 3496 | + | /* Skip submodules that are already being visited to avoid false |
|
| 3497 | + | * circular dependency errors. They will be checked by their |
|
| 3498 | + | * original caller. */ |
|
| 3499 | + | if (submod->state == MODULE_STATE_VISITING) { |
|
| 3500 | + | continue; |
|
| 3501 | + | } |
|
| 3502 | + | if (!resolve_mod_def(t, submod)) { |
|
| 3503 | + | return false; |
|
| 3504 | + | } |
|
| 3505 | + | } |
|
| 3506 | + | finalize_type_layout(t); |
|
| 3507 | + | ||
| 3508 | + | module->declared = true; |
|
| 3509 | + | module->state = MODULE_STATE_VISITED; |
|
| 3510 | + | module->ast->type = t->types.type_void; |
|
| 3511 | + | ||
| 3512 | + | t->scope = t->scope->parent; |
|
| 3513 | + | t->module = parent; |
|
| 3514 | + | ||
| 3515 | + | return true; |
|
| 3516 | + | } |
|
| 3517 | + | ||
| 3518 | + | /* Register a function signature without checking its body */ |
|
| 3519 | + | static type_t *resolve_fn_decl(resolve_t *t, node_t *n) { |
|
| 3520 | + | fn_decl_t *fn = &n->val.fn_decl; |
|
| 3521 | + | ||
| 3522 | + | /* Check attributes. */ |
|
| 3523 | + | if (fn->attribs && !resolve_node(t, fn->attribs, NULL)) |
|
| 3524 | + | return NULL; |
|
| 3525 | + | ||
| 3526 | + | attrib_t attrs = fn->attribs ? fn->attribs->val.attrib : ATTRIB_NONE; |
|
| 3527 | + | ||
| 3528 | + | /* Add function to symbol table */ |
|
| 3529 | + | if (!symbol_add(t, fn->ident, n)) { |
|
| 3530 | + | return NULL; |
|
| 3531 | + | } |
|
| 3532 | + | n->sym->e.fn.attribs = attrs; |
|
| 3533 | + | ||
| 3534 | + | /* Set up the qualified name for the function */ |
|
| 3535 | + | module_path(n->sym->qualified, t->module->qualified); |
|
| 3536 | + | module_qualify(n->sym->qualified, fn->ident); |
|
| 3537 | + | ||
| 3538 | + | /* Initialize usage tracking - mark as used if it's a default function */ |
|
| 3539 | + | n->sym->e.fn.used = (attrs & ATTRIB_DEFAULT) || (attrs & ATTRIB_TEST); |
|
| 3540 | + | ||
| 3541 | + | /* Initialize function type and scope */ |
|
| 3542 | + | type_t *ret_typ = n->val.fn_decl.return_type |
|
| 3543 | + | ? resolve_type(t, n->val.fn_decl.return_type) |
|
| 3544 | + | : t->types.type_void; |
|
| 3545 | + | n->sym->e.fn.scope = symtab_scope(t->scope, NULL); |
|
| 3546 | + | n->type = alloc_fn_type(t, n, ret_typ, n->val.fn_decl.params.len); |
|
| 3547 | + | ||
| 3548 | + | /* Enter function scope temporarily to register parameters */ |
|
| 3549 | + | scope_t *parent = t->scope; |
|
| 3550 | + | t->scope = n->sym->e.fn.scope; |
|
| 3551 | + | ||
| 3552 | + | /* Add parameters to function scope */ |
|
| 3553 | + | for (usize i = 0; i < n->val.fn_decl.params.len; i++) { |
|
| 3554 | + | node_t *param = |
|
| 3555 | + | nodespan_ptrs(&t->module->parser, n->val.fn_decl.params)[i]; |
|
| 3556 | + | ||
| 3557 | + | /* Assign declared type to identifier node. */ |
|
| 3558 | + | node_t *type = param->val.param.type; |
|
| 3559 | + | type_t *declared = resolve_type(t, type); |
|
| 3560 | + | ||
| 3561 | + | if (!declared) { |
|
| 3562 | + | return NULL; |
|
| 3563 | + | } |
|
| 3564 | + | param->type = declared; |
|
| 3565 | + | ||
| 3566 | + | /* Store parameter type in function type for function pointer |
|
| 3567 | + | * compatibility */ |
|
| 3568 | + | n->type->info.fun.params[i] = declared; |
|
| 3569 | + | ||
| 3570 | + | if (!symbol_add(t, param->val.param.ident, param)) { |
|
| 3571 | + | return NULL; |
|
| 3572 | + | } |
|
| 3573 | + | param->sym->e.var.typ = declared; |
|
| 3574 | + | param->sym->e.var.align = declared->align; |
|
| 3575 | + | } |
|
| 3576 | + | t->scope = parent; |
|
| 3577 | + | ||
| 3578 | + | if (!resolve_fn_throws(t, n->type, fn->throws, ret_typ)) |
|
| 3579 | + | return NULL; |
|
| 3580 | + | ||
| 3581 | + | return n->type; |
|
| 3582 | + | } |
|
| 3583 | + | ||
| 3584 | + | /* Type check function body (assumes signature is already registered) */ |
|
| 3585 | + | static type_t *resolve_fn_def(resolve_t *t, node_t *n) { |
|
| 3586 | + | /* Set current function and enter function scope */ |
|
| 3587 | + | t->fn = n->sym; |
|
| 3588 | + | t->scope = n->sym->e.fn.scope; |
|
| 3589 | + | ||
| 3590 | + | /* For extern functions, body will be NULL */ |
|
| 3591 | + | if (n->val.fn_decl.body && !resolve_block(t, n->val.fn_decl.body)) { |
|
| 3592 | + | t->fn = NULL; |
|
| 3593 | + | t->scope = t->scope->parent; |
|
| 3594 | + | return NULL; |
|
| 3595 | + | } |
|
| 3596 | + | t->fn = NULL; |
|
| 3597 | + | t->scope = t->scope->parent; |
|
| 3598 | + | ||
| 3599 | + | return n->type; |
|
| 3600 | + | } |
resolver.h
added
+168 -0
| 1 | + | #ifndef RESOLVER_H |
|
| 2 | + | #define RESOLVER_H |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "limits.h" |
|
| 6 | + | #include "module.h" |
|
| 7 | + | #include "parser.h" |
|
| 8 | + | #include "riscv.h" |
|
| 9 | + | #include "symtab.h" |
|
| 10 | + | #include "types.h" |
|
| 11 | + | ||
| 12 | + | /* Built-in slice/array fields */ |
|
| 13 | + | #define LEN_FIELD "len" |
|
| 14 | + | #define LEN_FIELD_LEN 3 |
|
| 15 | + | #define PTR_FIELD "ptr" |
|
| 16 | + | #define PTR_FIELD_LEN 3 |
|
| 17 | + | ||
| 18 | + | typedef struct type_t { |
|
| 19 | + | typeclass_t cls; |
|
| 20 | + | ||
| 21 | + | const char *name; /* Type name */ |
|
| 22 | + | u16 namelen; /* Type name length */ |
|
| 23 | + | ||
| 24 | + | union { |
|
| 25 | + | struct { |
|
| 26 | + | union_decl_t *decl; /* AST node for the union. */ |
|
| 27 | + | symbol_t **variants; |
|
| 28 | + | u8 nvariants; |
|
| 29 | + | struct type_t |
|
| 30 | + | *base; /* Underlying scalar type for fieldless unions */ |
|
| 31 | + | i32 variantsize; /* Largest payload size (bytes) */ |
|
| 32 | + | bool has_payload; /* Whether any variant carries data */ |
|
| 33 | + | } uni; |
|
| 34 | + | struct { |
|
| 35 | + | struct type_t *err; /* Error set */ |
|
| 36 | + | struct type_t *payload; /* Success payload type */ |
|
| 37 | + | } res; |
|
| 38 | + | struct { |
|
| 39 | + | symbol_t **fields; /* Fields of the record */ |
|
| 40 | + | u8 nfields; /* Number of fields */ |
|
| 41 | + | u32 packedsize; /* Size if packed */ |
|
| 42 | + | bool anonymous; /* Anonymous record */ |
|
| 43 | + | bool tuple; /* Tuple-style record */ |
|
| 44 | + | } srt; |
|
| 45 | + | struct { |
|
| 46 | + | struct type_t *target; /* Target type. */ |
|
| 47 | + | bool mut; /* Mutable pointer. */ |
|
| 48 | + | } ptr; |
|
| 49 | + | struct { |
|
| 50 | + | struct type_t *ret; /* Return type */ |
|
| 51 | + | struct type_t **params; /* Parameter types */ |
|
| 52 | + | struct type_t **throws; |
|
| 53 | + | u8 nparams; |
|
| 54 | + | u8 nthrows; |
|
| 55 | + | } fun; |
|
| 56 | + | struct { |
|
| 57 | + | struct type_t *elem; /* Type of array elements */ |
|
| 58 | + | u32 length; /* Length for arrays (fixed size) */ |
|
| 59 | + | } ary; |
|
| 60 | + | struct { |
|
| 61 | + | struct type_t *elem; /* Type of slice elements */ |
|
| 62 | + | struct type_t *base; /* Base array type */ |
|
| 63 | + | bool mut; /* Mutable slice pointer. */ |
|
| 64 | + | } slc; |
|
| 65 | + | struct { |
|
| 66 | + | struct type_t *elem; /* Type of the optional value */ |
|
| 67 | + | } opt; |
|
| 68 | + | } info; |
|
| 69 | + | ||
| 70 | + | struct type_t *ptr; /* Pointer type, eg. `*T` for `T`. */ |
|
| 71 | + | struct type_t *ptr_mut; /* Mutable pointer type, eg. `*mut T`. */ |
|
| 72 | + | struct type_t *slice; /* Slice type, eg. *[T] for [T]. */ |
|
| 73 | + | struct type_t *slice_mut; /* Mutable slice type, eg. *mut [T]. */ |
|
| 74 | + | ||
| 75 | + | i32 size; /* Calculated size in bytes. */ |
|
| 76 | + | i32 align; /* Alignment requirements. */ |
|
| 77 | + | } type_t; |
|
| 78 | + | ||
| 79 | + | /* Global type context. */ |
|
| 80 | + | typedef struct { |
|
| 81 | + | /* Built-in types. |
|
| 82 | + | * These point into the `objects` array. */ |
|
| 83 | + | type_t *type_i8; |
|
| 84 | + | type_t *type_u8; |
|
| 85 | + | type_t *type_i16; |
|
| 86 | + | type_t *type_u16; |
|
| 87 | + | type_t *type_i32; |
|
| 88 | + | type_t *type_u32; |
|
| 89 | + | type_t *type_bool; |
|
| 90 | + | type_t *type_char; |
|
| 91 | + | type_t *type_str; |
|
| 92 | + | ||
| 93 | + | /* For statements, which have no type. */ |
|
| 94 | + | type_t *type_void; |
|
| 95 | + | /* Opaque type that can only be used behind pointers. */ |
|
| 96 | + | type_t *type_opaque; |
|
| 97 | + | /* For expressions that never produce a value. */ |
|
| 98 | + | type_t *type_never; |
|
| 99 | + | ||
| 100 | + | /* Type storage across all modules. */ |
|
| 101 | + | type_t objects[MAX_TYPES]; |
|
| 102 | + | u16 nobjects; |
|
| 103 | + | ||
| 104 | + | /* Pools for pointer arrays inside type_t (variants, fields, params, |
|
| 105 | + | * throws). Each type_t stores a pointer into one of these pools. */ |
|
| 106 | + | symbol_t *sympool[MAX_SYMPTR_POOL]; |
|
| 107 | + | u16 nsympool; |
|
| 108 | + | struct type_t *typepool[MAX_TYPEPTR_POOL]; |
|
| 109 | + | u16 ntypepool; |
|
| 110 | + | } types_t; |
|
| 111 | + | ||
| 112 | + | typedef enum { |
|
| 113 | + | TC_CTX_NORMAL = 0, |
|
| 114 | + | TC_CTX_PATTERN = 1, |
|
| 115 | + | TC_CTX_TRY = 2, |
|
| 116 | + | } resolve_ctx_t; |
|
| 117 | + | ||
| 118 | + | /* Type checker state. */ |
|
| 119 | + | typedef struct { |
|
| 120 | + | scope_t *global; |
|
| 121 | + | types_t types; |
|
| 122 | + | symbol_t *fn; /* Track the current function. */ |
|
| 123 | + | scope_t *scope; /* Track the current scope. */ |
|
| 124 | + | module_manager_t *mm; /* Reference to module manager for imports */ |
|
| 125 | + | module_t *module; /* Currently being resolved module */ |
|
| 126 | + | u32 flags; |
|
| 127 | + | u16 recordid; /* Next anonymous record ID */ |
|
| 128 | + | resolve_ctx_t ctx; /* Allow unbound identifiers in patterns */ |
|
| 129 | + | } resolve_t; |
|
| 130 | + | ||
| 131 | + | /* Allocate `n` symbol_t* slots from the type pool. */ |
|
| 132 | + | symbol_t **types_alloc_sympool(types_t *t, u8 n); |
|
| 133 | + | /* Allocate `n` type_t* slots from the type pool. */ |
|
| 134 | + | type_t **types_alloc_typepool(types_t *t, u8 n); |
|
| 135 | + | ||
| 136 | + | /* Dereference a type. */ |
|
| 137 | + | type_t *deref_type(type_t *ref); |
|
| 138 | + | ||
| 139 | + | /* Initialize type checker. */ |
|
| 140 | + | void resolve_init(resolve_t *t, module_manager_t *mm); |
|
| 141 | + | /* Typecheck a complete AST. */ |
|
| 142 | + | bool resolve_run(resolve_t *t, module_t *root); |
|
| 143 | + | ||
| 144 | + | /* Check if a type is numeric, eg. integer or float. */ |
|
| 145 | + | bool type_is_numeric(typeclass_t t); |
|
| 146 | + | /* Check if a type is compound, ie. is made of multiple sub-types. */ |
|
| 147 | + | bool type_is_compound(type_t *t); |
|
| 148 | + | /* Check if a type is an address, eg. a pointer or slice. */ |
|
| 149 | + | bool type_is_address(typeclass_t t); |
|
| 150 | + | /* Check if a type is an integer */ |
|
| 151 | + | bool type_is_int(typeclass_t t); |
|
| 152 | + | /* Check if a type is an unsigned integer */ |
|
| 153 | + | bool type_is_unsigned(typeclass_t t); |
|
| 154 | + | /* Check if a type is primitive, ie. not compound. */ |
|
| 155 | + | bool type_is_primitive(type_t *t); |
|
| 156 | + | /* Check if a type is passed by reference automatically. */ |
|
| 157 | + | bool type_is_passed_by_ref(type_t *t); |
|
| 158 | + | /* Check if a type is a tagged value */ |
|
| 159 | + | bool type_is_tagged_value(type_t *ty); |
|
| 160 | + | /* Check if a type is a union with a payload value */ |
|
| 161 | + | bool type_is_union_with_payload(type_t *ty); |
|
| 162 | + | /* Check if a type is packed, ie. it has no padding */ |
|
| 163 | + | bool type_is_packed(type_t *t); |
|
| 164 | + | /* Check if type `a` can be coerced to type `b`. This handles cases like |
|
| 165 | + | * *mut [T] -> *[T] where the types are structurally compatible. */ |
|
| 166 | + | bool type_coercible(type_t *a, type_t *b); |
|
| 167 | + | ||
| 168 | + | #endif |
riscv.c
added
+280 -0
| 1 | + | /* RISC-V 64-bit (RV64I) instruction builder. */ |
|
| 2 | + | #include <stdlib.h> |
|
| 3 | + | ||
| 4 | + | #include "riscv.h" |
|
| 5 | + | #include "types.h" |
|
| 6 | + | ||
| 7 | + | const char *reg_names[] = { |
|
| 8 | + | "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "s1", "a0", |
|
| 9 | + | "a1", "a2", "a3", "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", |
|
| 10 | + | "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6" |
|
| 11 | + | }; |
|
| 12 | + | ||
| 13 | + | const bool caller_saved_registers[] = { |
|
| 14 | + | false, false, false, false, false, true, true, true, false, false, true, |
|
| 15 | + | true, true, true, true, true, true, true, false, false, false, false, |
|
| 16 | + | false, false, false, false, false, false, true, true, true, true |
|
| 17 | + | }; |
|
| 18 | + | ||
| 19 | + | const reg_t temp_registers[] = { T1, T2, T3, T4, T5, T6 }; |
|
| 20 | + | ||
| 21 | + | i32 sign_extend(u32 value, int bit_width) { |
|
| 22 | + | if ((value >> (bit_width - 1)) & 1) { |
|
| 23 | + | /* Sign bit is 1, so extend with 1s. */ |
|
| 24 | + | return (i32)(value | (~0u << bit_width)); |
|
| 25 | + | } |
|
| 26 | + | return (i32)value; |
|
| 27 | + | } |
|
| 28 | + | ||
| 29 | + | i32 align(i32 size, i32 alignment) { |
|
| 30 | + | /* Verify alignment is a power of 2. */ |
|
| 31 | + | /* This rounds up to the next multiple of alignment. */ |
|
| 32 | + | return (size + alignment - 1) & ~(alignment - 1); |
|
| 33 | + | } |
|
| 34 | + | ||
| 35 | + | /* Creates an I-type instruction struct. |
|
| 36 | + | * Used for immediate operations like ADDI, SLTI and loads like LW. */ |
|
| 37 | + | static instr_t instr_i( |
|
| 38 | + | opcode_t opcode, funct3_t fn3, reg_t rd, reg_t rs1, i32 imm |
|
| 39 | + | ) { |
|
| 40 | + | return (instr_t){ .i = { .opcode = opcode, |
|
| 41 | + | .rd = rd, |
|
| 42 | + | .rs1 = rs1, |
|
| 43 | + | .funct3 = fn3, |
|
| 44 | + | .imm_11_0 = (u32)imm } }; |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | /* Creates a U-type instruction struct. |
|
| 48 | + | * Used for LUI (Load Upper Immediate) and AUIPC. */ |
|
| 49 | + | static instr_t instr_u(opcode_t opcode, reg_t rd, i32 imm) { |
|
| 50 | + | return (instr_t){ .u = { |
|
| 51 | + | .opcode = opcode, |
|
| 52 | + | .rd = rd, |
|
| 53 | + | .imm_31_12 = (u32)imm & 0xFFFFF, |
|
| 54 | + | } }; |
|
| 55 | + | } |
|
| 56 | + | ||
| 57 | + | /* Creates an R-type instruction struct. |
|
| 58 | + | * Used for register-register operations like ADD, SUB, AND, OR. */ |
|
| 59 | + | static instr_t instr_r( |
|
| 60 | + | opcode_t opcode, funct3_t fn3, funct7_t fn7, reg_t rd, reg_t rs1, reg_t rs2 |
|
| 61 | + | ) { |
|
| 62 | + | return (instr_t){ .r = { .opcode = opcode, |
|
| 63 | + | .rd = rd, |
|
| 64 | + | .rs1 = rs1, |
|
| 65 | + | .rs2 = rs2, |
|
| 66 | + | .funct3 = fn3, |
|
| 67 | + | .funct7 = fn7 } }; |
|
| 68 | + | } |
|
| 69 | + | ||
| 70 | + | /* Creates an S-type instruction struct. |
|
| 71 | + | * Used for store instructions like SW, SH, SB. */ |
|
| 72 | + | static instr_t instr_s( |
|
| 73 | + | opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm |
|
| 74 | + | ) { |
|
| 75 | + | return (instr_t){ .s = { .opcode = opcode, |
|
| 76 | + | .rs1 = rs1, |
|
| 77 | + | .rs2 = rs2, |
|
| 78 | + | .funct3 = fn3, |
|
| 79 | + | .imm_4_0 = (u32)imm & 0x1F, |
|
| 80 | + | .imm_11_5 = ((u32)imm >> 5) & 0x7F } }; |
|
| 81 | + | } |
|
| 82 | + | ||
| 83 | + | /* Creates an SB-type (branch) instruction struct. |
|
| 84 | + | * Modified S-type used for conditional branches like BEQ, BNE. */ |
|
| 85 | + | static instr_t instr_sb( |
|
| 86 | + | opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm |
|
| 87 | + | ) { |
|
| 88 | + | return (instr_t){ .b = { .opcode = opcode, |
|
| 89 | + | .rs1 = rs1, |
|
| 90 | + | .rs2 = rs2, |
|
| 91 | + | .funct3 = fn3, |
|
| 92 | + | .imm_11 = (imm >> 11) & 0x1, |
|
| 93 | + | .imm_4_1 = (imm >> 1) & 0xF, |
|
| 94 | + | .imm_10_5 = (imm >> 5) & 0x3F, |
|
| 95 | + | .imm_12 = (imm >> 12) & 0x1 } }; |
|
| 96 | + | } |
|
| 97 | + | ||
| 98 | + | /* Creates a UJ-type (jump) instruction struct. |
|
| 99 | + | * Modified U-type used for JAL instruction. */ |
|
| 100 | + | static instr_t instr_uj(opcode_t opcode, reg_t rd, i32 imm) { |
|
| 101 | + | return (instr_t){ .j = { .opcode = opcode, |
|
| 102 | + | .rd = rd, |
|
| 103 | + | .imm_20 = (imm >> 20) & 0x1, |
|
| 104 | + | .imm_10_1 = (imm >> 1) & 0x3FF, |
|
| 105 | + | .imm_11 = (imm >> 11) & 0x1, |
|
| 106 | + | .imm_19_12 = (imm >> 12) & 0xFF } }; |
|
| 107 | + | } |
|
| 108 | + | ||
| 109 | + | /* Instruction definitions table. */ |
|
| 110 | + | /* Maps each instruction to its parameters for easy lookup. */ |
|
| 111 | + | typedef struct { |
|
| 112 | + | ifmt_t type; /* Instruction type */ |
|
| 113 | + | opcode_t opcode; /* Opcode value */ |
|
| 114 | + | funct3_t funct3; /* Function3 value (if applicable) */ |
|
| 115 | + | funct7_t funct7; /* Function7 value (if applicable) */ |
|
| 116 | + | bool special; /* Special handling flag */ |
|
| 117 | + | } idef_t; |
|
| 118 | + | ||
| 119 | + | /* Instruction definition table */ |
|
| 120 | + | static const idef_t idefs[] = { |
|
| 121 | + | /* Upper immediate instructions. */ |
|
| 122 | + | [I_LUI] = { IFMT_U, OP_LUI, 0, 0, true }, |
|
| 123 | + | [I_AUIPC] = { IFMT_U, OP_AUIPC, 0, 0, true }, |
|
| 124 | + | /* Jump instructions. */ |
|
| 125 | + | [I_JAL] = { IFMT_J, OP_JAL, 0, 0, true }, |
|
| 126 | + | [I_JALR] = { IFMT_I, OP_JALR, 0, 0, false }, |
|
| 127 | + | /* Branch instructions. */ |
|
| 128 | + | [I_BEQ] = { IFMT_B, OP_BRANCH, FUNCT3_BYTE, 0, false }, |
|
| 129 | + | [I_BNE] = { IFMT_B, OP_BRANCH, FUNCT3_HALF, 0, false }, |
|
| 130 | + | [I_BLT] = { IFMT_B, OP_BRANCH, FUNCT3_BYTE_U, 0, false }, |
|
| 131 | + | [I_BGE] = { IFMT_B, OP_BRANCH, FUNCT3_HALF_U, 0, false }, |
|
| 132 | + | [I_BLTU] = { IFMT_B, OP_BRANCH, FUNCT3_OR, 0, false }, |
|
| 133 | + | [I_BGEU] = { IFMT_B, OP_BRANCH, FUNCT3_AND, 0, false }, |
|
| 134 | + | /* Load instructions. */ |
|
| 135 | + | [I_LB] = { IFMT_I, OP_LOAD, FUNCT3_BYTE, 0, false }, |
|
| 136 | + | [I_LH] = { IFMT_I, OP_LOAD, FUNCT3_HALF, 0, false }, |
|
| 137 | + | [I_LW] = { IFMT_I, OP_LOAD, FUNCT3_WORD, 0, false }, |
|
| 138 | + | [I_LBU] = { IFMT_I, OP_LOAD, FUNCT3_BYTE_U, 0, false }, |
|
| 139 | + | [I_LHU] = { IFMT_I, OP_LOAD, FUNCT3_HALF_U, 0, false }, |
|
| 140 | + | /* Store instructions. */ |
|
| 141 | + | [I_SB] = { IFMT_S, OP_STORE, FUNCT3_BYTE, 0, false }, |
|
| 142 | + | [I_SH] = { IFMT_S, OP_STORE, FUNCT3_HALF, 0, false }, |
|
| 143 | + | [I_SW] = { IFMT_S, OP_STORE, FUNCT3_WORD, 0, false }, |
|
| 144 | + | /* ALU immediate operations. */ |
|
| 145 | + | [I_ADDI] = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, false }, |
|
| 146 | + | [I_SLTI] = { IFMT_I, OP_IMM, FUNCT3_SLT, 0, false }, |
|
| 147 | + | [I_SLTIU] = { IFMT_I, OP_IMM, FUNCT3_SLTU, 0, false }, |
|
| 148 | + | [I_XORI] = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, false }, |
|
| 149 | + | [I_ORI] = { IFMT_I, OP_IMM, FUNCT3_OR, 0, false }, |
|
| 150 | + | [I_ANDI] = { IFMT_I, OP_IMM, FUNCT3_AND, 0, false }, |
|
| 151 | + | [I_SLLI] = { IFMT_I, OP_IMM, FUNCT3_SLL, 0, true }, |
|
| 152 | + | [I_SRLI] = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true }, |
|
| 153 | + | [I_SRAI] = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true }, |
|
| 154 | + | /* ALU register operations. */ |
|
| 155 | + | [I_ADD] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_NORMAL, false }, |
|
| 156 | + | [I_SUB] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, false }, |
|
| 157 | + | [I_SLL] = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_NORMAL, false }, |
|
| 158 | + | [I_SLT] = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_NORMAL, false }, |
|
| 159 | + | [I_SLTU] = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_NORMAL, false }, |
|
| 160 | + | [I_XOR] = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_NORMAL, false }, |
|
| 161 | + | [I_SRL] = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_NORMAL, false }, |
|
| 162 | + | [I_AND] = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_NORMAL, false }, |
|
| 163 | + | [I_OR] = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_NORMAL, false }, |
|
| 164 | + | /* M extension - multiply and divide. */ |
|
| 165 | + | [I_MUL] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_MUL, false }, |
|
| 166 | + | [I_MULH] = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_MUL, false }, |
|
| 167 | + | [I_MULHSU] = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_MUL, false }, |
|
| 168 | + | [I_MULHU] = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_MUL, false }, |
|
| 169 | + | [I_DIV] = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_MUL, false }, |
|
| 170 | + | [I_DIVU] = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_MUL, false }, |
|
| 171 | + | [I_REM] = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_MUL, false }, |
|
| 172 | + | [I_REMU] = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_MUL, false }, |
|
| 173 | + | /* Pseudo-instructions. */ |
|
| 174 | + | [I_MV] = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true }, |
|
| 175 | + | [I_JMP] = { IFMT_J, OP_JAL, 0, 0, true }, |
|
| 176 | + | [I_NOP] = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true }, |
|
| 177 | + | [I_NOT] = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, true }, |
|
| 178 | + | [I_NEG] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, true }, |
|
| 179 | + | /* System instructions */ |
|
| 180 | + | [I_EBREAK] = { IFMT_I, OP_SYSTEM, 0, 0, true }, |
|
| 181 | + | [I_ECALL] = { IFMT_I, OP_SYSTEM, 0, 0, true }, |
|
| 182 | + | /* F Extension floating-point instructions */ |
|
| 183 | + | [I_FADD_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FADD_S, false }, |
|
| 184 | + | [I_FSUB_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FSUB_S, false }, |
|
| 185 | + | [I_FMUL_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FMUL_S, false }, |
|
| 186 | + | [I_FDIV_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FDIV_S, false }, |
|
| 187 | + | [I_FEQ_S] = { IFMT_R, OP_OP_FP, FUNCT3_FEQ, FUNCT7_FEQ_S, false }, |
|
| 188 | + | [I_FLT_S] = { IFMT_R, OP_OP_FP, FUNCT3_FLT, FUNCT7_FLT_S, false }, |
|
| 189 | + | [I_FLE_S] = { IFMT_R, OP_OP_FP, FUNCT3_FLE, FUNCT7_FLE_S, false }, |
|
| 190 | + | [I_FLW] = { IFMT_I, OP_LOAD_FP, FUNCT3_WORD_FP, 0, false }, |
|
| 191 | + | [I_FSW] = { IFMT_S, OP_STORE_FP, FUNCT3_WORD_FP, 0, false }, |
|
| 192 | + | /* RV64I load/store */ |
|
| 193 | + | [I_LWU] = { IFMT_I, OP_LOAD, FUNCT3_WORD_U, 0, false }, |
|
| 194 | + | [I_LD] = { IFMT_I, OP_LOAD, FUNCT3_DOUBLE, 0, false }, |
|
| 195 | + | [I_SD] = { IFMT_S, OP_STORE, FUNCT3_DOUBLE, 0, false }, |
|
| 196 | + | /* RV64I immediate W-ops */ |
|
| 197 | + | [I_ADDIW] = { IFMT_I, OP_IMM_32, FUNCT3_ADD, 0, false }, |
|
| 198 | + | [I_SLLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SLL, 0, true }, |
|
| 199 | + | [I_SRLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true }, |
|
| 200 | + | [I_SRAIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true }, |
|
| 201 | + | /* RV64I register W-ops */ |
|
| 202 | + | [I_ADDW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_NORMAL, false }, |
|
| 203 | + | [I_SUBW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_SUB, false }, |
|
| 204 | + | [I_SLLW] = { IFMT_R, OP_OP_32, FUNCT3_SLL, FUNCT7_NORMAL, false }, |
|
| 205 | + | [I_SRLW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_NORMAL, false }, |
|
| 206 | + | [I_SRAW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_SRA, false }, |
|
| 207 | + | /* RV64M W-ops */ |
|
| 208 | + | [I_MULW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_MUL, false }, |
|
| 209 | + | [I_DIVW] = { IFMT_R, OP_OP_32, FUNCT3_XOR, FUNCT7_MUL, false }, |
|
| 210 | + | [I_DIVUW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_MUL, false }, |
|
| 211 | + | [I_REMW] = { IFMT_R, OP_OP_32, FUNCT3_OR, FUNCT7_MUL, false }, |
|
| 212 | + | [I_REMUW] = { IFMT_R, OP_OP_32, FUNCT3_AND, FUNCT7_MUL, false }, |
|
| 213 | + | }; |
|
| 214 | + | ||
| 215 | + | /* Generates a RISC-V instruction based on the instruction definition */ |
|
| 216 | + | instr_t instr(iname_t iop, reg_t rd, reg_t rs1, reg_t rs2, i32 imm) { |
|
| 217 | + | const idef_t *def = &idefs[iop]; |
|
| 218 | + | ||
| 219 | + | /* Handle special cases that need specific processing. */ |
|
| 220 | + | if (def->special) { |
|
| 221 | + | switch (iop) { |
|
| 222 | + | case I_LUI: |
|
| 223 | + | return instr_u(OP_LUI, rd, imm); |
|
| 224 | + | case I_AUIPC: |
|
| 225 | + | return instr_u(OP_AUIPC, rd, imm); |
|
| 226 | + | case I_JAL: |
|
| 227 | + | return instr_uj(OP_JAL, rd, imm); |
|
| 228 | + | case I_SLLI: |
|
| 229 | + | return instr_i(OP_IMM, FUNCT3_SLL, rd, rs1, imm & 0x3F); |
|
| 230 | + | case I_SRLI: |
|
| 231 | + | return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, imm & 0x3F); |
|
| 232 | + | case I_SRAI: |
|
| 233 | + | return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, (imm & 0x3F) + 0x400); |
|
| 234 | + | case I_SLLIW: |
|
| 235 | + | return instr_i(OP_IMM_32, FUNCT3_SLL, rd, rs1, imm & 0x1F); |
|
| 236 | + | case I_SRLIW: |
|
| 237 | + | return instr_i(OP_IMM_32, FUNCT3_SRL, rd, rs1, imm & 0x1F); |
|
| 238 | + | case I_SRAIW: |
|
| 239 | + | return instr_i( |
|
| 240 | + | OP_IMM_32, FUNCT3_SRL, rd, rs1, (imm & 0x1F) + 0x400 |
|
| 241 | + | ); |
|
| 242 | + | case I_MV: |
|
| 243 | + | return instr_i(OP_IMM, FUNCT3_ADD, rd, rs1, 0); |
|
| 244 | + | case I_JMP: |
|
| 245 | + | return instr_uj(OP_JAL, ZERO, imm); |
|
| 246 | + | case I_NOP: |
|
| 247 | + | return instr_i(OP_IMM, FUNCT3_ADD, ZERO, ZERO, 0); |
|
| 248 | + | case I_NOT: |
|
| 249 | + | return instr_i(OP_IMM, FUNCT3_XOR, rd, rs1, 1); |
|
| 250 | + | case I_NEG: |
|
| 251 | + | return instr_r(OP_OP, FUNCT3_ADD, FUNCT7_SUB, rd, ZERO, rs1); |
|
| 252 | + | case I_EBREAK: |
|
| 253 | + | /* EBREAK is encoded as all zeros except for the opcode */ |
|
| 254 | + | return instr_i(OP_SYSTEM, 0, 0, 0, 1); |
|
| 255 | + | case I_ECALL: |
|
| 256 | + | /* ECALL is encoded as all zeros including immediate */ |
|
| 257 | + | return instr_i(OP_SYSTEM, 0, 0, 0, 0); |
|
| 258 | + | default: |
|
| 259 | + | break; |
|
| 260 | + | } |
|
| 261 | + | } |
|
| 262 | + | ||
| 263 | + | /* Regular instructions by type. */ |
|
| 264 | + | switch (def->type) { |
|
| 265 | + | case IFMT_I: |
|
| 266 | + | return instr_i(def->opcode, def->funct3, rd, rs1, imm); |
|
| 267 | + | case IFMT_R: |
|
| 268 | + | return instr_r(def->opcode, def->funct3, def->funct7, rd, rs1, rs2); |
|
| 269 | + | case IFMT_S: |
|
| 270 | + | return instr_s(def->opcode, def->funct3, rs1, rs2, imm); |
|
| 271 | + | case IFMT_B: |
|
| 272 | + | return instr_sb(def->opcode, def->funct3, rs1, rs2, imm); |
|
| 273 | + | case IFMT_U: |
|
| 274 | + | return instr_u(def->opcode, rd, imm); |
|
| 275 | + | case IFMT_J: |
|
| 276 | + | return instr_uj(def->opcode, rd, imm); |
|
| 277 | + | default: |
|
| 278 | + | abort(); |
|
| 279 | + | } |
|
| 280 | + | } |
riscv.h
added
+396 -0
| 1 | + | #ifndef OP_H |
|
| 2 | + | #define OP_H |
|
| 3 | + | ||
| 4 | + | #include "types.h" |
|
| 5 | + | ||
| 6 | + | /* Total number of registers. */ |
|
| 7 | + | #define REGISTERS 32 |
|
| 8 | + | /* Word size of target architecture (RISCV64). */ |
|
| 9 | + | #define WORD_SIZE 8 |
|
| 10 | + | /* Tag size for optional/union discriminants. */ |
|
| 11 | + | #define TAG_SIZE 1 |
|
| 12 | + | /* Instruction size in bytes (always 32-bit, even on RV64). */ |
|
| 13 | + | #define INSTR_SIZE 4 |
|
| 14 | + | /* Stack alignment requirement. */ |
|
| 15 | + | #define STACK_ALIGNMENT 16 |
|
| 16 | + | /* The frame pointer register is set as an alias of `S0`. */ |
|
| 17 | + | #define FP S0 |
|
| 18 | + | ||
| 19 | + | /* Convenient macro wrappers for `instr`. |
|
| 20 | + | * Some of these, such as BLE and BGT are implemented by swapping the operands |
|
| 21 | + | * of other instructions. */ |
|
| 22 | + | #define ADDI(rd, rs1, imm) __instr(I_ADDI, rd, rs1, 0, imm) |
|
| 23 | + | #define SLTI(rd, rs1, imm) __instr(I_SLTI, rd, rs1, 0, imm) |
|
| 24 | + | #define SLTIU(rd, rs1, imm) __instr(I_SLTIU, rd, rs1, 0, imm) |
|
| 25 | + | #define XORI(rd, rs1, imm) __instr(I_XORI, rd, rs1, 0, imm) |
|
| 26 | + | #define ORI(rd, rs1, imm) __instr(I_ORI, rd, rs1, 0, imm) |
|
| 27 | + | #define ANDI(rd, rs1, imm) __instr(I_ANDI, rd, rs1, 0, imm) |
|
| 28 | + | #define SLLI(rd, rs1, imm) __instr(I_SLLI, rd, rs1, 0, imm) |
|
| 29 | + | #define SRLI(rd, rs1, imm) __instr(I_SRLI, rd, rs1, 0, imm) |
|
| 30 | + | #define SRAI(rd, rs1, imm) __instr(I_SRAI, rd, rs1, 0, imm) |
|
| 31 | + | #define JALR(rd, rs1, imm) __instr(I_JALR, rd, rs1, 0, imm) |
|
| 32 | + | #define LB(rd, rs1, imm) __instr(I_LB, rd, rs1, 0, imm) |
|
| 33 | + | #define LH(rd, rs1, imm) __instr(I_LH, rd, rs1, 0, imm) |
|
| 34 | + | #define LW(rd, rs1, imm) __instr(I_LW, rd, rs1, 0, imm) |
|
| 35 | + | #define LWU(rd, rs1, imm) __instr(I_LWU, rd, rs1, 0, imm) |
|
| 36 | + | #define LD(rd, rs1, imm) __instr(I_LD, rd, rs1, 0, imm) |
|
| 37 | + | #define LBU(rd, rs1, imm) __instr(I_LBU, rd, rs1, 0, imm) |
|
| 38 | + | #define LHU(rd, rs1, imm) __instr(I_LHU, rd, rs1, 0, imm) |
|
| 39 | + | #define SB(rs2, rs1, imm) __instr(I_SB, 0, rs1, rs2, imm) |
|
| 40 | + | #define SH(rs2, rs1, imm) __instr(I_SH, 0, rs1, rs2, imm) |
|
| 41 | + | #define SW(rs2, rs1, imm) __instr(I_SW, 0, rs1, rs2, imm) |
|
| 42 | + | #define SD(rs2, rs1, imm) __instr(I_SD, 0, rs1, rs2, imm) |
|
| 43 | + | #define BEQ(rs1, rs2, imm) __instr(I_BEQ, 0, rs1, rs2, imm) |
|
| 44 | + | #define BNE(rs1, rs2, imm) __instr(I_BNE, 0, rs1, rs2, imm) |
|
| 45 | + | #define BLT(rs1, rs2, imm) __instr(I_BLT, 0, rs1, rs2, imm) |
|
| 46 | + | #define BGE(rs1, rs2, imm) __instr(I_BGE, 0, rs1, rs2, imm) |
|
| 47 | + | #define BLTU(rs1, rs2, imm) __instr(I_BLTU, 0, rs1, rs2, imm) |
|
| 48 | + | #define BGEU(rs1, rs2, imm) __instr(I_BGEU, 0, rs1, rs2, imm) |
|
| 49 | + | #define BLE(rs1, rs2, imm) __instr(I_BGE, 0, rs2, rs1, imm) |
|
| 50 | + | #define BGT(rs1, rs2, imm) __instr(I_BLT, 0, rs2, rs1, imm) |
|
| 51 | + | #define ADD(rd, rs1, rs2) __instr(I_ADD, rd, rs1, rs2, 0) |
|
| 52 | + | #define SUB(rd, rs1, rs2) __instr(I_SUB, rd, rs1, rs2, 0) |
|
| 53 | + | #define DIV(rd, rs1, rs2) __instr(I_DIV, rd, rs1, rs2, 0) |
|
| 54 | + | #define DIVU(rd, rs1, rs2) __instr(I_DIVU, rd, rs1, rs2, 0) |
|
| 55 | + | #define REM(rd, rs1, rs2) __instr(I_REM, rd, rs1, rs2, 0) |
|
| 56 | + | #define REMU(rd, rs1, rs2) __instr(I_REMU, rd, rs1, rs2, 0) |
|
| 57 | + | #define MUL(rd, rs1, rs2) __instr(I_MUL, rd, rs1, rs2, 0) |
|
| 58 | + | #define SLL(rd, rs1, rs2) __instr(I_SLL, rd, rs1, rs2, 0) |
|
| 59 | + | #define SLT(rd, rs1, rs2) __instr(I_SLT, rd, rs1, rs2, 0) |
|
| 60 | + | #define SLTU(rd, rs1, rs2) __instr(I_SLTU, rd, rs1, rs2, 0) |
|
| 61 | + | #define XOR(rd, rs1, rs2) __instr(I_XOR, rd, rs1, rs2, 0) |
|
| 62 | + | #define SRL(rd, rs1, rs2) __instr(I_SRL, rd, rs1, rs2, 0) |
|
| 63 | + | #define AND(rd, rs1, rs2) __instr(I_AND, rd, rs1, rs2, 0) |
|
| 64 | + | #define OR(rd, rs1, rs2) __instr(I_OR, rd, rs1, rs2, 0) |
|
| 65 | + | #define LUI(rd, imm) __instr(I_LUI, rd, 0, 0, imm) |
|
| 66 | + | #define AUIPC(rd, imm) __instr(I_AUIPC, rd, 0, 0, imm) |
|
| 67 | + | #define JAL(rd, imm) __instr(I_JAL, rd, 0, 0, imm) |
|
| 68 | + | #define JMP(imm) __instr(I_JMP, 0, 0, 0, imm) |
|
| 69 | + | #define MV(rd, rs1) __instr(I_MV, rd, rs1, 0, 0) |
|
| 70 | + | #define NOT(rd, rs1) __instr(I_NOT, rd, rs1, 0, 0) |
|
| 71 | + | #define NEG(rd, rs1) __instr(I_NEG, rd, rs1, 0, 0) |
|
| 72 | + | #define NOP __instr(I_NOP, 0, 0, 0, 0) |
|
| 73 | + | #define RET __instr(I_JALR, ZERO, RA, 0, 0) |
|
| 74 | + | #define EBREAK __instr(I_EBREAK, 0, 0, 0, 0) |
|
| 75 | + | #define ECALL __instr(I_ECALL, 0, 0, 0, 0) |
|
| 76 | + | /* RV64I word-width (32-bit) operations */ |
|
| 77 | + | #define ADDIW(rd, rs1, imm) __instr(I_ADDIW, rd, rs1, 0, imm) |
|
| 78 | + | #define ADDW(rd, rs1, rs2) __instr(I_ADDW, rd, rs1, rs2, 0) |
|
| 79 | + | #define SUBW(rd, rs1, rs2) __instr(I_SUBW, rd, rs1, rs2, 0) |
|
| 80 | + | #define MULW(rd, rs1, rs2) __instr(I_MULW, rd, rs1, rs2, 0) |
|
| 81 | + | #define DIVW(rd, rs1, rs2) __instr(I_DIVW, rd, rs1, rs2, 0) |
|
| 82 | + | #define DIVUW(rd, rs1, rs2) __instr(I_DIVUW, rd, rs1, rs2, 0) |
|
| 83 | + | #define REMW(rd, rs1, rs2) __instr(I_REMW, rd, rs1, rs2, 0) |
|
| 84 | + | #define REMUW(rd, rs1, rs2) __instr(I_REMUW, rd, rs1, rs2, 0) |
|
| 85 | + | #define SLLIW(rd, rs1, imm) __instr(I_SLLIW, rd, rs1, 0, imm) |
|
| 86 | + | #define SRLIW(rd, rs1, imm) __instr(I_SRLIW, rd, rs1, 0, imm) |
|
| 87 | + | #define SRAIW(rd, rs1, imm) __instr(I_SRAIW, rd, rs1, 0, imm) |
|
| 88 | + | #define SLLW(rd, rs1, rs2) __instr(I_SLLW, rd, rs1, rs2, 0) |
|
| 89 | + | #define SRLW(rd, rs1, rs2) __instr(I_SRLW, rd, rs1, rs2, 0) |
|
| 90 | + | #define SRAW(rd, rs1, rs2) __instr(I_SRAW, rd, rs1, rs2, 0) |
|
| 91 | + | /* F Extension - Floating-point instructions */ |
|
| 92 | + | #define FADD_S(rd, rs1, rs2) __instr(I_FADD_S, rd, rs1, rs2, 0) |
|
| 93 | + | #define FSUB_S(rd, rs1, rs2) __instr(I_FSUB_S, rd, rs1, rs2, 0) |
|
| 94 | + | #define FMUL_S(rd, rs1, rs2) __instr(I_FMUL_S, rd, rs1, rs2, 0) |
|
| 95 | + | #define FDIV_S(rd, rs1, rs2) __instr(I_FDIV_S, rd, rs1, rs2, 0) |
|
| 96 | + | #define FEQ_S(rd, rs1, rs2) __instr(I_FEQ_S, rd, rs1, rs2, 0) |
|
| 97 | + | #define FLT_S(rd, rs1, rs2) __instr(I_FLT_S, rd, rs1, rs2, 0) |
|
| 98 | + | #define FLE_S(rd, rs1, rs2) __instr(I_FLE_S, rd, rs1, rs2, 0) |
|
| 99 | + | #define FLW(rd, rs1, imm) __instr(I_FLW, rd, rs1, 0, imm) |
|
| 100 | + | #define FSW(rs2, rs1, imm) __instr(I_FSW, 0, rs1, rs2, imm) |
|
| 101 | + | ||
| 102 | + | /* String representations of register names. */ |
|
| 103 | + | extern const char *reg_names[]; |
|
| 104 | + | ||
| 105 | + | /* Boolean map of caller-saved registers. |
|
| 106 | + | * True for registers that need to be saved by the caller |
|
| 107 | + | * before a function call. */ |
|
| 108 | + | extern const bool caller_saved_registers[REGISTERS]; |
|
| 109 | + | ||
| 110 | + | /* RISC-V register names. */ |
|
| 111 | + | typedef enum { |
|
| 112 | + | ZERO = 0, /* Hard-wired zero */ |
|
| 113 | + | RA = 1, /* Return address */ |
|
| 114 | + | SP = 2, /* Stack pointer */ |
|
| 115 | + | GP = 3, /* Global pointer */ |
|
| 116 | + | TP = 4, /* Thread pointer */ |
|
| 117 | + | T0 = 5, /* Temporary/alternate link register */ |
|
| 118 | + | T1 = 6, /* Temporary */ |
|
| 119 | + | T2 = 7, /* Temporary */ |
|
| 120 | + | S0 = 8, /* Saved register/frame pointer */ |
|
| 121 | + | S1 = 9, /* Saved register */ |
|
| 122 | + | A0 = 10, /* Function arguments/returns */ |
|
| 123 | + | A1 = 11, |
|
| 124 | + | A2 = 12, /* Function arguments */ |
|
| 125 | + | A3 = 13, |
|
| 126 | + | A4 = 14, |
|
| 127 | + | A5 = 15, |
|
| 128 | + | A6 = 16, |
|
| 129 | + | A7 = 17, |
|
| 130 | + | S2 = 18, /* Saved registers */ |
|
| 131 | + | S3 = 19, |
|
| 132 | + | S4 = 20, |
|
| 133 | + | S5 = 21, |
|
| 134 | + | S6 = 22, |
|
| 135 | + | S7 = 23, |
|
| 136 | + | S8 = 24, |
|
| 137 | + | S9 = 25, |
|
| 138 | + | S10 = 26, |
|
| 139 | + | S11 = 27, |
|
| 140 | + | T3 = 28, /* Temporaries */ |
|
| 141 | + | T4 = 29, |
|
| 142 | + | T5 = 30, |
|
| 143 | + | T6 = 31 |
|
| 144 | + | } reg_t; |
|
| 145 | + | ||
| 146 | + | /* Temporary registers (T1-T6) */ |
|
| 147 | + | extern const reg_t temp_registers[6]; |
|
| 148 | + | ||
| 149 | + | /* Opcodes for RISC-V base instruction set */ |
|
| 150 | + | typedef enum { |
|
| 151 | + | OP_LOAD = 0x03, |
|
| 152 | + | OP_STORE = 0x23, |
|
| 153 | + | OP_BRANCH = 0x63, |
|
| 154 | + | OP_JALR = 0x67, |
|
| 155 | + | OP_JAL = 0x6F, |
|
| 156 | + | OP_OP = 0x33, |
|
| 157 | + | OP_IMM = 0x13, |
|
| 158 | + | OP_AUIPC = 0x17, |
|
| 159 | + | OP_IMM_32 = 0x1B, /* RV64I: ADDIW, SLLIW, SRLIW, SRAIW */ |
|
| 160 | + | OP_OP_32 = 0x3B, /* RV64I: ADDW, SUBW, SLLW, SRLW, SRAW, MULW, DIVW, REMW */ |
|
| 161 | + | OP_LUI = 0x37, |
|
| 162 | + | OP_SYSTEM = 0x73, |
|
| 163 | + | OP_FENCE = 0x0F, |
|
| 164 | + | /* F Extension opcodes */ |
|
| 165 | + | OP_LOAD_FP = 0x07, |
|
| 166 | + | OP_STORE_FP = 0x27, |
|
| 167 | + | OP_OP_FP = 0x53 |
|
| 168 | + | } opcode_t; |
|
| 169 | + | ||
| 170 | + | /* Function3 values */ |
|
| 171 | + | typedef enum { |
|
| 172 | + | /* Memory operations */ |
|
| 173 | + | FUNCT3_BYTE = 0x0, /* LB/SB - Load/Store Byte */ |
|
| 174 | + | FUNCT3_HALF = 0x1, /* LH/SH - Load/Store Halfword */ |
|
| 175 | + | FUNCT3_WORD = 0x2, /* LW/SW - Load/Store Word */ |
|
| 176 | + | FUNCT3_DOUBLE = 0x3, /* LD/SD - Load/Store Doubleword */ |
|
| 177 | + | FUNCT3_BYTE_U = 0x4, /* LBU - Load Byte Unsigned */ |
|
| 178 | + | FUNCT3_HALF_U = 0x5, /* LHU - Load Halfword Unsigned */ |
|
| 179 | + | FUNCT3_WORD_U = 0x6, /* LWU - Load Word Unsigned */ |
|
| 180 | + | ||
| 181 | + | /* ALU operations */ |
|
| 182 | + | FUNCT3_ADD = 0x0, /* ADD/SUB/ADDI */ |
|
| 183 | + | FUNCT3_SLL = 0x1, /* SLL/SLLI */ |
|
| 184 | + | FUNCT3_SLT = 0x2, /* SLT/SLTI */ |
|
| 185 | + | FUNCT3_SLTU = 0x3, /* SLTU/SLTIU */ |
|
| 186 | + | FUNCT3_XOR = 0x4, /* XOR/XORI */ |
|
| 187 | + | FUNCT3_SRL = 0x5, /* SRL/SRA/SRLI/SRAI */ |
|
| 188 | + | FUNCT3_OR = 0x6, /* OR/ORI */ |
|
| 189 | + | FUNCT3_AND = 0x7, /* AND/ANDI */ |
|
| 190 | + | /* F Extension function3 codes */ |
|
| 191 | + | FUNCT3_WORD_FP = 0x2, /* FLW/FSW - Load/Store Single */ |
|
| 192 | + | FUNCT3_FEQ = 0x2, /* FEQ.S */ |
|
| 193 | + | FUNCT3_FLT = 0x1, /* FLT.S */ |
|
| 194 | + | FUNCT3_FLE = 0x0 /* FLE.S */ |
|
| 195 | + | } funct3_t; |
|
| 196 | + | ||
| 197 | + | /* Function7 values */ |
|
| 198 | + | typedef enum { |
|
| 199 | + | FUNCT7_NORMAL = 0x00, |
|
| 200 | + | FUNCT7_SUB = 0x20, |
|
| 201 | + | FUNCT7_SRA = 0x20, |
|
| 202 | + | FUNCT7_MUL = 0x01, |
|
| 203 | + | /* F Extension function codes */ |
|
| 204 | + | FUNCT7_FADD_S = 0x00, |
|
| 205 | + | FUNCT7_FSUB_S = 0x04, |
|
| 206 | + | FUNCT7_FMUL_S = 0x08, |
|
| 207 | + | FUNCT7_FDIV_S = 0x0C, |
|
| 208 | + | FUNCT7_FEQ_S = 0x50, |
|
| 209 | + | FUNCT7_FLT_S = 0x50, |
|
| 210 | + | FUNCT7_FLE_S = 0x50 |
|
| 211 | + | } funct7_t; |
|
| 212 | + | ||
| 213 | + | /* Represents a RISC-V instruction in its various formats */ |
|
| 214 | + | typedef union { |
|
| 215 | + | struct { |
|
| 216 | + | u32 opcode : 7; |
|
| 217 | + | u32 rd : 5; |
|
| 218 | + | u32 funct3 : 3; |
|
| 219 | + | u32 rs1 : 5; |
|
| 220 | + | u32 rs2 : 5; |
|
| 221 | + | u32 funct7 : 7; |
|
| 222 | + | } r; /* Register format */ |
|
| 223 | + | ||
| 224 | + | struct { |
|
| 225 | + | u32 opcode : 7; |
|
| 226 | + | u32 rd : 5; |
|
| 227 | + | u32 funct3 : 3; |
|
| 228 | + | u32 rs1 : 5; |
|
| 229 | + | u32 imm_11_0 : 12; |
|
| 230 | + | } i; /* Immediate format */ |
|
| 231 | + | ||
| 232 | + | struct { |
|
| 233 | + | u32 opcode : 7; |
|
| 234 | + | u32 imm_4_0 : 5; |
|
| 235 | + | u32 funct3 : 3; |
|
| 236 | + | u32 rs1 : 5; |
|
| 237 | + | u32 rs2 : 5; |
|
| 238 | + | u32 imm_11_5 : 7; |
|
| 239 | + | } s; /* Store format */ |
|
| 240 | + | ||
| 241 | + | struct { |
|
| 242 | + | u32 opcode : 7; |
|
| 243 | + | u32 imm_11 : 1; |
|
| 244 | + | u32 imm_4_1 : 4; |
|
| 245 | + | u32 funct3 : 3; |
|
| 246 | + | u32 rs1 : 5; |
|
| 247 | + | u32 rs2 : 5; |
|
| 248 | + | u32 imm_10_5 : 6; |
|
| 249 | + | u32 imm_12 : 1; |
|
| 250 | + | } b; /* Branch format */ |
|
| 251 | + | ||
| 252 | + | struct { |
|
| 253 | + | u32 opcode : 7; |
|
| 254 | + | u32 rd : 5; |
|
| 255 | + | u32 imm_31_12 : 20; |
|
| 256 | + | } u; /* Upper immediate format */ |
|
| 257 | + | ||
| 258 | + | struct { |
|
| 259 | + | u32 opcode : 7; |
|
| 260 | + | u32 rd : 5; |
|
| 261 | + | u32 imm_19_12 : 8; |
|
| 262 | + | u32 imm_11 : 1; |
|
| 263 | + | u32 imm_10_1 : 10; |
|
| 264 | + | u32 imm_20 : 1; |
|
| 265 | + | } j; /* Jump format */ |
|
| 266 | + | ||
| 267 | + | u32 raw; /* Raw 32-bit instruction */ |
|
| 268 | + | } instr_t; |
|
| 269 | + | ||
| 270 | + | /* Instruction type. */ |
|
| 271 | + | typedef enum { |
|
| 272 | + | IFMT_I, /* I-type (immediate) */ |
|
| 273 | + | IFMT_R, /* R-type (register) */ |
|
| 274 | + | IFMT_S, /* S-type (store) */ |
|
| 275 | + | IFMT_B, /* B-type (branch) */ |
|
| 276 | + | IFMT_U, /* U-type (upper immediate) */ |
|
| 277 | + | IFMT_J, /* J-type (jump) */ |
|
| 278 | + | } ifmt_t; |
|
| 279 | + | ||
| 280 | + | /* RISC-V instruction name. */ |
|
| 281 | + | typedef enum { |
|
| 282 | + | I_LUI, |
|
| 283 | + | I_AUIPC, |
|
| 284 | + | I_JAL, |
|
| 285 | + | I_JALR, |
|
| 286 | + | I_BEQ, |
|
| 287 | + | I_BNE, |
|
| 288 | + | I_BLT, |
|
| 289 | + | I_BGE, |
|
| 290 | + | I_BLTU, |
|
| 291 | + | I_BGEU, |
|
| 292 | + | I_LB, |
|
| 293 | + | I_LH, |
|
| 294 | + | I_LW, |
|
| 295 | + | I_LBU, |
|
| 296 | + | I_LHU, |
|
| 297 | + | I_SB, |
|
| 298 | + | I_SH, |
|
| 299 | + | I_SW, |
|
| 300 | + | I_ADDI, |
|
| 301 | + | I_SLTI, |
|
| 302 | + | I_SLTIU, |
|
| 303 | + | I_XORI, |
|
| 304 | + | I_ORI, |
|
| 305 | + | I_ANDI, |
|
| 306 | + | I_SLLI, |
|
| 307 | + | I_SRLI, |
|
| 308 | + | I_SRAI, |
|
| 309 | + | I_ADD, |
|
| 310 | + | I_SUB, |
|
| 311 | + | I_SLL, |
|
| 312 | + | I_SLT, |
|
| 313 | + | I_SLTU, |
|
| 314 | + | I_XOR, |
|
| 315 | + | I_SRL, |
|
| 316 | + | I_SRA, |
|
| 317 | + | I_OR, |
|
| 318 | + | I_AND, |
|
| 319 | + | I_MUL, |
|
| 320 | + | I_MULH, |
|
| 321 | + | I_MULHSU, |
|
| 322 | + | I_MULHU, |
|
| 323 | + | I_DIV, |
|
| 324 | + | I_DIVU, |
|
| 325 | + | I_REM, |
|
| 326 | + | I_REMU, |
|
| 327 | + | I_MV, |
|
| 328 | + | I_JMP, |
|
| 329 | + | I_NOP, |
|
| 330 | + | I_NOT, |
|
| 331 | + | I_NEG, |
|
| 332 | + | I_EBREAK, |
|
| 333 | + | I_ECALL, |
|
| 334 | + | /* F Extension - Floating-point instructions */ |
|
| 335 | + | I_FADD_S, |
|
| 336 | + | I_FSUB_S, |
|
| 337 | + | I_FMUL_S, |
|
| 338 | + | I_FDIV_S, |
|
| 339 | + | I_FEQ_S, |
|
| 340 | + | I_FLT_S, |
|
| 341 | + | I_FLE_S, |
|
| 342 | + | I_FLW, |
|
| 343 | + | I_FSW, |
|
| 344 | + | /* RV64I extensions */ |
|
| 345 | + | I_LWU, |
|
| 346 | + | I_LD, |
|
| 347 | + | I_SD, |
|
| 348 | + | I_ADDIW, |
|
| 349 | + | I_SLLIW, |
|
| 350 | + | I_SRLIW, |
|
| 351 | + | I_SRAIW, |
|
| 352 | + | I_ADDW, |
|
| 353 | + | I_SUBW, |
|
| 354 | + | I_SLLW, |
|
| 355 | + | I_SRLW, |
|
| 356 | + | I_SRAW, |
|
| 357 | + | I_MULW, |
|
| 358 | + | I_DIVW, |
|
| 359 | + | I_DIVUW, |
|
| 360 | + | I_REMW, |
|
| 361 | + | I_REMUW |
|
| 362 | + | } iname_t; |
|
| 363 | + | ||
| 364 | + | /* Returns a RISC-V instruction based on the instruction type. */ |
|
| 365 | + | instr_t instr(iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm); |
|
| 366 | + | ||
| 367 | + | static inline instr_t __instr( |
|
| 368 | + | iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm |
|
| 369 | + | ) { |
|
| 370 | + | return instr(op, rd, rs1, rs2, imm); |
|
| 371 | + | } |
|
| 372 | + | ||
| 373 | + | /* Return true when a signed 12-bit immediate can encode `value`. */ |
|
| 374 | + | static inline bool is_small(i32 value) { |
|
| 375 | + | return value >= -2048 && value <= 2047; |
|
| 376 | + | } |
|
| 377 | + | ||
| 378 | + | static inline bool is_branch_imm(i32 value) { |
|
| 379 | + | return value >= -(1 << 12) && value <= ((1 << 12) - 2) && !(value & 1); |
|
| 380 | + | } |
|
| 381 | + | ||
| 382 | + | static inline bool is_jump_imm(i32 value) { |
|
| 383 | + | return value >= -(1 << 20) && value <= ((1 << 20) - 2) && !(value & 1); |
|
| 384 | + | } |
|
| 385 | + | ||
| 386 | + | /* Helper function to sign-extend a value. */ |
|
| 387 | + | i32 sign_extend(u32 value, int bit_width); |
|
| 388 | + | /* Aligns a size to the specified alignment boundary. */ |
|
| 389 | + | i32 align(i32 size, i32 alignment); |
|
| 390 | + | /* Functions to get immediates out of instruction. */ |
|
| 391 | + | i32 get_i_imm(instr_t instr); |
|
| 392 | + | i32 get_s_imm(instr_t instr); |
|
| 393 | + | i32 get_b_imm(instr_t instr); |
|
| 394 | + | i32 get_j_imm(instr_t instr); |
|
| 395 | + | ||
| 396 | + | #endif |
scanner.c
added
+372 -0
| 1 | + | #include <ctype.h> |
|
| 2 | + | #include <string.h> |
|
| 3 | + | ||
| 4 | + | #include "scanner.h" |
|
| 5 | + | #include "types.h" |
|
| 6 | + | ||
| 7 | + | /* Keyword lookup table. */ |
|
| 8 | + | static const struct { |
|
| 9 | + | const char *name; |
|
| 10 | + | usize length; |
|
| 11 | + | tokenclass_t tok; |
|
| 12 | + | } keywords[] = { |
|
| 13 | + | { "fn", 2, T_FN }, { "pub", 3, T_PUB }, |
|
| 14 | + | { "return", 6, T_RETURN }, { "while", 5, T_WHILE }, |
|
| 15 | + | { "mut", 3, T_MUT }, { "let", 3, T_LET }, |
|
| 16 | + | { "static", 6, T_STATIC }, { "if", 2, T_IF }, |
|
| 17 | + | { "else", 4, T_ELSE }, { "i8", 2, T_I8 }, |
|
| 18 | + | { "i16", 3, T_I16 }, { "i32", 3, T_I32 }, |
|
| 19 | + | { "i64", 3, T_I64 }, { "u8", 2, T_U8 }, |
|
| 20 | + | { "u16", 3, T_U16 }, { "u32", 3, T_U32 }, |
|
| 21 | + | { "u64", 3, T_U64 }, { "f32", 3, T_F32 }, |
|
| 22 | + | { "bool", 4, T_BOOL }, { "void", 4, T_VOID }, |
|
| 23 | + | { "true", 4, T_TRUE }, { "false", 5, T_FALSE }, |
|
| 24 | + | { "nil", 3, T_NIL }, { "loop", 4, T_LOOP }, |
|
| 25 | + | { "try", 3, T_TRY }, { "catch", 5, T_CATCH }, |
|
| 26 | + | { "for", 3, T_FOR }, { "in", 2, T_IN }, |
|
| 27 | + | { "const", 5, T_CONST }, { "break", 5, T_BREAK }, |
|
| 28 | + | { "throw", 5, T_THROW }, { "union", 5, T_UNION }, |
|
| 29 | + | { "and", 3, T_AND }, { "or", 2, T_OR }, |
|
| 30 | + | { "not", 3, T_NOT }, { "match", 5, T_MATCH }, |
|
| 31 | + | { "use", 3, T_USE }, { "case", 4, T_CASE }, |
|
| 32 | + | { "extern", 6, T_EXTERN }, { "mod", 3, T_MOD }, |
|
| 33 | + | { "as", 2, T_AS }, { "record", 6, T_RECORD }, |
|
| 34 | + | { "undefined", 9, T_UNDEF }, { "align", 5, T_ALIGN }, |
|
| 35 | + | { "throws", 6, T_THROWS }, { "super", 5, T_SUPER }, |
|
| 36 | + | { "panic", 5, T_PANIC }, { "opaque", 6, T_OPAQUE }, |
|
| 37 | + | }; |
|
| 38 | + | ||
| 39 | + | /* Initialize scanner with source text. */ |
|
| 40 | + | void scanner_init(scanner_t *s, const char *file, const char *source) { |
|
| 41 | + | s->file = file; |
|
| 42 | + | s->source = source; |
|
| 43 | + | s->token = source; |
|
| 44 | + | s->cursor = source; |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | /* Check if we've reached the end. */ |
|
| 48 | + | static bool is_eof(scanner_t *s) { |
|
| 49 | + | return *s->cursor == '\0'; |
|
| 50 | + | } |
|
| 51 | + | ||
| 52 | + | /* Peek at next character. */ |
|
| 53 | + | static char peek(scanner_t *s) { |
|
| 54 | + | if (is_eof(s)) |
|
| 55 | + | return '\0'; |
|
| 56 | + | return s->cursor[1]; |
|
| 57 | + | } |
|
| 58 | + | ||
| 59 | + | /* Advance current position and return previous char. */ |
|
| 60 | + | static char advance(scanner_t *s) { |
|
| 61 | + | s->cursor++; |
|
| 62 | + | return s->cursor[-1]; |
|
| 63 | + | } |
|
| 64 | + | ||
| 65 | + | /* Match expected character. */ |
|
| 66 | + | static bool consume(scanner_t *s, char expected) { |
|
| 67 | + | if (is_eof(s)) |
|
| 68 | + | return false; |
|
| 69 | + | if (*s->cursor != expected) |
|
| 70 | + | return false; |
|
| 71 | + | s->cursor++; |
|
| 72 | + | ||
| 73 | + | return true; |
|
| 74 | + | } |
|
| 75 | + | ||
| 76 | + | /* Create a token of given class. */ |
|
| 77 | + | static token_t tok(scanner_t *s, tokenclass_t cls) { |
|
| 78 | + | token_t t = { .cls = cls, |
|
| 79 | + | .start = s->token, |
|
| 80 | + | .length = (usize)(s->cursor - s->token), |
|
| 81 | + | .position = (usize)(s->token - s->source) }; |
|
| 82 | + | return t; |
|
| 83 | + | } |
|
| 84 | + | ||
| 85 | + | /* Create an error token. */ |
|
| 86 | + | static token_t error_tok( |
|
| 87 | + | scanner_t *s, const char *offset, const char *message |
|
| 88 | + | ) { |
|
| 89 | + | token_t t = { .cls = T_INVALID, |
|
| 90 | + | .start = message, |
|
| 91 | + | .length = strlen(message), |
|
| 92 | + | .position = (usize)(offset - s->source) }; |
|
| 93 | + | return t; |
|
| 94 | + | } |
|
| 95 | + | ||
| 96 | + | /* Skip whitespace and comments. */ |
|
| 97 | + | static void skip_whitespace(scanner_t *s) { |
|
| 98 | + | for (;;) { |
|
| 99 | + | switch (*s->cursor) { |
|
| 100 | + | case ' ': |
|
| 101 | + | case '\r': |
|
| 102 | + | case '\t': |
|
| 103 | + | advance(s); |
|
| 104 | + | break; |
|
| 105 | + | case '\n': |
|
| 106 | + | advance(s); |
|
| 107 | + | break; |
|
| 108 | + | case '/': |
|
| 109 | + | if (peek(s) == '/') { |
|
| 110 | + | /* Comment goes until end of line. */ |
|
| 111 | + | while (*s->cursor != '\n' && !is_eof(s)) |
|
| 112 | + | advance(s); |
|
| 113 | + | } else { |
|
| 114 | + | return; |
|
| 115 | + | } |
|
| 116 | + | break; |
|
| 117 | + | default: |
|
| 118 | + | return; |
|
| 119 | + | } |
|
| 120 | + | } |
|
| 121 | + | } |
|
| 122 | + | ||
| 123 | + | /* Check if character is digit. */ |
|
| 124 | + | static bool is_digit(char c) { |
|
| 125 | + | return c >= '0' && c <= '9'; |
|
| 126 | + | } |
|
| 127 | + | ||
| 128 | + | /* Check if character is hex digit. */ |
|
| 129 | + | static bool is_hex_digit(char c) { |
|
| 130 | + | return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || |
|
| 131 | + | (c >= 'A' && c <= 'F'); |
|
| 132 | + | } |
|
| 133 | + | ||
| 134 | + | /* Check if character is binary digit. */ |
|
| 135 | + | static bool is_bin_digit(char c) { |
|
| 136 | + | return c == '0' || c == '1'; |
|
| 137 | + | } |
|
| 138 | + | ||
| 139 | + | /* Check if character is letter. */ |
|
| 140 | + | static bool is_alpha(char c) { |
|
| 141 | + | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); |
|
| 142 | + | } |
|
| 143 | + | ||
| 144 | + | /* Scan a number token. */ |
|
| 145 | + | static token_t scan_number(scanner_t *s) { |
|
| 146 | + | bool signed_token = (s->token[0] == '-') || (s->token[0] == '+'); |
|
| 147 | + | ||
| 148 | + | if (signed_token) |
|
| 149 | + | advance(s); /* Consume the leading sign. */ |
|
| 150 | + | ||
| 151 | + | /* Check for hex literal (0x or 0X prefix) */ |
|
| 152 | + | if (s->cursor[-1] == '0' && (*s->cursor == 'x' || *s->cursor == 'X')) { |
|
| 153 | + | advance(s); /* Consume the 'x' or 'X' */ |
|
| 154 | + | /* Must have at least one hex digit after 0x */ |
|
| 155 | + | if (!is_hex_digit(*s->cursor)) |
|
| 156 | + | return error_tok(s, s->token, "invalid hex literal"); |
|
| 157 | + | ||
| 158 | + | while (is_hex_digit(*s->cursor)) |
|
| 159 | + | advance(s); |
|
| 160 | + | ||
| 161 | + | return tok(s, T_NUMBER); |
|
| 162 | + | } |
|
| 163 | + | ||
| 164 | + | /* Check for binary literal (0b or 0B prefix) */ |
|
| 165 | + | if (s->cursor[-1] == '0' && (*s->cursor == 'b' || *s->cursor == 'B')) { |
|
| 166 | + | advance(s); /* Consume the 'b' or 'B' */ |
|
| 167 | + | /* Must have at least one binary digit after 0b */ |
|
| 168 | + | if (!is_bin_digit(*s->cursor)) |
|
| 169 | + | return error_tok(s, s->token, "invalid binary literal"); |
|
| 170 | + | ||
| 171 | + | while (is_bin_digit(*s->cursor)) |
|
| 172 | + | advance(s); |
|
| 173 | + | ||
| 174 | + | return tok(s, T_NUMBER); |
|
| 175 | + | } |
|
| 176 | + | ||
| 177 | + | /* Regular decimal number */ |
|
| 178 | + | while (is_digit(*s->cursor)) |
|
| 179 | + | advance(s); |
|
| 180 | + | ||
| 181 | + | /* Look for decimal part. */ |
|
| 182 | + | if (*s->cursor == '.' && is_digit(peek(s))) { |
|
| 183 | + | advance(s); /* Consume the "." */ |
|
| 184 | + | while (is_digit(*s->cursor)) |
|
| 185 | + | advance(s); |
|
| 186 | + | } |
|
| 187 | + | return tok(s, T_NUMBER); |
|
| 188 | + | } |
|
| 189 | + | ||
| 190 | + | /* Scan a string. */ |
|
| 191 | + | static token_t scan_string(scanner_t *s) { |
|
| 192 | + | while (*s->cursor != '"' && !is_eof(s)) { |
|
| 193 | + | consume(s, '\\'); // Consume escapes. |
|
| 194 | + | advance(s); |
|
| 195 | + | } |
|
| 196 | + | if (!consume(s, '"')) |
|
| 197 | + | return error_tok(s, s->token, "unterminated string"); |
|
| 198 | + | ||
| 199 | + | return tok(s, T_STRING); |
|
| 200 | + | } |
|
| 201 | + | ||
| 202 | + | /* Scan a character, such as: 'z' */ |
|
| 203 | + | static token_t scan_char(scanner_t *s) { |
|
| 204 | + | while (*s->cursor != '\'' && !is_eof(s)) { |
|
| 205 | + | if (!isprint(*s->cursor)) |
|
| 206 | + | return error_tok(s, s->token, "invalid character"); |
|
| 207 | + | ||
| 208 | + | consume(s, '\\'); |
|
| 209 | + | advance(s); |
|
| 210 | + | } |
|
| 211 | + | if (!consume(s, '\'')) |
|
| 212 | + | return error_tok(s, s->token, "unterminated character"); |
|
| 213 | + | ||
| 214 | + | return tok(s, T_CHAR); |
|
| 215 | + | } |
|
| 216 | + | ||
| 217 | + | /* Return a keyword or identifier token. */ |
|
| 218 | + | static tokenclass_t keyword_or_ident(const char *start, usize length) { |
|
| 219 | + | for (usize i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { |
|
| 220 | + | if (length == keywords[i].length && |
|
| 221 | + | memcmp(start, keywords[i].name, length) == 0) { |
|
| 222 | + | return keywords[i].tok; |
|
| 223 | + | } |
|
| 224 | + | } |
|
| 225 | + | return T_IDENT; |
|
| 226 | + | } |
|
| 227 | + | ||
| 228 | + | /* Scan an identifier, label or keyword. */ |
|
| 229 | + | static token_t scan_identifier(scanner_t *s) { |
|
| 230 | + | while (is_alpha(*s->cursor) || is_digit(*s->cursor) || *s->cursor == '_' || |
|
| 231 | + | *s->cursor == '#') |
|
| 232 | + | advance(s); |
|
| 233 | + | ||
| 234 | + | return tok(s, keyword_or_ident(s->token, (usize)(s->cursor - s->token))); |
|
| 235 | + | } |
|
| 236 | + | ||
| 237 | + | /* Scan the next token. */ |
|
| 238 | + | token_t scanner_next(scanner_t *s) { |
|
| 239 | + | skip_whitespace(s); |
|
| 240 | + | s->token = s->cursor; |
|
| 241 | + | ||
| 242 | + | if (is_eof(s)) |
|
| 243 | + | return tok(s, T_EOF); |
|
| 244 | + | ||
| 245 | + | char c = advance(s); |
|
| 246 | + | ||
| 247 | + | if (is_digit(c)) |
|
| 248 | + | return scan_number(s); |
|
| 249 | + | if (is_alpha(c)) |
|
| 250 | + | return scan_identifier(s); |
|
| 251 | + | ||
| 252 | + | switch (c) { |
|
| 253 | + | case '\'': |
|
| 254 | + | return scan_char(s); |
|
| 255 | + | case '"': |
|
| 256 | + | return scan_string(s); |
|
| 257 | + | case '(': |
|
| 258 | + | return tok(s, T_LPAREN); |
|
| 259 | + | case ')': |
|
| 260 | + | return tok(s, T_RPAREN); |
|
| 261 | + | case '{': |
|
| 262 | + | return tok(s, T_LBRACE); |
|
| 263 | + | case '}': |
|
| 264 | + | return tok(s, T_RBRACE); |
|
| 265 | + | case '[': |
|
| 266 | + | return tok(s, T_LBRACKET); |
|
| 267 | + | case ']': |
|
| 268 | + | return tok(s, T_RBRACKET); |
|
| 269 | + | case ';': |
|
| 270 | + | return tok(s, T_SEMICOLON); |
|
| 271 | + | case ',': |
|
| 272 | + | return tok(s, T_COMMA); |
|
| 273 | + | case '.': |
|
| 274 | + | if (*s->cursor == '.') { |
|
| 275 | + | advance(s); |
|
| 276 | + | return tok(s, T_DOT_DOT); |
|
| 277 | + | } |
|
| 278 | + | return tok(s, T_DOT); |
|
| 279 | + | case ':': |
|
| 280 | + | if (*s->cursor == ':') { |
|
| 281 | + | advance(s); |
|
| 282 | + | return tok(s, T_COLON_COLON); |
|
| 283 | + | } |
|
| 284 | + | return tok(s, T_COLON); |
|
| 285 | + | case '-': |
|
| 286 | + | if (*s->cursor == '>') { |
|
| 287 | + | advance(s); |
|
| 288 | + | return tok(s, T_ARROW); |
|
| 289 | + | } |
|
| 290 | + | /* If followed by a digit, scan as negative number */ |
|
| 291 | + | if (is_digit(*s->cursor)) { |
|
| 292 | + | return scan_number(s); |
|
| 293 | + | } |
|
| 294 | + | return tok(s, T_MINUS); |
|
| 295 | + | case '+': |
|
| 296 | + | if (is_digit(*s->cursor)) { |
|
| 297 | + | return scan_number(s); |
|
| 298 | + | } |
|
| 299 | + | return tok(s, T_PLUS); |
|
| 300 | + | case '/': |
|
| 301 | + | return tok(s, T_SLASH); |
|
| 302 | + | case '*': |
|
| 303 | + | return tok(s, T_STAR); |
|
| 304 | + | case '%': |
|
| 305 | + | return tok(s, T_PERCENT); |
|
| 306 | + | case '&': |
|
| 307 | + | return tok(s, T_AMP); |
|
| 308 | + | case '?': |
|
| 309 | + | return tok(s, T_QUESTION); |
|
| 310 | + | case '!': |
|
| 311 | + | return tok(s, consume(s, '=') ? T_BANG_EQ : T_BANG); |
|
| 312 | + | case '=': |
|
| 313 | + | if (*s->cursor == '>') { |
|
| 314 | + | advance(s); |
|
| 315 | + | return tok(s, T_FAT_ARROW); |
|
| 316 | + | } |
|
| 317 | + | return tok(s, consume(s, '=') ? T_EQ_EQ : T_EQ); |
|
| 318 | + | case '<': |
|
| 319 | + | if (*s->cursor == '<') { |
|
| 320 | + | advance(s); |
|
| 321 | + | return tok(s, T_LSHIFT); |
|
| 322 | + | } |
|
| 323 | + | return tok(s, consume(s, '=') ? T_LT_EQ : T_LT); |
|
| 324 | + | case '>': |
|
| 325 | + | if (*s->cursor == '>') { |
|
| 326 | + | advance(s); |
|
| 327 | + | return tok(s, T_RSHIFT); |
|
| 328 | + | } |
|
| 329 | + | return tok(s, consume(s, '=') ? T_GT_EQ : T_GT); |
|
| 330 | + | case '|': |
|
| 331 | + | return tok(s, T_PIPE); |
|
| 332 | + | case '^': |
|
| 333 | + | return tok(s, T_CARET); |
|
| 334 | + | case '~': |
|
| 335 | + | return tok(s, T_TILDE); |
|
| 336 | + | case '@': |
|
| 337 | + | /* Scan @identifier as a single token. */ |
|
| 338 | + | if (!is_alpha(*s->cursor)) |
|
| 339 | + | return error_tok(s, s->token, "expected identifier after `@`"); |
|
| 340 | + | while (is_alpha(*s->cursor)) |
|
| 341 | + | advance(s); |
|
| 342 | + | return tok(s, T_AT_IDENT); |
|
| 343 | + | case '_': |
|
| 344 | + | if (is_alpha(*s->cursor) || is_digit(*s->cursor) || *s->cursor == '_') { |
|
| 345 | + | /* This is part of an identifier like `_foo` or `__start` */ |
|
| 346 | + | return scan_identifier(s); |
|
| 347 | + | } |
|
| 348 | + | return tok(s, T_UNDERSCORE); |
|
| 349 | + | } |
|
| 350 | + | return error_tok(s, s->token, "unexpected character"); |
|
| 351 | + | } |
|
| 352 | + | ||
| 353 | + | /* Get the source code location from a byte offset. */ |
|
| 354 | + | location_t scanner_get_location(scanner_t *s, u32 position) { |
|
| 355 | + | u32 l = 1; |
|
| 356 | + | u32 c = 1; |
|
| 357 | + | ||
| 358 | + | for (u32 i = 0; i < position; i++) { |
|
| 359 | + | if (s->source[i] == '\n') { |
|
| 360 | + | l++; |
|
| 361 | + | c = 1; |
|
| 362 | + | } else { |
|
| 363 | + | c++; |
|
| 364 | + | } |
|
| 365 | + | } |
|
| 366 | + | return (location_t){ |
|
| 367 | + | .file = s->file, |
|
| 368 | + | .src = s->source + position, |
|
| 369 | + | .line = l, |
|
| 370 | + | .col = c, |
|
| 371 | + | }; |
|
| 372 | + | } |
scanner.h
added
+151 -0
| 1 | + | #ifndef SCANNER_H |
|
| 2 | + | #define SCANNER_H |
|
| 3 | + | ||
| 4 | + | #include "types.h" |
|
| 5 | + | ||
| 6 | + | /* Token kinds. */ |
|
| 7 | + | typedef enum { |
|
| 8 | + | /* End of file token generated by the scanner |
|
| 9 | + | * when the input is exhausted. */ |
|
| 10 | + | T_EOF, |
|
| 11 | + | ||
| 12 | + | /* Special "error" token. */ |
|
| 13 | + | T_INVALID, |
|
| 14 | + | ||
| 15 | + | /* Single-char tokens. */ |
|
| 16 | + | T_LPAREN, /* ( */ |
|
| 17 | + | T_RPAREN, /* ) */ |
|
| 18 | + | T_LBRACE, /* { */ |
|
| 19 | + | T_RBRACE, /* } */ |
|
| 20 | + | T_LBRACKET, /* [ */ |
|
| 21 | + | T_RBRACKET, /* ] */ |
|
| 22 | + | T_COMMA, /* , */ |
|
| 23 | + | T_DOT, /* . */ |
|
| 24 | + | T_DOT_DOT, /* .. */ |
|
| 25 | + | T_MINUS, /* - */ |
|
| 26 | + | T_PLUS, /* + */ |
|
| 27 | + | T_SEMICOLON, /* ; */ |
|
| 28 | + | T_SLASH, /* / */ |
|
| 29 | + | T_STAR, /* * */ |
|
| 30 | + | T_PERCENT, /* % */ |
|
| 31 | + | T_AMP, /* & */ |
|
| 32 | + | T_PIPE, /* | */ |
|
| 33 | + | T_CARET, /* ^ */ |
|
| 34 | + | T_TILDE, /* ~ */ |
|
| 35 | + | T_UNDERSCORE, /* _ */ |
|
| 36 | + | ||
| 37 | + | /* One or two char tokens. */ |
|
| 38 | + | T_QUESTION, /* ? */ |
|
| 39 | + | T_BANG, /* ! */ |
|
| 40 | + | T_BANG_EQ, /* != */ |
|
| 41 | + | T_EQ, /* = */ |
|
| 42 | + | T_EQ_EQ, /* == */ |
|
| 43 | + | T_GT, /* > */ |
|
| 44 | + | T_GT_EQ, /* >= */ |
|
| 45 | + | T_LT, /* < */ |
|
| 46 | + | T_LT_EQ, /* <= */ |
|
| 47 | + | T_LSHIFT, /* << */ |
|
| 48 | + | T_RSHIFT, /* >> */ |
|
| 49 | + | ||
| 50 | + | /* Literals. */ |
|
| 51 | + | T_IDENT, /* fnord */ |
|
| 52 | + | T_AT_IDENT, /* @sizeOf */ |
|
| 53 | + | T_STRING, /* "fnord" */ |
|
| 54 | + | T_CHAR, /* 'f' */ |
|
| 55 | + | T_NUMBER, /* 42 */ |
|
| 56 | + | T_TRUE, /* true */ |
|
| 57 | + | T_FALSE, /* false */ |
|
| 58 | + | T_NIL, /* nil */ |
|
| 59 | + | T_UNDEF, /* undefined */ |
|
| 60 | + | ||
| 61 | + | /* Keywords. */ |
|
| 62 | + | T_IF, |
|
| 63 | + | T_ELSE, |
|
| 64 | + | T_RETURN, |
|
| 65 | + | T_BREAK, |
|
| 66 | + | T_CONTINUE, |
|
| 67 | + | T_THROW, |
|
| 68 | + | T_PANIC, |
|
| 69 | + | T_WHILE, |
|
| 70 | + | T_FOR, |
|
| 71 | + | T_LOOP, |
|
| 72 | + | T_TRY, |
|
| 73 | + | T_CATCH, |
|
| 74 | + | T_IN, |
|
| 75 | + | T_FN, |
|
| 76 | + | T_UNION, |
|
| 77 | + | T_RECORD, |
|
| 78 | + | T_DEFAULT, |
|
| 79 | + | T_PUB, |
|
| 80 | + | T_MUT, |
|
| 81 | + | T_CONST, |
|
| 82 | + | T_STATIC, |
|
| 83 | + | T_LET, |
|
| 84 | + | T_AND, |
|
| 85 | + | T_OR, |
|
| 86 | + | T_NOT, |
|
| 87 | + | T_MATCH, |
|
| 88 | + | T_CASE, |
|
| 89 | + | T_USE, |
|
| 90 | + | T_SUPER, /* super */ |
|
| 91 | + | T_EXTERN, /* extern */ |
|
| 92 | + | T_MOD, /* mod */ |
|
| 93 | + | T_AS, /* as */ |
|
| 94 | + | T_ALIGN, /* align */ |
|
| 95 | + | T_THROWS, /* throws */ |
|
| 96 | + | ||
| 97 | + | /* Type-related tokens. */ |
|
| 98 | + | T_COLON, /* : */ |
|
| 99 | + | T_COLON_COLON, /* :: */ |
|
| 100 | + | T_ARROW, /* -> */ |
|
| 101 | + | T_FAT_ARROW, /* => */ |
|
| 102 | + | ||
| 103 | + | /* Builtin type names. */ |
|
| 104 | + | T_I8, |
|
| 105 | + | T_I16, |
|
| 106 | + | T_I32, |
|
| 107 | + | T_I64, |
|
| 108 | + | T_U8, |
|
| 109 | + | T_U16, |
|
| 110 | + | T_U32, |
|
| 111 | + | T_U64, |
|
| 112 | + | T_F32, |
|
| 113 | + | T_BOOL, |
|
| 114 | + | T_VOID, |
|
| 115 | + | T_OPAQUE |
|
| 116 | + | } tokenclass_t; |
|
| 117 | + | ||
| 118 | + | /* Code location. */ |
|
| 119 | + | typedef struct { |
|
| 120 | + | const char *src; /* Pointer to source code location. */ |
|
| 121 | + | const char *file; /* File path. */ |
|
| 122 | + | u32 line; /* line number. */ |
|
| 123 | + | u32 col; /* Column number. */ |
|
| 124 | + | } location_t; |
|
| 125 | + | ||
| 126 | + | /* Token structure. */ |
|
| 127 | + | typedef struct { |
|
| 128 | + | tokenclass_t cls; |
|
| 129 | + | const char *start; /* Start of the token in the source code. */ |
|
| 130 | + | u32 length; /* Byte length of token in source code. */ |
|
| 131 | + | u32 position; /* Byte offset in source. */ |
|
| 132 | + | } token_t; |
|
| 133 | + | ||
| 134 | + | /* Scanner state. */ |
|
| 135 | + | typedef struct { |
|
| 136 | + | const char *file; /* File path. */ |
|
| 137 | + | const char *source; /* Start of source buffer. */ |
|
| 138 | + | const char *token; /* Start of current token. */ |
|
| 139 | + | const char *cursor; /* Current position. */ |
|
| 140 | + | } scanner_t; |
|
| 141 | + | ||
| 142 | + | /* Initialize scanner with source text. */ |
|
| 143 | + | void scanner_init(scanner_t *s, const char *file, const char *source); |
|
| 144 | + | ||
| 145 | + | /* Get next token from scanner. */ |
|
| 146 | + | token_t scanner_next(scanner_t *s); |
|
| 147 | + | ||
| 148 | + | /* Get line and column information for a token. */ |
|
| 149 | + | location_t scanner_get_location(scanner_t *s, u32 position); |
|
| 150 | + | ||
| 151 | + | #endif |
strings.c
added
+100 -0
| 1 | + | #include <string.h> |
|
| 2 | + | ||
| 3 | + | #include "strings.h" |
|
| 4 | + | #include "types.h" |
|
| 5 | + | ||
| 6 | + | /* Global string interning system */ |
|
| 7 | + | static struct { |
|
| 8 | + | char strings[MAX_STRINGS][MAX_STRING_LEN]; |
|
| 9 | + | usize nstrings; |
|
| 10 | + | bool initialized; |
|
| 11 | + | } table = { 0 }; |
|
| 12 | + | ||
| 13 | + | /* Initialize the global string interning system */ |
|
| 14 | + | void strings_init(void) { |
|
| 15 | + | table.nstrings = 0; |
|
| 16 | + | table.initialized = true; |
|
| 17 | + | } |
|
| 18 | + | ||
| 19 | + | /* Process escape sequences in a string */ |
|
| 20 | + | static usize escape(const char *src, usize src_len, char *dst, usize dst_size) { |
|
| 21 | + | usize dst_idx = 0; |
|
| 22 | + | ||
| 23 | + | for (usize i = 0; i < src_len && dst_idx < dst_size - 1; i++) { |
|
| 24 | + | if (src[i] == '\\' && i + 1 < src_len) { |
|
| 25 | + | switch (src[i + 1]) { |
|
| 26 | + | case 'n': |
|
| 27 | + | dst[dst_idx++] = '\n'; |
|
| 28 | + | i++; /* Skip the next character */ |
|
| 29 | + | break; |
|
| 30 | + | case 't': |
|
| 31 | + | dst[dst_idx++] = '\t'; |
|
| 32 | + | i++; |
|
| 33 | + | break; |
|
| 34 | + | case 'r': |
|
| 35 | + | dst[dst_idx++] = '\r'; |
|
| 36 | + | i++; |
|
| 37 | + | break; |
|
| 38 | + | case '\\': |
|
| 39 | + | dst[dst_idx++] = '\\'; |
|
| 40 | + | i++; |
|
| 41 | + | break; |
|
| 42 | + | case '"': |
|
| 43 | + | dst[dst_idx++] = '"'; |
|
| 44 | + | i++; |
|
| 45 | + | break; |
|
| 46 | + | case '0': |
|
| 47 | + | dst[dst_idx++] = '\0'; |
|
| 48 | + | i++; |
|
| 49 | + | break; |
|
| 50 | + | default: |
|
| 51 | + | /* Unknown escape sequence, keep the backslash */ |
|
| 52 | + | dst[dst_idx++] = src[i]; |
|
| 53 | + | break; |
|
| 54 | + | } |
|
| 55 | + | } else { |
|
| 56 | + | dst[dst_idx++] = src[i]; |
|
| 57 | + | } |
|
| 58 | + | } |
|
| 59 | + | dst[dst_idx] = '\0'; |
|
| 60 | + | ||
| 61 | + | return dst_idx; |
|
| 62 | + | } |
|
| 63 | + | ||
| 64 | + | /* Find an existing interned string */ |
|
| 65 | + | static const char *find(const char *str) { |
|
| 66 | + | for (usize i = 0; i < table.nstrings; i++) { |
|
| 67 | + | if (!strcmp(table.strings[i], str)) { |
|
| 68 | + | return table.strings[i]; |
|
| 69 | + | } |
|
| 70 | + | } |
|
| 71 | + | return NULL; |
|
| 72 | + | } |
|
| 73 | + | ||
| 74 | + | /* Intern a raw string without escape sequence processing */ |
|
| 75 | + | static const char *strings_alloc_raw(const char *str) { |
|
| 76 | + | /* Check if already interned */ |
|
| 77 | + | const char *existing = find(str); |
|
| 78 | + | if (existing) { |
|
| 79 | + | return existing; |
|
| 80 | + | } |
|
| 81 | + | ||
| 82 | + | char *slot = table.strings[table.nstrings++]; |
|
| 83 | + | strncpy(slot, str, MAX_STRING_LEN); |
|
| 84 | + | ||
| 85 | + | return slot; |
|
| 86 | + | } |
|
| 87 | + | ||
| 88 | + | /* Intern a string with escape sequence processing */ |
|
| 89 | + | const char *strings_alloc_len(const char *str, u16 len) { |
|
| 90 | + | /* Process escape sequences first */ |
|
| 91 | + | char escaped[MAX_STRING_LEN]; |
|
| 92 | + | escape(str, len, escaped, MAX_STRING_LEN); |
|
| 93 | + | ||
| 94 | + | return strings_alloc_raw(escaped); |
|
| 95 | + | } |
|
| 96 | + | ||
| 97 | + | /* Intern a string with escape sequence processing */ |
|
| 98 | + | const char *strings_alloc(const char *str) { |
|
| 99 | + | return strings_alloc_len(str, strlen(str)); |
|
| 100 | + | } |
strings.h
added
+17 -0
| 1 | + | #ifndef STRINGS_H |
|
| 2 | + | #define STRINGS_H |
|
| 3 | + | ||
| 4 | + | #include "types.h" |
|
| 5 | + | ||
| 6 | + | #define MAX_STRINGS 4096 |
|
| 7 | + | #define MAX_STRING_LEN 64 |
|
| 8 | + | ||
| 9 | + | /* Initialize the global string interning system */ |
|
| 10 | + | void strings_init(void); |
|
| 11 | + | ||
| 12 | + | /* Intern a string with escape sequence processing */ |
|
| 13 | + | const char *strings_alloc(const char *str); |
|
| 14 | + | /* Intern a string with escape sequence processing */ |
|
| 15 | + | const char *strings_alloc_len(const char *str, u16 len); |
|
| 16 | + | ||
| 17 | + | #endif |
symtab.c
added
+224 -0
| 1 | + | #include <assert.h> |
|
| 2 | + | #include <string.h> |
|
| 3 | + | ||
| 4 | + | #include "ast.h" |
|
| 5 | + | #include "io.h" |
|
| 6 | + | #include "module.h" |
|
| 7 | + | #include "resolver.h" |
|
| 8 | + | #include "scanner.h" |
|
| 9 | + | #include "symtab.h" |
|
| 10 | + | ||
| 11 | + | /* Symbol storage across all scopes. */ |
|
| 12 | + | static symbol_t SYMBOLS[MAX_SYMBOLS] = { 0 }; |
|
| 13 | + | static usize NSYMBOLS = 0; |
|
| 14 | + | ||
| 15 | + | /* Scope storage across all modules/functions. */ |
|
| 16 | + | static scope_t SCOPES[MAX_SCOPES] = { 0 }; |
|
| 17 | + | static usize NSCOPES = 0; |
|
| 18 | + | ||
| 19 | + | static symkind_t symbol_entry(node_t *n) { |
|
| 20 | + | switch (n->cls) { |
|
| 21 | + | case NODE_IDENT: |
|
| 22 | + | case NODE_VAR: |
|
| 23 | + | case NODE_STATIC: |
|
| 24 | + | case NODE_PARAM: |
|
| 25 | + | case NODE_UNION_VARIANT: /* This should use SYM_VARIANT */ |
|
| 26 | + | case NODE_RECORD_FIELD: |
|
| 27 | + | return SYM_VARIABLE; |
|
| 28 | + | case NODE_CONST: |
|
| 29 | + | return SYM_CONSTANT; |
|
| 30 | + | case NODE_FN: |
|
| 31 | + | return SYM_FUNCTION; |
|
| 32 | + | case NODE_UNION: |
|
| 33 | + | case NODE_RECORD: |
|
| 34 | + | case NODE_PTR: |
|
| 35 | + | return SYM_TYPE; |
|
| 36 | + | case NODE_MOD: |
|
| 37 | + | case NODE_USE: |
|
| 38 | + | return SYM_MODULE; |
|
| 39 | + | case NODE_REF: |
|
| 40 | + | return symbol_entry(n->val.ref.target); |
|
| 41 | + | case NODE_RETURN: |
|
| 42 | + | case NODE_BLOCK: |
|
| 43 | + | case NODE_LOOP: |
|
| 44 | + | case NODE_WHILE: |
|
| 45 | + | case NODE_WHILE_LET: |
|
| 46 | + | case NODE_FOR: |
|
| 47 | + | case NODE_IF: |
|
| 48 | + | case NODE_IF_LET: |
|
| 49 | + | case NODE_IF_CASE: |
|
| 50 | + | case NODE_GUARD_CASE: |
|
| 51 | + | case NODE_GUARD_LET: |
|
| 52 | + | case NODE_BINOP: |
|
| 53 | + | case NODE_UNOP: |
|
| 54 | + | case NODE_TYPE: |
|
| 55 | + | case NODE_NUMBER: |
|
| 56 | + | case NODE_CHAR: |
|
| 57 | + | case NODE_NIL: |
|
| 58 | + | case NODE_UNDEF: |
|
| 59 | + | case NODE_STRING: |
|
| 60 | + | case NODE_BOOL: |
|
| 61 | + | case NODE_ASSIGN: |
|
| 62 | + | case NODE_CALL: |
|
| 63 | + | case NODE_CALL_ARG: |
|
| 64 | + | case NODE_BUILTIN: |
|
| 65 | + | case NODE_ATTRIBUTE: |
|
| 66 | + | case NODE_BREAK: |
|
| 67 | + | case NODE_RECORD_TYPE: |
|
| 68 | + | case NODE_RECORD_LIT: |
|
| 69 | + | case NODE_ARRAY_LIT: |
|
| 70 | + | case NODE_ARRAY_REPEAT_LIT: |
|
| 71 | + | case NODE_ACCESS: |
|
| 72 | + | case NODE_RECORD_LIT_FIELD: |
|
| 73 | + | case NODE_EXPR_STMT: |
|
| 74 | + | case NODE_ARRAY_INDEX: |
|
| 75 | + | case NODE_RANGE: |
|
| 76 | + | case NODE_MATCH_CASE: |
|
| 77 | + | case NODE_MATCH: |
|
| 78 | + | case NODE_SCOPE: |
|
| 79 | + | case NODE_MOD_BODY: |
|
| 80 | + | case NODE_AS: |
|
| 81 | + | case NODE_PLACEHOLDER: |
|
| 82 | + | case NODE_ALIGN: |
|
| 83 | + | case NODE_THROW: |
|
| 84 | + | case NODE_TRY: |
|
| 85 | + | case NODE_CATCH: |
|
| 86 | + | case NODE_PANIC: |
|
| 87 | + | case NODE_SUPER: |
|
| 88 | + | break; |
|
| 89 | + | } |
|
| 90 | + | bail("node of class %d cannot have a symbol table entry", n->cls); |
|
| 91 | + | } |
|
| 92 | + | ||
| 93 | + | /* Allocate a scope. */ |
|
| 94 | + | scope_t *symtab_scope(scope_t *parent, module_t *mod) { |
|
| 95 | + | if (NSCOPES >= MAX_SCOPES) { |
|
| 96 | + | bail("scope overflow: too many scopes"); |
|
| 97 | + | return NULL; |
|
| 98 | + | } |
|
| 99 | + | scope_t *slot = &SCOPES[NSCOPES++]; |
|
| 100 | + | ||
| 101 | + | *slot = (scope_t){ |
|
| 102 | + | .mod = mod, |
|
| 103 | + | .parent = parent, |
|
| 104 | + | .symbols = { 0 }, |
|
| 105 | + | .nsymbols = 0, |
|
| 106 | + | }; |
|
| 107 | + | return slot; |
|
| 108 | + | } |
|
| 109 | + | ||
| 110 | + | /* Search for a symbol in the given scope only. */ |
|
| 111 | + | symbol_t *symtab_scope_lookup( |
|
| 112 | + | scope_t *s, const char *name, u16 length, symkind_t kind |
|
| 113 | + | ) { |
|
| 114 | + | for (usize i = 0; i < s->nsymbols; i++) { |
|
| 115 | + | symbol_t *sym = s->symbols[i]; |
|
| 116 | + | ||
| 117 | + | if ((kind == SYM_ANY || sym->kind == kind) && sym->length == length && |
|
| 118 | + | memcmp(sym->name, name, length) == 0) { |
|
| 119 | + | return sym; |
|
| 120 | + | } |
|
| 121 | + | } |
|
| 122 | + | return NULL; |
|
| 123 | + | } |
|
| 124 | + | ||
| 125 | + | /* Search for a symbol from the current to the top-level scope. */ |
|
| 126 | + | symbol_t *symtab_lookup( |
|
| 127 | + | scope_t *s, const char *name, u16 length, symkind_t kind |
|
| 128 | + | ) { |
|
| 129 | + | for (scope_t *scope = s; scope != NULL; scope = scope->parent) { |
|
| 130 | + | symbol_t *sym = NULL; |
|
| 131 | + | ||
| 132 | + | if ((sym = symtab_scope_lookup(scope, name, length, kind))) |
|
| 133 | + | return sym; |
|
| 134 | + | } |
|
| 135 | + | return NULL; |
|
| 136 | + | } |
|
| 137 | + | ||
| 138 | + | /* Add a symbol to the current scope. */ |
|
| 139 | + | bool symtab_add_ident(scope_t *s, node_t *ident, node_t *n) { |
|
| 140 | + | symkind_t kind = symbol_entry(n); |
|
| 141 | + | const char *name = ident->val.ident.name; |
|
| 142 | + | u16 length = ident->val.ident.length; |
|
| 143 | + | ||
| 144 | + | if (symtab_scope_lookup(s, name, length, kind)) |
|
| 145 | + | return false; /* Variable already defined in this scope. */ |
|
| 146 | + | ||
| 147 | + | symbol_t *sym = alloc_symbol((symbol_t){ |
|
| 148 | + | .name = name, |
|
| 149 | + | .length = length, |
|
| 150 | + | .node = n, |
|
| 151 | + | .kind = kind, |
|
| 152 | + | .scope = s, |
|
| 153 | + | }); |
|
| 154 | + | ||
| 155 | + | /* Nb. static variables are never exposed. */ |
|
| 156 | + | if (kind == SYM_FUNCTION || kind == SYM_CONSTANT) { |
|
| 157 | + | /* Copy qualified name for top-level symbols */ |
|
| 158 | + | ||
| 159 | + | module_path(sym->qualified, s->mod->qualified); |
|
| 160 | + | module_qualify_str(sym->qualified, sym->name, sym->length); |
|
| 161 | + | } |
|
| 162 | + | assert(s->nsymbols < MAX_SCOPE_SYMBOLS); |
|
| 163 | + | s->symbols[s->nsymbols++] = sym; |
|
| 164 | + | n->sym = sym; |
|
| 165 | + | ||
| 166 | + | return true; |
|
| 167 | + | } |
|
| 168 | + | ||
| 169 | + | /* Add a symbol to the current scope. */ |
|
| 170 | + | bool symtab_insert(scope_t *s, const char *name, u16 length, node_t *n) { |
|
| 171 | + | symkind_t kind = symbol_entry(n); |
|
| 172 | + | ||
| 173 | + | if (symtab_lookup(s, name, length, kind)) { |
|
| 174 | + | return false; |
|
| 175 | + | } |
|
| 176 | + | symbol_t *sym = alloc_symbol((symbol_t){ |
|
| 177 | + | .name = name, |
|
| 178 | + | .length = length, |
|
| 179 | + | .node = n, |
|
| 180 | + | .kind = kind, |
|
| 181 | + | }); |
|
| 182 | + | ||
| 183 | + | assert(s->nsymbols < MAX_SCOPE_SYMBOLS); |
|
| 184 | + | s->symbols[s->nsymbols++] = sym; |
|
| 185 | + | n->sym = sym; |
|
| 186 | + | ||
| 187 | + | return true; |
|
| 188 | + | } |
|
| 189 | + | ||
| 190 | + | /* Allocate a symbol. */ |
|
| 191 | + | symbol_t *alloc_symbol(symbol_t sym) { |
|
| 192 | + | if (NSYMBOLS >= MAX_SYMBOLS) { |
|
| 193 | + | bail("symbol table overflow: too many symbols"); |
|
| 194 | + | return NULL; |
|
| 195 | + | } |
|
| 196 | + | symbol_t *slot = &SYMBOLS[NSYMBOLS++]; |
|
| 197 | + | *slot = sym; |
|
| 198 | + | ||
| 199 | + | strncpy(slot->qualified, slot->name, slot->length); |
|
| 200 | + | ||
| 201 | + | return slot; |
|
| 202 | + | } |
|
| 203 | + | ||
| 204 | + | /* Add an imported symbol as an alias in the current scope. */ |
|
| 205 | + | bool symtab_add_alias(scope_t *s, node_t *ident, symbol_t *original) { |
|
| 206 | + | /* Check for conflicts */ |
|
| 207 | + | if (symtab_scope_lookup( |
|
| 208 | + | s, ident->val.ident.name, ident->val.ident.length, original->kind |
|
| 209 | + | )) { |
|
| 210 | + | return false; |
|
| 211 | + | } |
|
| 212 | + | /* Create alias that points to the original symbol instead of copying it */ |
|
| 213 | + | assert(s->nsymbols < MAX_SCOPE_SYMBOLS); |
|
| 214 | + | s->symbols[s->nsymbols++] = original; |
|
| 215 | + | ident->sym = original; |
|
| 216 | + | ||
| 217 | + | return true; |
|
| 218 | + | } |
|
| 219 | + | ||
| 220 | + | /* Add a symbol directly to a scope. */ |
|
| 221 | + | void symtab_add_symbol(scope_t *s, symbol_t *sym) { |
|
| 222 | + | assert(s->nsymbols < MAX_SCOPE_SYMBOLS); |
|
| 223 | + | s->symbols[s->nsymbols++] = sym; |
|
| 224 | + | } |
symtab.h
added
+170 -0
| 1 | + | #ifndef SYMTAB_H |
|
| 2 | + | #define SYMTAB_H |
|
| 3 | + | ||
| 4 | + | #include <stdio.h> |
|
| 5 | + | ||
| 6 | + | #include "limits.h" |
|
| 7 | + | #include "riscv.h" |
|
| 8 | + | #include "types.h" |
|
| 9 | + | ||
| 10 | + | struct symbol_t; |
|
| 11 | + | struct node_t; |
|
| 12 | + | struct scope_t; |
|
| 13 | + | struct type_t; |
|
| 14 | + | ||
| 15 | + | /* Type classes for values. */ |
|
| 16 | + | typedef enum { |
|
| 17 | + | TYPE_VOID = 0, /* Special value for nodes without a type. */ |
|
| 18 | + | TYPE_I8 = 1, /* Primitive types. */ |
|
| 19 | + | TYPE_I16 = 2, |
|
| 20 | + | TYPE_I32 = 3, |
|
| 21 | + | TYPE_U8 = 4, |
|
| 22 | + | TYPE_U16 = 5, |
|
| 23 | + | TYPE_U32 = 6, |
|
| 24 | + | TYPE_F32 = 7, |
|
| 25 | + | TYPE_BOOL = 8, |
|
| 26 | + | TYPE_FN = 9, /* Complex types. */ |
|
| 27 | + | TYPE_UNION = 10, |
|
| 28 | + | TYPE_RESULT = 11, |
|
| 29 | + | TYPE_RECORD = 12, |
|
| 30 | + | TYPE_ARRAY = 13, |
|
| 31 | + | TYPE_PTR = 14, |
|
| 32 | + | TYPE_SLICE = 15, |
|
| 33 | + | TYPE_OPT = 16, |
|
| 34 | + | TYPE_NEVER = 17, |
|
| 35 | + | TYPE_OPAQUE = 18 |
|
| 36 | + | } typeclass_t; |
|
| 37 | + | ||
| 38 | + | typedef enum { |
|
| 39 | + | SYM_ANY = 0, /* Match any symbol type */ |
|
| 40 | + | SYM_VARIABLE, |
|
| 41 | + | SYM_CONSTANT, /* Constant value */ |
|
| 42 | + | SYM_FIELD, |
|
| 43 | + | SYM_VARIANT, |
|
| 44 | + | SYM_FUNCTION, |
|
| 45 | + | SYM_TYPE, |
|
| 46 | + | SYM_MODULE, |
|
| 47 | + | } symkind_t; |
|
| 48 | + | ||
| 49 | + | /* Symbol table scope. */ |
|
| 50 | + | typedef struct scope_t { |
|
| 51 | + | struct module_t *mod; |
|
| 52 | + | struct scope_t *parent; |
|
| 53 | + | struct symbol_t *symbols[MAX_SCOPE_SYMBOLS]; |
|
| 54 | + | u16 nsymbols; |
|
| 55 | + | } scope_t; |
|
| 56 | + | ||
| 57 | + | /* Field, function or module attributes. */ |
|
| 58 | + | typedef enum { |
|
| 59 | + | ATTRIB_NONE = 0, |
|
| 60 | + | ATTRIB_PUB = 1 << 0, |
|
| 61 | + | ATTRIB_DEFAULT = 1 << 1, |
|
| 62 | + | ATTRIB_EXTERN = 1 << 2, |
|
| 63 | + | ATTRIB_TEST = 1 << 3, |
|
| 64 | + | ATTRIB_INTRINSIC = 1 << 4, |
|
| 65 | + | } attrib_t; |
|
| 66 | + | ||
| 67 | + | /* Stack frame. */ |
|
| 68 | + | typedef struct frame_t { |
|
| 69 | + | i32 size; /* Maximum temporary stack usage (includes frame header). */ |
|
| 70 | + | i32 sp; /* Temporary allocation cursor. */ |
|
| 71 | + | } frame_t; |
|
| 72 | + | ||
| 73 | + | /* Memory location for a value. */ |
|
| 74 | + | typedef enum { LOC_NONE = 0, LOC_REG, LOC_STACK, LOC_IMM, LOC_ADDR } memloc_t; |
|
| 75 | + | ||
| 76 | + | /* Offset from register, on stack. */ |
|
| 77 | + | typedef struct { |
|
| 78 | + | reg_t base; |
|
| 79 | + | int offset; |
|
| 80 | + | } offset_t; |
|
| 81 | + | ||
| 82 | + | /* Address in memory with optional offset */ |
|
| 83 | + | typedef struct { |
|
| 84 | + | usize base; |
|
| 85 | + | int offset; |
|
| 86 | + | } addr_t; |
|
| 87 | + | ||
| 88 | + | /* Immediates. */ |
|
| 89 | + | typedef union { |
|
| 90 | + | bool b; |
|
| 91 | + | i32 i; |
|
| 92 | + | u32 u; |
|
| 93 | + | f32 f; |
|
| 94 | + | } imm_t; |
|
| 95 | + | ||
| 96 | + | /* Value handled by code generator. */ |
|
| 97 | + | typedef struct value_t { |
|
| 98 | + | struct type_t *type; |
|
| 99 | + | memloc_t loc; |
|
| 100 | + | bool temp; |
|
| 101 | + | union { |
|
| 102 | + | reg_t reg; /* Register. */ |
|
| 103 | + | offset_t off; /* Offset from a base register. */ |
|
| 104 | + | imm_t imm; /* Stored as an immediate. */ |
|
| 105 | + | addr_t adr; /* Stored at a static address */ |
|
| 106 | + | } as; |
|
| 107 | + | } value_t; |
|
| 108 | + | ||
| 109 | + | typedef struct symbol_t { |
|
| 110 | + | const char *name; /* Symbol name. */ |
|
| 111 | + | u16 length; /* Symbol name length. */ |
|
| 112 | + | char qualified[MAX_QUALIFIED_NAME]; /* Fully qualified name */ |
|
| 113 | + | struct node_t *node; /* Node defining the symbol. */ |
|
| 114 | + | struct scope_t *scope; /* Scope where the symbol is defined */ |
|
| 115 | + | symkind_t kind; /* Kind of symbol. */ |
|
| 116 | + | union { |
|
| 117 | + | struct { /* Variable entry. */ |
|
| 118 | + | struct type_t *typ; /* Variable type. */ |
|
| 119 | + | struct value_t val; /* Variable memory value. */ |
|
| 120 | + | i32 align; /* Alignment override (bytes). */ |
|
| 121 | + | } var; |
|
| 122 | + | ||
| 123 | + | struct { |
|
| 124 | + | struct type_t *typ; /* Variable type. */ |
|
| 125 | + | i32 offset; /* Offset from start of object. */ |
|
| 126 | + | } field; |
|
| 127 | + | ||
| 128 | + | struct { |
|
| 129 | + | struct type_t *typ; |
|
| 130 | + | i32 tag; |
|
| 131 | + | } variant; |
|
| 132 | + | ||
| 133 | + | struct { /* Function entry. */ |
|
| 134 | + | scope_t *scope; /* Inner scope. */ |
|
| 135 | + | usize addr; /* Address in memory. */ |
|
| 136 | + | frame_t frame; /* Stack frame information. */ |
|
| 137 | + | attrib_t attribs; /* Attributes */ |
|
| 138 | + | bool used; /* Whether function is called/used */ |
|
| 139 | + | } fn; |
|
| 140 | + | ||
| 141 | + | struct { /* Type entry. */ |
|
| 142 | + | struct type_t *info; |
|
| 143 | + | } typ; |
|
| 144 | + | ||
| 145 | + | struct module_t *mod; /* Module entry. */ |
|
| 146 | + | } e; |
|
| 147 | + | } symbol_t; |
|
| 148 | + | ||
| 149 | + | /* Allocate a symbol. */ |
|
| 150 | + | symbol_t *alloc_symbol(symbol_t); |
|
| 151 | + | /* Insert a symbol into a scope. */ |
|
| 152 | + | bool symtab_insert(scope_t *s, const char *name, u16 length, struct node_t *n); |
|
| 153 | + | /* Insert an identifier into a scope. */ |
|
| 154 | + | bool symtab_add_ident(scope_t *s, struct node_t *ident, struct node_t *n); |
|
| 155 | + | /* Add an imported symbol as an alias in the current scope. */ |
|
| 156 | + | bool symtab_add_alias(scope_t *s, struct node_t *ident, symbol_t *original); |
|
| 157 | + | /* Add a symbol directly to a scope. */ |
|
| 158 | + | void symtab_add_symbol(scope_t *s, symbol_t *sym); |
|
| 159 | + | /* Lookup a symbol in the given scope only. */ |
|
| 160 | + | symbol_t *symtab_scope_lookup( |
|
| 161 | + | scope_t *s, const char *name, u16 length, symkind_t kind |
|
| 162 | + | ); |
|
| 163 | + | /* Lookup a symbol in current and parent scopes. */ |
|
| 164 | + | symbol_t *symtab_lookup( |
|
| 165 | + | scope_t *s, const char *name, u16 length, symkind_t kind |
|
| 166 | + | ); |
|
| 167 | + | /* Create a new scope with the given parent. */ |
|
| 168 | + | scope_t *symtab_scope(scope_t *parent, struct module_t *mod); |
|
| 169 | + | ||
| 170 | + | #endif |
types.h
added
+49 -0
| 1 | + | #ifndef TYPES_H |
|
| 2 | + | #define TYPES_H |
|
| 3 | + | ||
| 4 | + | typedef unsigned char u8; |
|
| 5 | + | typedef unsigned short u16; |
|
| 6 | + | typedef unsigned int u32; |
|
| 7 | + | typedef unsigned long long u64; |
|
| 8 | + | typedef signed char i8; |
|
| 9 | + | typedef short i16; |
|
| 10 | + | typedef int i32; |
|
| 11 | + | typedef long long i64; |
|
| 12 | + | typedef float f32; |
|
| 13 | + | typedef double f64; |
|
| 14 | + | ||
| 15 | + | typedef unsigned long usize; |
|
| 16 | + | typedef long isize; |
|
| 17 | + | ||
| 18 | + | typedef u8 bool; |
|
| 19 | + | ||
| 20 | + | #define true 1 |
|
| 21 | + | #define false 0 |
|
| 22 | + | ||
| 23 | + | #ifndef NULL |
|
| 24 | + | #define NULL ((void *)0) |
|
| 25 | + | #endif |
|
| 26 | + | ||
| 27 | + | #define U8_MIN 0 |
|
| 28 | + | #define U8_MAX 255 |
|
| 29 | + | #define U16_MIN 0 |
|
| 30 | + | #define U16_MAX 65535 |
|
| 31 | + | #define U32_MIN 0 |
|
| 32 | + | #define U32_MAX 4294967295U |
|
| 33 | + | ||
| 34 | + | #define I8_MIN (-128) |
|
| 35 | + | #define I8_MAX 127 |
|
| 36 | + | #define I16_MIN (-32768) |
|
| 37 | + | #define I16_MAX 32767 |
|
| 38 | + | #define I32_MIN -2147483648 |
|
| 39 | + | #define I32_MAX 2147483647 |
|
| 40 | + | ||
| 41 | + | /* Use appropriate syntax for no-discard function attribute, depending |
|
| 42 | + | * on C standard used. */ |
|
| 43 | + | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L |
|
| 44 | + | #define __nodiscard [[nodiscard]] |
|
| 45 | + | #else |
|
| 46 | + | #define __nodiscard __attribute__((warn_unused_result)) |
|
| 47 | + | #endif |
|
| 48 | + | ||
| 49 | + | #endif |
util.h
added
+59 -0
| 1 | + | #ifndef UTIL_H |
|
| 2 | + | #define UTIL_H |
|
| 3 | + | ||
| 4 | + | #include <stdlib.h> |
|
| 5 | + | #include <string.h> |
|
| 6 | + | ||
| 7 | + | #include "types.h" |
|
| 8 | + | ||
| 9 | + | /** |
|
| 10 | + | * Concatenate string src to the end of dst. |
|
| 11 | + | */ |
|
| 12 | + | static inline usize strlcat(char *dst, const char *src, usize dsize) { |
|
| 13 | + | usize dst_len = strlen(dst); |
|
| 14 | + | usize src_len = strlen(src); |
|
| 15 | + | ||
| 16 | + | /* If destination buffer is already full or too small, can't append */ |
|
| 17 | + | if (dst_len >= dsize) { |
|
| 18 | + | return dst_len + src_len; /* Return what length would be */ |
|
| 19 | + | } |
|
| 20 | + | ||
| 21 | + | /* Calculate remaining space in destination */ |
|
| 22 | + | usize remaining = dsize - dst_len - 1; /* -1 for null terminator */ |
|
| 23 | + | ||
| 24 | + | if (remaining > 0) { |
|
| 25 | + | /* Use strncat to append, but limit to remaining space */ |
|
| 26 | + | strncat(dst, src, remaining); |
|
| 27 | + | } |
|
| 28 | + | /* Return total length that would be created */ |
|
| 29 | + | return dst_len + src_len; |
|
| 30 | + | } |
|
| 31 | + | ||
| 32 | + | /* Copy a string safely */ |
|
| 33 | + | static inline void strndup(char *dst, const char *src, size_t maxlen) { |
|
| 34 | + | if (!dst || !src) |
|
| 35 | + | return; |
|
| 36 | + | ||
| 37 | + | size_t srclen = strlen(src); |
|
| 38 | + | size_t copylen = srclen < maxlen - 1 ? srclen : maxlen - 1; |
|
| 39 | + | ||
| 40 | + | memcpy(dst, src, copylen); |
|
| 41 | + | dst[copylen] = '\0'; |
|
| 42 | + | } |
|
| 43 | + | ||
| 44 | + | /* Like `strstr` but find the _last_ occurrence. */ |
|
| 45 | + | static inline char *strrstr(const char *haystack, const char *needle) { |
|
| 46 | + | if (*needle == '\0') { |
|
| 47 | + | return (char *)haystack + strlen(haystack); |
|
| 48 | + | } |
|
| 49 | + | char *result = NULL; |
|
| 50 | + | char *p = strstr(haystack, needle); |
|
| 51 | + | ||
| 52 | + | while (p != NULL) { |
|
| 53 | + | result = p; |
|
| 54 | + | p = strstr(p + 1, needle); |
|
| 55 | + | } |
|
| 56 | + | return result; |
|
| 57 | + | } |
|
| 58 | + | ||
| 59 | + | #endif |