gen/
data.c
10.9 KiB
data.h
2.5 KiB
emit.c
37.2 KiB
emit.h
4.6 KiB
.clang-format
570 B
.gitignore
30 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
911 B
README
1.8 KiB
ast.c
5.0 KiB
ast.h
15.1 KiB
desugar.c
23.1 KiB
desugar.h
286 B
gen.c
108.5 KiB
gen.h
4.9 KiB
io.c
1.1 KiB
io.h
444 B
limits.h
1.3 KiB
module.c
10.0 KiB
module.h
2.2 KiB
options.c
1.4 KiB
options.h
472 B
parser.c
68.3 KiB
parser.h
942 B
radiance.c
3.7 KiB
ralloc.c
2.0 KiB
ralloc.h
1.1 KiB
resolver.c
109.7 KiB
resolver.h
5.6 KiB
riscv.c
12.0 KiB
riscv.h
12.0 KiB
scanner.c
10.2 KiB
scanner.h
3.2 KiB
strings.c
2.6 KiB
strings.h
407 B
symtab.c
5.7 KiB
symtab.h
4.6 KiB
types.h
1.0 KiB
util.h
1.5 KiB
gen/data.c
raw
| 1 | #include <assert.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | |
| 5 | #include "../ast.h" |
| 6 | #include "../gen.h" |
| 7 | #include "../io.h" |
| 8 | #include "../limits.h" |
| 9 | #include "../parser.h" |
| 10 | #include "../riscv.h" |
| 11 | #include "../strings.h" |
| 12 | |
| 13 | #include "data.h" |
| 14 | |
| 15 | static void emit_node_data( |
| 16 | node_t *n, parser_t *p, FILE *out, data_section_t *d |
| 17 | ); |
| 18 | static void emit_array_data( |
| 19 | node_t *n, parser_t *p, FILE *out, data_section_t *d |
| 20 | ); |
| 21 | |
| 22 | /* Write a little-endian word */ |
| 23 | static void write_le32(FILE *out, u32 v) { |
| 24 | u8 bytes[4]; |
| 25 | |
| 26 | bytes[0] = (v & 0xFF); |
| 27 | bytes[1] = ((v >> 8) & 0xFF); |
| 28 | bytes[2] = ((v >> 16) & 0xFF); |
| 29 | bytes[3] = ((v >> 24) & 0xFF); |
| 30 | |
| 31 | fwrite(bytes, sizeof(bytes), 1, out); |
| 32 | } |
| 33 | |
| 34 | static void write_le64(FILE *out, u64 v) { |
| 35 | u8 bytes[8]; |
| 36 | |
| 37 | bytes[0] = (v & 0xFF); |
| 38 | bytes[1] = ((v >> 8) & 0xFF); |
| 39 | bytes[2] = ((v >> 16) & 0xFF); |
| 40 | bytes[3] = ((v >> 24) & 0xFF); |
| 41 | bytes[4] = ((v >> 32) & 0xFF); |
| 42 | bytes[5] = ((v >> 40) & 0xFF); |
| 43 | bytes[6] = ((v >> 48) & 0xFF); |
| 44 | bytes[7] = ((v >> 56) & 0xFF); |
| 45 | |
| 46 | fwrite(bytes, sizeof(bytes), 1, out); |
| 47 | } |
| 48 | |
| 49 | /* Add a string literal to the data section. |
| 50 | * Returns the offset in the data section. */ |
| 51 | usize data_string(data_section_t *d, const char *str, usize len) { |
| 52 | /* Check if this string already exists in the data section */ |
| 53 | for (usize i = 0; i < d->nstrings; i++) { |
| 54 | string_data_t *existing = &d->strings[i]; |
| 55 | |
| 56 | if (existing->length == len && memcmp(existing->data, str, len) == 0) { |
| 57 | return d->ro_offset + existing->offset; |
| 58 | } |
| 59 | } |
| 60 | d->ro_size = align(d->ro_size, WORD_SIZE); |
| 61 | |
| 62 | /* Store the string information */ |
| 63 | string_data_t *s = &d->strings[d->nstrings++]; |
| 64 | s->data = str; |
| 65 | s->length = len; |
| 66 | s->offset = d->ro_size; |
| 67 | |
| 68 | usize data = len + 1; /* Account for `NULL` terminator */ |
| 69 | usize padded = align(data, WORD_SIZE); /* Align to word boundary */ |
| 70 | d->ro_size += padded; |
| 71 | |
| 72 | return d->ro_offset + s->offset; |
| 73 | } |
| 74 | |
| 75 | /* Add a node value to the data section. |
| 76 | * Initialized items are placed in [0, rw_init_total), BSS items in |
| 77 | * [rw_init_total, ...). Returns the offset in the data section. */ |
| 78 | usize data_node( |
| 79 | data_section_t *d, |
| 80 | parser_t *p, |
| 81 | node_t *node, |
| 82 | const char *name, |
| 83 | usize name_len |
| 84 | ) { |
| 85 | bool bss = node->cls == NODE_UNDEF; |
| 86 | |
| 87 | /* Store the constant information */ |
| 88 | data_item_t *item = &d->items[d->nitems++]; |
| 89 | item->kind = DATA_CONST; |
| 90 | item->node = node; |
| 91 | item->parser = p; |
| 92 | item->as.constant.name = name; |
| 93 | item->as.constant.name_len = name_len; |
| 94 | |
| 95 | if (bss) { |
| 96 | d->rw_bss_size = align(d->rw_bss_size, WORD_SIZE); |
| 97 | item->offset = d->rw_init_total + d->rw_bss_size; |
| 98 | d->rw_bss_size += node->type->size; |
| 99 | } else { |
| 100 | d->rw_init_size = align(d->rw_init_size, WORD_SIZE); |
| 101 | item->offset = d->rw_init_size; |
| 102 | d->rw_init_size += node->type->size; |
| 103 | } |
| 104 | return d->rw_offset + item->offset; |
| 105 | } |
| 106 | |
| 107 | /* Add array data for a slice to the data section. |
| 108 | * Returns the offset in the data section. */ |
| 109 | usize data_array(data_section_t *d, parser_t *p, node_t *n) { |
| 110 | /* Check if this array already exists in the data section */ |
| 111 | for (usize i = 0; i < d->nitems; i++) { |
| 112 | data_item_t *existing = &d->items[i]; |
| 113 | |
| 114 | if (existing->kind == DATA_ARRAY && existing->node == n) { |
| 115 | return d->rw_offset + existing->offset; |
| 116 | } |
| 117 | } |
| 118 | d->rw_init_size = align(d->rw_init_size, WORD_SIZE); |
| 119 | |
| 120 | /* Store the array information */ |
| 121 | data_item_t *item = &d->items[d->nitems++]; |
| 122 | item->kind = DATA_ARRAY; |
| 123 | item->offset = d->rw_init_size; |
| 124 | item->node = n; |
| 125 | item->parser = p; |
| 126 | item->as.array.length = n->val.array_lit.elems.len; |
| 127 | item->as.array.elem = n->type->info.slc.elem; |
| 128 | |
| 129 | d->rw_init_size += n->type->size; |
| 130 | |
| 131 | return d->rw_offset + item->offset; |
| 132 | } |
| 133 | |
| 134 | /* Initialize data section */ |
| 135 | void data_init(data_section_t *d) { |
| 136 | d->nstrings = 0; |
| 137 | d->nitems = 0; |
| 138 | d->ro_size = 0; |
| 139 | d->rw_init_total = 0; |
| 140 | d->rw_init_size = 0; |
| 141 | d->rw_bss_size = 0; |
| 142 | d->rw_offset = DATA_RW_OFFSET; |
| 143 | d->ro_offset = DATA_RO_OFFSET; |
| 144 | } |
| 145 | |
| 146 | /* For slices, we need to emit: |
| 147 | * 1. The data pointer; points to array data |
| 148 | * 2. The length */ |
| 149 | static void emit_slice_data(node_t *n, FILE *out, data_section_t *d) { |
| 150 | if (n->cls == NODE_STRING) { |
| 151 | /* For string literals, look up or register the string offset */ |
| 152 | usize addr = 0; |
| 153 | |
| 154 | for (usize i = 0; i < d->nstrings; i++) { |
| 155 | string_data_t *s = &d->strings[i]; |
| 156 | if (s->length == n->val.string_lit.length && |
| 157 | memcmp(s->data, n->val.string_lit.data, s->length) == 0) { |
| 158 | addr = s->offset + d->ro_offset; |
| 159 | break; |
| 160 | } |
| 161 | } |
| 162 | /* If string not found, register it now */ |
| 163 | if (!addr) { |
| 164 | addr = data_string( |
| 165 | d, n->val.string_lit.data, n->val.string_lit.length |
| 166 | ); |
| 167 | } |
| 168 | write_le64(out, addr); |
| 169 | write_le64(out, n->val.string_lit.length); |
| 170 | } else { |
| 171 | bail("unsupported slice node %s", node_names[n->cls]); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | static void emit_node_data( |
| 176 | node_t *n, parser_t *p, FILE *out, data_section_t *d |
| 177 | ) { |
| 178 | /* Handle undefined nodes by emitting zeros for their type size */ |
| 179 | if (n->cls == NODE_UNDEF) { |
| 180 | for (i32 i = 0; i < n->type->size; i++) { |
| 181 | fputc(0, out); |
| 182 | } |
| 183 | return; |
| 184 | } |
| 185 | /* Resolve identifiers that reference constants */ |
| 186 | if (n->cls == NODE_IDENT && n->sym && n->sym->kind == SYM_CONSTANT) { |
| 187 | emit_node_data(n->sym->node->val.constant.value, p, out, d); |
| 188 | return; |
| 189 | } |
| 190 | switch (n->type->cls) { |
| 191 | case TYPE_ARRAY: { |
| 192 | if (n->cls == NODE_ARRAY_LIT) { |
| 193 | node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); |
| 194 | for (usize j = 0; j < n->val.array_lit.elems.len; j++) { |
| 195 | emit_node_data(elems[j], p, out, d); |
| 196 | } |
| 197 | } else if (n->cls == NODE_ARRAY_REPEAT_LIT) { |
| 198 | /* Array repeat literal: emit the same value N times */ |
| 199 | usize count = n->val.array_repeat_lit.count->val.number.value.u; |
| 200 | for (usize j = 0; j < count; j++) { |
| 201 | emit_node_data(n->val.array_repeat_lit.value, p, out, d); |
| 202 | } |
| 203 | } else { |
| 204 | bail("unsupported array node %s", node_names[n->cls]); |
| 205 | } |
| 206 | break; |
| 207 | } |
| 208 | case TYPE_RECORD: { |
| 209 | /* Emit record fields in order, with proper inter-field padding. */ |
| 210 | node_t **fields = nodespan_ptrs(p, n->val.record_lit.fields); |
| 211 | i32 pos = 0; |
| 212 | |
| 213 | for (usize i = 0; i < n->val.record_lit.fields.len; i++) { |
| 214 | node_t *field = fields[i]; |
| 215 | node_t *field_val = field->val.record_lit_field.value; |
| 216 | symbol_t *field_sym = field->sym; |
| 217 | i32 field_off = field_sym->e.field.offset; |
| 218 | |
| 219 | /* Emit padding bytes to reach the field's offset. */ |
| 220 | while (pos < field_off) { |
| 221 | fputc(0, out); |
| 222 | pos++; |
| 223 | } |
| 224 | emit_node_data(field_val, p, out, d); |
| 225 | pos += field_val->type->size; |
| 226 | } |
| 227 | /* Emit trailing padding to reach the full record size. */ |
| 228 | while (pos < n->type->size) { |
| 229 | fputc(0, out); |
| 230 | pos++; |
| 231 | } |
| 232 | break; |
| 233 | } |
| 234 | case TYPE_SLICE: |
| 235 | emit_slice_data(n, out, d); |
| 236 | break; |
| 237 | case TYPE_UNION: { |
| 238 | assert(n->sym); |
| 239 | assert(n->sym->node); |
| 240 | /* For union types, write the tag byte and pad to the type size. */ |
| 241 | fputc((u8)n->sym->node->val.union_variant.value, out); |
| 242 | for (i32 i = 1; i < n->type->size; i++) { |
| 243 | fputc(0, out); |
| 244 | } |
| 245 | break; |
| 246 | } |
| 247 | case TYPE_BOOL: |
| 248 | fputc(n->val.bool_lit ? 1 : 0, out); |
| 249 | break; |
| 250 | case TYPE_U8: { |
| 251 | u8 value = (u8)n->val.number.value.u; |
| 252 | if (n->cls == NODE_CHAR) |
| 253 | value = (u8)n->val.char_lit; |
| 254 | fputc(value, out); |
| 255 | break; |
| 256 | } |
| 257 | case TYPE_U16: { |
| 258 | u16 value = (u16)n->val.number.value.u; |
| 259 | fputc(value & 0xFF, out); |
| 260 | fputc((value >> 8) & 0xFF, out); |
| 261 | break; |
| 262 | } |
| 263 | case TYPE_U32: |
| 264 | write_le32(out, n->val.number.value.u); |
| 265 | break; |
| 266 | case TYPE_I8: |
| 267 | fputc((u8)n->val.number.value.i, out); |
| 268 | break; |
| 269 | case TYPE_I16: { |
| 270 | i16 value = (i16)n->val.number.value.i; |
| 271 | fputc(value & 0xFF, out); |
| 272 | fputc((value >> 8) & 0xFF, out); |
| 273 | break; |
| 274 | } |
| 275 | case TYPE_I32: |
| 276 | write_le32(out, n->val.number.value.i); |
| 277 | break; |
| 278 | default: |
| 279 | break; |
| 280 | } |
| 281 | |
| 282 | /* Add padding to ensure alignment */ |
| 283 | usize size = n->type->size; |
| 284 | usize aligned = align(size, n->type->align); |
| 285 | usize padding = aligned - size; |
| 286 | |
| 287 | for (usize i = 0; i < padding; i++) { |
| 288 | fputc(0, out); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | /* Helper function to emit array data */ |
| 293 | static void emit_array_data( |
| 294 | node_t *n, parser_t *p, FILE *out, data_section_t *d |
| 295 | ) { |
| 296 | /* Emit each array element */ |
| 297 | node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); |
| 298 | for (usize i = 0; i < n->val.array_lit.elems.len; i++) { |
| 299 | emit_node_data(elems[i], p, out, d); |
| 300 | } |
| 301 | /* Padding */ |
| 302 | usize aligned = align(n->type->size, n->type->align); |
| 303 | usize padding = aligned - n->type->size; |
| 304 | |
| 305 | for (usize i = 0; i < padding; i++) { |
| 306 | fputc(0, out); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | /* Emit the data section to the output file */ |
| 311 | void data_emit_ro(data_section_t *d, FILE *out) { |
| 312 | if (!out || d->ro_size == 0) { |
| 313 | return; /* No data to emit */ |
| 314 | } |
| 315 | for (usize i = 0; i < d->nstrings; i++) { |
| 316 | string_data_t *s = &d->strings[i]; |
| 317 | |
| 318 | /* Write string data */ |
| 319 | fwrite(s->data, 1, s->length, out); |
| 320 | fputc(0, out); /* NULL terminator */ |
| 321 | |
| 322 | /* Write padding */ |
| 323 | u32 padding = (WORD_SIZE - ((s->length + 1) % WORD_SIZE)) % WORD_SIZE; |
| 324 | |
| 325 | for (usize j = 0; j < padding; j++) { |
| 326 | fputc(0, out); |
| 327 | } |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | void data_emit_rw(data_section_t *d, FILE *out) { |
| 332 | if (!out || d->rw_init_total == 0) { |
| 333 | return; /* No initialized data to emit */ |
| 334 | } |
| 335 | /* Emit only initialized items (offsets < rw_init_total). |
| 336 | * BSS items (offsets >= rw_init_total) are zero-initialized by the |
| 337 | * runtime and are not written to the file. */ |
| 338 | usize current = 0; |
| 339 | |
| 340 | for (usize i = 0; i < d->nitems; i++) { |
| 341 | data_item_t *item = &d->items[i]; |
| 342 | |
| 343 | /* Skip BSS items */ |
| 344 | if (item->offset >= d->rw_init_total) |
| 345 | continue; |
| 346 | |
| 347 | /* Pad to reach the item's offset */ |
| 348 | while (current < item->offset) { |
| 349 | fputc(0, out); |
| 350 | current++; |
| 351 | } |
| 352 | if (item->kind == DATA_ARRAY) { |
| 353 | emit_array_data(item->node, item->parser, out, d); |
| 354 | } else { |
| 355 | emit_node_data(item->node, item->parser, out, d); |
| 356 | } |
| 357 | current = ftell(out); |
| 358 | } |
| 359 | /* Pad to the full initialized size */ |
| 360 | while (current < d->rw_init_total) { |
| 361 | fputc(0, out); |
| 362 | current++; |
| 363 | } |
| 364 | } |