#include #include #include #include "../ast.h" #include "../gen.h" #include "../io.h" #include "../limits.h" #include "../parser.h" #include "../riscv.h" #include "../strings.h" #include "data.h" static void emit_node_data( node_t *n, parser_t *p, FILE *out, data_section_t *d ); static void emit_array_data( node_t *n, parser_t *p, FILE *out, data_section_t *d ); /* Write a little-endian word */ static void write_le32(FILE *out, u32 v) { u8 bytes[4]; bytes[0] = (v & 0xFF); bytes[1] = ((v >> 8) & 0xFF); bytes[2] = ((v >> 16) & 0xFF); bytes[3] = ((v >> 24) & 0xFF); fwrite(bytes, sizeof(bytes), 1, out); } static void write_le64(FILE *out, u64 v) { u8 bytes[8]; bytes[0] = (v & 0xFF); bytes[1] = ((v >> 8) & 0xFF); bytes[2] = ((v >> 16) & 0xFF); bytes[3] = ((v >> 24) & 0xFF); bytes[4] = ((v >> 32) & 0xFF); bytes[5] = ((v >> 40) & 0xFF); bytes[6] = ((v >> 48) & 0xFF); bytes[7] = ((v >> 56) & 0xFF); fwrite(bytes, sizeof(bytes), 1, out); } /* Add a string literal to the data section. * Returns the offset in the data section. */ usize data_string(data_section_t *d, const char *str, usize len) { /* Check if this string already exists in the data section */ for (usize i = 0; i < d->nstrings; i++) { string_data_t *existing = &d->strings[i]; if (existing->length == len && memcmp(existing->data, str, len) == 0) { return d->ro_offset + existing->offset; } } d->ro_size = align(d->ro_size, WORD_SIZE); /* Store the string information */ string_data_t *s = &d->strings[d->nstrings++]; s->data = str; s->length = len; s->offset = d->ro_size; usize data = len + 1; /* Account for `NULL` terminator */ usize padded = align(data, WORD_SIZE); /* Align to word boundary */ d->ro_size += padded; return d->ro_offset + s->offset; } /* Add a node value to the data section. * Initialized items are placed in [0, rw_init_total), BSS items in * [rw_init_total, ...). Returns the offset in the data section. */ usize data_node( data_section_t *d, parser_t *p, node_t *node, const char *name, usize name_len ) { bool bss = node->cls == NODE_UNDEF; /* Store the constant information */ data_item_t *item = &d->items[d->nitems++]; item->kind = DATA_CONST; item->node = node; item->parser = p; item->as.constant.name = name; item->as.constant.name_len = name_len; if (bss) { d->rw_bss_size = align(d->rw_bss_size, WORD_SIZE); item->offset = d->rw_init_total + d->rw_bss_size; d->rw_bss_size += node->type->size; } else { d->rw_init_size = align(d->rw_init_size, WORD_SIZE); item->offset = d->rw_init_size; d->rw_init_size += node->type->size; } return d->rw_offset + item->offset; } /* Add array data for a slice to the data section. * Returns the offset in the data section. */ usize data_array(data_section_t *d, parser_t *p, node_t *n) { /* Check if this array already exists in the data section */ for (usize i = 0; i < d->nitems; i++) { data_item_t *existing = &d->items[i]; if (existing->kind == DATA_ARRAY && existing->node == n) { return d->rw_offset + existing->offset; } } d->rw_init_size = align(d->rw_init_size, WORD_SIZE); /* Store the array information */ data_item_t *item = &d->items[d->nitems++]; item->kind = DATA_ARRAY; item->offset = d->rw_init_size; item->node = n; item->parser = p; item->as.array.length = n->val.array_lit.elems.len; item->as.array.elem = n->type->info.slc.elem; d->rw_init_size += n->type->size; return d->rw_offset + item->offset; } /* Initialize data section */ void data_init(data_section_t *d) { d->nstrings = 0; d->nitems = 0; d->ro_size = 0; d->rw_init_total = 0; d->rw_init_size = 0; d->rw_bss_size = 0; d->rw_offset = DATA_RW_OFFSET; d->ro_offset = DATA_RO_OFFSET; } /* For slices, we need to emit: * 1. The data pointer; points to array data * 2. The length */ static void emit_slice_data(node_t *n, FILE *out, data_section_t *d) { if (n->cls == NODE_STRING) { /* For string literals, look up or register the string offset */ usize addr = 0; for (usize i = 0; i < d->nstrings; i++) { string_data_t *s = &d->strings[i]; if (s->length == n->val.string_lit.length && memcmp(s->data, n->val.string_lit.data, s->length) == 0) { addr = s->offset + d->ro_offset; break; } } /* If string not found, register it now */ if (!addr) { addr = data_string( d, n->val.string_lit.data, n->val.string_lit.length ); } write_le64(out, addr); write_le64(out, n->val.string_lit.length); } else { bail("unsupported slice node %s", node_names[n->cls]); } } static void emit_node_data( node_t *n, parser_t *p, FILE *out, data_section_t *d ) { /* Handle undefined nodes by emitting zeros for their type size */ if (n->cls == NODE_UNDEF) { for (i32 i = 0; i < n->type->size; i++) { fputc(0, out); } return; } /* Resolve identifiers that reference constants */ if (n->cls == NODE_IDENT && n->sym && n->sym->kind == SYM_CONSTANT) { emit_node_data(n->sym->node->val.constant.value, p, out, d); return; } switch (n->type->cls) { case TYPE_ARRAY: { if (n->cls == NODE_ARRAY_LIT) { node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); for (usize j = 0; j < n->val.array_lit.elems.len; j++) { emit_node_data(elems[j], p, out, d); } } else if (n->cls == NODE_ARRAY_REPEAT_LIT) { /* Array repeat literal: emit the same value N times */ usize count = n->val.array_repeat_lit.count->val.number.value.u; for (usize j = 0; j < count; j++) { emit_node_data(n->val.array_repeat_lit.value, p, out, d); } } else { bail("unsupported array node %s", node_names[n->cls]); } break; } case TYPE_RECORD: { /* Emit record fields in order, with proper inter-field padding. */ node_t **fields = nodespan_ptrs(p, n->val.record_lit.fields); i32 pos = 0; for (usize i = 0; i < n->val.record_lit.fields.len; i++) { node_t *field = fields[i]; node_t *field_val = field->val.record_lit_field.value; symbol_t *field_sym = field->sym; i32 field_off = field_sym->e.field.offset; /* Emit padding bytes to reach the field's offset. */ while (pos < field_off) { fputc(0, out); pos++; } emit_node_data(field_val, p, out, d); pos += field_val->type->size; } /* Emit trailing padding to reach the full record size. */ while (pos < n->type->size) { fputc(0, out); pos++; } break; } case TYPE_SLICE: emit_slice_data(n, out, d); break; case TYPE_UNION: { assert(n->sym); assert(n->sym->node); /* For union types, write the tag byte and pad to the type size. */ fputc((u8)n->sym->node->val.union_variant.value, out); for (i32 i = 1; i < n->type->size; i++) { fputc(0, out); } break; } case TYPE_BOOL: fputc(n->val.bool_lit ? 1 : 0, out); break; case TYPE_U8: { u8 value = (u8)n->val.number.value.u; if (n->cls == NODE_CHAR) value = (u8)n->val.char_lit; fputc(value, out); break; } case TYPE_U16: { u16 value = (u16)n->val.number.value.u; fputc(value & 0xFF, out); fputc((value >> 8) & 0xFF, out); break; } case TYPE_U32: write_le32(out, n->val.number.value.u); break; case TYPE_I8: fputc((u8)n->val.number.value.i, out); break; case TYPE_I16: { i16 value = (i16)n->val.number.value.i; fputc(value & 0xFF, out); fputc((value >> 8) & 0xFF, out); break; } case TYPE_I32: write_le32(out, n->val.number.value.i); break; default: break; } /* Add padding to ensure alignment */ usize size = n->type->size; usize aligned = align(size, n->type->align); usize padding = aligned - size; for (usize i = 0; i < padding; i++) { fputc(0, out); } } /* Helper function to emit array data */ static void emit_array_data( node_t *n, parser_t *p, FILE *out, data_section_t *d ) { /* Emit each array element */ node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems); for (usize i = 0; i < n->val.array_lit.elems.len; i++) { emit_node_data(elems[i], p, out, d); } /* Padding */ usize aligned = align(n->type->size, n->type->align); usize padding = aligned - n->type->size; for (usize i = 0; i < padding; i++) { fputc(0, out); } } /* Emit the data section to the output file */ void data_emit_ro(data_section_t *d, FILE *out) { if (!out || d->ro_size == 0) { return; /* No data to emit */ } for (usize i = 0; i < d->nstrings; i++) { string_data_t *s = &d->strings[i]; /* Write string data */ fwrite(s->data, 1, s->length, out); fputc(0, out); /* NULL terminator */ /* Write padding */ u32 padding = (WORD_SIZE - ((s->length + 1) % WORD_SIZE)) % WORD_SIZE; for (usize j = 0; j < padding; j++) { fputc(0, out); } } } void data_emit_rw(data_section_t *d, FILE *out) { if (!out || d->rw_init_total == 0) { return; /* No initialized data to emit */ } /* Emit only initialized items (offsets < rw_init_total). * BSS items (offsets >= rw_init_total) are zero-initialized by the * runtime and are not written to the file. */ usize current = 0; for (usize i = 0; i < d->nitems; i++) { data_item_t *item = &d->items[i]; /* Skip BSS items */ if (item->offset >= d->rw_init_total) continue; /* Pad to reach the item's offset */ while (current < item->offset) { fputc(0, out); current++; } if (item->kind == DATA_ARRAY) { emit_array_data(item->node, item->parser, out, d); } else { emit_node_data(item->node, item->parser, out, d); } current = ftell(out); } /* Pad to the full initialized size */ while (current < d->rw_init_total) { fputc(0, out); current++; } }