gen/data.c 10.9 KiB raw
1
#include <assert.h>
2
#include <stdio.h>
3
#include <string.h>
4
5
#include "../ast.h"
6
#include "../gen.h"
7
#include "../io.h"
8
#include "../limits.h"
9
#include "../parser.h"
10
#include "../riscv.h"
11
#include "../strings.h"
12
13
#include "data.h"
14
15
static void emit_node_data(
16
    node_t *n, parser_t *p, FILE *out, data_section_t *d
17
);
18
static void emit_array_data(
19
    node_t *n, parser_t *p, FILE *out, data_section_t *d
20
);
21
22
/* Write a little-endian word */
23
static void write_le32(FILE *out, u32 v) {
24
    u8 bytes[4];
25
26
    bytes[0] = (v & 0xFF);
27
    bytes[1] = ((v >> 8) & 0xFF);
28
    bytes[2] = ((v >> 16) & 0xFF);
29
    bytes[3] = ((v >> 24) & 0xFF);
30
31
    fwrite(bytes, sizeof(bytes), 1, out);
32
}
33
34
static void write_le64(FILE *out, u64 v) {
35
    u8 bytes[8];
36
37
    bytes[0] = (v & 0xFF);
38
    bytes[1] = ((v >> 8) & 0xFF);
39
    bytes[2] = ((v >> 16) & 0xFF);
40
    bytes[3] = ((v >> 24) & 0xFF);
41
    bytes[4] = ((v >> 32) & 0xFF);
42
    bytes[5] = ((v >> 40) & 0xFF);
43
    bytes[6] = ((v >> 48) & 0xFF);
44
    bytes[7] = ((v >> 56) & 0xFF);
45
46
    fwrite(bytes, sizeof(bytes), 1, out);
47
}
48
49
/* Add a string literal to the data section.
50
 * Returns the offset in the data section. */
51
usize data_string(data_section_t *d, const char *str, usize len) {
52
    /* Check if this string already exists in the data section */
53
    for (usize i = 0; i < d->nstrings; i++) {
54
        string_data_t *existing = &d->strings[i];
55
56
        if (existing->length == len && memcmp(existing->data, str, len) == 0) {
57
            return d->ro_offset + existing->offset;
58
        }
59
    }
60
    d->ro_size = align(d->ro_size, WORD_SIZE);
61
62
    /* Store the string information */
63
    string_data_t *s = &d->strings[d->nstrings++];
64
    s->data          = str;
65
    s->length        = len;
66
    s->offset        = d->ro_size;
67
68
    usize data    = len + 1;                /* Account for `NULL` terminator */
69
    usize padded  = align(data, WORD_SIZE); /* Align to word boundary */
70
    d->ro_size   += padded;
71
72
    return d->ro_offset + s->offset;
73
}
74
75
/* Add a node value to the data section.
76
 * Initialized items are placed in [0, rw_init_total), BSS items in
77
 * [rw_init_total, ...).  Returns the offset in the data section. */
78
usize data_node(
79
    data_section_t *d,
80
    parser_t       *p,
81
    node_t         *node,
82
    const char     *name,
83
    usize           name_len
84
) {
85
    bool bss = node->cls == NODE_UNDEF;
86
87
    /* Store the constant information */
88
    data_item_t *item          = &d->items[d->nitems++];
89
    item->kind                 = DATA_CONST;
90
    item->node                 = node;
91
    item->parser               = p;
92
    item->as.constant.name     = name;
93
    item->as.constant.name_len = name_len;
94
95
    if (bss) {
96
        d->rw_bss_size  = align(d->rw_bss_size, WORD_SIZE);
97
        item->offset    = d->rw_init_total + d->rw_bss_size;
98
        d->rw_bss_size += node->type->size;
99
    } else {
100
        d->rw_init_size  = align(d->rw_init_size, WORD_SIZE);
101
        item->offset     = d->rw_init_size;
102
        d->rw_init_size += node->type->size;
103
    }
104
    return d->rw_offset + item->offset;
105
}
106
107
/* Add array data for a slice to the data section.
108
 * Returns the offset in the data section. */
109
usize data_array(data_section_t *d, parser_t *p, node_t *n) {
110
    /* Check if this array already exists in the data section */
111
    for (usize i = 0; i < d->nitems; i++) {
112
        data_item_t *existing = &d->items[i];
113
114
        if (existing->kind == DATA_ARRAY && existing->node == n) {
115
            return d->rw_offset + existing->offset;
116
        }
117
    }
118
    d->rw_init_size = align(d->rw_init_size, WORD_SIZE);
119
120
    /* Store the array information */
121
    data_item_t *item     = &d->items[d->nitems++];
122
    item->kind            = DATA_ARRAY;
123
    item->offset          = d->rw_init_size;
124
    item->node            = n;
125
    item->parser          = p;
126
    item->as.array.length = n->val.array_lit.elems.len;
127
    item->as.array.elem   = n->type->info.slc.elem;
128
129
    d->rw_init_size += n->type->size;
130
131
    return d->rw_offset + item->offset;
132
}
133
134
/* Initialize data section */
135
void data_init(data_section_t *d) {
136
    d->nstrings      = 0;
137
    d->nitems        = 0;
138
    d->ro_size       = 0;
139
    d->rw_init_total = 0;
140
    d->rw_init_size  = 0;
141
    d->rw_bss_size   = 0;
142
    d->rw_offset     = DATA_RW_OFFSET;
143
    d->ro_offset     = DATA_RO_OFFSET;
144
}
145
146
/* For slices, we need to emit:
147
 * 1. The data pointer; points to array data
148
 * 2. The length */
149
static void emit_slice_data(node_t *n, FILE *out, data_section_t *d) {
150
    if (n->cls == NODE_STRING) {
151
        /* For string literals, look up or register the string offset */
152
        usize addr = 0;
153
154
        for (usize i = 0; i < d->nstrings; i++) {
155
            string_data_t *s = &d->strings[i];
156
            if (s->length == n->val.string_lit.length &&
157
                memcmp(s->data, n->val.string_lit.data, s->length) == 0) {
158
                addr = s->offset + d->ro_offset;
159
                break;
160
            }
161
        }
162
        /* If string not found, register it now */
163
        if (!addr) {
164
            addr = data_string(
165
                d, n->val.string_lit.data, n->val.string_lit.length
166
            );
167
        }
168
        write_le64(out, addr);
169
        write_le64(out, n->val.string_lit.length);
170
    } else {
171
        bail("unsupported slice node %s", node_names[n->cls]);
172
    }
173
}
174
175
static void emit_node_data(
176
    node_t *n, parser_t *p, FILE *out, data_section_t *d
177
) {
178
    /* Handle undefined nodes by emitting zeros for their type size */
179
    if (n->cls == NODE_UNDEF) {
180
        for (i32 i = 0; i < n->type->size; i++) {
181
            fputc(0, out);
182
        }
183
        return;
184
    }
185
    /* Resolve identifiers that reference constants */
186
    if (n->cls == NODE_IDENT && n->sym && n->sym->kind == SYM_CONSTANT) {
187
        emit_node_data(n->sym->node->val.constant.value, p, out, d);
188
        return;
189
    }
190
    switch (n->type->cls) {
191
    case TYPE_ARRAY: {
192
        if (n->cls == NODE_ARRAY_LIT) {
193
            node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems);
194
            for (usize j = 0; j < n->val.array_lit.elems.len; j++) {
195
                emit_node_data(elems[j], p, out, d);
196
            }
197
        } else if (n->cls == NODE_ARRAY_REPEAT_LIT) {
198
            /* Array repeat literal: emit the same value N times */
199
            usize count = n->val.array_repeat_lit.count->val.number.value.u;
200
            for (usize j = 0; j < count; j++) {
201
                emit_node_data(n->val.array_repeat_lit.value, p, out, d);
202
            }
203
        } else {
204
            bail("unsupported array node %s", node_names[n->cls]);
205
        }
206
        break;
207
    }
208
    case TYPE_RECORD: {
209
        /* Emit record fields in order, with proper inter-field padding. */
210
        node_t **fields = nodespan_ptrs(p, n->val.record_lit.fields);
211
        i32      pos    = 0;
212
213
        for (usize i = 0; i < n->val.record_lit.fields.len; i++) {
214
            node_t   *field     = fields[i];
215
            node_t   *field_val = field->val.record_lit_field.value;
216
            symbol_t *field_sym = field->sym;
217
            i32       field_off = field_sym->e.field.offset;
218
219
            /* Emit padding bytes to reach the field's offset. */
220
            while (pos < field_off) {
221
                fputc(0, out);
222
                pos++;
223
            }
224
            emit_node_data(field_val, p, out, d);
225
            pos += field_val->type->size;
226
        }
227
        /* Emit trailing padding to reach the full record size. */
228
        while (pos < n->type->size) {
229
            fputc(0, out);
230
            pos++;
231
        }
232
        break;
233
    }
234
    case TYPE_SLICE:
235
        emit_slice_data(n, out, d);
236
        break;
237
    case TYPE_UNION: {
238
        assert(n->sym);
239
        assert(n->sym->node);
240
        /* For union types, write the tag byte and pad to the type size. */
241
        fputc((u8)n->sym->node->val.union_variant.value, out);
242
        for (i32 i = 1; i < n->type->size; i++) {
243
            fputc(0, out);
244
        }
245
        break;
246
    }
247
    case TYPE_BOOL:
248
        fputc(n->val.bool_lit ? 1 : 0, out);
249
        break;
250
    case TYPE_U8: {
251
        u8 value = (u8)n->val.number.value.u;
252
        if (n->cls == NODE_CHAR)
253
            value = (u8)n->val.char_lit;
254
        fputc(value, out);
255
        break;
256
    }
257
    case TYPE_U16: {
258
        u16 value = (u16)n->val.number.value.u;
259
        fputc(value & 0xFF, out);
260
        fputc((value >> 8) & 0xFF, out);
261
        break;
262
    }
263
    case TYPE_U32:
264
        write_le32(out, n->val.number.value.u);
265
        break;
266
    case TYPE_I8:
267
        fputc((u8)n->val.number.value.i, out);
268
        break;
269
    case TYPE_I16: {
270
        i16 value = (i16)n->val.number.value.i;
271
        fputc(value & 0xFF, out);
272
        fputc((value >> 8) & 0xFF, out);
273
        break;
274
    }
275
    case TYPE_I32:
276
        write_le32(out, n->val.number.value.i);
277
        break;
278
    default:
279
        break;
280
    }
281
282
    /* Add padding to ensure alignment */
283
    usize size    = n->type->size;
284
    usize aligned = align(size, n->type->align);
285
    usize padding = aligned - size;
286
287
    for (usize i = 0; i < padding; i++) {
288
        fputc(0, out);
289
    }
290
}
291
292
/* Helper function to emit array data */
293
static void emit_array_data(
294
    node_t *n, parser_t *p, FILE *out, data_section_t *d
295
) {
296
    /* Emit each array element */
297
    node_t **elems = nodespan_ptrs(p, n->val.array_lit.elems);
298
    for (usize i = 0; i < n->val.array_lit.elems.len; i++) {
299
        emit_node_data(elems[i], p, out, d);
300
    }
301
    /* Padding */
302
    usize aligned = align(n->type->size, n->type->align);
303
    usize padding = aligned - n->type->size;
304
305
    for (usize i = 0; i < padding; i++) {
306
        fputc(0, out);
307
    }
308
}
309
310
/* Emit the data section to the output file */
311
void data_emit_ro(data_section_t *d, FILE *out) {
312
    if (!out || d->ro_size == 0) {
313
        return; /* No data to emit */
314
    }
315
    for (usize i = 0; i < d->nstrings; i++) {
316
        string_data_t *s = &d->strings[i];
317
318
        /* Write string data */
319
        fwrite(s->data, 1, s->length, out);
320
        fputc(0, out); /* NULL terminator */
321
322
        /* Write padding */
323
        u32 padding = (WORD_SIZE - ((s->length + 1) % WORD_SIZE)) % WORD_SIZE;
324
325
        for (usize j = 0; j < padding; j++) {
326
            fputc(0, out);
327
        }
328
    }
329
}
330
331
void data_emit_rw(data_section_t *d, FILE *out) {
332
    if (!out || d->rw_init_total == 0) {
333
        return; /* No initialized data to emit */
334
    }
335
    /* Emit only initialized items (offsets < rw_init_total).
336
     * BSS items (offsets >= rw_init_total) are zero-initialized by the
337
     * runtime and are not written to the file. */
338
    usize current = 0;
339
340
    for (usize i = 0; i < d->nitems; i++) {
341
        data_item_t *item = &d->items[i];
342
343
        /* Skip BSS items */
344
        if (item->offset >= d->rw_init_total)
345
            continue;
346
347
        /* Pad to reach the item's offset */
348
        while (current < item->offset) {
349
            fputc(0, out);
350
            current++;
351
        }
352
        if (item->kind == DATA_ARRAY) {
353
            emit_array_data(item->node, item->parser, out, d);
354
        } else {
355
            emit_node_data(item->node, item->parser, out, d);
356
        }
357
        current = ftell(out);
358
    }
359
    /* Pad to the full initialized size */
360
    while (current < d->rw_init_total) {
361
        fputc(0, out);
362
        current++;
363
    }
364
}