gen/emit.c 37.2 KiB raw
1
#include <stdio.h>
2
#include <stdlib.h>
3
4
#include "emit.h"
5
6
void split_imm(i32 imm, i32 *hi, i32 *lo) {
7
    /* Split immediate into upper 20 bits and lower 12 bits */
8
    *hi = ((imm + 0x800) >> 12) & 0xFFFFF; /* Add 0x800 for proper rounding */
9
    *lo = imm & 0xFFF;
10
    if (*lo & 0x800) {
11
        /* If the highest bit of the lower 12 bits is set,
12
           it will be sign-extended, so adjust upper part */
13
        *lo = *lo | ~0xFFF; /* Sign extend lower */
14
    }
15
}
16
17
void emit_li(gen_t *g, reg_t rd, i32 imm) {
18
    if (is_small(imm)) {
19
        emit(g, instr(I_ADDI, rd, 0, 0, imm));
20
        return;
21
    }
22
    i32 hi, lo;
23
    split_imm(imm, &hi, &lo);
24
25
    emit(g, instr(I_LUI, rd, 0, 0, hi));
26
    /* Use ADDIW to sign-extend the 32-bit result on RV64, otherwise
27
     * LUI's upper-bit sign-extension leaves garbage in bits 63:32. */
28
    if (lo != 0) {
29
        emit(g, instr(I_ADDIW, rd, rd, 0, lo));
30
    } else {
31
        /* Even with lo == 0, LUI sign-extends bit 31 into 63:32.
32
         * Use SEXT.W (ADDIW rd, rd, 0) to canonicalize. */
33
        emit(g, instr(I_ADDIW, rd, rd, 0, 0));
34
    }
35
}
36
37
void emit_mv(gen_t *g, reg_t dst, reg_t src) {
38
    if (dst != src) {
39
        emit(g, instr(I_MV, dst, src, 0, 0));
40
    }
41
}
42
43
usize emit_jump(gen_t *g, usize offset) {
44
    return emit(g, JMP(jump_offset(g->ninstrs, offset)));
45
}
46
47
/* Compute hi/lo split for PC-relative offset to target address. */
48
static void pc_rel_offset(gen_t *g, usize addr, i32 *hi, i32 *lo) {
49
    i32 target_addr  = (i32)(addr * INSTR_SIZE);
50
    i32 current_addr = (i32)(g->ninstrs * INSTR_SIZE);
51
    i32 offset       = target_addr - current_addr;
52
    split_imm(offset, hi, lo);
53
}
54
55
void emit_pc_rel_addr(gen_t *g, reg_t rd, usize addr) {
56
    i32 hi, lo;
57
    pc_rel_offset(g, addr, &hi, &lo);
58
    emit(g, AUIPC(rd, hi));
59
    emit(g, ADDI(rd, rd, lo));
60
}
61
62
static usize emit_call_far(gen_t *g, usize addr, reg_t scratch) {
63
    i32 hi, lo;
64
    pc_rel_offset(g, addr, &hi, &lo);
65
    usize pc = emit(g, AUIPC(scratch, hi));
66
    emit(g, JALR(RA, scratch, lo));
67
    return pc;
68
}
69
70
usize emit_call(gen_t *g, usize addr) {
71
    i32 offset = jump_offset(g->ninstrs, addr);
72
    if (is_jump_imm(offset))
73
        return emit(g, JAL(RA, offset));
74
75
    reg_t scratch = nextreg(g);
76
    usize pc      = emit_call_far(g, addr, scratch);
77
    freereg(g, scratch);
78
    return pc;
79
}
80
81
void emit_record_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
82
    for (usize i = 0; i < ty->info.srt.nfields; i++) {
83
        symbol_t *field     = ty->info.srt.fields[i];
84
        type_t   *field_typ = field->e.field.typ;
85
        i32       field_off = field->e.field.offset;
86
        offset_t  field_src = OFFSET(src.base, src.offset + field_off);
87
        offset_t  field_dst = OFFSET(dst.base, dst.offset + field_off);
88
89
        emit_memcopy(g, field_src, field_dst, field_typ);
90
    }
91
}
92
93
static value_t emit_field_get(value_t sval, i32 off, type_t *typ) {
94
    switch (sval.loc) {
95
    case LOC_REG:
96
        return value_stack(OFFSET(sval.as.reg, off), typ);
97
    case LOC_STACK:
98
        return value_stack(
99
            OFFSET(sval.as.off.base, sval.as.off.offset + off), typ
100
        );
101
    case LOC_ADDR:
102
        return value_addr(sval.as.adr.base, sval.as.adr.offset + off, typ);
103
    case LOC_NONE:
104
    case LOC_IMM:
105
        break;
106
    }
107
    abort();
108
}
109
110
/* RISC-V load/store immediates are limited to signed 12 bits. This helper folds
111
 * large displacements into a temporary register so the generated instruction
112
 * still uses the small-immediate forms, keeping the addressing logic in callers
113
 * simple. */
114
static addr_adj_t adjust_addr_avoid(
115
    gen_t *g, reg_t base, i32 *offset, reg_t avoid
116
) {
117
    if (is_small(*offset))
118
        return (addr_adj_t){ base, false };
119
120
    reg_t tmp = avoid ? nextreg_except(g, avoid) : nextreg(g);
121
122
    emit_li(g, tmp, *offset);
123
    emit(g, ADD(tmp, base, tmp));
124
    *offset = 0;
125
126
    return (addr_adj_t){ tmp, true };
127
}
128
129
static addr_adj_t adjust_addr(gen_t *g, reg_t base, i32 *offset) {
130
    return adjust_addr_avoid(g, base, offset, 0);
131
}
132
133
/* Release any temporary register created by `adjust_addr`. */
134
static void release_addr(gen_t *g, addr_adj_t adj) {
135
    if (adj.temp)
136
        freereg(g, adj.base);
137
}
138
139
void emit_addr_offset(gen_t *g, reg_t dst, reg_t base, i32 offset) {
140
    if (is_small(offset)) {
141
        emit(g, ADDI(dst, base, offset));
142
        return;
143
    }
144
    reg_t tmp = nextreg(g);
145
    emit_li(g, tmp, offset);
146
    emit(g, ADD(dst, base, tmp));
147
    freereg(g, tmp);
148
}
149
150
value_t emit_slice_lit(
151
    gen_t *g, i32 offset, usize ptr, usize len, type_t *typ
152
) {
153
    static type_t ptr_type = { .cls = TYPE_PTR };
154
    imm_t         imm_ptr  = (imm_t){ .u = ptr }; /* Slice pointer */
155
    imm_t         imm_len  = (imm_t){ .u = len }; /* Slice length */
156
157
    emit_store(
158
        g, value_imm(imm_ptr, &ptr_type), FP, offset + SLICE_FIELD_PTR_OFFSET
159
    );
160
    emit_store(
161
        g, value_imm(imm_len, &ptr_type), FP, offset + SLICE_FIELD_LEN_OFFSET
162
    );
163
    return value_stack(OFFSET(FP, offset), typ);
164
}
165
166
value_t emit_record_field_get(value_t sval, symbol_t *field) {
167
    i32     foff  = field->e.field.offset;
168
    type_t *ftype = field->node->type;
169
170
    return emit_field_get(sval, foff, ftype);
171
}
172
173
void emit_record_field_set(
174
    gen_t *g, value_t val, reg_t base, i32 record_offset, symbol_t *field
175
) {
176
    i32 field_offset  = field->e.field.offset;
177
    i32 target_offset = record_offset + field_offset;
178
179
    value_t dest = value_stack(OFFSET(base, target_offset), field->e.field.typ);
180
181
    emit_replace(g, dest, val);
182
}
183
184
void emit_memzero(gen_t *g, offset_t dst, i32 size) {
185
    if (size == 0) /* Nothing to do for zero-sized regions */
186
        return;
187
188
    reg_t cursor = nextreg(g);
189
    emit_addr_offset(g, cursor, dst.base, dst.offset);
190
191
    /* Calculate word-aligned size and remainder */
192
    i32 aligned_size = align_stack(size, WORD_SIZE);
193
    i32 remainder    = size - aligned_size;
194
195
    reg_t end = ZERO;
196
197
    /* Only use the word-based loop if we have at least one complete word */
198
    if (aligned_size > 0) {
199
        end = nextreg(g);
200
        emit_addr_offset(g, end, cursor, aligned_size);
201
202
        usize loop_start = g->ninstrs;
203
        usize branch_end = emit(g, NOP);
204
205
        /* Store zero to current address and increment by word size */
206
        emit(g, SD(ZERO, cursor, 0));
207
        emit(g, ADDI(cursor, cursor, WORD_SIZE));
208
        emit(g, JMP(jump_offset(g->ninstrs, loop_start))); /* Loop back */
209
210
        g->instrs[branch_end] =
211
            BGE(cursor, end, jump_offset(branch_end, g->ninstrs));
212
    }
213
214
    /* At least four bytes left */
215
    if (remainder >= 4) { /* Store a word (4 bytes) */
216
        emit(g, SW(ZERO, cursor, 0));
217
        emit(g, ADDI(cursor, cursor, 4));
218
        remainder -= 4;
219
    }
220
    /* At least two bytes left */
221
    if (remainder >= 2) { /* Store a halfword (2 bytes) */
222
        emit(g, SH(ZERO, cursor, 0));
223
        emit(g, ADDI(cursor, cursor, 2));
224
        remainder -= 2;
225
    }
226
    /* One byte left */
227
    if (remainder == 1) {
228
        emit(g, SB(ZERO, cursor, 0));
229
    }
230
    freereg(g, cursor);
231
    if (aligned_size > 0)
232
        freereg(g, end);
233
}
234
235
void emit_replace(gen_t *g, value_t old, value_t new) {
236
    if (old.type->cls == TYPE_OPT) {
237
        if (new.type->cls == TYPE_OPT) {
238
            switch (old.loc) {
239
            case LOC_STACK:
240
                emit_memcopy(g, new.as.off, old.as.off, old.type);
241
                break;
242
            case LOC_ADDR: {
243
                /* Handle assignment to LOC_ADDR optional */
244
                reg_t base = nextreg(g);
245
                emit_li(g, base, old.as.adr.base);
246
                emit_store(g, new, base, old.as.adr.offset);
247
                freereg(g, base);
248
                break;
249
            }
250
            default:
251
                bail(
252
                    "can't replace tagged value with storage location %d",
253
                    old.loc
254
                );
255
            }
256
        } else if (new.type->cls == old.type->info.opt.elem->cls) {
257
            /* T -> ?T coercion: create some value */
258
            tval_store(g, old, new, 1);
259
        } else {
260
            bail(
261
                "cannot assign %s to %s; type mismatch",
262
                type_names[new.type->cls],
263
                type_names[old.type->cls]
264
            );
265
        }
266
    } else if (old.type->cls == TYPE_RESULT) {
267
        type_t *payload = old.type->info.res.payload;
268
        type_t *err     = old.type->info.res.err;
269
270
        if (new.type->cls == TYPE_RESULT) {
271
            switch (old.loc) {
272
            case LOC_STACK:
273
                emit_memcopy(g, new.as.off, old.as.off, old.type);
274
                break;
275
            case LOC_ADDR: {
276
                /* Handle assignment to LOC_ADDR result */
277
                reg_t base = nextreg(g);
278
                emit_li(g, base, old.as.adr.base);
279
                emit_store(g, new, base, old.as.adr.offset);
280
                freereg(g, base);
281
                break;
282
            }
283
            default:
284
                bail(
285
                    "can't replace tagged value with storage location %d",
286
                    old.loc
287
                );
288
            }
289
        } else if (new.type == payload) {
290
            emit_result_store_success(g, old, new);
291
        } else if (new.type == err) {
292
            emit_result_store_error(g, old, new);
293
        } else {
294
            bail(
295
                "cannot assign %s to %s; type mismatch",
296
                type_names[new.type->cls],
297
                type_names[old.type->cls]
298
            );
299
        }
300
    } else {
301
        /* Non-optional assignments (original logic) */
302
        switch (old.loc) {
303
        case LOC_REG:
304
            /* Load the new value directly into the register of
305
             * the old value. */
306
            emit_load_into(g, old.as.reg, new);
307
            break;
308
        case LOC_STACK:
309
            emit_store(g, new, old.as.off.base, old.as.off.offset);
310
            break;
311
        case LOC_ADDR: {
312
            reg_t base = usereg(g, nextreg(g));
313
            emit_li(g, base, old.as.adr.base);
314
            emit_store(g, new, base, old.as.adr.offset);
315
            freereg(g, base);
316
            break;
317
        }
318
        default:
319
            bail("can't replace variable with storage location %d", old.loc);
320
        }
321
    }
322
323
    /* Free the new location and update the value, since we don't
324
     * need two copies of the value. Only free temporaries so we don't
325
     * invalidate live values that are intentionally kept in registers
326
     * (eg. function parameters). */
327
    if (new.loc == LOC_REG && new.temp) {
328
        freereg(g, new.as.reg);
329
    }
330
}
331
332
void emit_array_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
333
    type_t *elem_type = ty->info.ary.elem;
334
    usize   length    = ty->info.ary.length;
335
336
    for (usize i = 0; i < length; i++) {
337
        i32      elem_off = (i32)(i * elem_type->size);
338
        offset_t elem_src = OFFSET(src.base, src.offset + elem_off);
339
        offset_t elem_dst = OFFSET(dst.base, dst.offset + elem_off);
340
341
        emit_memcopy(g, elem_src, elem_dst, elem_type);
342
    }
343
}
344
345
/* Copy single value between offsets, via register */
346
static void emit_offset_copy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
347
    reg_t rs = emit_load(g, value_stack(src, ty));
348
    emit_regstore(g, rs, dst.base, dst.offset, ty);
349
    freereg(g, rs);
350
}
351
352
/* Copy a full machine word (WORD_SIZE bytes) using LD/SD. */
353
static void emit_dword_copy(gen_t *g, offset_t src, offset_t dst) {
354
    reg_t      tmp     = nextreg(g);
355
    i32        src_off = src.offset;
356
    i32        dst_off = dst.offset;
357
    addr_adj_t src_adj = adjust_addr(g, src.base, &src_off);
358
    emit(g, LD(tmp, src_adj.base, src_off));
359
    release_addr(g, src_adj);
360
    addr_adj_t dst_adj = adjust_addr(g, dst.base, &dst_off);
361
    emit(g, SD(tmp, dst_adj.base, dst_off));
362
    release_addr(g, dst_adj);
363
    freereg(g, tmp);
364
}
365
366
/* Copy tagged values (optional and payload unions) */
367
static void emit_tval_copy(
368
    gen_t   *g,
369
    offset_t src,
370
    offset_t dst,
371
    usize    size,
372
    i32      val_offset,
373
    type_t  *value_type
374
) {
375
    /* Copy tag byte */
376
    emit_offset_copy(g, src, dst, g->types->type_u8);
377
378
    /* Zero padding between tag (1 byte) and payload start, so that
379
     * byte-level equality comparisons of tagged values work correctly
380
     * even when the destination was previously uninitialized. */
381
    if (val_offset > TAG_SIZE) {
382
        emit_memzero(
383
            g, OFFSET(dst.base, dst.offset + TAG_SIZE), val_offset - TAG_SIZE
384
        );
385
    }
386
387
    if (size == 0)
388
        return;
389
390
    offset_t val_src = OFFSET(src.base, src.offset + val_offset);
391
    offset_t val_dst = OFFSET(dst.base, dst.offset + val_offset);
392
393
    if (value_type) {
394
        /* Use recursive memcopy for typed data (optionals) */
395
        emit_memcopy(g, val_src, val_dst, value_type);
396
        return;
397
    }
398
    /* Copy raw bytes for untyped data (payload unions) */
399
    usize copied = 0;
400
401
    /* Copy whole dwords (8 bytes) */
402
    while (copied + WORD_SIZE <= size) {
403
        emit_dword_copy(
404
            g,
405
            OFFSET(val_src.base, val_src.offset + (i32)copied),
406
            OFFSET(val_dst.base, val_dst.offset + (i32)copied)
407
        );
408
        copied += WORD_SIZE;
409
    }
410
    /* Copy remaining word (4 bytes) if present */
411
    if (size - copied >= 4) {
412
        emit_offset_copy(
413
            g,
414
            OFFSET(val_src.base, val_src.offset + (i32)copied),
415
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
416
            g->types->type_i32
417
        );
418
        copied += 4;
419
    }
420
    /* Copy remaining halfword if present */
421
    if (size - copied >= 2) {
422
        emit_offset_copy(
423
            g,
424
            OFFSET(val_src.base, val_src.offset + (i32)copied),
425
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
426
            g->types->type_u16
427
        );
428
        copied += 2;
429
    }
430
    /* Copy remaining byte if present */
431
    if (size - copied == 1) {
432
        emit_offset_copy(
433
            g,
434
            OFFSET(val_src.base, val_src.offset + (i32)copied),
435
            OFFSET(val_dst.base, val_dst.offset + (i32)copied),
436
            g->types->type_u8
437
        );
438
    }
439
}
440
441
void emit_memcopy(gen_t *g, offset_t src, offset_t dst, type_t *ty) {
442
    if (src.base == dst.base && src.offset == dst.offset)
443
        return; /* Nothing to do. */
444
445
    switch (ty->cls) {
446
    case TYPE_RECORD:
447
        emit_record_copy(g, src, dst, ty);
448
        return;
449
    case TYPE_ARRAY:
450
        emit_array_copy(g, src, dst, ty);
451
        return;
452
    case TYPE_OPT: {
453
        /* For optional types, copy tag and typed value */
454
        i32 val_off = align(TAG_SIZE, ty->info.opt.elem->align);
455
        emit_tval_copy(
456
            g, src, dst, ty->info.opt.elem->size, val_off, ty->info.opt.elem
457
        );
458
        return;
459
    }
460
    case TYPE_UNION:
461
        if (ty->info.uni.has_payload) {
462
            /* Copy the full payload area including alignment padding,
463
             * so that byte-level equality comparisons work correctly. */
464
            i32   val_off      = align(TAG_SIZE, ty->align);
465
            usize payload_size = ty->size - val_off;
466
            emit_tval_copy(g, src, dst, payload_size, val_off, NULL);
467
            return;
468
        }
469
        break;
470
    case TYPE_SLICE: {
471
        /* For slice types, copy both pointer (8 bytes) and length (8 bytes) */
472
        emit_dword_copy(g, src, dst);
473
        emit_dword_copy(
474
            g,
475
            OFFSET(src.base, src.offset + WORD_SIZE),
476
            OFFSET(dst.base, dst.offset + WORD_SIZE)
477
        );
478
        return;
479
    }
480
    case TYPE_RESULT: {
481
        bail("result types are never materialized");
482
    }
483
    default:
484
        break;
485
    }
486
    /* For primitive types, just copy via a register. */
487
    emit_offset_copy(g, src, dst, ty);
488
}
489
490
value_t emit_store(gen_t *g, value_t v, reg_t base, int offset) {
491
    switch (v.loc) {
492
    case LOC_IMM: {
493
        /* Load, store, free. */
494
        reg_t rd = nextreg(g);
495
        emit_load_into(g, rd, v);
496
        emit_regstore(g, rd, base, offset, v.type);
497
        freereg(g, rd);
498
499
        break;
500
    }
501
    case LOC_REG:
502
        if (type_is_passed_by_ref(v.type)) {
503
            emit_memcopy(g, OFFSET(v.as.reg, 0), OFFSET(base, offset), v.type);
504
        } else {
505
            emit_regstore(g, v.as.reg, base, offset, v.type);
506
        }
507
        break;
508
    case LOC_STACK:
509
        emit_memcopy(g, v.as.off, OFFSET(base, offset), v.type);
510
        break;
511
    case LOC_ADDR: {
512
        /* Copy from data section into stack */
513
        reg_t addr = nextreg(g);
514
        emit_li(g, addr, v.as.adr.base);
515
        emit_memcopy(
516
            g, OFFSET(addr, v.as.adr.offset), OFFSET(base, offset), v.type
517
        );
518
        freereg(g, addr);
519
520
        break;
521
    }
522
    case LOC_NONE:
523
        break;
524
    }
525
    return value_stack(OFFSET(base, offset), v.type);
526
}
527
528
reg_t emit_load(gen_t *g, value_t v) {
529
    if (v.loc == LOC_REG && v.temp) {
530
        return v.as.reg;
531
    } else {
532
        return emit_load_into(g, nextreg(g), v);
533
    }
534
}
535
536
/* Load a full machine dword (WORD_SIZE = 8 bytes) from a value. */
537
reg_t emit_load_dword(gen_t *g, value_t v) {
538
    /* Use TYPE_PTR to trigger LD (8-byte load). */
539
    type_t ptr_type = { .cls = TYPE_PTR };
540
    return emit_load(
541
        g, (value_t){ .loc = v.loc, .as = v.as, .type = &ptr_type }
542
    );
543
}
544
545
reg_t emit_load_offset(gen_t *g, value_t v, i32 offset) {
546
    reg_t rd = nextreg(g);
547
    switch (v.loc) {
548
    case LOC_REG:
549
        emit(g, LD(rd, v.as.reg, offset));
550
        break;
551
    case LOC_STACK: {
552
        i32        combined_offset = v.as.off.offset + offset;
553
        addr_adj_t adj = adjust_addr(g, v.as.off.base, &combined_offset);
554
        emit(g, LD(rd, adj.base, combined_offset));
555
        release_addr(g, adj);
556
        break;
557
    }
558
    case LOC_ADDR: {
559
        reg_t base = nextreg(g);
560
        emit_li(g, base, v.as.adr.base);
561
        i32        combined_offset = v.as.adr.offset + offset;
562
        addr_adj_t adj             = adjust_addr(g, base, &combined_offset);
563
        emit(g, LD(rd, adj.base, combined_offset));
564
        release_addr(g, adj);
565
        freereg(g, base);
566
        break;
567
    }
568
    case LOC_IMM:
569
    case LOC_NONE:
570
        abort();
571
    }
572
    return rd;
573
}
574
575
value_t emit_push(gen_t *g, value_t v) {
576
    /* Always allocate new stack space - each variable should have its own
577
     * location */
578
    int offset = reserve(g, v.type);
579
    return emit_store(g, v, FP, offset);
580
}
581
582
value_t emit_array_index(gen_t *g, value_t array_val, value_t index, bool ref) {
583
    reg_t   elem_siz    = nextreg(g);
584
    reg_t   data_adr    = ZERO;
585
    reg_t   base_reg    = ZERO;
586
    reg_t   base_alloc  = ZERO;
587
    i32     base_offset = 0;
588
    type_t *elem_type;
589
    type_t *arr_type = array_val.type;
590
    if (arr_type->cls == TYPE_PTR) {
591
        arr_type = arr_type->info.ptr.target;
592
    }
593
594
    /* Handle different storage locations */
595
    if (array_val.type->cls == TYPE_PTR) {
596
        /* Dereference pointers up front to get the actual base address. */
597
        base_reg    = emit_load_dword(g, array_val);
598
        base_offset = 0;
599
    } else if (array_val.loc == LOC_REG) {
600
        base_reg    = array_val.as.reg;
601
        base_offset = 0;
602
    } else if (array_val.loc == LOC_STACK) {
603
        base_reg    = array_val.as.off.base;
604
        base_offset = array_val.as.off.offset;
605
    } else if (array_val.loc == LOC_ADDR) {
606
        /* For constants in the data section, load the address but don't
607
         * dereference it. This way we get the actual array base address for
608
         * indexing. */
609
        base_reg = nextreg(g);
610
        emit_li(g, base_reg, array_val.as.adr.base);
611
        base_offset = array_val.as.adr.offset;
612
        base_alloc  = base_reg;
613
    } else {
614
        bail("cannot index array/slice at this location");
615
    }
616
    /* Load index into a register. Will hold final output */
617
    reg_t rd = emit_load(g, index);
618
619
    if (arr_type->cls == TYPE_SLICE) {
620
        /* Adjust base_offset for large offsets before loading slice fields */
621
        i32        ptr_offset = base_offset;
622
        addr_adj_t adj        = adjust_addr(g, base_reg, &ptr_offset);
623
624
        /* Load data pointer (first dword of slice) */
625
        /* and use it as our new base. */
626
        data_adr = nextreg(g);
627
        emit(g, LD(data_adr, adj.base, ptr_offset));
628
629
        /* Load slice length (second dword of slice) for bounds checking */
630
        reg_t len = nextreg(g);
631
        emit(g, LD(len, adj.base, ptr_offset + WORD_SIZE));
632
633
        release_addr(g, adj);
634
635
        /* Bounds check: if index >= length, emit EBREAK */
636
        /* Skip EBREAK if index < length (jump 2 instructions) */
637
        emit(g, BLTU(rd, len, INSTR_SIZE * 2));
638
        emit(g, EBREAK);
639
640
        freereg(g, len);
641
642
        base_reg    = data_adr;
643
        base_offset = 0;
644
        elem_type   = arr_type->info.slc.elem;
645
    } else {
646
        elem_type = arr_type->info.ary.elem;
647
    }
648
649
    /* Get element size */
650
    emit_li(g, elem_siz, elem_type->size);
651
    emit(g, MUL(rd, rd, elem_siz)); /* Relative offset. */
652
    emit(g, ADD(rd, rd, base_reg));
653
654
    freereg(g, elem_siz);
655
    freereg(g, data_adr);
656
    if (base_alloc)
657
        freereg(g, base_alloc);
658
659
    if (base_offset != 0 && !is_small(base_offset)) {
660
        emit_addr_offset(g, rd, rd, base_offset);
661
        base_offset = 0;
662
    }
663
664
    if (ref) {
665
        return value_stack(OFFSET(rd, base_offset), elem_type);
666
    } else {
667
        /* Reserve space on stack for the element */
668
        i32 stack_offset = reserve(g, elem_type);
669
670
        /* Copy element from array to stack using memcopy */
671
        offset_t src = OFFSET(rd, base_offset);  /* Source: element in array */
672
        offset_t dst = OFFSET(FP, stack_offset); /* Destination: stack */
673
        emit_memcopy(g, src, dst, elem_type);
674
675
        freereg(g, rd);
676
677
        /* Return a stack-based value pointing to the array element. */
678
        return value_stack(dst, elem_type);
679
    }
680
}
681
682
usize emit_regstore(gen_t *g, reg_t src, reg_t base, i32 offset, type_t *ty) {
683
    reg_t      orig_base   = base;
684
    i32        orig_offset = offset;
685
    addr_adj_t adj         = adjust_addr_avoid(g, base, &offset, src);
686
    reg_t      addr        = adj.base;
687
    usize      idx         = 0;
688
689
    switch (ty->cls) {
690
    case TYPE_BOOL:
691
    case TYPE_I8:
692
    case TYPE_U8:
693
        idx = emit(g, SB(src, addr, offset));
694
        break;
695
    case TYPE_I16:
696
    case TYPE_U16:
697
        idx = emit(g, SH(src, addr, offset));
698
        break;
699
    case TYPE_I32:
700
    case TYPE_U32:
701
        idx = emit(g, SW(src, addr, offset));
702
        break;
703
    case TYPE_PTR: /* References are pointers, so store as a dword. */
704
    case TYPE_FN:  /* Function pointers are addresses, so store as a dword. */
705
        idx = emit(g, SD(src, addr, offset));
706
        break;
707
    case TYPE_UNION:
708
        if (ty->info.uni.has_payload) {
709
            /* Tag is 1 byte. */
710
            idx = emit(g, SB(src, addr, offset));
711
            break;
712
        }
713
        release_addr(g, adj);
714
        return emit_regstore(g, src, orig_base, orig_offset, ty->info.uni.base);
715
    case TYPE_ARRAY:
716
    case TYPE_RECORD:
717
    case TYPE_OPT:
718
        /* Structs, arrays, optional types are stored by reference, so
719
         * just store the address (pointer). */
720
        idx = emit(g, SD(src, addr, offset));
721
        break;
722
    case TYPE_SLICE:
723
        release_addr(g, adj);
724
        bail("storing slices via register store is unsupported");
725
    default:
726
        bail("storing unsupported type `%s`", type_names[ty->cls]);
727
    }
728
    release_addr(g, adj);
729
730
    return idx;
731
}
732
733
void emit_store_tag(gen_t *g, tval_t tv, reg_t tag_reg) {
734
    i32        off = tv.tag.as.off.offset;
735
    addr_adj_t adj = adjust_addr(g, tv.tag.as.off.base, &off);
736
737
    emit(g, SB(tag_reg, adj.base, off));
738
    release_addr(g, adj);
739
}
740
741
usize emit_regload(gen_t *g, reg_t dst, reg_t base, i32 offset, type_t *ty) {
742
    reg_t      orig_base   = base;
743
    i32        orig_offset = offset;
744
    addr_adj_t adj         = adjust_addr(g, base, &offset);
745
    reg_t      addr        = adj.base;
746
    usize      idx         = 0;
747
748
    switch (ty->cls) {
749
    case TYPE_BOOL:
750
    case TYPE_U8:
751
        idx = emit(g, LBU(dst, addr, offset));
752
        break;
753
    case TYPE_I8:
754
        idx = emit(g, LB(dst, addr, offset));
755
        break;
756
    case TYPE_U16:
757
        idx = emit(g, LHU(dst, addr, offset));
758
        break;
759
    case TYPE_I16:
760
        idx = emit(g, LH(dst, addr, offset));
761
        break;
762
    case TYPE_I32:
763
        idx = emit(g, LW(dst, addr, offset));
764
        break;
765
    case TYPE_U32:
766
        idx = emit(g, LWU(dst, addr, offset));
767
        break;
768
    case TYPE_PTR: /* Raw pointer values occupy one 64-bit dword. */
769
    case TYPE_FN:  /* Function pointers are addresses, so load as a dword. */
770
        idx = emit(g, LD(dst, addr, offset));
771
        break;
772
    case TYPE_UNION:
773
        if (ty->info.uni.has_payload) {
774
            idx = emit(g, ADDI(dst, addr, offset));
775
            break;
776
        }
777
        release_addr(g, adj);
778
        return emit_regload(g, dst, orig_base, orig_offset, ty->info.uni.base);
779
    case TYPE_ARRAY:
780
    case TYPE_RECORD:
781
    case TYPE_SLICE:
782
    case TYPE_OPT:
783
        /* For records, arrays, optional types, we load the address in the
784
         * register. */
785
        idx = emit(g, ADDI(dst, addr, offset));
786
        break;
787
    default:
788
        release_addr(g, adj);
789
        bail("loading unsupported type `%s`", type_names[ty->cls]);
790
    }
791
    release_addr(g, adj);
792
793
    return idx;
794
}
795
796
int emit_regpush(gen_t *g, reg_t src, type_t *ty) {
797
    /* Store the register to the stack. */
798
    int offset = reserve(g, ty);
799
    emit_regstore(g, src, FP, offset, ty);
800
801
    return offset;
802
}
803
804
i32 reserve_aligned(gen_t *g, type_t *ty, i32 align) {
805
    frame_t *frame = &g->fn.current->e.fn.frame;
806
807
    /* Zero-sized types (e.g. empty arrays) don't need stack space. */
808
    if (ty->size == 0) {
809
        return frame->sp;
810
    }
811
    frame->sp = align_stack(frame->sp - ty->size, align);
812
813
    if (-frame->sp >= MAX_FRAME_SIZE)
814
        bail("stack frame overflow");
815
    if (-frame->sp < 0)
816
        bail("stack frame underflow");
817
818
    if (-frame->sp > frame->size)
819
        frame->size = -frame->sp;
820
821
    /* Zero memory for non-packed types to ensure clean initialization.
822
     * Packed types are skipped as they are densely packed without padding. */
823
    if (!type_is_packed(ty)) {
824
        emit_memzero(g, OFFSET(FP, frame->sp), ty->size);
825
    }
826
827
    return frame->sp;
828
}
829
830
reg_t emit_load_into(gen_t *g, reg_t dst, value_t src) {
831
    switch (src.loc) {
832
    case LOC_IMM:
833
        switch (src.type->cls) {
834
        case TYPE_UNION: /* Unions default to i32 base type. */
835
        case TYPE_I8:
836
        case TYPE_I16:
837
        case TYPE_I32:
838
            emit_li(g, dst, src.as.imm.i);
839
            break;
840
        case TYPE_U8:
841
        case TYPE_U16:
842
        case TYPE_U32:
843
        case TYPE_PTR:
844
        case TYPE_FN:
845
            emit_li(g, dst, src.as.imm.u);
846
            break;
847
        case TYPE_BOOL:
848
            emit_li(g, dst, src.as.imm.b);
849
            break;
850
        default:
851
            bail("unsupported type `%s`", type_names[src.type->cls]);
852
        }
853
        break;
854
    case LOC_STACK:
855
        /* For types passed by reference, load the address
856
         * instead of the value. */
857
        if (type_is_passed_by_ref(src.type)) {
858
            i32        off = src.as.off.offset;
859
            addr_adj_t adj = adjust_addr(g, src.as.off.base, &off);
860
            emit(g, ADDI(dst, adj.base, off));
861
            release_addr(g, adj);
862
        } else {
863
            emit_regload(g, dst, src.as.off.base, src.as.off.offset, src.type);
864
        }
865
        break;
866
    case LOC_REG: {
867
        reg_t rs = src.as.reg;
868
        if (rs == dst) {
869
            break;
870
        }
871
        if (src.temp)
872
            freereg(g, rs);
873
874
        emit(g, MV(dst, rs));
875
        break;
876
    }
877
    case LOC_ADDR: {
878
        /* Start by loading the address into the register */
879
        emit_li(g, dst, src.as.adr.base);
880
881
        /* For non-compound types, we need to load the value from the address.
882
         * For compound types, we keep the address itself. */
883
        if (!type_is_passed_by_ref(src.type)) {
884
            emit_regload(g, dst, dst, src.as.adr.offset, src.type);
885
        } else {
886
            /* For compound types passed by reference, add the offset to get
887
             * the actual address. */
888
            if (src.as.adr.offset != 0) {
889
                emit(g, ADDI(dst, dst, src.as.adr.offset));
890
            }
891
        }
892
        break;
893
    }
894
    case LOC_NONE:
895
        break;
896
    }
897
    return dst;
898
}
899
900
/* Compare values at two memory addresses and accumulate result.
901
 * Loads values from memory, compares them, and ANDs the comparison result
902
 * with the accumulating result register. */
903
static void emit_cmp_step(
904
    gen_t  *g,
905
    reg_t   left_val,   /* Register to hold left value during comparison */
906
    reg_t   right_val,  /* Register to hold right value during comparison */
907
    reg_t   left_addr,  /* Base address register for left operand */
908
    reg_t   right_addr, /* Base address register for right operand */
909
    usize   offset,     /* Byte offset from base addresses to load from */
910
    reg_t   result,     /* Register that accumulates comparison results */
911
    type_t *val_typ     /* Type information for loading value */
912
) {
913
    /* Load values from both memory addresses at the given offset */
914
    emit_regload(g, left_val, left_addr, offset, val_typ);
915
    emit_regload(g, right_val, right_addr, offset, val_typ);
916
917
    /* XOR the two loaded values: left_val = left_val ^ right_val
918
     * If values are equal, result will be 0 (equal values XOR to 0)
919
     * If values differ, result will be non-zero */
920
    emit(g, XOR(left_val, left_val, right_val));
921
    /* Convert XOR result to 1 (equal) or 0 (not equal) */
922
    emit(g, SLTIU(left_val, left_val, 1));
923
    /* Accumulate the result with a previous result.
924
     * If any comparison fails, the final result becomes 0 */
925
    emit(g, AND(result, result, left_val));
926
}
927
928
/* Compare raw bytes at two memory addresses.
929
 * Sets result = 1 if all bytes match. */
930
void emit_bytes_equal(
931
    gen_t *g, reg_t left, reg_t right, usize size, reg_t result
932
) {
933
    /* Start assuming they're equal */
934
    emit_li(g, result, 1);
935
936
    if (size == 0)
937
        return; /* Zero bytes are always equal */
938
939
    reg_t left_val  = nextreg(g);
940
    reg_t right_val = nextreg(g);
941
942
    /* Compare dword by dword (8 bytes) */
943
    usize i, remaining = size;
944
945
    for (i = 0; i + WORD_SIZE <= size; i += WORD_SIZE) {
946
        /* Load 8-byte dwords directly with LD */
947
        i32        off_l = (i32)i, off_r = (i32)i;
948
        addr_adj_t adj_l = adjust_addr(g, left, &off_l);
949
        emit(g, LD(left_val, adj_l.base, off_l));
950
        release_addr(g, adj_l);
951
        addr_adj_t adj_r = adjust_addr(g, right, &off_r);
952
        emit(g, LD(right_val, adj_r.base, off_r));
953
        release_addr(g, adj_r);
954
        emit(g, XOR(left_val, left_val, right_val));
955
        emit(g, SLTIU(left_val, left_val, 1));
956
        emit(g, AND(result, result, left_val));
957
    }
958
    remaining -= i;
959
960
    if (remaining >= 4) {
961
        emit_cmp_step(
962
            g, left_val, right_val, left, right, i, result, g->types->type_u32
963
        );
964
        i         += 4;
965
        remaining -= 4;
966
    }
967
968
    if (remaining >= 2) {
969
        emit_cmp_step(
970
            g, left_val, right_val, left, right, i, result, g->types->type_u16
971
        );
972
        i         += 2;
973
        remaining -= 2;
974
    }
975
    if (remaining == 1) {
976
        emit_cmp_step(
977
            g, left_val, right_val, left, right, i, result, g->types->type_u8
978
        );
979
    }
980
    freereg(g, left_val);
981
    freereg(g, right_val);
982
}
983
984
void emit_memequal(
985
    gen_t *g, reg_t left, reg_t right, type_t *ty, reg_t result
986
) {
987
    switch (ty->cls) {
988
    case TYPE_OPT: { /* For optional types, compare tag and value */
989
        reg_t left_tag  = nextreg(g);
990
        reg_t right_tag = nextreg(g);
991
992
        /* Load tags (first byte) */
993
        emit(g, LBU(left_tag, left, 0));
994
        emit(g, LBU(right_tag, right, 0));
995
996
        /* Compare tags directly - if different, optionals are not equal */
997
        emit_li(g, result, 0); /* Assume not equal */
998
        usize jump_to_end = emit(g, NOP);
999
1000
        /* If both are nil (tag == 0), they're equal */
1001
        emit_li(g, result, 1); /* Set equal */
1002
        usize skip_value_check = emit(g, NOP);
1003
1004
        /* Compare values (past tag) */
1005
        type_t *inner_type = ty->info.opt.elem;
1006
        i32     val_off    = align(TAG_SIZE, inner_type->align);
1007
        reg_t   left_val   = nextreg(g);
1008
        reg_t   right_val  = nextreg(g);
1009
1010
        /* Calculate value addresses (skip tag) */
1011
        emit(g, ADDI(left_val, left, val_off));
1012
        emit(g, ADDI(right_val, right, val_off));
1013
1014
        /* Load values if primitive type */
1015
        if (type_is_primitive(inner_type)) {
1016
            emit_regload(g, left_val, left_val, 0, inner_type);
1017
            emit_regload(g, right_val, right_val, 0, inner_type);
1018
        }
1019
1020
        /* Compare the values recursively */
1021
        emit_memequal(g, left_val, right_val, inner_type, result);
1022
1023
        /* Patch skip_value_check: jump here if both tags are 0 (nil) */
1024
        g->instrs[skip_value_check] =
1025
            BEQ(left_tag, ZERO, jump_offset(skip_value_check, g->ninstrs));
1026
1027
        /* Patch jump_to_end: jump here if tags are different */
1028
        g->instrs[jump_to_end] =
1029
            BNE(left_tag, right_tag, jump_offset(jump_to_end, g->ninstrs));
1030
1031
        freereg(g, left_tag);
1032
        freereg(g, right_tag);
1033
        freereg(g, left_val);
1034
        freereg(g, right_val);
1035
1036
        break;
1037
    }
1038
    case TYPE_I8:
1039
    case TYPE_I16:
1040
    case TYPE_I32:
1041
        /* For primitive types, compare directly */
1042
        emit(g, SUB(result, left, right));
1043
        emit(g, SLTIU(result, result, 1));
1044
        break;
1045
    case TYPE_U8:
1046
    case TYPE_U16:
1047
    case TYPE_U32:
1048
    case TYPE_BOOL:
1049
    case TYPE_PTR:
1050
        /* For primitive types, compare directly */
1051
        emit(g, XOR(result, left, right));
1052
        emit(g, SLTIU(result, result, 1));
1053
        break;
1054
    case TYPE_UNION:
1055
        if (!ty->info.uni.has_payload) {
1056
            type_t *base =
1057
                ty->info.uni.base ? ty->info.uni.base : g->types->type_i32;
1058
            emit_memequal(g, left, right, base, result);
1059
        } else {
1060
            emit_bytes_equal(g, left, right, ty->size, result);
1061
        }
1062
        break;
1063
    case TYPE_ARRAY:
1064
    case TYPE_RECORD:
1065
    case TYPE_SLICE:
1066
        emit_bytes_equal(g, left, right, ty->size, result);
1067
        break;
1068
    default:
1069
        bail("equality is not supported for type `%s`", ty->name);
1070
    }
1071
}
1072
1073
void emit_copy_by_ref(gen_t *g, value_t src, value_t dst) {
1074
    static type_t ptr_type = { .cls = TYPE_PTR };
1075
1076
    if (src.loc == LOC_REG && dst.loc == LOC_REG) {
1077
        emit_mv(g, dst.as.reg, src.as.reg);
1078
    } else if (src.loc == LOC_REG && dst.loc == LOC_STACK) {
1079
        i32     dst_off  = dst.as.off.offset;
1080
        type_t *store_ty = dst.type;
1081
1082
        if (dst.type->cls == TYPE_SLICE) {
1083
            /* Slice fat pointers live on the stack; only copy the address. */
1084
            dst_off  += SLICE_FIELD_PTR_OFFSET;
1085
            store_ty  = &ptr_type;
1086
        }
1087
        emit_regstore(g, src.as.reg, dst.as.off.base, dst_off, store_ty);
1088
    } else if (src.loc == LOC_STACK && dst.loc == LOC_REG) {
1089
        type_t *load_ty = dst.type;
1090
        i32     src_off = src.as.off.offset;
1091
1092
        if (dst.type->cls == TYPE_SLICE) {
1093
            load_ty  = &ptr_type;
1094
            src_off += SLICE_FIELD_PTR_OFFSET;
1095
        }
1096
        emit_regload(g, dst.as.reg, src.as.off.base, src_off, load_ty);
1097
    } else if (src.loc == LOC_STACK && dst.loc == LOC_STACK) {
1098
        i32        src_off = src.as.off.offset;
1099
        addr_adj_t src_adj = adjust_addr(g, src.as.off.base, &src_off);
1100
        reg_t      adr     = nextreg(g);
1101
1102
        emit(g, ADDI(adr, src_adj.base, src_off));
1103
1104
        i32        dst_off = dst.as.off.offset;
1105
        addr_adj_t dst_adj = adjust_addr(g, dst.as.off.base, &dst_off);
1106
1107
        if (dst.type->cls == TYPE_SLICE)
1108
            dst_off += SLICE_FIELD_PTR_OFFSET;
1109
1110
        emit(g, SD(adr, dst_adj.base, dst_off));
1111
1112
        release_addr(g, dst_adj);
1113
        release_addr(g, src_adj);
1114
        freereg(g, adr);
1115
    } else if (src.loc == LOC_ADDR && dst.loc == LOC_STACK) {
1116
        reg_t adr = nextreg(g);
1117
        /* Load the absolute address into a register. */
1118
        emit_li(g, adr, (i32)(src.as.adr.base + src.as.adr.offset));
1119
        i32     dst_off  = dst.as.off.offset;
1120
        type_t *store_ty = dst.type;
1121
1122
        if (dst.type->cls == TYPE_SLICE) {
1123
            dst_off  += SLICE_FIELD_PTR_OFFSET;
1124
            store_ty  = &ptr_type;
1125
        }
1126
        emit_regstore(g, adr, dst.as.off.base, dst_off, store_ty);
1127
        freereg(g, adr);
1128
    } else {
1129
        bail("don't know how to copy between these slots");
1130
    }
1131
}
1132
1133
/* Write a successful result tag (0) and copy the payload if present. */
1134
void emit_result_store_success(gen_t *g, value_t dest, value_t value) {
1135
    tval_t tv  = tval_from_val(g, dest);
1136
    reg_t  tag = nextreg(g);
1137
1138
    emit_li(g, tag, 0);
1139
    emit_store_tag(g, tv, tag);
1140
    freereg(g, tag);
1141
1142
    type_t *payload = dest.type->info.res.payload;
1143
1144
    /* Nb. We don't memzero, since result types are always unwrapped to
1145
     * one of their payloads. */
1146
1147
    if (payload->size > 0) {
1148
        /* Check if we need to wrap the value in an optional. */
1149
        if (payload->cls == TYPE_OPT && value.type->cls != TYPE_OPT) {
1150
            /* Wrap non-optional value in an optional */
1151
            value_t payload_val = value_stack(
1152
                OFFSET(tv.val.as.off.base, tv.val.as.off.offset), payload
1153
            );
1154
            tval_store(g, payload_val, value, 1);
1155
        } else {
1156
            emit_store(g, value, tv.val.as.off.base, tv.val.as.off.offset);
1157
        }
1158
    }
1159
}
1160
1161
/* Write an error Result tag (1) and copy the error payload. */
1162
void emit_result_store_error(gen_t *g, value_t dest, value_t err) {
1163
    tval_t tv  = tval_from_val(g, dest);
1164
    reg_t  tag = nextreg(g);
1165
1166
    emit_li(g, tag, 1);
1167
    emit_store_tag(g, tv, tag);
1168
    freereg(g, tag);
1169
1170
    /* Nb. We don't memzero, since result types are always unwrapped to
1171
     * one of their payloads. */
1172
1173
    if (err.type->cls != TYPE_VOID) {
1174
        emit_store(g, err, tv.val.as.off.base, tv.val.as.off.offset);
1175
    }
1176
}