lib/std/arch/rv64/isel.rad 41.1 KiB raw
1
//! RV64 instruction selection.
2
//!
3
//! Walks IL and selects RV64 instructions for each operation.
4
//!
5
//! *Register resolution hierarchy*
6
//!
7
//!   getReg(ssa) -> Reg
8
//!     Primitive physical register lookup. Panics if the register is spilled.
9
//!     Used as a building block by the functions below.
10
//!
11
//!   getSrcReg(ssa, scratch) -> Reg
12
//!     Source register for an [`il::Reg`] operand. Returns the physical register,
13
//!     or loads a spilled value into `scratch`. Used for instruction fields
14
//!     typed as [`il::Reg`] (e.g. base addresses in Load/Store/Blit).
15
//!
16
//!   getDstReg(ssa, scratch) -> Reg
17
//!     Destination register for an instruction result. Returns the physical
18
//!     register, or records a pending spill and returns `scratch`. The pending
19
//!     spill is flushed by [`selectBlock`] after each instruction.
20
//!
21
//!   resolveVal(scratch, val) -> Reg
22
//!     Resolve an [`il::Val`] to whatever register holds it. Delegates to [`getSrcReg`]
23
//!     for register values; materializes immediates and symbols into `scratch`.
24
//!     Used for operands that can be consumed from any register.
25
//!
26
//!   loadVal(rd, val) -> Reg
27
//!     Force an [`il::Val`] into a specific register `rd`. Built on [`resolveVal`] + [`emitMv`].
28
//!     Used when the instruction requires the value in `rd` (e.g. `sub rd, rd, rs2`).
29
30
use std::mem;
31
use std::lang::il;
32
use std::lang::gen::regalloc;
33
use std::lang::gen::labels;
34
35
use super::encode;
36
use super::emit;
37
38
///////////////
39
// Constants //
40
///////////////
41
42
/// Shift amount for byte sign/zero extension (64 - 8).
43
const SHIFT_W8: i32 = 56;
44
/// Shift amount for halfword sign/zero extension (64 - 16).
45
const SHIFT_W16: i32 = 48;
46
/// Shift amount for word sign/zero extension (64 - 32).
47
const SHIFT_W32: i32 = 32;
48
/// Mask for extracting byte value.
49
const MASK_W8: i32 = 0xFF;
50
/// Maximum number of block arguments supported.
51
const MAX_BLOCK_ARGS: u32 = 16;
52
53
/// Binary operation.
54
union BinOp { Add, And, Or, Xor }
55
/// Shift operation.
56
union ShiftOp { Sll, Srl, Sra }
57
/// Compare operation.
58
union CmpOp { Slt, Ult }
59
60
/// Selector errors.
61
pub union SelectorError {
62
    Internal,
63
}
64
65
/// A pending spill store to be flushed after instruction selection.
66
record PendingSpill {
67
    /// The SSA register that was spilled.
68
    ssa: il::Reg,
69
    /// The physical register holding the value to store.
70
    rd: super::Reg,
71
}
72
73
////////////////////
74
// Selector State //
75
////////////////////
76
77
/// Instruction selector state.
78
pub record Selector {
79
    /// Emitter for outputting instructions.
80
    e: *mut emit::Emitter,
81
    /// Register allocation result.
82
    ralloc: *regalloc::AllocResult,
83
    /// Data symbols for resolving symbol addresses.
84
    dataSyms: *[super::DataSym],
85
    /// Total stack frame size.
86
    frameSize: i32,
87
    /// Running offset into the reserve region of the frame.
88
    /// Tracks current position within the pre-allocated reserve slots.
89
    reserveOffset: i32,
90
    /// Pending spill store, auto-committed after each instruction.
91
    pendingSpill: ?PendingSpill,
92
    /// Next synthetic block index for skip-branch targets.
93
    nextSynthBlock: u32,
94
}
95
96
/////////////////////////
97
// Register Allocation //
98
/////////////////////////
99
100
/// Get the physical register for an already-allocated SSA register.
101
fn getReg(s: *Selector, ssa: il::Reg) -> super::Reg {
102
    let phys = s.ralloc.assignments[ssa.n] else {
103
        panic "getReg: spilled register has no physical assignment";
104
    };
105
    return super::Reg { n: phys };
106
}
107
108
/// Compute the FP-relative offset for a spill slot.
109
/// Spill slots live at the bottom of the frame.
110
/// Since `FP = SP + totalFrameSize`, the offset from FP is `slot - totalSize`.
111
fn spillOffset(s: *Selector, slot: i32) -> i32 {
112
    return slot - s.frameSize;
113
}
114
115
/// Get the destination register for an SSA register.
116
/// If the register is spilled, records a pending spill and returns the scratch
117
/// register. The pending spill is auto-committed by [`selectBlock`] after each
118
/// instruction. If not spilled, returns the physical register.
119
fn getDstReg(s: *mut Selector, ssa: il::Reg, scratch: super::Reg) -> super::Reg {
120
    if let _ = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) {
121
        s.pendingSpill = PendingSpill { ssa, rd: scratch };
122
        return scratch;
123
    }
124
    return getReg(s, ssa);
125
}
126
127
/// Get the source register for an SSA register.
128
/// If the register is spilled, loads the value from the spill slot into the
129
/// scratch register and returns it. Otherwise returns the physical register.
130
fn getSrcReg(s: *mut Selector, ssa: il::Reg, scratch: super::Reg) -> super::Reg {
131
    if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) {
132
        emit::emitLd(s.e, scratch, super::FP, spillOffset(s, slot));
133
        return scratch;
134
    }
135
    return getReg(s, ssa);
136
}
137
138
/// Look up symbol address in data map.
139
fn lookupDataSym(s: *Selector, name: *[u8]) -> u32 throws (SelectorError) {
140
    for sym in s.dataSyms {
141
        if mem::eq(sym.name, name) {
142
            return sym.addr;
143
        }
144
    }
145
    throw SelectorError::Internal;
146
}
147
148
/// Resolve an IL value to the physical register holding it.
149
/// For non-spilled register values, returns the physical register directly.
150
/// For immediates, symbols, and spilled registers, materializes into `scratch`.
151
fn resolveVal(s: *mut Selector, scratch: super::Reg, val: il::Val) -> super::Reg {
152
    match val {
153
        case il::Val::Reg(r) => {
154
            return getSrcReg(s, r, scratch);
155
        },
156
        case il::Val::Imm(imm) => {
157
            emit::loadImm(s.e, scratch, imm);
158
            return scratch;
159
        },
160
        case il::Val::DataSym(name) => {
161
            let addr = try lookupDataSym(s, name) catch {
162
                panic "resolveVal: data symbol not found";
163
            };
164
            emit::loadImm(s.e, scratch, addr as i64);
165
            return scratch;
166
        },
167
        case il::Val::FnAddr(name) => {
168
            emit::recordAddrLoad(s.e, name, scratch);
169
            return scratch;
170
        },
171
        case il::Val::Undef => {
172
            return scratch;
173
        }
174
    }
175
}
176
177
/// Load an IL value into a specific physical register.
178
/// Like [`resolveVal`], but ensures the value ends up in `rd`.
179
fn loadVal(s: *mut Selector, rd: super::Reg, val: il::Val) -> super::Reg {
180
    let rs = resolveVal(s, rd, val);
181
    emitMv(s, rd, rs);
182
    return rd;
183
}
184
185
/// Emit a move instruction if source and destination differ.
186
fn emitMv(s: *mut Selector, rd: super::Reg, rs: super::Reg) {
187
    if rd.n != rs.n {
188
        emit::emit(s.e, encode::mv(rd, rs));
189
    }
190
}
191
192
/// Emit zero-extension from a sub-word type to the full register width.
193
fn emitZext(e: *mut emit::Emitter, rd: super::Reg, rs: super::Reg, typ: il::Type) {
194
    match typ {
195
        case il::Type::W8 => emit::emit(e, encode::andi(rd, rs, MASK_W8)),
196
        case il::Type::W16 => {
197
            emit::emit(e, encode::slli(rd, rs, SHIFT_W16));
198
            emit::emit(e, encode::srli(rd, rd, SHIFT_W16));
199
        },
200
        case il::Type::W32 => {
201
            emit::emit(e, encode::slli(rd, rs, SHIFT_W32));
202
            emit::emit(e, encode::srli(rd, rd, SHIFT_W32));
203
        },
204
        case il::Type::W64 => {}
205
    }
206
}
207
208
/// Emit sign-extension from a sub-word type to the full register width.
209
fn emitSext(e: *mut emit::Emitter, rd: super::Reg, rs: super::Reg, typ: il::Type) {
210
    match typ {
211
        case il::Type::W8 => {
212
            emit::emit(e, encode::slli(rd, rs, SHIFT_W8));
213
            emit::emit(e, encode::srai(rd, rd, SHIFT_W8));
214
        },
215
        case il::Type::W16 => {
216
            emit::emit(e, encode::slli(rd, rs, SHIFT_W16));
217
            emit::emit(e, encode::srai(rd, rd, SHIFT_W16));
218
        },
219
        case il::Type::W32 => {
220
            emit::emit(e, encode::addiw(rd, rs, 0));
221
        },
222
        case il::Type::W64 => {}
223
    }
224
}
225
226
////////////////////////
227
// Instruction Select //
228
////////////////////////
229
230
/// Pre-scan all blocks for constant-sized reserve instructions.
231
/// Returns the total size needed for all static reserves, respecting alignment.
232
fn computeReserveSize(func: *il::Fn) -> i32 {
233
    let mut offset: i32 = 0;
234
    for b in 0..func.blocks.len {
235
        let block = &func.blocks[b];
236
        for instr in block.instrs {
237
            match instr {
238
                case il::Instr::Reserve { size, alignment, .. } => {
239
                    if let case il::Val::Imm(sz) = size {
240
                        offset = mem::alignUpI32(offset, alignment as i32);
241
                        offset += (sz as i32);
242
                    }
243
                },
244
                else => {},
245
            }
246
        }
247
    }
248
    return offset;
249
}
250
251
/// Select instructions for a function.
252
pub fn selectFn(
253
    e: *mut emit::Emitter,
254
    dataSyms: *[super::DataSym],
255
    ralloc: *regalloc::AllocResult,
256
    func: *il::Fn
257
) {
258
    // Reset block offsets for this function.
259
    labels::resetBlocks(&mut e.labels);
260
    // Pre-scan for constant-sized reserves to promote to fixed frame slots.
261
    let reserveSize = computeReserveSize(func);
262
    // Compute frame layout from spill slots, reserve slots, and used callee-saved registers.
263
    let frame = emit::computeFrame(
264
        ralloc.spill.frameSize + reserveSize,
265
        ralloc.usedCalleeSaved,
266
        func.blocks.len
267
    );
268
    // Synthetic block indices start after real blocks and the epilogue block.
269
    let mut s = Selector {
270
        e, ralloc, dataSyms, frameSize: frame.totalSize,
271
        reserveOffset: 0, pendingSpill: nil,
272
        nextSynthBlock: func.blocks.len + 1,
273
    };
274
    // Record function name for printing.
275
    emit::recordFunc(s.e, func.name);
276
    // Record function code offset for call patching.
277
    emit::recordFuncOffset(s.e, func.name);
278
    // Emit prologue.
279
    emit::emitPrologue(s.e, &frame);
280
281
    // Move function params from arg registers to assigned registers.
282
    // Cross-call params may have been assigned to callee-saved registers
283
    // instead of their natural arg registers. Spilled params are stored
284
    // directly to their spill slots.
285
    for funcParam, i in func.params {
286
        if i < super::ARG_REGS.len {
287
            let param = funcParam.value;
288
            let argReg = super::ARG_REGS[i];
289
290
            if let slot = regalloc::spill::spillSlot(&ralloc.spill, param) {
291
                // Spilled parameter: store arg register to spill slot.
292
                emit::emitSd(s.e, argReg, super::FP, spillOffset(&s, slot));
293
            } else if let assigned = ralloc.assignments[param.n] {
294
                emitMv(&mut s, super::Reg { n: assigned }, argReg);
295
            }
296
        }
297
    }
298
299
    // Emit each block.
300
    for i in 0..func.blocks.len {
301
        selectBlock(&mut s, i, &func.blocks[i], &frame, func);
302
    }
303
    // Emit epilogue.
304
    emit::emitEpilogue(s.e, &frame);
305
    // Patch local branches now that all blocks are emitted.
306
    emit::patchLocalBranches(s.e);
307
}
308
309
/// Select instructions for a block.
310
fn selectBlock(s: *mut Selector, blockIdx: u32, block: *il::Block, frame: *emit::Frame, func: *il::Fn) {
311
    // Record block address for branch patching.
312
    emit::recordBlock(s.e, blockIdx);
313
314
    // Block parameters are handled at jump sites (in `Jmp`/`Br`).
315
    // By the time we enter the block, the arguments have already been
316
    // moved to the parameter registers by the predecessor's terminator.
317
318
    // Process each instruction, auto-committing any pending spill after each.
319
    let hasLocs = block.locs.len > 0;
320
    for instr, i in block.instrs {
321
        // Record debug location before emitting machine instructions.
322
        if hasLocs {
323
            emit::recordSrcLoc(s.e, block.locs[i]);
324
        }
325
        s.pendingSpill = nil;
326
        selectInstr(s, blockIdx, instr, frame, func);
327
328
        // Flush the pending spill store, if any.
329
        if let p = s.pendingSpill {
330
            if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, p.ssa) {
331
                emit::emitSd(s.e, p.rd, super::FP, spillOffset(s, slot));
332
            }
333
            s.pendingSpill = nil;
334
        }
335
    }
336
}
337
338
/// Select instructions for a single IL instruction.
339
fn selectInstr(s: *mut Selector, blockIdx: u32, instr: il::Instr, frame: *emit::Frame, func: *il::Fn) {
340
    match instr {
341
        case il::Instr::BinOp { op, typ, dst, a, b } => {
342
            let rd = getDstReg(s, dst, super::SCRATCH1);
343
            let rs1 = resolveVal(s, super::SCRATCH1, a);
344
            selectAluBinOp(s, op, typ, rd, rs1, b);
345
        },
346
        case il::Instr::UnOp { op, typ, dst, a } => {
347
            let rd = getDstReg(s, dst, super::SCRATCH1);
348
            let rs = resolveVal(s, super::SCRATCH1, a);
349
            selectAluUnOp(s, op, typ, rd, rs);
350
        },
351
        case il::Instr::Load { typ, dst, src, offset } => {
352
            let rd = getDstReg(s, dst, super::SCRATCH1);
353
            let base = getSrcReg(s, src, super::SCRATCH2);
354
            emit::emitLoad(s.e, rd, base, offset, typ);
355
        },
356
        case il::Instr::Sload { typ, dst, src, offset } => {
357
            let rd = getDstReg(s, dst, super::SCRATCH1);
358
            let base = getSrcReg(s, src, super::SCRATCH2);
359
            emit::emitSload(s.e, rd, base, offset, typ);
360
        },
361
        case il::Instr::Store { typ, src, dst, offset } => {
362
            let base = getSrcReg(s, dst, super::SCRATCH2);
363
            let rs = resolveVal(s, super::SCRATCH1, src);
364
            emit::emitStore(s.e, rs, base, offset, typ);
365
        },
366
        case il::Instr::Copy { dst, val } => {
367
            let rd = getDstReg(s, dst, super::SCRATCH1);
368
            let rs = resolveVal(s, super::SCRATCH1, val);
369
            emitMv(s, rd, rs);
370
        },
371
        case il::Instr::Reserve { dst, size, alignment } => {
372
            match size {
373
                case il::Val::Imm(sz) => {
374
                    // Constant-sized reserve: use pre-allocated frame slot.
375
                    let rd = getDstReg(s, dst, super::SCRATCH1);
376
                    let aligned: i32 = mem::alignUpI32(s.reserveOffset, alignment as i32);
377
                    let fpOffset: i32 = s.ralloc.spill.frameSize + aligned - s.frameSize;
378
379
                    emit::emitAddImm(s.e, rd, super::FP, fpOffset);
380
                    s.reserveOffset = aligned + (sz as i32);
381
                },
382
                case il::Val::Reg(r) => {
383
                    // Dynamic-sized reserve: runtime SP adjustment.
384
                    let rd = getDstReg(s, dst, super::SCRATCH1);
385
                    let rs = getSrcReg(s, r, super::SCRATCH2);
386
387
                    emit::emit(s.e, encode::sub(super::SP, super::SP, rs));
388
389
                    if alignment > 1 {
390
                        let mask = 0 - alignment as i32;
391
                        assert encode::isSmallImm(mask);
392
393
                        emit::emit(s.e, encode::andi(super::SP, super::SP, mask));
394
                    }
395
                    emit::emit(s.e, encode::mv(rd, super::SP));
396
                },
397
                else =>
398
                    panic "selectInstr: invalid reserve operand",
399
            }
400
        },
401
        case il::Instr::Blit { dst, src, size } => {
402
            let case il::Val::Imm(staticSize) = size
403
                else panic "selectInstr: blit requires immediate size";
404
405
            let bothSpilled = regalloc::spill::isSpilled(&s.ralloc.spill, dst)
406
                and regalloc::spill::isSpilled(&s.ralloc.spill, src);
407
408
            // When both are spilled, offsets must fit 12-bit immediates
409
            // since we can't advance base registers (they live in spill
410
            // slots, not real registers we can mutate).
411
            if bothSpilled and staticSize as i32 > super::MAX_IMM {
412
                panic "selectInstr: blit both-spilled with large size";
413
            }
414
415
            // Resolve dst/src base registers.
416
            let mut rdst = super::SCRATCH2;
417
            let mut rsrc = super::SCRATCH1;
418
            let mut srcReload: ?i32 = nil;
419
420
            if bothSpilled {
421
                let dstSlot = regalloc::spill::spillSlot(&s.ralloc.spill, dst) else {
422
                    panic "selectInstr: blit dst not spilled";
423
                };
424
                let srcSlot = regalloc::spill::spillSlot(&s.ralloc.spill, src) else {
425
                    panic "selectInstr: blit src not spilled";
426
                };
427
                emit::emitLd(s.e, super::SCRATCH2, super::FP, spillOffset(s, dstSlot));
428
                srcReload = spillOffset(s, srcSlot);
429
            } else {
430
                rdst = getSrcReg(s, dst, super::SCRATCH2);
431
                rsrc = getSrcReg(s, src, super::SCRATCH2);
432
            }
433
            let mut offset: i32 = 0;
434
            let mut remaining = staticSize as i32;
435
436
            // Copy loop: 8 bytes, then 4 bytes, then 1 byte at a time.
437
            // Before each load/store pair, check whether the offset is
438
            // about to exceed the 12-bit signed immediate range. When
439
            // it does, advance the base registers by the accumulated
440
            // offset and reset to zero.
441
            while remaining >= super::DWORD_SIZE {
442
                if offset > super::MAX_IMM - super::DWORD_SIZE {
443
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
444
                    if rdst.n != rsrc.n {
445
                        emit::emitAddImm(s.e, rdst, rdst, offset);
446
                    }
447
                    offset = 0;
448
                }
449
                if let off = srcReload {
450
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
451
                    emit::emitLd(s.e, super::SCRATCH1, super::SCRATCH1, offset);
452
                } else {
453
                    emit::emitLd(s.e, super::SCRATCH1, rsrc, offset);
454
                }
455
                emit::emitSd(s.e, super::SCRATCH1, rdst, offset);
456
                offset += super::DWORD_SIZE;
457
                remaining -= super::DWORD_SIZE;
458
            }
459
            if remaining >= super::WORD_SIZE {
460
                if offset > super::MAX_IMM - super::WORD_SIZE {
461
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
462
                    if rdst.n != rsrc.n {
463
                        emit::emitAddImm(s.e, rdst, rdst, offset);
464
                    }
465
                    offset = 0;
466
                }
467
                if let off = srcReload {
468
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
469
                    emit::emitLw(s.e, super::SCRATCH1, super::SCRATCH1, offset);
470
                } else {
471
                    emit::emitLw(s.e, super::SCRATCH1, rsrc, offset);
472
                }
473
                emit::emitSw(s.e, super::SCRATCH1, rdst, offset);
474
                offset += super::WORD_SIZE;
475
                remaining -= super::WORD_SIZE;
476
            }
477
            while remaining > 0 {
478
                if offset > super::MAX_IMM - 1 {
479
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
480
                    if rdst.n != rsrc.n {
481
                        emit::emitAddImm(s.e, rdst, rdst, offset);
482
                    }
483
                    offset = 0;
484
                }
485
                if let off = srcReload {
486
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
487
                    emit::emitLb(s.e, super::SCRATCH1, super::SCRATCH1, offset);
488
                } else {
489
                    emit::emitLb(s.e, super::SCRATCH1, rsrc, offset);
490
                }
491
                emit::emitSb(s.e, super::SCRATCH1, rdst, offset);
492
                offset += 1;
493
                remaining -= 1;
494
            }
495
            // Restore base registers if they were advanced (never happens
496
            // in the both-spilled case since size <= MAX_IMM).
497
            if not bothSpilled {
498
                let advanced = staticSize as i32 - offset;
499
                if advanced != 0 {
500
                    emit::emitAddImm(s.e, rsrc, rsrc, 0 - advanced);
501
                    if rdst.n != rsrc.n {
502
                        emit::emitAddImm(s.e, rdst, rdst, 0 - advanced);
503
                    }
504
                }
505
            }
506
        },
507
        case il::Instr::Zext { typ, dst, val } => {
508
            let rd = getDstReg(s, dst, super::SCRATCH1);
509
            let rs = resolveVal(s, super::SCRATCH1, val);
510
            emitZext(s.e, rd, rs, typ);
511
        },
512
        case il::Instr::Sext { typ, dst, val } => {
513
            let rd = getDstReg(s, dst, super::SCRATCH1);
514
            let rs = resolveVal(s, super::SCRATCH1, val);
515
            emitSext(s.e, rd, rs, typ);
516
        },
517
        case il::Instr::Ret { val } => {
518
            if let v = val {
519
                let rs = resolveVal(s, super::SCRATCH1, v);
520
                emitMv(s, super::A0, rs);
521
            }
522
            emit::emitReturn(s.e, frame);
523
        },
524
        case il::Instr::Jmp { target, args } => {
525
            // Move arguments to target block's parameter registers.
526
            emitBlockArgs(s, func, target, args);
527
            // Skip branch if target is the next block (fallthrough).
528
            if target != blockIdx + 1 {
529
                emit::recordBranch(s.e, target, emit::BranchKind::Jump);
530
            }
531
        },
532
        case il::Instr::Br { op, typ, a, b, thenTarget, thenArgs, elseTarget, elseArgs } => {
533
            let rs1 = resolveVal(s, super::SCRATCH1, a);
534
            let rs2 = resolveVal(s, super::SCRATCH2, b);
535
536
            // Normalize sub-word operands so that both registers have the same
537
            // canonical representation. Without this, eg. `-1 : i8 ` loaded as
538
            // `0xFFFFFFFFFFFFFFFF` and `255 : i8` loaded as `0xFF` would compare
539
            // unequal even though they are the same 8-bit pattern.
540
            if let case il::CmpOp::Slt = op {
541
                emitSext(s.e, rs1, rs1, typ);
542
                emitSext(s.e, rs2, rs2, typ);
543
            } else {
544
                emitZext(s.e, rs1, rs1, typ);
545
                emitZext(s.e, rs2, rs2, typ);
546
            }
547
            // Block-argument moves must only execute on the taken path.
548
            // When `thenArgs` is non-empty, invert the branch so that the
549
            // then-moves land on the fall-through (taken) side.
550
            //
551
            // When one target is the next block in layout order, we can
552
            // eliminate the trailing unconditional jump by arranging the
553
            // conditional branch to skip to the *other* target and letting
554
            // execution fall through.
555
            if thenArgs.len > 0 and elseArgs.len > 0 {
556
                panic "selectInstr: both `then` and `else` have block arguments";
557
            } else if thenArgs.len > 0 {
558
                emit::recordBranch(s.e, elseTarget, emit::BranchKind::InvertedCond { op, rs1, rs2 });
559
                emitBlockArgs(s, func, thenTarget, thenArgs);
560
                // Skip trailing jump if then is the next block (fallthrough).
561
                if thenTarget != blockIdx + 1 {
562
                    emit::recordBranch(s.e, thenTarget, emit::BranchKind::Jump);
563
                }
564
            } else if thenTarget == blockIdx + 1 and elseArgs.len == 0 {
565
                // Then is the next block and no else args: invert the
566
                // condition to branch to else and fall through to then.
567
                emit::recordBranch(s.e, elseTarget, emit::BranchKind::InvertedCond { op, rs1, rs2 });
568
            } else {
569
                emit::recordBranch(s.e, thenTarget, emit::BranchKind::Cond { op, rs1, rs2 });
570
                emitBlockArgs(s, func, elseTarget, elseArgs);
571
                // Skip trailing jump if else is the next block (fallthrough).
572
                if elseTarget != blockIdx + 1 {
573
                    emit::recordBranch(s.e, elseTarget, emit::BranchKind::Jump);
574
                }
575
            }
576
        },
577
        case il::Instr::Switch { val, defaultTarget, defaultArgs, cases } => {
578
            let rs1 = resolveVal(s, super::SCRATCH1, val);
579
            // When a case has block args, invert the branch to skip past
580
            // the arg moves.
581
            for c in cases {
582
                emit::loadImm(s.e, super::SCRATCH2, c.value);
583
584
                if c.args.len > 0 {
585
                    let skip = s.nextSynthBlock;
586
                    s.nextSynthBlock = skip + 1;
587
588
                    emit::recordBranch(s.e, skip, emit::BranchKind::InvertedCond {
589
                        op: il::CmpOp::Eq, rs1, rs2: super::SCRATCH2,
590
                    });
591
                    emitBlockArgs(s, func, c.target, c.args);
592
                    emit::recordBranch(s.e, c.target, emit::BranchKind::Jump);
593
                    emit::recordBlock(s.e, skip);
594
                } else {
595
                    emit::recordBranch(s.e, c.target, emit::BranchKind::Cond {
596
                        op: il::CmpOp::Eq, rs1, rs2: super::SCRATCH2,
597
                    });
598
                }
599
            }
600
            // Fall through to default.
601
            emitBlockArgs(s, func, defaultTarget, defaultArgs);
602
            emit::recordBranch(s.e, defaultTarget, emit::BranchKind::Jump);
603
        },
604
        case il::Instr::Unreachable => {
605
            emit::emit(s.e, encode::ebreak());
606
        },
607
        case il::Instr::Call { retTy, dst, func, args } => {
608
            // For indirect calls, save target to scratch register before arg
609
            // setup can clobber it.
610
            if let case il::Val::Reg(r) = func {
611
                let target = getSrcReg(s, r, super::SCRATCH2);
612
                emitMv(s, super::SCRATCH2, target);
613
            }
614
            // Move arguments to A0-A7 using parallel move resolution.
615
            if args.len > super::ARG_REGS.len {
616
                panic "selectInstr: too many call arguments";
617
            }
618
            emitParallelMoves(s, &super::ARG_REGS[..], args);
619
620
            // Emit call.
621
            match func {
622
                case il::Val::FnAddr(name) => {
623
                    emit::recordCall(s.e, name);
624
                },
625
                case il::Val::Reg(_) => {
626
                    emit::emit(s.e, encode::jalr(super::RA, super::SCRATCH2, 0));
627
                },
628
                else => {
629
                    panic "selectInstr: invalid call target";
630
                }
631
            }
632
            // Move result from A0.
633
            if let d = dst {
634
                let rd = getDstReg(s, d, super::SCRATCH1);
635
                emitMv(s, rd, super::A0);
636
            }
637
        },
638
        case il::Instr::Ecall { dst, num, a0, a1, a2, a3 } => {
639
            // Move arguments using parallel move.
640
            // TODO: Can't use slice literals here because the lowerer doesn't
641
            // support const-evaluating struct/union values in them.
642
            let ecallDsts: [super::Reg; 5] = [super::A7, super::A0, super::A1, super::A2, super::A3];
643
            let ecallArgs: [il::Val; 5] = [num, a0, a1, a2, a3];
644
645
            emitParallelMoves(s, &ecallDsts[..], &ecallArgs[..]);
646
            emit::emit(s.e, encode::ecall());
647
648
            // Result in A0.
649
            let ecallRd = getDstReg(s, dst, super::SCRATCH1);
650
            emitMv(s, ecallRd, super::A0);
651
        },
652
        case il::Instr::Ebreak => {
653
            emit::emit(s.e, encode::ebreak());
654
        },
655
    }
656
}
657
658
/// Emit runtime trap for division/modulo by zero.
659
fn emitTrapIfZero(s: *mut Selector, rs: super::Reg) {
660
    emit::emit(s.e, encode::bne(rs, super::ZERO, super::INSTR_SIZE * 2));
661
    emit::emit(s.e, encode::ebreak());
662
}
663
664
/// Select a binary ALU operation, dispatching to the appropriate
665
/// instruction pattern based on the operation kind and type.
666
fn selectAluBinOp(s: *mut Selector, op: il::BinOp, typ: il::Type, rd: super::Reg, rs1: super::Reg, b: il::Val) {
667
    match op {
668
        case il::BinOp::Add => {
669
            if typ == il::Type::W32 {
670
                selectBinOpW(s, rd, rs1, b, super::SCRATCH2);
671
            } else {
672
                selectBinOp(s, rd, rs1, b, BinOp::Add, super::SCRATCH2);
673
            }
674
        }
675
        case il::BinOp::Sub => {
676
            let rs2 = resolveVal(s, super::SCRATCH2, b);
677
            emit::emit(s.e,
678
                encode::subw(rd, rs1, rs2)
679
                    if typ == il::Type::W32 else
680
                encode::sub(rd, rs1, rs2));
681
        }
682
        case il::BinOp::Mul => {
683
            let rs2 = resolveVal(s, super::SCRATCH2, b);
684
            emit::emit(s.e,
685
                encode::mulw(rd, rs1, rs2)
686
                    if typ == il::Type::W32 else
687
                encode::mul(rd, rs1, rs2));
688
        }
689
        case il::BinOp::Sdiv => {
690
            let rs2 = resolveVal(s, super::SCRATCH2, b);
691
            emitTrapIfZero(s, rs2);
692
            emit::emit(s.e,
693
                encode::divw(rd, rs1, rs2)
694
                    if typ == il::Type::W32 else
695
                encode::div(rd, rs1, rs2));
696
        }
697
        case il::BinOp::Udiv => {
698
            let rs2 = resolveVal(s, super::SCRATCH2, b);
699
            emitTrapIfZero(s, rs2);
700
            emit::emit(s.e,
701
                encode::divuw(rd, rs1, rs2)
702
                    if typ == il::Type::W32 else
703
                encode::divu(rd, rs1, rs2));
704
        }
705
        case il::BinOp::Srem => {
706
            let rs2 = resolveVal(s, super::SCRATCH2, b);
707
            emitTrapIfZero(s, rs2);
708
            emit::emit(s.e,
709
                encode::remw(rd, rs1, rs2)
710
                    if typ == il::Type::W32 else
711
                encode::rem(rd, rs1, rs2));
712
        }
713
        case il::BinOp::Urem => {
714
            let rs2 = resolveVal(s, super::SCRATCH2, b);
715
            emitTrapIfZero(s, rs2);
716
            emit::emit(s.e,
717
                encode::remuw(rd, rs1, rs2)
718
                    if typ == il::Type::W32 else
719
                encode::remu(rd, rs1, rs2));
720
        }
721
        case il::BinOp::And =>
722
            selectBinOp(s, rd, rs1, b, BinOp::And, super::SCRATCH2),
723
        case il::BinOp::Or =>
724
            selectBinOp(s, rd, rs1, b, BinOp::Or, super::SCRATCH2),
725
        case il::BinOp::Xor =>
726
            selectBinOp(s, rd, rs1, b, BinOp::Xor, super::SCRATCH2),
727
        case il::BinOp::Shl =>
728
            selectShift(s, rd, rs1, b, ShiftOp::Sll, typ, super::SCRATCH2),
729
        case il::BinOp::Sshr =>
730
            selectShift(s, rd, rs1, b, ShiftOp::Sra, typ, super::SCRATCH2),
731
        case il::BinOp::Ushr =>
732
            selectShift(s, rd, rs1, b, ShiftOp::Srl, typ, super::SCRATCH2),
733
        case il::BinOp::Eq => {
734
            let rs2 = resolveVal(s, super::SCRATCH2, b);
735
            // Canonicalize both operands to the declared width
736
            // so high-bit junk doesn't affect the result.
737
            emitZext(s.e, rs1, rs1, typ);
738
            emitZext(s.e, rs2, rs2, typ);
739
            emit::emit(s.e, encode::xor(rd, rs1, rs2));
740
            emit::emit(s.e, encode::sltiu(rd, rd, 1));
741
        }
742
        case il::BinOp::Ne => {
743
            let rs2 = resolveVal(s, super::SCRATCH2, b);
744
            // Canonicalize both operands to the declared width
745
            // so high-bit junk doesn't affect the result.
746
            emitZext(s.e, rs1, rs1, typ);
747
            emitZext(s.e, rs2, rs2, typ);
748
            emit::emit(s.e, encode::xor(rd, rs1, rs2));
749
            emit::emit(s.e, encode::sltu(rd, super::ZERO, rd));
750
        }
751
        case il::BinOp::Slt =>
752
            selectCmp(s, rd, rs1, b, CmpOp::Slt, super::SCRATCH2),
753
        case il::BinOp::Ult =>
754
            selectCmp(s, rd, rs1, b, CmpOp::Ult, super::SCRATCH2),
755
        case il::BinOp::Sge => {
756
            let rs2 = resolveVal(s, super::SCRATCH2, b);
757
            emit::emit(s.e, encode::slt(rd, rs1, rs2));
758
            emit::emit(s.e, encode::xori(rd, rd, 1)); // flip.
759
        }
760
        case il::BinOp::Uge => {
761
            let rs2 = resolveVal(s, super::SCRATCH2, b);
762
            emit::emit(s.e, encode::sltu(rd, rs1, rs2));
763
            emit::emit(s.e, encode::xori(rd, rd, 1)); // flip.
764
        }
765
    }
766
}
767
768
/// Select a unary ALU operation.
769
fn selectAluUnOp(s: *mut Selector, op: il::UnOp, typ: il::Type, rd: super::Reg, rs: super::Reg) {
770
    match op {
771
        case il::UnOp::Neg => {
772
            if typ == il::Type::W32 {
773
                emit::emit(s.e, encode::subw(rd, super::ZERO, rs));
774
            } else {
775
                emit::emit(s.e, encode::neg(rd, rs));
776
            }
777
        }
778
        case il::UnOp::Not =>
779
            emit::emit(s.e, encode::not_(rd, rs)),
780
    }
781
}
782
783
/// Select 32-bit addition with immediate optimization.
784
/// Uses `addiw`/`addw` to operate on 32 bits and sign-extend the result.
785
fn selectBinOpW(s: *mut Selector, rd: super::Reg, rs1: super::Reg, b: il::Val, scratch: super::Reg) {
786
    if let case il::Val::Imm(imm) = b {
787
        if encode::isSmallImm64(imm) {
788
            emit::emit(s.e, encode::addiw(rd, rs1, imm as i32));
789
            return;
790
        }
791
    }
792
    let rs2 = resolveVal(s, scratch, b);
793
    emit::emit(s.e, encode::addw(rd, rs1, rs2));
794
}
795
796
/// Select binary operation with immediate optimization.
797
fn selectBinOp(s: *mut Selector, rd: super::Reg, rs1: super::Reg, b: il::Val, op: BinOp, scratch: super::Reg) {
798
    // Try immediate optimization first.
799
    if let case il::Val::Imm(imm) = b {
800
        if encode::isSmallImm64(imm) {
801
            let simm = imm as i32;
802
            match op {
803
                case BinOp::Add => emit::emit(s.e, encode::addi(rd, rs1, simm)),
804
                case BinOp::And => emit::emit(s.e, encode::andi(rd, rs1, simm)),
805
                case BinOp::Or  => emit::emit(s.e, encode::ori(rd, rs1, simm)),
806
                case BinOp::Xor => emit::emit(s.e, encode::xori(rd, rs1, simm)),
807
            }
808
            return;
809
        }
810
    }
811
    // Fallback: load into register.
812
    let rs2 = resolveVal(s, scratch, b);
813
    match op {
814
        case BinOp::Add => emit::emit(s.e, encode::add(rd, rs1, rs2)),
815
        case BinOp::And => emit::emit(s.e, encode::and_(rd, rs1, rs2)),
816
        case BinOp::Or  => emit::emit(s.e, encode::or_(rd, rs1, rs2)),
817
        case BinOp::Xor => emit::emit(s.e, encode::xor(rd, rs1, rs2)),
818
    }
819
}
820
821
/// Select shift operation with immediate optimization.
822
/// For 32-bit operations, uses the `*w` variants that operate on the lower 32 bits
823
/// and sign-extend the result.
824
fn selectShift(s: *mut Selector, rd: super::Reg, rs1: super::Reg, b: il::Val, op: ShiftOp, typ: il::Type, scratch: super::Reg) {
825
    let isW32: bool = typ == il::Type::W32;
826
827
    // Try immediate optimization first.
828
    if let case il::Val::Imm(shamt) = b {
829
        // Keep immediate forms only for encodable shift amounts.
830
        // Otherwise fall back to register shifts, which naturally mask the count.
831
        if shamt >= 0 and ((isW32 and shamt < 32) or (not isW32 and shamt < 64)) {
832
            let sa = shamt as i32;
833
            if isW32 {
834
                match op {
835
                    case ShiftOp::Sll => emit::emit(s.e, encode::slliw(rd, rs1, sa)),
836
                    case ShiftOp::Srl => emit::emit(s.e, encode::srliw(rd, rs1, sa)),
837
                    case ShiftOp::Sra => emit::emit(s.e, encode::sraiw(rd, rs1, sa)),
838
                }
839
            } else {
840
                match op {
841
                    case ShiftOp::Sll => emit::emit(s.e, encode::slli(rd, rs1, sa)),
842
                    case ShiftOp::Srl => emit::emit(s.e, encode::srli(rd, rs1, sa)),
843
                    case ShiftOp::Sra => emit::emit(s.e, encode::srai(rd, rs1, sa)),
844
                }
845
            }
846
            return;
847
        }
848
    }
849
    // Fallback: load into register.
850
    let rs2 = resolveVal(s, scratch, b);
851
    if isW32 {
852
        match op {
853
            case ShiftOp::Sll => emit::emit(s.e, encode::sllw(rd, rs1, rs2)),
854
            case ShiftOp::Srl => emit::emit(s.e, encode::srlw(rd, rs1, rs2)),
855
            case ShiftOp::Sra => emit::emit(s.e, encode::sraw(rd, rs1, rs2)),
856
        }
857
    } else {
858
        match op {
859
            case ShiftOp::Sll => emit::emit(s.e, encode::sll(rd, rs1, rs2)),
860
            case ShiftOp::Srl => emit::emit(s.e, encode::srl(rd, rs1, rs2)),
861
            case ShiftOp::Sra => emit::emit(s.e, encode::sra(rd, rs1, rs2)),
862
        }
863
    }
864
}
865
866
/// Resolve parallel moves from IL values to physical destination registers.
867
///
868
/// The parallel move problem arises when moving values between registers where
869
/// there may be dependencies (e.g. moving A0 to A1 and A1 to A0 simultaneously).
870
///
871
/// This algorithm:
872
/// 1. Identifies "ready" moves.
873
/// 2. Executes ready moves.
874
/// 3. Breaks cycles using scratch register.
875
///
876
/// Entries with `ZERO` destination are skipped, as they are handled by caller.
877
fn emitParallelMoves(s: *mut Selector, dsts: *[super::Reg], args: *[il::Val]) {
878
    let n: u32 = args.len;
879
    if n == 0 {
880
        return;
881
    }
882
    if n > MAX_BLOCK_ARGS {
883
        panic "emitParallelMoves: too many arguments";
884
    }
885
    // Source registers for each arg.
886
    let mut srcRegs: [super::Reg; MAX_BLOCK_ARGS] = [super::ZERO; MAX_BLOCK_ARGS];
887
    // If this is a register-to-register move.
888
    let mut isRegMove: [bool; MAX_BLOCK_ARGS] = [false; MAX_BLOCK_ARGS];
889
    // If this move still needs to be executed.
890
    let mut pending: [bool; MAX_BLOCK_ARGS] = [false; MAX_BLOCK_ARGS];
891
    // Number of pending moves.
892
    let mut numPending: u32 = 0;
893
894
    for i in 0..n {
895
        let dst = dsts[i];
896
        if dst != super::ZERO { // Skip entries with no destination.
897
            match args[i] {
898
                case il::Val::Reg(r) => {
899
                    if let _ = regalloc::spill::spillSlot(&s.ralloc.spill, r) {
900
                        // Spilled value needs load, not a register move.
901
                        pending[i] = true;
902
                        numPending += 1;
903
                    } else {
904
                        let src = getReg(s, r);
905
                        if src != dst {
906
                            // Register-to-register move needed.
907
                            srcRegs[i] = src;
908
                            isRegMove[i] = true;
909
                            pending[i] = true;
910
                            numPending += 1;
911
                        } else {
912
                            // No move needed.
913
                        }
914
                    }
915
                },
916
                case il::Val::Imm(_), il::Val::DataSym(_), il::Val::FnAddr(_) => {
917
                    pending[i] = true;
918
                    numPending += 1;
919
                },
920
                case il::Val::Undef => {
921
                    // Undefined values don't need any move.
922
                }
923
            }
924
        } else {
925
            // Nothing to do.
926
        }
927
    }
928
929
    // Execute parallel move algorithm.
930
    while numPending > 0 {
931
        let mut found = false;
932
933
        // Find a ready move: one whose destination is not a source of any
934
        // pending register move.
935
        for i in 0..n {
936
            if pending[i] {
937
                let dst = dsts[i];
938
                let mut isReady = true;
939
940
                // Check if `dst` is used as source by any other pending register move.
941
                for j in 0..n {
942
                    if j != i and pending[j] and isRegMove[j] and srcRegs[j] == dst {
943
                        isReady = false;
944
                        break;
945
                    }
946
                }
947
                if isReady {
948
                    // Execute this move.
949
                    if isRegMove[i] {
950
                        emitMv(s, dst, srcRegs[i]);
951
                    } else {
952
                        // Load immediate, symbol, or spilled value.
953
                        loadVal(s, dst, args[i]);
954
                    }
955
                    found = true;
956
                    pending[i] = false;
957
                    numPending -= 1;
958
959
                    break;
960
                }
961
            }
962
        }
963
964
        if not found {
965
            // No ready move, we have a cycle among register moves.
966
            // Break it by saving one source to scratch.
967
            for i in 0..n {
968
                if pending[i] and isRegMove[i] {
969
                    let src = srcRegs[i];
970
                    // Save this source to scratch.
971
                    emitMv(s, super::SCRATCH1, src);
972
                    // Update all pending moves that use this source.
973
                    for j in 0..n {
974
                        if pending[j] and isRegMove[j] and srcRegs[j] == src {
975
                            srcRegs[j] = super::SCRATCH1;
976
                        }
977
                    }
978
                    break;
979
                }
980
            }
981
        }
982
    }
983
}
984
985
/// Emit moves from block arguments to target block's parameter registers.
986
///
987
/// Handles spilled destinations directly, then delegates to [`emitParallelMoves`]
988
/// for the remaining register-to-register parallel move resolution.
989
fn emitBlockArgs(s: *mut Selector, func: *il::Fn, target: u32, args: *mut [il::Val]) {
990
    if args.len == 0 {
991
        return;
992
    }
993
    let block = &func.blocks[target];
994
    if args.len != block.params.len {
995
        panic "emitBlockArgs: argument/parameter count mismatch";
996
    }
997
    if args.len > MAX_BLOCK_ARGS {
998
        panic "emitBlockArgs: too many block arguments";
999
    }
1000
1001
    // Destination registers for each arg.
1002
    // Zero means the destination is spilled or skipped.
1003
    let mut dsts: [super::Reg; MAX_BLOCK_ARGS] = [super::ZERO; MAX_BLOCK_ARGS];
1004
1005
    for arg, i in args {
1006
        let param = block.params[i].value;
1007
1008
        // Spilled destinations: store directly to spill slot.
1009
        // These don't participate in the parallel move algorithm.
1010
        if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, param) {
1011
            if let case il::Val::Undef = arg {
1012
                // Undefined values don't need any move.
1013
            } else {
1014
                let rs = resolveVal(s, super::SCRATCH1, arg);
1015
                emit::emitSd(s.e, rs, super::FP, spillOffset(s, slot));
1016
            }
1017
        } else {
1018
            dsts[i] = getReg(s, param);
1019
        }
1020
    }
1021
    emitParallelMoves(s, &dsts[..], args);
1022
}
1023
1024
/// Select comparison with immediate optimization.
1025
fn selectCmp(s: *mut Selector, rd: super::Reg, rs1: super::Reg, b: il::Val, op: CmpOp, scratch: super::Reg) {
1026
    // Try immediate optimization first.
1027
    if let case il::Val::Imm(imm) = b {
1028
        if encode::isSmallImm64(imm) {
1029
            let simm = imm as i32;
1030
            match op {
1031
                case CmpOp::Slt => emit::emit(s.e, encode::slti(rd, rs1, simm)),
1032
                case CmpOp::Ult => emit::emit(s.e, encode::sltiu(rd, rs1, simm)),
1033
            }
1034
            return;
1035
        }
1036
    }
1037
    // Fallback: load into register.
1038
    let rs2 = resolveVal(s, scratch, b);
1039
    match op {
1040
        case CmpOp::Slt => emit::emit(s.e, encode::slt(rd, rs1, rs2)),
1041
        case CmpOp::Ult => emit::emit(s.e, encode::sltu(rd, rs1, rs2)),
1042
    }
1043
}