lib/std/arch/rv64/isel.rad 47.4 KiB raw
1
//! RV64 instruction selection.
2
//!
3
//! Walks IL and selects RV64 instructions for each operation.
4
//!
5
//! *Register resolution hierarchy*
6
//!
7
//!   getReg(ssa) -> Reg
8
//!     Primitive physical register lookup. Panics if the register is spilled.
9
//!     Used as a building block by the functions below.
10
//!
11
//!   getSrcReg(ssa, scratch) -> Reg
12
//!     Source register for an [`il::Reg`] operand. Returns the physical register,
13
//!     or loads a spilled value into `scratch`. Used for instruction fields
14
//!     typed as [`il::Reg`] (e.g. base addresses in Load/Store/Blit).
15
//!
16
//!   getDstReg(ssa, scratch) -> Reg
17
//!     Destination register for an instruction result. Returns the physical
18
//!     register, or records a pending spill and returns `scratch`. The pending
19
//!     spill is flushed by [`selectBlock`] after each instruction.
20
//!
21
//!   resolveVal(scratch, val) -> Reg
22
//!     Resolve an [`il::Val`] to whatever register holds it. Delegates to [`getSrcReg`]
23
//!     for register values; materializes immediates and symbols into `scratch`.
24
//!     Used for operands that can be consumed from any register.
25
//!
26
//!   loadVal(rd, val) -> Reg
27
//!     Force an [`il::Val`] into a specific register `rd`. Built on [`resolveVal`] + [`emitMv`].
28
//!     Used when the instruction requires the value in `rd` (e.g. `sub rd, rd, rs2`).
29
30
use std::mem;
31
use std::lang::il;
32
use std::lang::gen;
33
use std::lang::gen::regalloc;
34
use std::lang::gen::labels;
35
use std::lang::gen::data;
36
37
use super::encode;
38
use super::emit;
39
40
///////////////
41
// Constants //
42
///////////////
43
44
/// Shift amount for byte sign/zero extension.
45
const SHIFT_W8: i32 = 64 - 8;
46
/// Shift amount for halfword sign/zero extension.
47
const SHIFT_W16: i32 = 64 - 16;
48
/// Shift amount for word sign/zero extension.
49
const SHIFT_W32: i32 = 64 - 32;
50
/// Mask for extracting byte value.
51
const MASK_W8: i32 = 0xFF;
52
/// Maximum number of block arguments supported.
53
const MAX_BLOCK_ARGS: u32 = 16;
54
55
/// Signed integer range limits.
56
const I8_MIN: i64 = -128;
57
const I8_MAX: i64 = 127;
58
const I16_MIN: i64 = -32768;
59
const I16_MAX: i64 = 32767;
60
const I32_MIN: i64 = -2147483648;
61
const I32_MAX: i64 = 2147483647;
62
63
/// Unsigned integer range limits.
64
const U8_MAX: i64 = 255;
65
const U16_MAX: i64 = 65535;
66
const U32_MAX: i64 = 4294967295;
67
68
/// Binary operation.
69
union BinOp { Add, And, Or, Xor }
70
/// Shift operation.
71
union ShiftOp { Sll, Srl, Sra }
72
/// Compare operation.
73
union CmpOp { Slt, Ult }
74
75
/// Selector errors.
76
pub union SelectorError {
77
    Internal,
78
}
79
80
/// A pending spill store to be flushed after instruction selection.
81
record PendingSpill {
82
    /// The SSA register that was spilled.
83
    ssa: il::Reg,
84
    /// The physical register holding the value to store.
85
    rd: gen::Reg,
86
}
87
88
////////////////////
89
// Selector State //
90
////////////////////
91
92
/// Instruction selector state.
93
pub record Selector {
94
    /// Emitter for outputting instructions.
95
    e: *mut emit::Emitter,
96
    /// Register allocation result.
97
    ralloc: *regalloc::AllocResult,
98
    /// Hash-indexed data symbol map.
99
    dataSymMap: *data::DataSymMap,
100
    /// Total stack frame size.
101
    frameSize: i32,
102
    /// Running offset into the reserve region of the frame.
103
    /// Tracks current position within the pre-allocated reserve slots.
104
    reserveOffset: i32,
105
    /// Pending spill store, auto-committed after each instruction.
106
    pendingSpill: ?PendingSpill,
107
    /// Next synthetic block index for skip-branch targets.
108
    nextSynthBlock: u32,
109
    /// Whether dynamic allocations exist.
110
    isDynamic: bool,
111
}
112
113
/////////////////////////
114
// Register Allocation //
115
/////////////////////////
116
117
/// Get the physical register for an already-allocated SSA register.
118
fn getReg(s: *Selector, ssa: il::Reg) -> gen::Reg {
119
    let phys = s.ralloc.assignments[ssa.n] else {
120
        panic "getReg: spilled register has no physical assignment";
121
    };
122
    return phys;
123
}
124
125
/// Compute the offset for a spill slot.
126
/// When using FP (dynamic): offset from FP = `slot - totalSize`.
127
/// When using SP: offset from SP = `slot`.
128
fn spillOffset(s: *Selector, slot: i32) -> i32 {
129
    if s.isDynamic {
130
        return slot - s.frameSize;
131
    }
132
    return slot;
133
}
134
135
/// Get the base register for spill slot addressing (FP or SP).
136
fn spillBase(s: *Selector) -> gen::Reg {
137
    if s.isDynamic {
138
        return super::FP;
139
    }
140
    return super::SP;
141
}
142
143
/// Get the destination register for an SSA register.
144
/// If the register is spilled, records a pending spill and returns the scratch
145
/// register. The pending spill is auto-committed by [`selectBlock`] after each
146
/// instruction. If not spilled, returns the physical register.
147
fn getDstReg(s: *mut Selector, ssa: il::Reg, scratch: gen::Reg) -> gen::Reg {
148
    if let _ = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) {
149
        s.pendingSpill = PendingSpill { ssa, rd: scratch };
150
        return scratch;
151
    }
152
    return getReg(s, ssa);
153
}
154
155
/// Get the source register for an SSA register.
156
/// If the register is spilled, loads the value from the spill slot into the
157
/// scratch register and returns it. Otherwise returns the physical register.
158
fn getSrcReg(s: *mut Selector, ssa: il::Reg, scratch: gen::Reg) -> gen::Reg {
159
    if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) {
160
        emit::emitLd(s.e, scratch, spillBase(s), spillOffset(s, slot));
161
        return scratch;
162
    }
163
    return getReg(s, ssa);
164
}
165
166
/// Look up symbol address in data map.
167
fn lookupDataSym(s: *Selector, name: *[u8]) -> u32 throws (SelectorError) {
168
    let addr = data::lookupAddr(s.dataSymMap, name) else {
169
        throw SelectorError::Internal;
170
    };
171
    return addr;
172
}
173
174
/// Resolve an IL value to the physical register holding it.
175
/// For non-spilled register values, returns the physical register directly.
176
/// For immediates, symbols, and spilled registers, materializes into `scratch`.
177
fn resolveVal(s: *mut Selector, scratch: gen::Reg, val: il::Val) -> gen::Reg {
178
    match val {
179
        case il::Val::Reg(r) => {
180
            return getSrcReg(s, r, scratch);
181
        },
182
        case il::Val::Imm(imm) => {
183
            if imm == 0 {
184
                return super::ZERO;
185
            }
186
            emit::loadImm(s.e, scratch, imm);
187
            return scratch;
188
        },
189
        case il::Val::DataSym(name) => {
190
            let addr = try lookupDataSym(s, name) catch {
191
                panic "resolveVal: data symbol not found";
192
            };
193
            emit::loadImm(s.e, scratch, addr as i64);
194
            return scratch;
195
        },
196
        case il::Val::FnAddr(name) => {
197
            emit::recordAddrLoad(s.e, name, scratch);
198
            return scratch;
199
        },
200
        case il::Val::Undef => {
201
            return scratch;
202
        }
203
    }
204
}
205
206
/// Load an IL value into a specific physical register.
207
/// Like [`resolveVal`], but ensures the value ends up in `rd`.
208
fn loadVal(s: *mut Selector, rd: gen::Reg, val: il::Val) -> gen::Reg {
209
    let rs = resolveVal(s, rd, val);
210
    emitMv(s, rd, rs);
211
    return rd;
212
}
213
214
/// Emit a move instruction if source and destination differ.
215
fn emitMv(s: *mut Selector, rd: gen::Reg, rs: gen::Reg) {
216
    if *rd != *rs {
217
        emit::emit(s.e, encode::mv(rd, rs));
218
    }
219
}
220
221
/// Emit zero-extension from a sub-word type to the full register width.
222
fn emitZext(e: *mut emit::Emitter, rd: gen::Reg, rs: gen::Reg, typ: il::Type) {
223
    match typ {
224
        case il::Type::W8 => emit::emit(e, encode::andi(rd, rs, MASK_W8)),
225
        case il::Type::W16 => {
226
            emit::emit(e, encode::slli(rd, rs, SHIFT_W16));
227
            emit::emit(e, encode::srli(rd, rd, SHIFT_W16));
228
        },
229
        case il::Type::W32 => {
230
            emit::emit(e, encode::slli(rd, rs, SHIFT_W32));
231
            emit::emit(e, encode::srli(rd, rd, SHIFT_W32));
232
        },
233
        case il::Type::W64 => {}
234
    }
235
}
236
237
/// Emit sign-extension from a sub-word type to the full register width.
238
fn emitSext(e: *mut emit::Emitter, rd: gen::Reg, rs: gen::Reg, typ: il::Type) {
239
    match typ {
240
        case il::Type::W8 => {
241
            emit::emit(e, encode::slli(rd, rs, SHIFT_W8));
242
            emit::emit(e, encode::srai(rd, rd, SHIFT_W8));
243
        },
244
        case il::Type::W16 => {
245
            emit::emit(e, encode::slli(rd, rs, SHIFT_W16));
246
            emit::emit(e, encode::srai(rd, rd, SHIFT_W16));
247
        },
248
        case il::Type::W32 => {
249
            emit::emit(e, encode::addiw(rd, rs, 0));
250
        },
251
        case il::Type::W64 => {}
252
    }
253
}
254
255
/// Resolve a value, trap if zero (unless known non-zero), and return the register.
256
fn resolveAndTrapIfZero(s: *mut Selector, b: il::Val) -> gen::Reg {
257
    let rs2 = resolveVal(s, super::SCRATCH2, b);
258
    let mut knownNonZero = false;
259
    if let case il::Val::Imm(imm) = b {
260
        knownNonZero = imm != 0;
261
    }
262
    if not knownNonZero {
263
        emit::emit(s.e, encode::bne(rs2, super::ZERO, super::INSTR_SIZE * 2));
264
        emit::emit(s.e, encode::ebreak());
265
    }
266
    return rs2;
267
}
268
269
////////////////////////
270
// Instruction Select //
271
////////////////////////
272
273
/// Pre-scan result for reserve analysis.
274
record ReserveInfo {
275
    /// Total size needed for constant-sized reserves.
276
    size: i32,
277
    /// Whether any dynamic-sized reserves exist.
278
    isDynamic: bool,
279
}
280
281
/// Pre-scan all blocks for constant-sized reserve instructions.
282
/// Returns the total size needed for all static reserves, respecting alignment.
283
fn computeReserveInfo(func: *il::Fn) -> ReserveInfo {
284
    let mut offset: i32 = 0;
285
    let mut isDynamic = false;
286
287
    for b in 0..func.blocks.len {
288
        let block = &func.blocks[b];
289
        for instr in block.instrs {
290
            match instr {
291
                case il::Instr::Reserve { size, alignment, .. } => {
292
                    if let case il::Val::Imm(sz) = size {
293
                        offset = mem::alignUpI32(offset, alignment as i32);
294
                        offset += sz as i32;
295
                    } else {
296
                        isDynamic = true;
297
                    }
298
                },
299
                else => {},
300
            }
301
        }
302
    }
303
    return ReserveInfo { size: offset, isDynamic };
304
}
305
306
/// Select instructions for a function.
307
pub fn selectFn(
308
    e: *mut emit::Emitter,
309
    dataSymMap: *data::DataSymMap,
310
    ralloc: *regalloc::AllocResult,
311
    func: *il::Fn
312
) {
313
    // Reset block offsets for this function.
314
    labels::resetBlocks(&mut e.labels);
315
    // Pre-scan for constant-sized reserves to promote to fixed frame slots.
316
    let reserveInfo = computeReserveInfo(func);
317
    let isLeaf = func.isLeaf;
318
    // Compute frame layout from spill slots, reserve slots, and used callee-saved registers.
319
    let frame = emit::computeFrame(
320
        ralloc.spill.frameSize + reserveInfo.size,
321
        ralloc.usedCalleeSaved,
322
        func.blocks.len,
323
        isLeaf,
324
        reserveInfo.isDynamic
325
    );
326
    // Synthetic block indices start after real blocks and the epilogue block.
327
    let mut s = Selector {
328
        e, ralloc, dataSymMap, frameSize: frame.totalSize,
329
        reserveOffset: 0, pendingSpill: nil,
330
        nextSynthBlock: func.blocks.len + 1,
331
        isDynamic: frame.isDynamic,
332
    };
333
    // Record function name for printing.
334
    emit::recordFunc(s.e, func.name);
335
    // Record function code offset for call patching.
336
    emit::recordFuncOffset(s.e, func.name);
337
    // Emit prologue.
338
    emit::emitPrologue(s.e, &frame);
339
340
    // Move function params from arg registers to assigned registers.
341
    // Cross-call params may have been assigned to callee-saved registers
342
    // instead of their natural arg registers. Spilled params are stored
343
    // directly to their spill slots.
344
    for funcParam, i in func.params {
345
        if i < super::ARG_REGS.len {
346
            let param = funcParam.value;
347
            let argReg = super::ARG_REGS[i];
348
349
            if let slot = regalloc::spill::spillSlot(&ralloc.spill, param) {
350
                // Spilled parameter: store arg register to spill slot.
351
                emit::emitSd(s.e, argReg, spillBase(&s), spillOffset(&s, slot));
352
            } else if let assigned = ralloc.assignments[param.n] {
353
                emitMv(&mut s, assigned, argReg);
354
            }
355
        }
356
    }
357
358
    // Emit each block.
359
    for i in 0..func.blocks.len {
360
        selectBlock(&mut s, i, &func.blocks[i], &frame, func);
361
    }
362
    // Emit epilogue.
363
    emit::emitEpilogue(s.e, &frame);
364
    // Patch local branches now that all blocks are emitted.
365
    emit::patchLocalBranches(s.e);
366
}
367
368
/// Select instructions for a block.
369
fn selectBlock(s: *mut Selector, blockIdx: u32, block: *il::Block, frame: *emit::Frame, func: *il::Fn) {
370
    // Record block address for branch patching.
371
    emit::recordBlock(s.e, blockIdx);
372
373
    // Block parameters are handled at jump sites (in `Jmp`/`Br`).
374
    // By the time we enter the block, the arguments have already been
375
    // moved to the parameter registers by the predecessor's terminator.
376
377
    // Process each instruction, auto-committing any pending spill after each.
378
    let hasLocs = block.locs.len > 0;
379
    for instr, i in block.instrs {
380
        // Record debug location before emitting machine instructions.
381
        if hasLocs {
382
            emit::recordSrcLoc(s.e, block.locs[i]);
383
        }
384
        s.pendingSpill = nil;
385
        selectInstr(s, blockIdx, instr, frame, func);
386
387
        // Flush the pending spill store, if any.
388
        if let p = s.pendingSpill {
389
            if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, p.ssa) {
390
                emit::emitSd(s.e, p.rd, spillBase(s), spillOffset(s, slot));
391
            }
392
            s.pendingSpill = nil;
393
        }
394
    }
395
}
396
397
/// Select instructions for a single IL instruction.
398
fn selectInstr(s: *mut Selector, blockIdx: u32, instr: il::Instr, frame: *emit::Frame, func: *il::Fn) {
399
    match instr {
400
        case il::Instr::BinOp { op, typ, dst, a, b } => {
401
            let rd = getDstReg(s, dst, super::SCRATCH1);
402
            let rs1 = resolveVal(s, super::SCRATCH1, a);
403
            selectAluBinOp(s, op, typ, rd, rs1, b);
404
        },
405
        case il::Instr::UnOp { op, typ, dst, a } => {
406
            let rd = getDstReg(s, dst, super::SCRATCH1);
407
            let rs = resolveVal(s, super::SCRATCH1, a);
408
            selectAluUnOp(s, op, typ, rd, rs);
409
        },
410
        case il::Instr::Load { typ, dst, src, offset } => {
411
            let rd = getDstReg(s, dst, super::SCRATCH1);
412
            let base = getSrcReg(s, src, super::SCRATCH2);
413
            emit::emitLoad(s.e, rd, base, offset, typ);
414
        },
415
        case il::Instr::Sload { typ, dst, src, offset } => {
416
            let rd = getDstReg(s, dst, super::SCRATCH1);
417
            let base = getSrcReg(s, src, super::SCRATCH2);
418
            emit::emitSload(s.e, rd, base, offset, typ);
419
        },
420
        case il::Instr::Store { typ, src, dst, offset } => {
421
            let base = getSrcReg(s, dst, super::SCRATCH2);
422
            let rs = resolveVal(s, super::SCRATCH1, src);
423
            emit::emitStore(s.e, rs, base, offset, typ);
424
        },
425
        case il::Instr::Copy { dst, val } => {
426
            let rd = getDstReg(s, dst, super::SCRATCH1);
427
            let rs = resolveVal(s, super::SCRATCH1, val);
428
            emitMv(s, rd, rs);
429
        },
430
        case il::Instr::Reserve { dst, size, alignment } => {
431
            match size {
432
                case il::Val::Imm(sz) => {
433
                    // Constant-sized reserve: use pre-allocated frame slot.
434
                    let rd = getDstReg(s, dst, super::SCRATCH1);
435
                    let aligned: i32 = mem::alignUpI32(s.reserveOffset, alignment as i32);
436
                    let base = spillBase(s);
437
                    let offset = s.ralloc.spill.frameSize + aligned
438
                        - (s.frameSize if s.isDynamic else 0);
439
440
                    emit::emitAddImm(s.e, rd, base, offset);
441
                    s.reserveOffset = aligned + (sz as i32);
442
                },
443
                case il::Val::Reg(r) => {
444
                    // Dynamic-sized reserve: runtime SP adjustment.
445
                    let rd = getDstReg(s, dst, super::SCRATCH1);
446
                    let rs = getSrcReg(s, r, super::SCRATCH2);
447
448
                    emit::emit(s.e, encode::sub(super::SP, super::SP, rs));
449
450
                    if alignment > 1 {
451
                        let mask = 0 - alignment as i32;
452
                        assert encode::isSmallImm(mask);
453
454
                        emit::emit(s.e, encode::andi(super::SP, super::SP, mask));
455
                    }
456
                    emit::emit(s.e, encode::mv(rd, super::SP));
457
                },
458
                else =>
459
                    panic "selectInstr: invalid reserve operand",
460
            }
461
        },
462
        case il::Instr::Blit { dst, src, size } => {
463
            let case il::Val::Imm(staticSize) = size
464
                else panic "selectInstr: blit requires immediate size";
465
466
            let bothSpilled = regalloc::spill::isSpilled(&s.ralloc.spill, dst)
467
                and regalloc::spill::isSpilled(&s.ralloc.spill, src);
468
469
            // When both are spilled, offsets must fit 12-bit immediates
470
            // since we can't advance base registers (they live in spill
471
            // slots, not real registers we can mutate).
472
            assert not (bothSpilled and staticSize as i32 > super::MAX_IMM), "selectInstr: blit both-spilled with large size";
473
474
            // Resolve dst/src base registers.
475
            let mut rdst = super::SCRATCH2;
476
            let mut rsrc = super::SCRATCH1;
477
            let mut srcReload: ?i32 = nil;
478
479
            if bothSpilled {
480
                let dstSlot = regalloc::spill::spillSlot(&s.ralloc.spill, dst) else {
481
                    panic "selectInstr: blit dst not spilled";
482
                };
483
                let srcSlot = regalloc::spill::spillSlot(&s.ralloc.spill, src) else {
484
                    panic "selectInstr: blit src not spilled";
485
                };
486
                emit::emitLd(s.e, super::SCRATCH2, spillBase(s), spillOffset(s, dstSlot));
487
                srcReload = spillOffset(s, srcSlot);
488
            } else {
489
                rdst = getSrcReg(s, dst, super::SCRATCH2);
490
                rsrc = getSrcReg(s, src, super::SCRATCH2);
491
            }
492
            let mut offset: i32 = 0;
493
            let mut remaining = staticSize as i32;
494
495
            // For large blits where both pointers are in real registers,
496
            // use an inline loop instead of unrolled LD/SD pairs.
497
            let dwordBytes = remaining & ~(super::DWORD_SIZE - 1);
498
            let canLoop = not bothSpilled
499
                and *rsrc != *super::SCRATCH1 and *rsrc != *super::SCRATCH2
500
                and *rdst != *super::SCRATCH1 and *rdst != *super::SCRATCH2;
501
502
            if canLoop and dwordBytes >= super::BLIT_LOOP_THRESHOLD {
503
                emit::emitAddImm(s.e, super::SCRATCH1, rsrc, dwordBytes);
504
505
                let loopStart = s.e.codeLen;
506
507
                emit::emitLd(s.e, super::SCRATCH2, rsrc, 0);
508
                emit::emitSd(s.e, super::SCRATCH2, rdst, 0);
509
                emit::emit(s.e, encode::addi(rsrc, rsrc, super::DWORD_SIZE));
510
511
                if *rdst != *rsrc {
512
                    emit::emit(s.e, encode::addi(rdst, rdst, super::DWORD_SIZE));
513
                }
514
                let brOff = (loopStart as i32 - s.e.codeLen as i32) * super::INSTR_SIZE;
515
516
                emit::emit(s.e, encode::bne(rsrc, super::SCRATCH1, brOff));
517
                remaining -= dwordBytes;
518
            }
519
520
            // Copy remaining: 8 bytes, then 4 bytes, then 1 byte at a time.
521
            // Before each load/store pair, check whether the offset is
522
            // about to exceed the 12-bit signed immediate range. When
523
            // it does, advance the base registers by the accumulated
524
            // offset and reset to zero.
525
            while remaining >= super::DWORD_SIZE {
526
                if offset > super::MAX_IMM - super::DWORD_SIZE {
527
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
528
                    if *rdst != *rsrc {
529
                        emit::emitAddImm(s.e, rdst, rdst, offset);
530
                    }
531
                    offset = 0;
532
                }
533
                if let off = srcReload {
534
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
535
                    emit::emitLd(s.e, super::SCRATCH1, super::SCRATCH1, offset);
536
                } else {
537
                    emit::emitLd(s.e, super::SCRATCH1, rsrc, offset);
538
                }
539
                emit::emitSd(s.e, super::SCRATCH1, rdst, offset);
540
                offset += super::DWORD_SIZE;
541
                remaining -= super::DWORD_SIZE;
542
            }
543
            if remaining >= super::WORD_SIZE {
544
                if offset > super::MAX_IMM - super::WORD_SIZE {
545
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
546
                    if *rdst != *rsrc {
547
                        emit::emitAddImm(s.e, rdst, rdst, offset);
548
                    }
549
                    offset = 0;
550
                }
551
                if let off = srcReload {
552
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
553
                    emit::emitLw(s.e, super::SCRATCH1, super::SCRATCH1, offset);
554
                } else {
555
                    emit::emitLw(s.e, super::SCRATCH1, rsrc, offset);
556
                }
557
                emit::emitSw(s.e, super::SCRATCH1, rdst, offset);
558
                offset += super::WORD_SIZE;
559
                remaining -= super::WORD_SIZE;
560
            }
561
            while remaining > 0 {
562
                if offset > super::MAX_IMM - 1 {
563
                    emit::emitAddImm(s.e, rsrc, rsrc, offset);
564
                    if *rdst != *rsrc {
565
                        emit::emitAddImm(s.e, rdst, rdst, offset);
566
                    }
567
                    offset = 0;
568
                }
569
                if let off = srcReload {
570
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
571
                    emit::emitLb(s.e, super::SCRATCH1, super::SCRATCH1, offset);
572
                } else {
573
                    emit::emitLb(s.e, super::SCRATCH1, rsrc, offset);
574
                }
575
                emit::emitSb(s.e, super::SCRATCH1, rdst, offset);
576
                offset += 1;
577
                remaining -= 1;
578
            }
579
            // Restore base registers if they were advanced (never happens
580
            // in the both-spilled case since size <= MAX_IMM).
581
            if not bothSpilled {
582
                let advanced = staticSize as i32 - offset;
583
                if advanced != 0 {
584
                    emit::emitAddImm(s.e, rsrc, rsrc, 0 - advanced);
585
                    if *rdst != *rsrc {
586
                        emit::emitAddImm(s.e, rdst, rdst, 0 - advanced);
587
                    }
588
                }
589
            }
590
        },
591
        case il::Instr::Zext { typ, dst, val } => {
592
            let rd = getDstReg(s, dst, super::SCRATCH1);
593
            let rs = resolveVal(s, super::SCRATCH1, val);
594
            emitZext(s.e, rd, rs, typ);
595
        },
596
        case il::Instr::Sext { typ, dst, val } => {
597
            let rd = getDstReg(s, dst, super::SCRATCH1);
598
            let rs = resolveVal(s, super::SCRATCH1, val);
599
            emitSext(s.e, rd, rs, typ);
600
        },
601
        case il::Instr::Ret { val } => {
602
            if let v = val {
603
                let rs = resolveVal(s, super::SCRATCH1, v);
604
                emitMv(s, super::A0, rs);
605
            }
606
            // Skip the jump to epilogue if this RET is in the last block,
607
            // since the epilogue immediately follows.
608
            if frame.totalSize != 0 and blockIdx + 1 == frame.epilogueBlock {
609
                // Epilogue is the next block; fallthrough is sufficient.
610
            } else {
611
                emit::emitReturn(s.e, frame);
612
            }
613
        },
614
        case il::Instr::Jmp { target, args } => {
615
            // Move arguments to target block's parameter registers.
616
            emitBlockArgs(s, func, target, args);
617
            // Skip branch if target is the next block (fallthrough).
618
            if target != blockIdx + 1 {
619
                emit::recordBranch(s.e, target, emit::BranchKind::Jump);
620
            }
621
        },
622
        case il::Instr::Br { op, typ, a, b, thenTarget, thenArgs, elseTarget, elseArgs } => {
623
            // Use zero register directly for immediate `0` operands.
624
            let aIsZero = isZeroImm(a);
625
            let bIsZero = isZeroImm(b);
626
627
            let rs1 = super::ZERO if aIsZero else resolveVal(s, super::SCRATCH1, a);
628
            let rs2 = super::ZERO if bIsZero else resolveVal(s, super::SCRATCH2, b);
629
630
            // Normalize sub-word operands so that both registers have the same
631
            // canonical representation. Without this, eg. `-1 : i8 ` loaded as
632
            // `0xFFFFFFFFFFFFFFFF` and `255 : i8` loaded as `0xFF` would compare
633
            // unequal even though they are the same 8-bit pattern.
634
            //
635
            // For SLT: sign-extension needed (signed comparison).
636
            // For ULT: zero-extension needed (unsigned magnitude comparison).
637
            // For EQ/NE with W32: sign-extension is cheaper.
638
            // For EQ/NE with W8/W16: keep zero-extension.
639
            // Skip extension for zero register.
640
            // Determine extension mode: sign-extend for W32 or SLT,
641
            // zero-extend otherwise.
642
            let mut useSext: bool = undefined;
643
            if let case il::CmpOp::Slt = op {
644
                useSext = true;
645
            } else {
646
                useSext = typ == il::Type::W32;
647
            }
648
            if useSext {
649
                if not aIsZero and not isExtendedImm(a, typ, true) {
650
                    emitSext(s.e, rs1, rs1, typ);
651
                }
652
                if not bIsZero and not isExtendedImm(b, typ, true) {
653
                    emitSext(s.e, rs2, rs2, typ);
654
                }
655
            } else {
656
                if not aIsZero and not isExtendedImm(a, typ, false) {
657
                    emitZext(s.e, rs1, rs1, typ);
658
                }
659
                if not bIsZero and not isExtendedImm(b, typ, false) {
660
                    emitZext(s.e, rs2, rs2, typ);
661
                }
662
            }
663
            // Block-argument moves must only execute on the taken path.
664
            // When `thenArgs` is non-empty, invert the branch so that the
665
            // then-moves land on the fall-through (taken) side.
666
            //
667
            // When one target is the next block in layout order, we can
668
            // eliminate the trailing unconditional jump by arranging the
669
            // conditional branch to skip to the *other* target and letting
670
            // execution fall through.
671
            if thenArgs.len > 0 and elseArgs.len > 0 {
672
                panic "selectInstr: both `then` and `else` have block arguments";
673
            } else if thenArgs.len > 0 {
674
                emit::recordBranch(s.e, elseTarget, emit::BranchKind::InvertedCond { op, rs1, rs2 });
675
                emitBlockArgs(s, func, thenTarget, thenArgs);
676
                // Skip trailing jump if then is the next block (fallthrough).
677
                if thenTarget != blockIdx + 1 {
678
                    emit::recordBranch(s.e, thenTarget, emit::BranchKind::Jump);
679
                }
680
            } else if thenTarget == blockIdx + 1 and elseArgs.len == 0 {
681
                // Then is the next block and no else args: invert the
682
                // condition to branch to else and fall through to then.
683
                emit::recordBranch(s.e, elseTarget, emit::BranchKind::InvertedCond { op, rs1, rs2 });
684
            } else {
685
                emit::recordBranch(s.e, thenTarget, emit::BranchKind::Cond { op, rs1, rs2 });
686
                emitBlockArgs(s, func, elseTarget, elseArgs);
687
                // Skip trailing jump if else is the next block (fallthrough).
688
                if elseTarget != blockIdx + 1 {
689
                    emit::recordBranch(s.e, elseTarget, emit::BranchKind::Jump);
690
                }
691
            }
692
        },
693
        case il::Instr::Switch { val, defaultTarget, defaultArgs, cases } => {
694
            let rs1 = resolveVal(s, super::SCRATCH1, val);
695
            // When a case has block args, invert the branch to skip past
696
            // the arg moves.
697
            for c in cases {
698
                emit::loadImm(s.e, super::SCRATCH2, c.value);
699
700
                if c.args.len > 0 {
701
                    let skip = s.nextSynthBlock;
702
                    s.nextSynthBlock = skip + 1;
703
704
                    emit::recordBranch(s.e, skip, emit::BranchKind::InvertedCond {
705
                        op: il::CmpOp::Eq, rs1, rs2: super::SCRATCH2,
706
                    });
707
                    emitBlockArgs(s, func, c.target, c.args);
708
                    emit::recordBranch(s.e, c.target, emit::BranchKind::Jump);
709
                    emit::recordBlock(s.e, skip);
710
                } else {
711
                    emit::recordBranch(s.e, c.target, emit::BranchKind::Cond {
712
                        op: il::CmpOp::Eq, rs1, rs2: super::SCRATCH2,
713
                    });
714
                }
715
            }
716
            // Fall through to default.
717
            emitBlockArgs(s, func, defaultTarget, defaultArgs);
718
            emit::recordBranch(s.e, defaultTarget, emit::BranchKind::Jump);
719
        },
720
        case il::Instr::Unreachable => {
721
            emit::emit(s.e, encode::ebreak());
722
        },
723
        case il::Instr::Call { retTy, dst, func, args } => {
724
            // For indirect calls, save target to scratch register before arg
725
            // setup can clobber it.
726
            if let case il::Val::Reg(r) = func {
727
                let target = getSrcReg(s, r, super::SCRATCH2);
728
                emitMv(s, super::SCRATCH2, target);
729
            }
730
            // Move arguments to A0-A7 using parallel move resolution.
731
            assert args.len <= super::ARG_REGS.len, "selectInstr: too many call arguments";
732
            emitParallelMoves(s, &super::ARG_REGS[..], args);
733
734
            // Emit call.
735
            match func {
736
                case il::Val::FnAddr(name) => {
737
                    emit::recordCall(s.e, name);
738
                },
739
                case il::Val::Reg(_) => {
740
                    emit::emit(s.e, encode::jalr(super::RA, super::SCRATCH2, 0));
741
                },
742
                else => {
743
                    panic "selectInstr: invalid call target";
744
                }
745
            }
746
            // Move result from A0.
747
            if let d = dst {
748
                let rd = getDstReg(s, d, super::SCRATCH1);
749
                emitMv(s, rd, super::A0);
750
            }
751
        },
752
        case il::Instr::Ecall { dst, num, a0, a1, a2, a3 } => {
753
            // Move arguments using parallel move.
754
            // TODO: Can't use slice literals here because the lowerer doesn't
755
            // support const-evaluating struct/union values in them.
756
            let ecallDsts: [gen::Reg; 5] = [super::A7, super::A0, super::A1, super::A2, super::A3];
757
            let ecallArgs: [il::Val; 5] = [num, a0, a1, a2, a3];
758
759
            emitParallelMoves(s, &ecallDsts[..], &ecallArgs[..]);
760
            emit::emit(s.e, encode::ecall());
761
762
            // Result in A0.
763
            let ecallRd = getDstReg(s, dst, super::SCRATCH1);
764
            emitMv(s, ecallRd, super::A0);
765
        },
766
        case il::Instr::Ebreak => {
767
            emit::emit(s.e, encode::ebreak());
768
        },
769
    }
770
}
771
772
/// Check if a value is an immediate that's already correctly extended.
773
/// `loadImm` produces the exact 64-bit value; this checks whether that value
774
/// already matches what sign/zero-extension to the given type would produce.
775
fn isExtendedImm(val: il::Val, typ: il::Type, signed: bool) -> bool {
776
    if let case il::Val::Imm(imm) = val {
777
        if signed {
778
            // Sign-extension truncates to the type width and sign-extends.
779
            // The 64-bit value is already correctly sign-extended if it
780
            // fits in the signed range of the target type.
781
            match typ {
782
                case il::Type::W8 => return imm >= I8_MIN and imm <= I8_MAX,
783
                case il::Type::W16 => return imm >= I16_MIN and imm <= I16_MAX,
784
                case il::Type::W32 => return imm >= I32_MIN and imm <= I32_MAX,
785
                case il::Type::W64 => return true,
786
            }
787
        } else {
788
            // Zero-extension: value must be non-negative and within unsigned range.
789
            match typ {
790
                case il::Type::W8 => return imm >= 0 and imm <= U8_MAX,
791
                case il::Type::W16 => return imm >= 0 and imm <= U16_MAX,
792
                case il::Type::W32 => return imm >= 0 and imm <= U32_MAX,
793
                case il::Type::W64 => return true,
794
            }
795
        }
796
    }
797
    return false;
798
}
799
800
/// Check if a value is an immediate zero.
801
fn isZeroImm(val: il::Val) -> bool {
802
    if let case il::Val::Imm(imm) = val {
803
        return imm == 0;
804
    }
805
    return false;
806
}
807
808
/// Select a binary ALU operation, dispatching to the appropriate
809
/// instruction pattern based on the operation kind and type.
810
fn selectAluBinOp(s: *mut Selector, op: il::BinOp, typ: il::Type, rd: gen::Reg, rs1: gen::Reg, b: il::Val) {
811
    match op {
812
        case il::BinOp::Add => {
813
            if typ == il::Type::W32 {
814
                // Inline W32 ADD with immediate optimization.
815
                if let case il::Val::Imm(imm) = b {
816
                    if encode::isSmallImm64(imm) {
817
                        emit::emit(s.e, encode::addiw(rd, rs1, imm as i32));
818
                        return;
819
                    }
820
                }
821
                let rs2 = resolveVal(s, super::SCRATCH2, b);
822
                emit::emit(s.e, encode::addw(rd, rs1, rs2));
823
            } else {
824
                selectBinOp(s, rd, rs1, b, BinOp::Add, super::SCRATCH2);
825
            }
826
        }
827
        case il::BinOp::Sub => {
828
            // Optimize subtraction by small immediate: use ADDI with negated value.
829
            if let case il::Val::Imm(imm) = b {
830
                let neg = -imm;
831
                if neg >= super::MIN_IMM as i64 and neg <= super::MAX_IMM as i64 {
832
                    emit::emit(s.e,
833
                        encode::addiw(rd, rs1, neg as i32)
834
                            if typ == il::Type::W32 else
835
                        encode::addi(rd, rs1, neg as i32));
836
                    return;
837
                }
838
            }
839
            let rs2 = resolveVal(s, super::SCRATCH2, b);
840
841
            emit::emit(s.e,
842
                encode::subw(rd, rs1, rs2)
843
                    if typ == il::Type::W32 else
844
                encode::sub(rd, rs1, rs2));
845
        }
846
        case il::BinOp::Mul => {
847
            // Strength-reduce multiplication by known constants.
848
            if let case il::Val::Imm(imm) = b {
849
                if imm == 0 {
850
                    emit::emit(s.e, encode::mv(rd, super::ZERO));
851
                    return;
852
                } else if imm == 1 {
853
                    emitMv(s, rd, rs1);
854
                    return;
855
                } else if imm == 2 {
856
                    emit::emit(s.e, encode::slli(rd, rs1, 1));
857
                    return;
858
                } else if imm == 4 {
859
                    emit::emit(s.e, encode::slli(rd, rs1, 2));
860
                    return;
861
                } else if imm == 8 {
862
                    emit::emit(s.e, encode::slli(rd, rs1, 3));
863
                    return;
864
                }
865
            }
866
            let rs2 = resolveVal(s, super::SCRATCH2, b);
867
            emit::emit(s.e,
868
                encode::mulw(rd, rs1, rs2)
869
                    if typ == il::Type::W32 else
870
                encode::mul(rd, rs1, rs2));
871
        }
872
        case il::BinOp::Sdiv => {
873
            let rs2 = resolveAndTrapIfZero(s, b);
874
            emit::emit(s.e,
875
                encode::divw(rd, rs1, rs2)
876
                    if typ == il::Type::W32 else
877
                encode::div(rd, rs1, rs2));
878
        }
879
        case il::BinOp::Udiv => {
880
            let rs2 = resolveAndTrapIfZero(s, b);
881
            emit::emit(s.e,
882
                encode::divuw(rd, rs1, rs2)
883
                    if typ == il::Type::W32 else
884
                encode::divu(rd, rs1, rs2));
885
        }
886
        case il::BinOp::Srem => {
887
            let rs2 = resolveAndTrapIfZero(s, b);
888
            emit::emit(s.e,
889
                encode::remw(rd, rs1, rs2)
890
                    if typ == il::Type::W32 else
891
                encode::rem(rd, rs1, rs2));
892
        }
893
        case il::BinOp::Urem => {
894
            let rs2 = resolveAndTrapIfZero(s, b);
895
            emit::emit(s.e,
896
                encode::remuw(rd, rs1, rs2)
897
                    if typ == il::Type::W32 else
898
                encode::remu(rd, rs1, rs2));
899
        }
900
        case il::BinOp::And =>
901
            selectBinOp(s, rd, rs1, b, BinOp::And, super::SCRATCH2),
902
        case il::BinOp::Or =>
903
            selectBinOp(s, rd, rs1, b, BinOp::Or, super::SCRATCH2),
904
        case il::BinOp::Xor =>
905
            selectBinOp(s, rd, rs1, b, BinOp::Xor, super::SCRATCH2),
906
        case il::BinOp::Shl =>
907
            selectShift(s, rd, rs1, b, ShiftOp::Sll, typ, super::SCRATCH2),
908
        case il::BinOp::Sshr =>
909
            selectShift(s, rd, rs1, b, ShiftOp::Sra, typ, super::SCRATCH2),
910
        case il::BinOp::Ushr =>
911
            selectShift(s, rd, rs1, b, ShiftOp::Srl, typ, super::SCRATCH2),
912
        case il::BinOp::Eq, il::BinOp::Ne => {
913
            let rs2 = resolveVal(s, super::SCRATCH2, b);
914
            // Canonicalize both operands to the declared width.
915
            if typ == il::Type::W32 {
916
                emitSext(s.e, rs1, rs1, typ);
917
                if not isExtendedImm(b, typ, true) {
918
                    emitSext(s.e, rs2, rs2, typ);
919
                }
920
            } else {
921
                emitZext(s.e, rs1, rs1, typ);
922
                if not isExtendedImm(b, typ, false) {
923
                    emitZext(s.e, rs2, rs2, typ);
924
                }
925
            }
926
            emit::emit(s.e, encode::xor(rd, rs1, rs2));
927
            if let case il::BinOp::Eq = op {
928
                emit::emit(s.e, encode::sltiu(rd, rd, 1));
929
            } else {
930
                emit::emit(s.e, encode::sltu(rd, super::ZERO, rd));
931
            }
932
        }
933
        case il::BinOp::Slt =>
934
            selectCmp(s, rd, rs1, b, CmpOp::Slt, super::SCRATCH2),
935
        case il::BinOp::Ult =>
936
            selectCmp(s, rd, rs1, b, CmpOp::Ult, super::SCRATCH2),
937
        case il::BinOp::Sge, il::BinOp::Uge => {
938
            let rs2 = resolveVal(s, super::SCRATCH2, b);
939
            if let case il::BinOp::Sge = op {
940
                emit::emit(s.e, encode::slt(rd, rs1, rs2));
941
            } else {
942
                emit::emit(s.e, encode::sltu(rd, rs1, rs2));
943
            }
944
            emit::emit(s.e, encode::xori(rd, rd, 1));
945
        }
946
    }
947
}
948
949
/// Select a unary ALU operation.
950
fn selectAluUnOp(s: *mut Selector, op: il::UnOp, typ: il::Type, rd: gen::Reg, rs: gen::Reg) {
951
    match op {
952
        case il::UnOp::Neg => {
953
            if typ == il::Type::W32 {
954
                emit::emit(s.e, encode::subw(rd, super::ZERO, rs));
955
            } else {
956
                emit::emit(s.e, encode::neg(rd, rs));
957
            }
958
        }
959
        case il::UnOp::Not =>
960
            emit::emit(s.e, encode::not_(rd, rs)),
961
    }
962
}
963
964
/// Select binary operation with immediate optimization.
965
fn selectBinOp(s: *mut Selector, rd: gen::Reg, rs1: gen::Reg, b: il::Val, op: BinOp, scratch: gen::Reg) {
966
    // Try immediate optimization first.
967
    if let case il::Val::Imm(imm) = b {
968
        if encode::isSmallImm64(imm) {
969
            let simm = imm as i32;
970
            match op {
971
                case BinOp::Add => emit::emit(s.e, encode::addi(rd, rs1, simm)),
972
                case BinOp::And => emit::emit(s.e, encode::andi(rd, rs1, simm)),
973
                case BinOp::Or  => emit::emit(s.e, encode::ori(rd, rs1, simm)),
974
                case BinOp::Xor => emit::emit(s.e, encode::xori(rd, rs1, simm)),
975
            }
976
            return;
977
        }
978
    }
979
    // Fallback: load into register.
980
    let rs2 = resolveVal(s, scratch, b);
981
    match op {
982
        case BinOp::Add => emit::emit(s.e, encode::add(rd, rs1, rs2)),
983
        case BinOp::And => emit::emit(s.e, encode::and_(rd, rs1, rs2)),
984
        case BinOp::Or  => emit::emit(s.e, encode::or_(rd, rs1, rs2)),
985
        case BinOp::Xor => emit::emit(s.e, encode::xor(rd, rs1, rs2)),
986
    }
987
}
988
989
/// Select shift operation with immediate optimization.
990
/// For 32-bit operations, uses the `*w` variants that operate on the lower 32 bits
991
/// and sign-extend the result.
992
fn selectShift(s: *mut Selector, rd: gen::Reg, rs1: gen::Reg, b: il::Val, op: ShiftOp, typ: il::Type, scratch: gen::Reg) {
993
    let isW32: bool = typ == il::Type::W32;
994
995
    // Try immediate optimization first.
996
    if let case il::Val::Imm(shamt) = b {
997
        // Keep immediate forms only for encodable shift amounts.
998
        // Otherwise fall back to register shifts, which naturally mask the count.
999
        if shamt >= 0 and ((isW32 and shamt < 32) or (not isW32 and shamt < 64)) {
1000
            let sa = shamt as i32;
1001
            if isW32 {
1002
                match op {
1003
                    case ShiftOp::Sll => emit::emit(s.e, encode::slliw(rd, rs1, sa)),
1004
                    case ShiftOp::Srl => emit::emit(s.e, encode::srliw(rd, rs1, sa)),
1005
                    case ShiftOp::Sra => emit::emit(s.e, encode::sraiw(rd, rs1, sa)),
1006
                }
1007
            } else {
1008
                match op {
1009
                    case ShiftOp::Sll => emit::emit(s.e, encode::slli(rd, rs1, sa)),
1010
                    case ShiftOp::Srl => emit::emit(s.e, encode::srli(rd, rs1, sa)),
1011
                    case ShiftOp::Sra => emit::emit(s.e, encode::srai(rd, rs1, sa)),
1012
                }
1013
            }
1014
            return;
1015
        }
1016
    }
1017
    // Fallback: load into register.
1018
    let rs2 = resolveVal(s, scratch, b);
1019
    if isW32 {
1020
        match op {
1021
            case ShiftOp::Sll => emit::emit(s.e, encode::sllw(rd, rs1, rs2)),
1022
            case ShiftOp::Srl => emit::emit(s.e, encode::srlw(rd, rs1, rs2)),
1023
            case ShiftOp::Sra => emit::emit(s.e, encode::sraw(rd, rs1, rs2)),
1024
        }
1025
    } else {
1026
        match op {
1027
            case ShiftOp::Sll => emit::emit(s.e, encode::sll(rd, rs1, rs2)),
1028
            case ShiftOp::Srl => emit::emit(s.e, encode::srl(rd, rs1, rs2)),
1029
            case ShiftOp::Sra => emit::emit(s.e, encode::sra(rd, rs1, rs2)),
1030
        }
1031
    }
1032
}
1033
1034
/// Resolve parallel moves from IL values to physical destination registers.
1035
///
1036
/// The parallel move problem arises when moving values between registers where
1037
/// there may be dependencies (e.g. moving A0 to A1 and A1 to A0 simultaneously).
1038
///
1039
/// This algorithm:
1040
/// 1. Identifies "ready" moves.
1041
/// 2. Executes ready moves.
1042
/// 3. Breaks cycles using scratch register.
1043
///
1044
/// Entries with `ZERO` destination are skipped, as they are handled by caller.
1045
fn emitParallelMoves(s: *mut Selector, dsts: *[gen::Reg], args: *[il::Val]) {
1046
    let n: u32 = args.len;
1047
    if n == 0 {
1048
        return;
1049
    }
1050
    assert n <= MAX_BLOCK_ARGS, "emitParallelMoves: too many arguments";
1051
    // Source registers for each arg.
1052
    let mut srcRegs: [gen::Reg; MAX_BLOCK_ARGS] = [super::ZERO; MAX_BLOCK_ARGS];
1053
    // If this is a register-to-register move.
1054
    let mut isRegMove: [bool; MAX_BLOCK_ARGS] = [false; MAX_BLOCK_ARGS];
1055
    // If this move still needs to be executed.
1056
    let mut pending: [bool; MAX_BLOCK_ARGS] = [false; MAX_BLOCK_ARGS];
1057
    // Number of pending moves.
1058
    let mut numPending: u32 = 0;
1059
1060
    for i in 0..n {
1061
        let dst = dsts[i];
1062
        if dst != super::ZERO { // Skip entries with no destination.
1063
            match args[i] {
1064
                case il::Val::Reg(r) => {
1065
                    if let _ = regalloc::spill::spillSlot(&s.ralloc.spill, r) {
1066
                        // Spilled value needs load, not a register move.
1067
                        pending[i] = true;
1068
                        numPending += 1;
1069
                    } else {
1070
                        let src = getReg(s, r);
1071
                        if src != dst {
1072
                            // Register-to-register move needed.
1073
                            srcRegs[i] = src;
1074
                            isRegMove[i] = true;
1075
                            pending[i] = true;
1076
                            numPending += 1;
1077
                        } else {
1078
                            // No move needed.
1079
                        }
1080
                    }
1081
                },
1082
                case il::Val::Imm(_), il::Val::DataSym(_), il::Val::FnAddr(_) => {
1083
                    pending[i] = true;
1084
                    numPending += 1;
1085
                },
1086
                case il::Val::Undef => {
1087
                    // Undefined values don't need any move.
1088
                }
1089
            }
1090
        } else {
1091
            // Nothing to do.
1092
        }
1093
    }
1094
1095
    // Execute parallel move algorithm.
1096
    while numPending > 0 {
1097
        let mut found = false;
1098
1099
        // Find a ready move: one whose destination is not a source of any
1100
        // pending register move.
1101
        for i in 0..n {
1102
            if pending[i] {
1103
                let dst = dsts[i];
1104
                let mut isReady = true;
1105
1106
                // Check if `dst` is used as source by any other pending register move.
1107
                for j in 0..n {
1108
                    if j != i and pending[j] and isRegMove[j] and srcRegs[j] == dst {
1109
                        isReady = false;
1110
                        break;
1111
                    }
1112
                }
1113
                if isReady {
1114
                    // Execute this move.
1115
                    if isRegMove[i] {
1116
                        emitMv(s, dst, srcRegs[i]);
1117
                    } else {
1118
                        // Load immediate, symbol, or spilled value.
1119
                        loadVal(s, dst, args[i]);
1120
                    }
1121
                    found = true;
1122
                    pending[i] = false;
1123
                    numPending -= 1;
1124
1125
                    break;
1126
                }
1127
            }
1128
        }
1129
1130
        if not found {
1131
            // No ready move, we have a cycle among register moves.
1132
            // Break it by saving one source to scratch.
1133
            for i in 0..n {
1134
                if pending[i] and isRegMove[i] {
1135
                    let src = srcRegs[i];
1136
                    // Save this source to scratch.
1137
                    emitMv(s, super::SCRATCH1, src);
1138
                    // Update all pending moves that use this source.
1139
                    for j in 0..n {
1140
                        if pending[j] and isRegMove[j] and srcRegs[j] == src {
1141
                            srcRegs[j] = super::SCRATCH1;
1142
                        }
1143
                    }
1144
                    break;
1145
                }
1146
            }
1147
        }
1148
    }
1149
}
1150
1151
/// Emit moves from block arguments to target block's parameter registers.
1152
///
1153
/// Handles spilled destinations directly, then delegates to [`emitParallelMoves`]
1154
/// for the remaining register-to-register parallel move resolution.
1155
fn emitBlockArgs(s: *mut Selector, func: *il::Fn, target: u32, args: *mut [il::Val]) {
1156
    if args.len == 0 {
1157
        return;
1158
    }
1159
    let block = &func.blocks[target];
1160
    assert args.len == block.params.len, "emitBlockArgs: argument/parameter count mismatch";
1161
    assert args.len <= MAX_BLOCK_ARGS, "emitBlockArgs: too many block arguments";
1162
1163
    // Destination registers for each arg.
1164
    // Zero means the destination is spilled or skipped.
1165
    let mut dsts: [gen::Reg; MAX_BLOCK_ARGS] = [super::ZERO; MAX_BLOCK_ARGS];
1166
1167
    for arg, i in args {
1168
        let param = block.params[i].value;
1169
1170
        // Spilled destinations: store directly to spill slot.
1171
        // These don't participate in the parallel move algorithm.
1172
        if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, param) {
1173
            if let case il::Val::Undef = arg {
1174
                // Undefined values don't need any move.
1175
            } else {
1176
                let rs = resolveVal(s, super::SCRATCH1, arg);
1177
                emit::emitSd(s.e, rs, spillBase(s), spillOffset(s, slot));
1178
            }
1179
        } else {
1180
            dsts[i] = getReg(s, param);
1181
        }
1182
    }
1183
    emitParallelMoves(s, &dsts[..], args);
1184
}
1185
1186
/// Select comparison with immediate optimization.
1187
fn selectCmp(s: *mut Selector, rd: gen::Reg, rs1: gen::Reg, b: il::Val, op: CmpOp, scratch: gen::Reg) {
1188
    // Try immediate optimization first.
1189
    if let case il::Val::Imm(imm) = b {
1190
        if encode::isSmallImm64(imm) {
1191
            let simm = imm as i32;
1192
            match op {
1193
                case CmpOp::Slt => emit::emit(s.e, encode::slti(rd, rs1, simm)),
1194
                case CmpOp::Ult => emit::emit(s.e, encode::sltiu(rd, rs1, simm)),
1195
            }
1196
            return;
1197
        }
1198
    }
1199
    // Fallback: load into register.
1200
    let rs2 = resolveVal(s, scratch, b);
1201
    match op {
1202
        case CmpOp::Slt => emit::emit(s.e, encode::slt(rd, rs1, rs2)),
1203
        case CmpOp::Ult => emit::emit(s.e, encode::sltu(rd, rs1, rs2)),
1204
    }
1205
}