lib/std/arch/rv64/emit.rad 24.2 KiB raw
1
//! RV64 binary emission.
2
//!
3
//! Emits RV64 machine code as `u32` list.
4
5
use std::lang::il;
6
use std::lang::alloc;
7
use std::lang::gen;
8
use std::lang::gen::labels;
9
use std::lang::gen::types;
10
use std::collections::dict;
11
use std::mem;
12
13
use super::encode;
14
15
/// Maximum number of instructions in code buffer.
16
const MAX_INSTRS: u32 = 2097152;
17
/// Maximum code length before byte offset overflows signed 32-bits.
18
const MAX_CODE_LEN: u32 = 0x7FFFFFFF / super::INSTR_SIZE as u32;
19
/// Maximum number of pending branches awaiting patching.
20
const MAX_PENDING: u32 = 65536;
21
/// Maximum number of function entries.
22
const MAX_FUNCS: u32 = 4096;
23
/// Maximum number of debug entries.
24
const MAX_DEBUG_ENTRIES: u32 = 524288;
25
26
//////////////////////
27
// Emission Context //
28
//////////////////////
29
30
/// Branch/jump that needs offset patching after all blocks are emitted.
31
pub record PendingBranch {
32
    /// Index into code buffer where the branch instruction is.
33
    index: u32,
34
    /// Target block index.
35
    target: u32,
36
    /// Type of branch for re-encoding.
37
    kind: BranchKind,
38
}
39
40
/// Type of branch instruction.
41
pub union BranchKind {
42
    /// Conditional branch (B-type encoding).
43
    Cond { op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg },
44
    /// Inverted conditional branch (B-type encoding with negated condition).
45
    InvertedCond { op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg },
46
    /// Unconditional jump (J-type encoding).
47
    Jump,
48
}
49
50
/// Function call that needs offset patching.
51
pub record PendingCall {
52
    /// Index in code buffer where the call was emitted.
53
    index: u32,
54
    /// Target function name.
55
    target: *[u8],
56
}
57
58
/// Function address load that needs offset patching.
59
/// Used when taking a function's address as a value.
60
pub record PendingAddrLoad {
61
    /// Index in code buffer where the load was emitted.
62
    index: u32,
63
    /// Target function name.
64
    target: *[u8],
65
    /// Destination register.
66
    rd: gen::Reg,
67
}
68
69
/// Adjusted base register and offset for addressing.
70
pub record AdjustedOffset {
71
    /// Base register.
72
    base: gen::Reg,
73
    /// Byte offset from register.
74
    offset: i32,
75
}
76
77
/// Callee-saved register with its stack offset.
78
pub record SavedReg {
79
    /// Register to save/restore.
80
    reg: gen::Reg,
81
    /// Offset from SP.
82
    offset: i32,
83
}
84
85
/// Emission context. Tracks state during code generation.
86
pub record Emitter {
87
    /// Emitted instructions storage.
88
    code: *mut [u32],
89
    /// Current number of emitted instructions.
90
    codeLen: u32,
91
    /// Local branches needing offset patching.
92
    pendingBranches: *mut [PendingBranch],
93
    /// Number of pending local branches.
94
    pendingBranchesLen: u32,
95
    /// Function calls needing offset patching.
96
    pendingCalls: *mut [PendingCall],
97
    /// Number of pending calls.
98
    pendingCallsLen: u32,
99
    /// Function address loads needing offset patching.
100
    pendingAddrLoads: *mut [PendingAddrLoad],
101
    /// Number of pending address loads.
102
    pendingAddrLoadsLen: u32,
103
    /// Block label tracking.
104
    labels: labels::Labels,
105
    /// Function start positions for printing.
106
    funcs: *mut [types::FuncAddr],
107
    /// Number of recorded functions.
108
    funcsLen: u32,
109
    /// Debug entries mapping PCs to source locations.
110
    debugEntries: *mut [types::DebugEntry],
111
    /// Number of debug entries recorded.
112
    debugEntriesLen: u32,
113
}
114
115
/// Computed stack frame layout for a function.
116
pub record Frame {
117
    /// Total frame size in bytes (aligned).
118
    totalSize: i32,
119
    /// Callee-saved registers and their offsets.
120
    // TODO: Use constant length when language supports it.
121
    savedRegs: [SavedReg; super::NUM_SAVED_REGISTERS],
122
    /// Number of saved registers.
123
    savedRegsLen: u32,
124
    /// Epilogue block index for return jumps.
125
    epilogueBlock: u32,
126
    /// Whether this is a leaf function. Leaf functions
127
    /// skip saving/restoring RA since it is never clobbered.
128
    isLeaf: bool,
129
    /// Whether the function has dynamic stack allocations.
130
    /// When false, SP never changes after the prologue.
131
    isDynamic: bool,
132
}
133
134
/// Compute frame layout from local size and used callee-saved registers.
135
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool, isDynamic: bool) -> Frame {
136
    let mut frame = Frame {
137
        totalSize: 0,
138
        savedRegs: undefined,
139
        savedRegsLen: 0,
140
        epilogueBlock,
141
        isLeaf,
142
        isDynamic,
143
    };
144
    // Skip frame allocation for leaf functions with no locals and no
145
    // callee-saved registers. Leaf functions don't call other functions,
146
    // so RA is never clobbered and doesn't need saving.
147
    if isLeaf and localSize == 0 and usedCalleeSaved == 0 {
148
        return frame;
149
    }
150
    // Compute total frame size. Includes RA and FP registers.
151
    let savedRegs = mem::popCount(usedCalleeSaved) + 2;
152
    let totalSize = mem::alignUpI32(
153
        localSize + savedRegs * super::DWORD_SIZE,
154
        super::STACK_ALIGNMENT
155
    );
156
    frame.totalSize = totalSize;
157
158
    // Build list of callee-saved registers with offsets.
159
    let mut offset = totalSize - (super::DWORD_SIZE * 3);
160
    for reg, i in super::CALLEE_SAVED {
161
        // Check if this register is in use.
162
        if (usedCalleeSaved & (1 << i)) != 0 {
163
            frame.savedRegs[frame.savedRegsLen] = SavedReg {
164
                reg,
165
                offset,
166
            };
167
            frame.savedRegsLen += 1;
168
            offset -= super::DWORD_SIZE;
169
        }
170
    }
171
    return frame;
172
}
173
174
/// Create a new emitter.
175
pub fn emitter(arena: *mut alloc::Arena, debug: bool) -> Emitter throws (alloc::AllocError) {
176
    let code = try alloc::allocSlice(arena, @sizeOf(u32), @alignOf(u32), MAX_INSTRS);
177
    let pendingBranches = try alloc::allocSlice(arena, @sizeOf(PendingBranch), @alignOf(PendingBranch), MAX_PENDING);
178
    let pendingCalls = try alloc::allocSlice(arena, @sizeOf(PendingCall), @alignOf(PendingCall), MAX_PENDING);
179
    let pendingAddrLoads = try alloc::allocSlice(arena, @sizeOf(PendingAddrLoad), @alignOf(PendingAddrLoad), MAX_PENDING);
180
    let blockOffsets = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), labels::MAX_BLOCKS_PER_FN);
181
    let funcEntries = try alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), labels::FUNC_TABLE_SIZE);
182
    let funcs = try alloc::allocSlice(arena, @sizeOf(types::FuncAddr), @alignOf(types::FuncAddr), MAX_FUNCS);
183
184
    let mut debugEntries: *mut [types::DebugEntry] = &mut [];
185
    if debug {
186
        debugEntries = try alloc::allocSlice(
187
            arena, @sizeOf(types::DebugEntry), @alignOf(types::DebugEntry), MAX_DEBUG_ENTRIES
188
        ) as *mut [types::DebugEntry];
189
    }
190
    return Emitter {
191
        code: code as *mut [u32],
192
        codeLen: 0,
193
        pendingBranches: pendingBranches as *mut [PendingBranch],
194
        pendingBranchesLen: 0,
195
        pendingCalls: pendingCalls as *mut [PendingCall],
196
        pendingCallsLen: 0,
197
        pendingAddrLoads: pendingAddrLoads as *mut [PendingAddrLoad],
198
        pendingAddrLoadsLen: 0,
199
        labels: labels::init(blockOffsets as *mut [i32], funcEntries as *mut [dict::Entry]),
200
        funcs: funcs as *mut [types::FuncAddr],
201
        funcsLen: 0,
202
        debugEntries,
203
        debugEntriesLen: 0,
204
    };
205
}
206
207
///////////////////////
208
// Emission Helpers  //
209
///////////////////////
210
211
/// Emit a single instruction.
212
pub fn emit(e: *mut Emitter, instr: u32) {
213
    assert e.codeLen < e.code.len, "emit: code buffer full";
214
    e.code[e.codeLen] = instr;
215
    e.codeLen += 1;
216
}
217
218
/// Compute branch offset to a function by name.
219
pub fn branchOffsetToFunc(e: *Emitter, srcIndex: u32, name: *[u8]) -> i32 {
220
    return labels::branchToFunc(&e.labels, srcIndex, name, super::INSTR_SIZE);
221
}
222
223
/// Patch an instruction at a given index.
224
pub fn patch(e: *mut Emitter, index: u32, instr: u32) {
225
    e.code[index] = instr;
226
}
227
228
/// Record a block's address for branch resolution.
229
pub fn recordBlock(e: *mut Emitter, blockIdx: u32) {
230
    assert e.codeLen <= MAX_CODE_LEN;
231
    labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE);
232
}
233
234
/// Record a function's code offset for call resolution.
235
pub fn recordFuncOffset(e: *mut Emitter, name: *[u8]) {
236
    assert e.codeLen <= MAX_CODE_LEN;
237
    dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE);
238
}
239
240
/// Record a function's start position for printing.
241
pub fn recordFunc(e: *mut Emitter, name: *[u8]) {
242
    assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full";
243
    e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen };
244
    e.funcsLen += 1;
245
}
246
247
/// Record a local branch needing later patching.
248
/// Unconditional jumps use a single slot (J-type, +-1MB range).
249
/// Conditional branches use two slots (B-type has only +-4KB range,
250
/// so large functions may need the inverted-branch + JAL fallback).
251
pub fn recordBranch(e: *mut Emitter, targetBlock: u32, kind: BranchKind) {
252
    assert e.pendingBranchesLen < e.pendingBranches.len, "recordBranch: buffer full";
253
    e.pendingBranches[e.pendingBranchesLen] = PendingBranch {
254
        index: e.codeLen,
255
        target: targetBlock,
256
        kind: kind,
257
    };
258
    e.pendingBranchesLen += 1;
259
260
    emit(e, encode::nop()); // First slot, always needed.
261
262
    match kind {
263
        case BranchKind::Jump => {},
264
        else => emit(e, encode::nop()), // Second slot for conditional branches.
265
    }
266
}
267
268
/// Record a function call needing later patching.
269
/// Emits placeholder instructions that will be patched later.
270
/// Uses two slots to support long-distance calls.
271
pub fn recordCall(e: *mut Emitter, target: *[u8]) {
272
    assert e.pendingCallsLen < e.pendingCalls.len, "recordCall: buffer full";
273
    e.pendingCalls[e.pendingCallsLen] = PendingCall {
274
        index: e.codeLen,
275
        target,
276
    };
277
    e.pendingCallsLen += 1;
278
279
    emit(e, encode::nop()); // Placeholder for AUIPC.
280
    emit(e, encode::nop()); // Placeholder for JALR.
281
}
282
283
/// Record a function address load needing later patching.
284
/// Emits placeholder instructions that will be patched to load the function's address.
285
/// Uses two slots to compute long-distance addresses.
286
pub fn recordAddrLoad(e: *mut Emitter, target: *[u8], rd: gen::Reg) {
287
    assert e.pendingAddrLoadsLen < e.pendingAddrLoads.len, "recordAddrLoad: buffer full";
288
    e.pendingAddrLoads[e.pendingAddrLoadsLen] = PendingAddrLoad {
289
        index: e.codeLen,
290
        target,
291
        rd: rd,
292
    };
293
    e.pendingAddrLoadsLen += 1;
294
295
    emit(e, encode::nop()); // Placeholder for AUIPC.
296
    emit(e, encode::nop()); // Placeholder for ADDI.
297
}
298
299
/// Patch local branches and clear the pending list.
300
///
301
/// Called after each function.
302
///
303
/// Uses two-instruction sequences: short branches use `branch` and `nop`,
304
/// long branches use inverted branch  and `jal` or `auipc` and `jalr`.
305
pub fn patchLocalBranches(e: *mut Emitter) {
306
    for i in 0..e.pendingBranchesLen {
307
        let p = e.pendingBranches[i];
308
        let offset = labels::branchToBlock(&e.labels, p.index, p.target, super::INSTR_SIZE);
309
        match p.kind {
310
            case BranchKind::Cond { op, rs1, rs2 } => {
311
                if encode::isBranchImm(offset) {
312
                    patch(e, p.index, encodeCondBranch(op, rs1, rs2, offset));
313
                    patch(e, p.index + 1, encode::nop());
314
                } else {
315
                    let adj = offset - super::INSTR_SIZE;
316
                    patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, super::INSTR_SIZE * 2));
317
                    patch(e, p.index + 1, encode::jal(super::ZERO, adj));
318
                }
319
            },
320
            case BranchKind::InvertedCond { op, rs1, rs2 } => {
321
                if encode::isBranchImm(offset) {
322
                    patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, offset));
323
                    patch(e, p.index + 1, encode::nop());
324
                } else {
325
                    let adj = offset - super::INSTR_SIZE;
326
                    patch(e, p.index, encodeCondBranch(op, rs1, rs2, super::INSTR_SIZE * 2));
327
                    patch(e, p.index + 1, encode::jal(super::ZERO, adj));
328
                }
329
            },
330
            case BranchKind::Jump => {
331
                // Single-slot jump (J-type, +-1MB range).
332
                assert encode::isJumpImm(offset), "patchLocalBranches: jump offset too large";
333
                patch(e, p.index, encode::jal(super::ZERO, offset));
334
            },
335
        }
336
    }
337
    e.pendingBranchesLen = 0;
338
}
339
340
/// Encode a conditional branch instruction.
341
fn encodeCondBranch(op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg, offset: i32) -> u32 {
342
    match op {
343
        case il::CmpOp::Eq => return encode::beq(rs1, rs2, offset),
344
        case il::CmpOp::Ne => return encode::bne(rs1, rs2, offset),
345
        case il::CmpOp::Slt => return encode::blt(rs1, rs2, offset),
346
        case il::CmpOp::Ult => return encode::bltu(rs1, rs2, offset),
347
    }
348
}
349
350
/// Encode an inverted conditional branch instruction.
351
fn encodeInvertedBranch(op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg, offset: i32) -> u32 {
352
    match op {
353
        case il::CmpOp::Eq => return encode::bne(rs1, rs2, offset),
354
        case il::CmpOp::Ne => return encode::beq(rs1, rs2, offset),
355
        case il::CmpOp::Slt => return encode::bge(rs1, rs2, offset),
356
        case il::CmpOp::Ult => return encode::bgeu(rs1, rs2, offset),
357
    }
358
}
359
360
/// Patch all pending function calls.
361
/// Called after all functions have been generated.
362
pub fn patchCalls(e: *mut Emitter) {
363
    for i in 0..e.pendingCallsLen {
364
        let p = e.pendingCalls[i];
365
        let offset = branchOffsetToFunc(e, p.index, p.target);
366
        let s = splitImm(offset);
367
368
        // `AUIPC scratch, hi(offset)`.
369
        patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi));
370
        // `JALR ra, scratch, lo(offset)`.
371
        patch(e, p.index + 1, encode::jalr(super::RA, super::SCRATCH1, s.lo));
372
    }
373
}
374
375
/// Patch all pending function address loads.
376
/// Called after all functions have been generated.
377
/// Uses PC-relative addresses up to 2GB away.
378
pub fn patchAddrLoads(e: *mut Emitter) {
379
    for i in 0..e.pendingAddrLoadsLen {
380
        let p = e.pendingAddrLoads[i];
381
        let offset = branchOffsetToFunc(e, p.index, p.target);
382
        let s = splitImm(offset);
383
384
        // `AUIPC rd, hi(offset)`.
385
        patch(e, p.index, encode::auipc(p.rd, s.hi));
386
        // `ADDI rd, rd, lo(offset)`.
387
        patch(e, p.index + 1, encode::addi(p.rd, p.rd, s.lo));
388
    }
389
}
390
391
/////////////////////////
392
// Immediate Handling  //
393
/////////////////////////
394
395
/// Split immediate into `hi` and `lo` bits.
396
pub record SplitImm {
397
    /// Upper 20 bits.
398
    hi: i32,
399
    /// Lower 12 bits.
400
    lo: i32,
401
}
402
403
/// Split a 32-bit immediate for `AUIPC, ADDI` / `JALR` sequences.
404
/// Handles sign extension: if *lo* is negative, increment *hi*.
405
pub fn splitImm(imm: i32) -> SplitImm {
406
    let lo = imm & 0xFFF;
407
    let mut hi = (imm >> 12) & 0xFFFFF;
408
    // If `lo`'s sign bit is set, it will be sign-extended to negative.
409
    // Compensate by incrementing `hi`.
410
    if (lo & 0x800) != 0 {
411
        hi += 1;
412
        return SplitImm { hi, lo: lo | 0xFFFFF000 as i32 };
413
    }
414
    return SplitImm { hi, lo };
415
}
416
417
/// Adjust a large offset by loading *hi* bits into [`super::ADDR_SCRATCH`].
418
/// Returns adjusted base register and remaining offset.
419
///
420
/// When the offset fits a 12-bit signed immediate, returns it unchanged.
421
/// Otherwise uses [`super::ADDR_SCRATCH`] for the LUI+ADD decomposition.
422
fn adjustOffset(e: *mut Emitter, base: gen::Reg, offset: i32) -> AdjustedOffset {
423
    if offset >= super::MIN_IMM and offset <= super::MAX_IMM {
424
        return AdjustedOffset { base, offset };
425
    }
426
    let s = splitImm(offset);
427
    emit(e, encode::lui(super::ADDR_SCRATCH, s.hi));
428
    emit(e, encode::add(super::ADDR_SCRATCH, super::ADDR_SCRATCH, base));
429
430
    return AdjustedOffset { base: super::ADDR_SCRATCH, offset: s.lo };
431
}
432
433
/// Load an immediate value into a register.
434
/// Handles the full range of 64-bit immediates.
435
/// For values fitting in 12 bits, uses a single `ADDI`.
436
/// For values fitting in 32 bits, uses `LUI` + `ADDIW`.
437
/// For wider values, loads upper and lower halves then combines with shift and add.
438
pub fn loadImm(e: *mut Emitter, rd: gen::Reg, imm: i64) {
439
    let immMin = super::MIN_IMM as i64;
440
    let immMax = super::MAX_IMM as i64;
441
442
    if imm >= immMin and imm <= immMax {
443
        emit(e, encode::addi(rd, super::ZERO, imm as i32));
444
        return;
445
    }
446
    // Check if the value fits in 32 bits (sign-extended).
447
    let lo32 = imm as i32;
448
    if lo32 as i64 == imm {
449
        let s = splitImm(lo32);
450
        emit(e, encode::lui(rd, s.hi));
451
        if s.lo != 0 {
452
            emit(e, encode::addiw(rd, rd, s.lo));
453
        }
454
        return;
455
    }
456
    // Full 64-bit immediate: use only rd, no scratch registers.
457
    // Load upper 32 bits first via the 32-bit path (LUI+ADDIW),
458
    // then shift and add lower bits in 11-bit groups to avoid
459
    // sign-extension issues with ADDI's 12-bit signed immediate.
460
    let hi32 = (imm >> 32) as i32;
461
    let lower = imm as i32;
462
463
    // Load upper 32 bits.
464
    loadImm(e, rd, hi32 as i64);
465
    // Shift left by 11, add bits [31:21].
466
    emit(e, encode::slli(rd, rd, 11));
467
    emit(e, encode::addi(rd, rd, (lower >> 21) & 0x7FF));
468
    // Shift left by 11, add bits [20:10].
469
    emit(e, encode::slli(rd, rd, 11));
470
    emit(e, encode::addi(rd, rd, (lower >> 10) & 0x7FF));
471
    // Shift left by 10, add bits [9:0].
472
    emit(e, encode::slli(rd, rd, 10));
473
    emit(e, encode::addi(rd, rd, lower & 0x3FF));
474
}
475
476
/// Emit add-immediate, handling large immediates.
477
pub fn emitAddImm(e: *mut Emitter, rd: gen::Reg, rs: gen::Reg, imm: i32) {
478
    if imm >= super::MIN_IMM and imm <= super::MAX_IMM {
479
        emit(e, encode::addi(rd, rs, imm));
480
    } else {
481
        loadImm(e, super::SCRATCH1, imm as i64);
482
        emit(e, encode::add(rd, rs, super::SCRATCH1));
483
    }
484
}
485
486
////////////////////////
487
// Load/Store Helpers //
488
////////////////////////
489
490
/// Emit unsigned load with automatic offset adjustment.
491
pub fn emitLoad(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) {
492
    let adj = adjustOffset(e, base, offset);
493
    match typ {
494
        case il::Type::W8 => emit(e, encode::lbu(rd, adj.base, adj.offset)),
495
        case il::Type::W16 => emit(e, encode::lhu(rd, adj.base, adj.offset)),
496
        case il::Type::W32 => emit(e, encode::lwu(rd, adj.base, adj.offset)),
497
        case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)),
498
    }
499
}
500
501
/// Emit signed load with automatic offset adjustment.
502
pub fn emitSload(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) {
503
    let adj = adjustOffset(e, base, offset);
504
    match typ {
505
        case il::Type::W8 => emit(e, encode::lb(rd, adj.base, adj.offset)),
506
        case il::Type::W16 => emit(e, encode::lh(rd, adj.base, adj.offset)),
507
        case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)),
508
        case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)),
509
    }
510
}
511
512
/// Emit store with automatic offset adjustment.
513
pub fn emitStore(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) {
514
    let adj = adjustOffset(e, base, offset);
515
    match typ {
516
        case il::Type::W8 => emit(e, encode::sb(rs, adj.base, adj.offset)),
517
        case il::Type::W16 => emit(e, encode::sh(rs, adj.base, adj.offset)),
518
        case il::Type::W32 => emit(e, encode::sw(rs, adj.base, adj.offset)),
519
        case il::Type::W64 => emit(e, encode::sd(rs, adj.base, adj.offset)),
520
    }
521
}
522
523
/// Emit 64-bit load with automatic offset adjustment.
524
pub fn emitLd(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) {
525
    let adj = adjustOffset(e, base, offset);
526
    emit(e, encode::ld(rd, adj.base, adj.offset));
527
}
528
529
/// Emit 64-bit store with automatic offset adjustment.
530
pub fn emitSd(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) {
531
    let adj = adjustOffset(e, base, offset);
532
    emit(e, encode::sd(rs, adj.base, adj.offset));
533
}
534
535
/// Emit 32-bit load with automatic offset adjustment.
536
pub fn emitLw(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) {
537
    let adj = adjustOffset(e, base, offset);
538
    emit(e, encode::lw(rd, adj.base, adj.offset));
539
}
540
541
/// Emit 32-bit store with automatic offset adjustment.
542
pub fn emitSw(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) {
543
    let adj = adjustOffset(e, base, offset);
544
    emit(e, encode::sw(rs, adj.base, adj.offset));
545
}
546
547
/// Emit 8-bit load with automatic offset adjustment.
548
pub fn emitLb(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) {
549
    let adj = adjustOffset(e, base, offset);
550
    emit(e, encode::lb(rd, adj.base, adj.offset));
551
}
552
553
/// Emit 8-bit store with automatic offset adjustment.
554
pub fn emitSb(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) {
555
    let adj = adjustOffset(e, base, offset);
556
    emit(e, encode::sb(rs, adj.base, adj.offset));
557
}
558
559
//////////////////////////
560
// Prologue / Epilogue  //
561
//////////////////////////
562
563
/// Emit function prologue.
564
/// Allocates stack frame, saves RA/FP, saves callee-saved registers.
565
pub fn emitPrologue(e: *mut Emitter, frame: *Frame) {
566
    // Fast path: leaf function with no locals.
567
    if frame.totalSize == 0 {
568
        return;
569
    }
570
    let totalSize = frame.totalSize;
571
572
    // Allocate stack frame.
573
    let negFrame = 0 - totalSize;
574
    if negFrame >= super::MIN_IMM {
575
        emit(e, encode::addi(super::SP, super::SP, negFrame));
576
    } else {
577
        loadImm(e, super::SCRATCH1, totalSize as i64);
578
        emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1));
579
    }
580
    // Save return address.
581
    if not frame.isLeaf {
582
        emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
583
    }
584
    // Save frame pointer.
585
    emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
586
587
    // Set up frame pointer, only needed when dynamic allocs may move SP.
588
    if frame.isDynamic {
589
        emitAddImm(e, super::FP, super::SP, totalSize);
590
    }
591
    // Save callee-saved registers.
592
    for i in 0..frame.savedRegsLen {
593
        let sr = frame.savedRegs[i];
594
        emitSd(e, sr.reg, super::SP, sr.offset);
595
    }
596
}
597
598
/// Emit a return: jump to epilogue, or emit `ret` directly for leaf functions.
599
pub fn emitReturn(e: *mut Emitter, frame: *Frame) {
600
    if frame.totalSize == 0 {
601
        // Leaf function: no frame to tear down, emit ret directly.
602
        emit(e, encode::ret());
603
        return;
604
    }
605
    recordBranch(e, frame.epilogueBlock, BranchKind::Jump);
606
}
607
608
/// Emit function epilogue.
609
/// Restores callee-saved registers, `RA/FP`, deallocates frame, returns.
610
pub fn emitEpilogue(e: *mut Emitter, frame: *Frame) {
611
    // Record epilogue block address for return jumps.
612
    recordBlock(e, frame.epilogueBlock);
613
614
    // Fast path: leaf function with no locals.
615
    if frame.totalSize == 0 {
616
        emit(e, encode::ret());
617
        return;
618
    }
619
    let totalSize = frame.totalSize;
620
621
    // Restore SP to post-prologue value. Only needed when dynamic stack
622
    // allocation may have moved SP.
623
    if frame.isDynamic {
624
        emitAddImm(e, super::SP, super::FP, 0 - totalSize);
625
    }
626
    // Restore callee-saved registers.
627
    for i in 0..frame.savedRegsLen {
628
        let sr = frame.savedRegs[i];
629
        emitLd(e, sr.reg, super::SP, sr.offset);
630
    }
631
    // Restore frame pointer.
632
    emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
633
    // Restore return address.
634
    if not frame.isLeaf {
635
        emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
636
    }
637
    // Deallocate stack frame.
638
    emitAddImm(e, super::SP, super::SP, totalSize);
639
    emit(e, encode::ret());
640
}
641
642
//////////////////
643
// Code Access  //
644
//////////////////
645
646
/// Get emitted code as a slice.
647
pub fn getCode(e: *Emitter) -> *[u32] {
648
    return &e.code[..e.codeLen];
649
}
650
651
/// Get function addresses for printing.
652
pub fn getFuncs(e: *Emitter) -> *[types::FuncAddr] {
653
    return &e.funcs[..e.funcsLen];
654
}
655
656
/// Record a debug entry mapping the current PC to a source location.
657
/// Deduplicates consecutive entries with the same location.
658
pub fn recordSrcLoc(e: *mut Emitter, loc: il::SrcLoc) {
659
    let pc = e.codeLen * super::INSTR_SIZE as u32;
660
661
    // Skip if this is the same location as the previous entry.
662
    if e.debugEntriesLen > 0 {
663
        let prev = &e.debugEntries[e.debugEntriesLen - 1];
664
        if prev.offset == loc.offset and prev.moduleId == loc.moduleId {
665
            return;
666
        }
667
    }
668
    assert e.debugEntriesLen < e.debugEntries.len, "recordSrcLoc: debug entry buffer full";
669
    e.debugEntries[e.debugEntriesLen] = types::DebugEntry {
670
        pc,
671
        moduleId: loc.moduleId,
672
        offset: loc.offset,
673
    };
674
    e.debugEntriesLen += 1;
675
}
676
677
/// Get debug entries as a slice.
678
pub fn getDebugEntries(e: *Emitter) -> *[types::DebugEntry] {
679
    return &e.debugEntries[..e.debugEntriesLen];
680
}