lib/std/arch/rv64/emit.rad 24.4 KiB raw
1
//! RV64 binary emission.
2
//!
3
//! Emits RV64 machine code as `u32` list.
4
5
use std::lang::il;
6
use std::lang::alloc;
7
use std::lang::gen::labels;
8
use std::collections::dict;
9
use std::mem;
10
11
use super::encode;
12
13
/// Maximum number of instructions in code buffer.
14
const MAX_INSTRS: u32 = 2097152;
15
/// Maximum code length before byte offset overflows 32-bits.
16
/// Computed as `0x7FFFFFFF` / [`super::INSTR_SIZE`].
17
const MAX_CODE_LEN: u32 = 0x3FFFFFFE;
18
/// Maximum number of pending branches awaiting patching.
19
const MAX_PENDING: u32 = 65536;
20
/// Maximum number of function entries.
21
const MAX_FUNCS: u32 = 4096;
22
/// Maximum number of debug entries.
23
const MAX_DEBUG_ENTRIES: u32 = 524288;
24
25
//////////////////////
26
// Emission Context //
27
//////////////////////
28
29
/// Branch/jump that needs offset patching after all blocks are emitted.
30
pub record PendingBranch {
31
    /// Index into code buffer where the branch instruction is.
32
    index: u32,
33
    /// Target block index.
34
    target: u32,
35
    /// Type of branch for re-encoding.
36
    kind: BranchKind,
37
}
38
39
/// Type of branch instruction.
40
pub union BranchKind {
41
    /// Conditional branch (B-type encoding).
42
    Cond { op: il::CmpOp, rs1: super::Reg, rs2: super::Reg },
43
    /// Inverted conditional branch (B-type encoding with negated condition).
44
    InvertedCond { op: il::CmpOp, rs1: super::Reg, rs2: super::Reg },
45
    /// Unconditional jump (J-type encoding).
46
    Jump,
47
}
48
49
/// Function call that needs offset patching.
50
pub record PendingCall {
51
    /// Index in code buffer where the call was emitted.
52
    index: u32,
53
    /// Target function name.
54
    target: *[u8],
55
}
56
57
/// Function address load that needs offset patching.
58
/// Used when taking a function's address as a value.
59
pub record PendingAddrLoad {
60
    /// Index in code buffer where the load was emitted.
61
    index: u32,
62
    /// Target function name.
63
    target: *[u8],
64
    /// Destination register.
65
    rd: super::Reg,
66
}
67
68
/// Function address entry for printing.
69
pub record FuncAddr {
70
    /// Function name.
71
    name: *[u8],
72
    /// Instruction index where this function starts.
73
    index: u32,
74
}
75
76
/// Debug entry mapping an instruction to a source location.
77
pub record DebugEntry {
78
    /// Byte offset of the instruction from the start of the program.
79
    pc: u32,
80
    /// Module identifier.
81
    moduleId: u16,
82
    /// Byte offset into the module's source file.
83
    offset: u32,
84
}
85
86
/// Adjusted base register and offset for addressing.
87
pub record AdjustedOffset {
88
    /// Base register.
89
    base: super::Reg,
90
    /// Byte offset from register.
91
    offset: i32,
92
}
93
94
/// Callee-saved register with its stack offset.
95
pub record SavedReg {
96
    /// Register to save/restore.
97
    reg: super::Reg,
98
    /// Offset from SP.
99
    offset: i32,
100
}
101
102
/// Emission context. Tracks state during code generation.
103
pub record Emitter {
104
    /// Emitted instructions storage.
105
    code: *mut [u32],
106
    /// Current number of emitted instructions.
107
    codeLen: u32,
108
    /// Local branches needing offset patching.
109
    pendingBranches: *mut [PendingBranch],
110
    /// Number of pending local branches.
111
    pendingBranchesLen: u32,
112
    /// Function calls needing offset patching.
113
    pendingCalls: *mut [PendingCall],
114
    /// Number of pending calls.
115
    pendingCallsLen: u32,
116
    /// Function address loads needing offset patching.
117
    pendingAddrLoads: *mut [PendingAddrLoad],
118
    /// Number of pending address loads.
119
    pendingAddrLoadsLen: u32,
120
    /// Block label tracking.
121
    labels: labels::Labels,
122
    /// Function start positions for printing.
123
    funcs: *mut [FuncAddr],
124
    /// Number of recorded functions.
125
    funcsLen: u32,
126
    /// Debug entries mapping PCs to source locations.
127
    debugEntries: *mut [DebugEntry],
128
    /// Number of debug entries recorded.
129
    debugEntriesLen: u32,
130
}
131
132
/// Computed stack frame layout for a function.
133
pub record Frame {
134
    /// Total frame size in bytes (aligned).
135
    totalSize: i32,
136
    /// Callee-saved registers and their offsets.
137
    // TODO: Use constant length when language supports it.
138
    savedRegs: [SavedReg; 11],
139
    /// Number of saved registers.
140
    savedRegsLen: u32,
141
    /// Epilogue block index for return jumps.
142
    epilogueBlock: u32,
143
}
144
145
/// Compute frame layout from local size and used callee-saved registers.
146
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32) -> Frame {
147
    let mut frame = Frame {
148
        totalSize: 0,
149
        savedRegs: undefined,
150
        savedRegsLen: 0,
151
        epilogueBlock,
152
    };
153
    // TODO: Skip frame allocation for leaf functions with no locals.
154
    // Compute total frame size. Includes RA and SP registers.
155
    let savedRegs = mem::popCount(usedCalleeSaved) + 2;
156
    let totalSize = mem::alignUpI32(
157
        localSize + savedRegs * super::DWORD_SIZE,
158
        super::STACK_ALIGNMENT
159
    );
160
    frame.totalSize = totalSize;
161
162
    // Build list of callee-saved registers with offsets.
163
    let mut offset = totalSize - (super::DWORD_SIZE * 3);
164
    for reg, i in super::CALLEE_SAVED {
165
        // Check if this register is in use.
166
        if (usedCalleeSaved & (1 << i)) != 0 {
167
            frame.savedRegs[frame.savedRegsLen] = SavedReg {
168
                reg,
169
                offset,
170
            };
171
            frame.savedRegsLen += 1;
172
            offset -= super::DWORD_SIZE;
173
        }
174
    }
175
    return frame;
176
}
177
178
/// Create a new emitter.
179
pub fn emitter(arena: *mut alloc::Arena, debug: bool) -> Emitter throws (alloc::AllocError) {
180
    let code = try alloc::allocSlice(arena, @sizeOf(u32), @alignOf(u32), MAX_INSTRS);
181
    let pendingBranches = try alloc::allocSlice(arena, @sizeOf(PendingBranch), @alignOf(PendingBranch), MAX_PENDING);
182
    let pendingCalls = try alloc::allocSlice(arena, @sizeOf(PendingCall), @alignOf(PendingCall), MAX_PENDING);
183
    let pendingAddrLoads = try alloc::allocSlice(arena, @sizeOf(PendingAddrLoad), @alignOf(PendingAddrLoad), MAX_PENDING);
184
    let blockOffsets = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), labels::MAX_BLOCKS_PER_FN);
185
    let funcEntries = try alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), labels::FUNC_TABLE_SIZE);
186
    let funcs = try alloc::allocSlice(arena, @sizeOf(FuncAddr), @alignOf(FuncAddr), MAX_FUNCS);
187
188
    let mut debugEntries: *mut [DebugEntry] = &mut [];
189
    if debug {
190
        debugEntries = try alloc::allocSlice(
191
            arena, @sizeOf(DebugEntry), @alignOf(DebugEntry), MAX_DEBUG_ENTRIES
192
        ) as *mut [DebugEntry];
193
    }
194
    return Emitter {
195
        code: code as *mut [u32],
196
        codeLen: 0,
197
        pendingBranches: pendingBranches as *mut [PendingBranch],
198
        pendingBranchesLen: 0,
199
        pendingCalls: pendingCalls as *mut [PendingCall],
200
        pendingCallsLen: 0,
201
        pendingAddrLoads: pendingAddrLoads as *mut [PendingAddrLoad],
202
        pendingAddrLoadsLen: 0,
203
        labels: labels::init(blockOffsets as *mut [i32], funcEntries as *mut [dict::Entry]),
204
        funcs: funcs as *mut [FuncAddr],
205
        funcsLen: 0,
206
        debugEntries,
207
        debugEntriesLen: 0,
208
    };
209
}
210
211
///////////////////////
212
// Emission Helpers  //
213
///////////////////////
214
215
/// Emit a single instruction.
216
pub fn emit(e: *mut Emitter, instr: u32) {
217
    if e.codeLen >= e.code.len {
218
        panic "emit: code buffer full";
219
    }
220
    e.code[e.codeLen] = instr;
221
    e.codeLen += 1;
222
}
223
224
/// Compute branch offset to a function by name.
225
pub fn branchOffsetToFunc(e: *Emitter, srcIndex: u32, name: *[u8]) -> i32 {
226
    return labels::branchToFunc(&e.labels, srcIndex, name, super::INSTR_SIZE);
227
}
228
229
/// Patch an instruction at a given index.
230
pub fn patch(e: *mut Emitter, index: u32, instr: u32) {
231
    e.code[index] = instr;
232
}
233
234
/// Record a block's address for branch resolution.
235
pub fn recordBlock(e: *mut Emitter, blockIdx: u32) {
236
    assert e.codeLen <= MAX_CODE_LEN;
237
    labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE);
238
}
239
240
/// Record a function's code offset for call resolution.
241
pub fn recordFuncOffset(e: *mut Emitter, name: *[u8]) {
242
    assert e.codeLen <= MAX_CODE_LEN;
243
    dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE);
244
}
245
246
/// Record a function's start position for printing.
247
pub fn recordFunc(e: *mut Emitter, name: *[u8]) {
248
    if e.funcsLen >= e.funcs.len {
249
        panic "recordFunc: funcs buffer full";
250
    }
251
    e.funcs[e.funcsLen] = FuncAddr { name, index: e.codeLen };
252
    e.funcsLen += 1;
253
}
254
255
/// Record a local branch needing later patching.
256
/// Emits two placeholder instructions that will be patched later.
257
pub fn recordBranch(e: *mut Emitter, targetBlock: u32, kind: BranchKind) {
258
    if e.pendingBranchesLen >= e.pendingBranches.len {
259
        panic "recordBranch: buffer full";
260
    }
261
    e.pendingBranches[e.pendingBranchesLen] = PendingBranch {
262
        index: e.codeLen,
263
        target: targetBlock,
264
        kind: kind,
265
    };
266
    e.pendingBranchesLen += 1;
267
268
    emit(e, encode::nop()); // Placeholder for branch/auipc.
269
    emit(e, encode::nop()); // Placeholder for nop/jal/jalr.
270
}
271
272
/// Record a function call needing later patching.
273
/// Emits placeholder instructions that will be patched later.
274
/// Uses two slots to support long-distance calls.
275
pub fn recordCall(e: *mut Emitter, target: *[u8]) {
276
    if e.pendingCallsLen >= e.pendingCalls.len {
277
        panic "recordCall: buffer full";
278
    }
279
    e.pendingCalls[e.pendingCallsLen] = PendingCall {
280
        index: e.codeLen,
281
        target,
282
    };
283
    e.pendingCallsLen += 1;
284
285
    emit(e, encode::nop()); // Placeholder for AUIPC.
286
    emit(e, encode::nop()); // Placeholder for JALR.
287
}
288
289
/// Record a function address load needing later patching.
290
/// Emits placeholder instructions that will be patched to load the function's address.
291
/// Uses two slots to compute long-distance addresses.
292
pub fn recordAddrLoad(e: *mut Emitter, target: *[u8], rd: super::Reg) {
293
    if e.pendingAddrLoadsLen >= e.pendingAddrLoads.len {
294
        panic "recordAddrLoad: buffer full";
295
    }
296
    e.pendingAddrLoads[e.pendingAddrLoadsLen] = PendingAddrLoad {
297
        index: e.codeLen,
298
        target,
299
        rd: rd,
300
    };
301
    e.pendingAddrLoadsLen += 1;
302
303
    emit(e, encode::nop()); // Placeholder for AUIPC.
304
    emit(e, encode::nop()); // Placeholder for ADDI.
305
}
306
307
/// Patch local branches and clear the pending list.
308
///
309
/// Called after each function.
310
///
311
/// Uses two-instruction sequences: short branches use `branch` and `nop`,
312
/// long branches use inverted branch  and `jal` or `auipc` and `jalr`.
313
pub fn patchLocalBranches(e: *mut Emitter) {
314
    for i in 0..e.pendingBranchesLen {
315
        let p = e.pendingBranches[i];
316
        let offset = labels::branchToBlock(&e.labels, p.index, p.target, super::INSTR_SIZE);
317
        match p.kind {
318
            case BranchKind::Cond { op, rs1, rs2 } => {
319
                if encode::isBranchImm(offset) {
320
                    // Short: direct branch.
321
                    patch(e, p.index, encodeCondBranch(op, rs1, rs2, offset));
322
                    patch(e, p.index + 1, encode::nop());
323
                } else {
324
                    // Long: inverted branch skips `jal`; `jal` goes to target.
325
                    let adj = offset - super::INSTR_SIZE;
326
                    if not encode::isJumpImm(adj) {
327
                        panic "patchLocalBranches: branch offset too large";
328
                    }
329
                    patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, super::INSTR_SIZE * 2));
330
                    patch(e, p.index + 1, encode::jal(super::ZERO, adj));
331
                }
332
            },
333
            case BranchKind::InvertedCond { op, rs1, rs2 } => {
334
                if encode::isBranchImm(offset) {
335
                    // Short: direct inverted branch.
336
                    patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, offset));
337
                    patch(e, p.index + 1, encode::nop());
338
                } else {
339
                    // Long: non-inverted branch skips `jal`, jal goes to target.
340
                    let adj = offset - super::INSTR_SIZE;
341
                    if not encode::isJumpImm(adj) {
342
                        panic "patchLocalBranches: branch offset too large";
343
                    }
344
                    patch(e, p.index, encodeCondBranch(op, rs1, rs2, super::INSTR_SIZE * 2));
345
                    patch(e, p.index + 1, encode::jal(super::ZERO, adj));
346
                }
347
            },
348
            case BranchKind::Jump => {
349
                if encode::isJumpImm(offset) {
350
                    patch(e, p.index, encode::jal(super::ZERO, offset));
351
                    patch(e, p.index + 1, encode::nop());
352
                } else {
353
                    let s = splitImm(offset);
354
                    patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi));
355
                    patch(e, p.index + 1, encode::jalr(super::ZERO, super::SCRATCH1, s.lo));
356
                }
357
            },
358
        }
359
    }
360
    e.pendingBranchesLen = 0;
361
}
362
363
/// Encode a conditional branch instruction.
364
fn encodeCondBranch(op: il::CmpOp, rs1: super::Reg, rs2: super::Reg, offset: i32) -> u32 {
365
    match op {
366
        case il::CmpOp::Eq => return encode::beq(rs1, rs2, offset),
367
        case il::CmpOp::Ne => return encode::bne(rs1, rs2, offset),
368
        case il::CmpOp::Slt => return encode::blt(rs1, rs2, offset),
369
        case il::CmpOp::Ult => return encode::bltu(rs1, rs2, offset),
370
    }
371
}
372
373
/// Encode an inverted conditional branch instruction.
374
fn encodeInvertedBranch(op: il::CmpOp, rs1: super::Reg, rs2: super::Reg, offset: i32) -> u32 {
375
    match op {
376
        case il::CmpOp::Eq => return encode::bne(rs1, rs2, offset),
377
        case il::CmpOp::Ne => return encode::beq(rs1, rs2, offset),
378
        case il::CmpOp::Slt => return encode::bge(rs1, rs2, offset),
379
        case il::CmpOp::Ult => return encode::bgeu(rs1, rs2, offset),
380
    }
381
}
382
383
/// Patch all pending function calls.
384
/// Called after all functions have been generated.
385
pub fn patchCalls(e: *mut Emitter) {
386
    for i in 0..e.pendingCallsLen {
387
        let p = e.pendingCalls[i];
388
        let offset = branchOffsetToFunc(e, p.index, p.target);
389
        let s = splitImm(offset);
390
391
        // `AUIPC scratch, hi(offset)`.
392
        patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi));
393
        // `JALR ra, scratch, lo(offset)`.
394
        patch(e, p.index + 1, encode::jalr(super::RA, super::SCRATCH1, s.lo));
395
    }
396
}
397
398
/// Patch all pending function address loads.
399
/// Called after all functions have been generated.
400
/// Uses PC-relative addresses up to 2GB away.
401
pub fn patchAddrLoads(e: *mut Emitter) {
402
    for i in 0..e.pendingAddrLoadsLen {
403
        let p = e.pendingAddrLoads[i];
404
        let offset = branchOffsetToFunc(e, p.index, p.target);
405
        let s = splitImm(offset);
406
407
        // `AUIPC rd, hi(offset)`.
408
        patch(e, p.index, encode::auipc(p.rd, s.hi));
409
        // `ADDI rd, rd, lo(offset)`.
410
        patch(e, p.index + 1, encode::addi(p.rd, p.rd, s.lo));
411
    }
412
}
413
414
/////////////////////////
415
// Immediate Handling  //
416
/////////////////////////
417
418
/// Split immediate into `hi` and `lo` bits.
419
pub record SplitImm {
420
    /// Upper 20 bits.
421
    hi: i32,
422
    /// Lower 12 bits.
423
    lo: i32,
424
}
425
426
/// Split a 32-bit immediate for `AUIPC, ADDI` / `JALR` sequences.
427
/// Handles sign extension: if *lo* is negative, increment *hi*.
428
pub fn splitImm(imm: i32) -> SplitImm {
429
    let lo = imm & 0xFFF;
430
    let mut hi = (imm >> 12) & 0xFFFFF;
431
    // If `lo`'s sign bit is set, it will be sign-extended to negative.
432
    // Compensate by incrementing `hi`.
433
    if (lo & 0x800) != 0 {
434
        hi += 1;
435
        return SplitImm { hi, lo: lo | 0xFFFFF000 as i32 };
436
    }
437
    return SplitImm { hi, lo };
438
}
439
440
/// Adjust a large offset by loading *hi* bits into [`super::ADDR_SCRATCH`].
441
/// Returns adjusted base register and remaining offset.
442
///
443
/// When the offset fits a 12-bit signed immediate, returns it unchanged.
444
/// Otherwise uses [`super::ADDR_SCRATCH`] for the LUI+ADD decomposition.
445
fn adjustOffset(e: *mut Emitter, base: super::Reg, offset: i32) -> AdjustedOffset {
446
    if offset >= super::MIN_IMM and offset <= super::MAX_IMM {
447
        return AdjustedOffset { base, offset };
448
    }
449
    let s = splitImm(offset);
450
    emit(e, encode::lui(super::ADDR_SCRATCH, s.hi));
451
    emit(e, encode::add(super::ADDR_SCRATCH, super::ADDR_SCRATCH, base));
452
453
    return AdjustedOffset { base: super::ADDR_SCRATCH, offset: s.lo };
454
}
455
456
/// Load an immediate value into a register.
457
/// Handles the full range of 64-bit immediates.
458
/// For values fitting in 12 bits, uses a single `ADDI`.
459
/// For values fitting in 32 bits, uses `LUI` + `ADDIW`.
460
/// For wider values, loads upper and lower halves then combines with shift and add.
461
pub fn loadImm(e: *mut Emitter, rd: super::Reg, imm: i64) {
462
    let immMin = super::MIN_IMM as i64;
463
    let immMax = super::MAX_IMM as i64;
464
465
    if imm >= immMin and imm <= immMax {
466
        emit(e, encode::addi(rd, super::ZERO, imm as i32));
467
        return;
468
    }
469
    // Check if the value fits in 32 bits (sign-extended).
470
    let lo32 = imm as i32;
471
    if lo32 as i64 == imm {
472
        let s = splitImm(lo32);
473
        emit(e, encode::lui(rd, s.hi));
474
        if s.lo != 0 {
475
            emit(e, encode::addiw(rd, rd, s.lo));
476
        }
477
        return;
478
    }
479
    // Full 64-bit immediate: use only rd, no scratch registers.
480
    // Load upper 32 bits first via the 32-bit path (LUI+ADDIW),
481
    // then shift and add lower bits in 11-bit groups to avoid
482
    // sign-extension issues with ADDI's 12-bit signed immediate.
483
    let hi32 = (imm >> 32) as i32;
484
    let lower = imm as i32;
485
486
    // Load upper 32 bits.
487
    loadImm(e, rd, hi32 as i64);
488
    // Shift left by 11, add bits [31:21].
489
    emit(e, encode::slli(rd, rd, 11));
490
    emit(e, encode::addi(rd, rd, (lower >> 21) & 0x7FF));
491
    // Shift left by 11, add bits [20:10].
492
    emit(e, encode::slli(rd, rd, 11));
493
    emit(e, encode::addi(rd, rd, (lower >> 10) & 0x7FF));
494
    // Shift left by 10, add bits [9:0].
495
    emit(e, encode::slli(rd, rd, 10));
496
    emit(e, encode::addi(rd, rd, lower & 0x3FF));
497
}
498
499
/// Emit add-immediate, handling large immediates.
500
pub fn emitAddImm(e: *mut Emitter, rd: super::Reg, rs: super::Reg, imm: i32) {
501
    if imm >= super::MIN_IMM and imm <= super::MAX_IMM {
502
        emit(e, encode::addi(rd, rs, imm));
503
    } else {
504
        loadImm(e, super::SCRATCH1, imm as i64);
505
        emit(e, encode::add(rd, rs, super::SCRATCH1));
506
    }
507
}
508
509
////////////////////////
510
// Load/Store Helpers //
511
////////////////////////
512
513
/// Emit unsigned load with automatic offset adjustment.
514
pub fn emitLoad(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32, typ: il::Type) {
515
    let adj = adjustOffset(e, base, offset);
516
    match typ {
517
        case il::Type::W8 => emit(e, encode::lbu(rd, adj.base, adj.offset)),
518
        case il::Type::W16 => emit(e, encode::lhu(rd, adj.base, adj.offset)),
519
        case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)),
520
        case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)),
521
    }
522
}
523
524
/// Emit signed load with automatic offset adjustment.
525
pub fn emitSload(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32, typ: il::Type) {
526
    let adj = adjustOffset(e, base, offset);
527
    match typ {
528
        case il::Type::W8 => emit(e, encode::lb(rd, adj.base, adj.offset)),
529
        case il::Type::W16 => emit(e, encode::lh(rd, adj.base, adj.offset)),
530
        case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)),
531
        case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)),
532
    }
533
}
534
535
/// Emit store with automatic offset adjustment.
536
pub fn emitStore(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32, typ: il::Type) {
537
    let adj = adjustOffset(e, base, offset);
538
    match typ {
539
        case il::Type::W8 => emit(e, encode::sb(rs, adj.base, adj.offset)),
540
        case il::Type::W16 => emit(e, encode::sh(rs, adj.base, adj.offset)),
541
        case il::Type::W32 => emit(e, encode::sw(rs, adj.base, adj.offset)),
542
        case il::Type::W64 => emit(e, encode::sd(rs, adj.base, adj.offset)),
543
    }
544
}
545
546
/// Emit 64-bit load with automatic offset adjustment.
547
pub fn emitLd(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) {
548
    let adj = adjustOffset(e, base, offset);
549
    emit(e, encode::ld(rd, adj.base, adj.offset));
550
}
551
552
/// Emit 64-bit store with automatic offset adjustment.
553
pub fn emitSd(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) {
554
    let adj = adjustOffset(e, base, offset);
555
    emit(e, encode::sd(rs, adj.base, adj.offset));
556
}
557
558
/// Emit 32-bit load with automatic offset adjustment.
559
pub fn emitLw(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) {
560
    let adj = adjustOffset(e, base, offset);
561
    emit(e, encode::lw(rd, adj.base, adj.offset));
562
}
563
564
/// Emit 32-bit store with automatic offset adjustment.
565
pub fn emitSw(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) {
566
    let adj = adjustOffset(e, base, offset);
567
    emit(e, encode::sw(rs, adj.base, adj.offset));
568
}
569
570
/// Emit 8-bit load with automatic offset adjustment.
571
pub fn emitLb(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) {
572
    let adj = adjustOffset(e, base, offset);
573
    emit(e, encode::lb(rd, adj.base, adj.offset));
574
}
575
576
/// Emit 8-bit store with automatic offset adjustment.
577
pub fn emitSb(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) {
578
    let adj = adjustOffset(e, base, offset);
579
    emit(e, encode::sb(rs, adj.base, adj.offset));
580
}
581
582
//////////////////////////
583
// Prologue / Epilogue  //
584
//////////////////////////
585
586
/// Emit function prologue.
587
/// Allocates stack frame, saves RA/FP, saves callee-saved registers.
588
pub fn emitPrologue(e: *mut Emitter, frame: *Frame) {
589
    // Fast path: leaf function with no locals.
590
    if frame.totalSize == 0 {
591
        return;
592
    }
593
    let totalSize = frame.totalSize;
594
595
    // Allocate stack frame.
596
    let negFrame = 0 - totalSize;
597
    if negFrame >= super::MIN_IMM {
598
        emit(e, encode::addi(super::SP, super::SP, negFrame));
599
    } else {
600
        loadImm(e, super::SCRATCH1, totalSize as i64);
601
        emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1));
602
    }
603
    // Save return address and frame pointer.
604
    emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
605
    emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
606
607
    // Set up frame pointer.
608
    emitAddImm(e, super::FP, super::SP, totalSize);
609
610
    // Save callee-saved registers.
611
    for i in 0..frame.savedRegsLen {
612
        let sr = frame.savedRegs[i];
613
        emitSd(e, sr.reg, super::SP, sr.offset);
614
    }
615
}
616
617
/// Emit a return: jump to epilogue.
618
pub fn emitReturn(e: *mut Emitter, frame: *Frame) {
619
    recordBranch(e, frame.epilogueBlock, BranchKind::Jump);
620
}
621
622
/// Emit function epilogue.
623
/// Restores callee-saved registers, `RA/FP`, deallocates frame, returns.
624
pub fn emitEpilogue(e: *mut Emitter, frame: *Frame) {
625
    // Record epilogue block address for return jumps.
626
    recordBlock(e, frame.epilogueBlock);
627
628
    // Fast path: leaf function with no locals.
629
    if frame.totalSize == 0 {
630
        emit(e, encode::ret());
631
        return;
632
    }
633
    let totalSize = frame.totalSize;
634
635
    // Restore SP to post-prologue value. Required if we performed dynamic
636
    // stack allocation, as SP may have moved.
637
    //
638
    // Since we set FP to `SP + totalSize`, we now set SP to `FP - totalSize`.
639
    emitAddImm(e, super::SP, super::FP, 0 - totalSize);
640
641
    // Restore callee-saved registers.
642
    for i in 0..frame.savedRegsLen {
643
        let sr = frame.savedRegs[i];
644
        emitLd(e, sr.reg, super::SP, sr.offset);
645
    }
646
    // Restore frame pointer and return address.
647
    emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
648
    emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
649
650
    // Deallocate stack frame.
651
    emitAddImm(e, super::SP, super::SP, totalSize);
652
    emit(e, encode::ret());
653
}
654
655
//////////////////
656
// Code Access  //
657
//////////////////
658
659
/// Get emitted code as a slice.
660
pub fn getCode(e: *Emitter) -> *[u32] {
661
    return &e.code[..e.codeLen];
662
}
663
664
/// Get function addresses for printing.
665
pub fn getFuncs(e: *Emitter) -> *[FuncAddr] {
666
    return &e.funcs[..e.funcsLen];
667
}
668
669
/// Record a debug entry mapping the current PC to a source location.
670
/// Deduplicates consecutive entries with the same location.
671
pub fn recordSrcLoc(e: *mut Emitter, loc: il::SrcLoc) {
672
    let pc = e.codeLen * super::INSTR_SIZE as u32;
673
674
    // Skip if this is the same location as the previous entry.
675
    if e.debugEntriesLen > 0 {
676
        let prev = &e.debugEntries[e.debugEntriesLen - 1];
677
        if prev.offset == loc.offset and prev.moduleId == loc.moduleId {
678
            return;
679
        }
680
    }
681
    if e.debugEntriesLen >= e.debugEntries.len {
682
        panic "recordSrcLoc: debug entry buffer full";
683
    }
684
    e.debugEntries[e.debugEntriesLen] = DebugEntry {
685
        pc,
686
        moduleId: loc.moduleId,
687
        offset: loc.offset,
688
    };
689
    e.debugEntriesLen += 1;
690
}
691
692
/// Get debug entries as a slice.
693
pub fn getDebugEntries(e: *Emitter) -> *[DebugEntry] {
694
    return &e.debugEntries[..e.debugEntriesLen];
695
}