compiler/
lib/
examples/
std/
arch/
rv64/
decode.rad
14.3 KiB
emit.rad
24.2 KiB
encode.rad
19.1 KiB
isel.rad
47.4 KiB
printer.rad
12.6 KiB
tests.rad
15.7 KiB
rv64.rad
8.8 KiB
collections/
lang/
sys/
arch.rad
65 B
collections.rad
36 B
fmt.rad
3.8 KiB
intrinsics.rad
399 B
io.rad
1.2 KiB
lang.rad
222 B
mem.rad
2.1 KiB
sys.rad
167 B
testing.rad
2.3 KiB
tests.rad
11.6 KiB
vec.rad
3.1 KiB
std.rad
231 B
scripts/
seed/
test/
vim/
.gitignore
353 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
3.0 KiB
README
2.5 KiB
std.lib
1.0 KiB
std.lib.test
252 B
lib/std/arch/rv64/emit.rad
raw
| 1 | //! RV64 binary emission. |
| 2 | //! |
| 3 | //! Emits RV64 machine code as `u32` list. |
| 4 | |
| 5 | use std::lang::il; |
| 6 | use std::lang::alloc; |
| 7 | use std::lang::gen; |
| 8 | use std::lang::gen::labels; |
| 9 | use std::lang::gen::types; |
| 10 | use std::collections::dict; |
| 11 | use std::mem; |
| 12 | |
| 13 | use super::encode; |
| 14 | |
| 15 | /// Maximum number of instructions in code buffer. |
| 16 | const MAX_INSTRS: u32 = 2097152; |
| 17 | /// Maximum code length before byte offset overflows signed 32-bits. |
| 18 | const MAX_CODE_LEN: u32 = 0x7FFFFFFF / super::INSTR_SIZE as u32; |
| 19 | /// Maximum number of pending branches awaiting patching. |
| 20 | const MAX_PENDING: u32 = 65536; |
| 21 | /// Maximum number of function entries. |
| 22 | const MAX_FUNCS: u32 = 4096; |
| 23 | /// Maximum number of debug entries. |
| 24 | const MAX_DEBUG_ENTRIES: u32 = 524288; |
| 25 | |
| 26 | ////////////////////// |
| 27 | // Emission Context // |
| 28 | ////////////////////// |
| 29 | |
| 30 | /// Branch/jump that needs offset patching after all blocks are emitted. |
| 31 | pub record PendingBranch { |
| 32 | /// Index into code buffer where the branch instruction is. |
| 33 | index: u32, |
| 34 | /// Target block index. |
| 35 | target: u32, |
| 36 | /// Type of branch for re-encoding. |
| 37 | kind: BranchKind, |
| 38 | } |
| 39 | |
| 40 | /// Type of branch instruction. |
| 41 | pub union BranchKind { |
| 42 | /// Conditional branch (B-type encoding). |
| 43 | Cond { op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg }, |
| 44 | /// Inverted conditional branch (B-type encoding with negated condition). |
| 45 | InvertedCond { op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg }, |
| 46 | /// Unconditional jump (J-type encoding). |
| 47 | Jump, |
| 48 | } |
| 49 | |
| 50 | /// Function call that needs offset patching. |
| 51 | pub record PendingCall { |
| 52 | /// Index in code buffer where the call was emitted. |
| 53 | index: u32, |
| 54 | /// Target function name. |
| 55 | target: *[u8], |
| 56 | } |
| 57 | |
| 58 | /// Function address load that needs offset patching. |
| 59 | /// Used when taking a function's address as a value. |
| 60 | pub record PendingAddrLoad { |
| 61 | /// Index in code buffer where the load was emitted. |
| 62 | index: u32, |
| 63 | /// Target function name. |
| 64 | target: *[u8], |
| 65 | /// Destination register. |
| 66 | rd: gen::Reg, |
| 67 | } |
| 68 | |
| 69 | /// Adjusted base register and offset for addressing. |
| 70 | pub record AdjustedOffset { |
| 71 | /// Base register. |
| 72 | base: gen::Reg, |
| 73 | /// Byte offset from register. |
| 74 | offset: i32, |
| 75 | } |
| 76 | |
| 77 | /// Callee-saved register with its stack offset. |
| 78 | pub record SavedReg { |
| 79 | /// Register to save/restore. |
| 80 | reg: gen::Reg, |
| 81 | /// Offset from SP. |
| 82 | offset: i32, |
| 83 | } |
| 84 | |
| 85 | /// Emission context. Tracks state during code generation. |
| 86 | pub record Emitter { |
| 87 | /// Emitted instructions storage. |
| 88 | code: *mut [u32], |
| 89 | /// Current number of emitted instructions. |
| 90 | codeLen: u32, |
| 91 | /// Local branches needing offset patching. |
| 92 | pendingBranches: *mut [PendingBranch], |
| 93 | /// Number of pending local branches. |
| 94 | pendingBranchesLen: u32, |
| 95 | /// Function calls needing offset patching. |
| 96 | pendingCalls: *mut [PendingCall], |
| 97 | /// Number of pending calls. |
| 98 | pendingCallsLen: u32, |
| 99 | /// Function address loads needing offset patching. |
| 100 | pendingAddrLoads: *mut [PendingAddrLoad], |
| 101 | /// Number of pending address loads. |
| 102 | pendingAddrLoadsLen: u32, |
| 103 | /// Block label tracking. |
| 104 | labels: labels::Labels, |
| 105 | /// Function start positions for printing. |
| 106 | funcs: *mut [types::FuncAddr], |
| 107 | /// Number of recorded functions. |
| 108 | funcsLen: u32, |
| 109 | /// Debug entries mapping PCs to source locations. |
| 110 | debugEntries: *mut [types::DebugEntry], |
| 111 | /// Number of debug entries recorded. |
| 112 | debugEntriesLen: u32, |
| 113 | } |
| 114 | |
| 115 | /// Computed stack frame layout for a function. |
| 116 | pub record Frame { |
| 117 | /// Total frame size in bytes (aligned). |
| 118 | totalSize: i32, |
| 119 | /// Callee-saved registers and their offsets. |
| 120 | // TODO: Use constant length when language supports it. |
| 121 | savedRegs: [SavedReg; super::NUM_SAVED_REGISTERS], |
| 122 | /// Number of saved registers. |
| 123 | savedRegsLen: u32, |
| 124 | /// Epilogue block index for return jumps. |
| 125 | epilogueBlock: u32, |
| 126 | /// Whether this is a leaf function. Leaf functions |
| 127 | /// skip saving/restoring RA since it is never clobbered. |
| 128 | isLeaf: bool, |
| 129 | /// Whether the function has dynamic stack allocations. |
| 130 | /// When false, SP never changes after the prologue. |
| 131 | isDynamic: bool, |
| 132 | } |
| 133 | |
| 134 | /// Compute frame layout from local size and used callee-saved registers. |
| 135 | pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool, isDynamic: bool) -> Frame { |
| 136 | let mut frame = Frame { |
| 137 | totalSize: 0, |
| 138 | savedRegs: undefined, |
| 139 | savedRegsLen: 0, |
| 140 | epilogueBlock, |
| 141 | isLeaf, |
| 142 | isDynamic, |
| 143 | }; |
| 144 | // Skip frame allocation for leaf functions with no locals and no |
| 145 | // callee-saved registers. Leaf functions don't call other functions, |
| 146 | // so RA is never clobbered and doesn't need saving. |
| 147 | if isLeaf and localSize == 0 and usedCalleeSaved == 0 { |
| 148 | return frame; |
| 149 | } |
| 150 | // Compute total frame size. Includes RA and FP registers. |
| 151 | let savedRegs = mem::popCount(usedCalleeSaved) + 2; |
| 152 | let totalSize = mem::alignUpI32( |
| 153 | localSize + savedRegs * super::DWORD_SIZE, |
| 154 | super::STACK_ALIGNMENT |
| 155 | ); |
| 156 | frame.totalSize = totalSize; |
| 157 | |
| 158 | // Build list of callee-saved registers with offsets. |
| 159 | let mut offset = totalSize - (super::DWORD_SIZE * 3); |
| 160 | for reg, i in super::CALLEE_SAVED { |
| 161 | // Check if this register is in use. |
| 162 | if (usedCalleeSaved & (1 << i)) != 0 { |
| 163 | frame.savedRegs[frame.savedRegsLen] = SavedReg { |
| 164 | reg, |
| 165 | offset, |
| 166 | }; |
| 167 | frame.savedRegsLen += 1; |
| 168 | offset -= super::DWORD_SIZE; |
| 169 | } |
| 170 | } |
| 171 | return frame; |
| 172 | } |
| 173 | |
| 174 | /// Create a new emitter. |
| 175 | pub fn emitter(arena: *mut alloc::Arena, debug: bool) -> Emitter throws (alloc::AllocError) { |
| 176 | let code = try alloc::allocSlice(arena, @sizeOf(u32), @alignOf(u32), MAX_INSTRS); |
| 177 | let pendingBranches = try alloc::allocSlice(arena, @sizeOf(PendingBranch), @alignOf(PendingBranch), MAX_PENDING); |
| 178 | let pendingCalls = try alloc::allocSlice(arena, @sizeOf(PendingCall), @alignOf(PendingCall), MAX_PENDING); |
| 179 | let pendingAddrLoads = try alloc::allocSlice(arena, @sizeOf(PendingAddrLoad), @alignOf(PendingAddrLoad), MAX_PENDING); |
| 180 | let blockOffsets = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), labels::MAX_BLOCKS_PER_FN); |
| 181 | let funcEntries = try alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), labels::FUNC_TABLE_SIZE); |
| 182 | let funcs = try alloc::allocSlice(arena, @sizeOf(types::FuncAddr), @alignOf(types::FuncAddr), MAX_FUNCS); |
| 183 | |
| 184 | let mut debugEntries: *mut [types::DebugEntry] = &mut []; |
| 185 | if debug { |
| 186 | debugEntries = try alloc::allocSlice( |
| 187 | arena, @sizeOf(types::DebugEntry), @alignOf(types::DebugEntry), MAX_DEBUG_ENTRIES |
| 188 | ) as *mut [types::DebugEntry]; |
| 189 | } |
| 190 | return Emitter { |
| 191 | code: code as *mut [u32], |
| 192 | codeLen: 0, |
| 193 | pendingBranches: pendingBranches as *mut [PendingBranch], |
| 194 | pendingBranchesLen: 0, |
| 195 | pendingCalls: pendingCalls as *mut [PendingCall], |
| 196 | pendingCallsLen: 0, |
| 197 | pendingAddrLoads: pendingAddrLoads as *mut [PendingAddrLoad], |
| 198 | pendingAddrLoadsLen: 0, |
| 199 | labels: labels::init(blockOffsets as *mut [i32], funcEntries as *mut [dict::Entry]), |
| 200 | funcs: funcs as *mut [types::FuncAddr], |
| 201 | funcsLen: 0, |
| 202 | debugEntries, |
| 203 | debugEntriesLen: 0, |
| 204 | }; |
| 205 | } |
| 206 | |
| 207 | /////////////////////// |
| 208 | // Emission Helpers // |
| 209 | /////////////////////// |
| 210 | |
| 211 | /// Emit a single instruction. |
| 212 | pub fn emit(e: *mut Emitter, instr: u32) { |
| 213 | assert e.codeLen < e.code.len, "emit: code buffer full"; |
| 214 | e.code[e.codeLen] = instr; |
| 215 | e.codeLen += 1; |
| 216 | } |
| 217 | |
| 218 | /// Compute branch offset to a function by name. |
| 219 | pub fn branchOffsetToFunc(e: *Emitter, srcIndex: u32, name: *[u8]) -> i32 { |
| 220 | return labels::branchToFunc(&e.labels, srcIndex, name, super::INSTR_SIZE); |
| 221 | } |
| 222 | |
| 223 | /// Patch an instruction at a given index. |
| 224 | pub fn patch(e: *mut Emitter, index: u32, instr: u32) { |
| 225 | e.code[index] = instr; |
| 226 | } |
| 227 | |
| 228 | /// Record a block's address for branch resolution. |
| 229 | pub fn recordBlock(e: *mut Emitter, blockIdx: u32) { |
| 230 | assert e.codeLen <= MAX_CODE_LEN; |
| 231 | labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE); |
| 232 | } |
| 233 | |
| 234 | /// Record a function's code offset for call resolution. |
| 235 | pub fn recordFuncOffset(e: *mut Emitter, name: *[u8]) { |
| 236 | assert e.codeLen <= MAX_CODE_LEN; |
| 237 | dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE); |
| 238 | } |
| 239 | |
| 240 | /// Record a function's start position for printing. |
| 241 | pub fn recordFunc(e: *mut Emitter, name: *[u8]) { |
| 242 | assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full"; |
| 243 | e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen }; |
| 244 | e.funcsLen += 1; |
| 245 | } |
| 246 | |
| 247 | /// Record a local branch needing later patching. |
| 248 | /// Unconditional jumps use a single slot (J-type, +-1MB range). |
| 249 | /// Conditional branches use two slots (B-type has only +-4KB range, |
| 250 | /// so large functions may need the inverted-branch + JAL fallback). |
| 251 | pub fn recordBranch(e: *mut Emitter, targetBlock: u32, kind: BranchKind) { |
| 252 | assert e.pendingBranchesLen < e.pendingBranches.len, "recordBranch: buffer full"; |
| 253 | e.pendingBranches[e.pendingBranchesLen] = PendingBranch { |
| 254 | index: e.codeLen, |
| 255 | target: targetBlock, |
| 256 | kind: kind, |
| 257 | }; |
| 258 | e.pendingBranchesLen += 1; |
| 259 | |
| 260 | emit(e, encode::nop()); // First slot, always needed. |
| 261 | |
| 262 | match kind { |
| 263 | case BranchKind::Jump => {}, |
| 264 | else => emit(e, encode::nop()), // Second slot for conditional branches. |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | /// Record a function call needing later patching. |
| 269 | /// Emits placeholder instructions that will be patched later. |
| 270 | /// Uses two slots to support long-distance calls. |
| 271 | pub fn recordCall(e: *mut Emitter, target: *[u8]) { |
| 272 | assert e.pendingCallsLen < e.pendingCalls.len, "recordCall: buffer full"; |
| 273 | e.pendingCalls[e.pendingCallsLen] = PendingCall { |
| 274 | index: e.codeLen, |
| 275 | target, |
| 276 | }; |
| 277 | e.pendingCallsLen += 1; |
| 278 | |
| 279 | emit(e, encode::nop()); // Placeholder for AUIPC. |
| 280 | emit(e, encode::nop()); // Placeholder for JALR. |
| 281 | } |
| 282 | |
| 283 | /// Record a function address load needing later patching. |
| 284 | /// Emits placeholder instructions that will be patched to load the function's address. |
| 285 | /// Uses two slots to compute long-distance addresses. |
| 286 | pub fn recordAddrLoad(e: *mut Emitter, target: *[u8], rd: gen::Reg) { |
| 287 | assert e.pendingAddrLoadsLen < e.pendingAddrLoads.len, "recordAddrLoad: buffer full"; |
| 288 | e.pendingAddrLoads[e.pendingAddrLoadsLen] = PendingAddrLoad { |
| 289 | index: e.codeLen, |
| 290 | target, |
| 291 | rd: rd, |
| 292 | }; |
| 293 | e.pendingAddrLoadsLen += 1; |
| 294 | |
| 295 | emit(e, encode::nop()); // Placeholder for AUIPC. |
| 296 | emit(e, encode::nop()); // Placeholder for ADDI. |
| 297 | } |
| 298 | |
| 299 | /// Patch local branches and clear the pending list. |
| 300 | /// |
| 301 | /// Called after each function. |
| 302 | /// |
| 303 | /// Uses two-instruction sequences: short branches use `branch` and `nop`, |
| 304 | /// long branches use inverted branch and `jal` or `auipc` and `jalr`. |
| 305 | pub fn patchLocalBranches(e: *mut Emitter) { |
| 306 | for i in 0..e.pendingBranchesLen { |
| 307 | let p = e.pendingBranches[i]; |
| 308 | let offset = labels::branchToBlock(&e.labels, p.index, p.target, super::INSTR_SIZE); |
| 309 | match p.kind { |
| 310 | case BranchKind::Cond { op, rs1, rs2 } => { |
| 311 | if encode::isBranchImm(offset) { |
| 312 | patch(e, p.index, encodeCondBranch(op, rs1, rs2, offset)); |
| 313 | patch(e, p.index + 1, encode::nop()); |
| 314 | } else { |
| 315 | let adj = offset - super::INSTR_SIZE; |
| 316 | patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, super::INSTR_SIZE * 2)); |
| 317 | patch(e, p.index + 1, encode::jal(super::ZERO, adj)); |
| 318 | } |
| 319 | }, |
| 320 | case BranchKind::InvertedCond { op, rs1, rs2 } => { |
| 321 | if encode::isBranchImm(offset) { |
| 322 | patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, offset)); |
| 323 | patch(e, p.index + 1, encode::nop()); |
| 324 | } else { |
| 325 | let adj = offset - super::INSTR_SIZE; |
| 326 | patch(e, p.index, encodeCondBranch(op, rs1, rs2, super::INSTR_SIZE * 2)); |
| 327 | patch(e, p.index + 1, encode::jal(super::ZERO, adj)); |
| 328 | } |
| 329 | }, |
| 330 | case BranchKind::Jump => { |
| 331 | // Single-slot jump (J-type, +-1MB range). |
| 332 | assert encode::isJumpImm(offset), "patchLocalBranches: jump offset too large"; |
| 333 | patch(e, p.index, encode::jal(super::ZERO, offset)); |
| 334 | }, |
| 335 | } |
| 336 | } |
| 337 | e.pendingBranchesLen = 0; |
| 338 | } |
| 339 | |
| 340 | /// Encode a conditional branch instruction. |
| 341 | fn encodeCondBranch(op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg, offset: i32) -> u32 { |
| 342 | match op { |
| 343 | case il::CmpOp::Eq => return encode::beq(rs1, rs2, offset), |
| 344 | case il::CmpOp::Ne => return encode::bne(rs1, rs2, offset), |
| 345 | case il::CmpOp::Slt => return encode::blt(rs1, rs2, offset), |
| 346 | case il::CmpOp::Ult => return encode::bltu(rs1, rs2, offset), |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | /// Encode an inverted conditional branch instruction. |
| 351 | fn encodeInvertedBranch(op: il::CmpOp, rs1: gen::Reg, rs2: gen::Reg, offset: i32) -> u32 { |
| 352 | match op { |
| 353 | case il::CmpOp::Eq => return encode::bne(rs1, rs2, offset), |
| 354 | case il::CmpOp::Ne => return encode::beq(rs1, rs2, offset), |
| 355 | case il::CmpOp::Slt => return encode::bge(rs1, rs2, offset), |
| 356 | case il::CmpOp::Ult => return encode::bgeu(rs1, rs2, offset), |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | /// Patch all pending function calls. |
| 361 | /// Called after all functions have been generated. |
| 362 | pub fn patchCalls(e: *mut Emitter) { |
| 363 | for i in 0..e.pendingCallsLen { |
| 364 | let p = e.pendingCalls[i]; |
| 365 | let offset = branchOffsetToFunc(e, p.index, p.target); |
| 366 | let s = splitImm(offset); |
| 367 | |
| 368 | // `AUIPC scratch, hi(offset)`. |
| 369 | patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi)); |
| 370 | // `JALR ra, scratch, lo(offset)`. |
| 371 | patch(e, p.index + 1, encode::jalr(super::RA, super::SCRATCH1, s.lo)); |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | /// Patch all pending function address loads. |
| 376 | /// Called after all functions have been generated. |
| 377 | /// Uses PC-relative addresses up to 2GB away. |
| 378 | pub fn patchAddrLoads(e: *mut Emitter) { |
| 379 | for i in 0..e.pendingAddrLoadsLen { |
| 380 | let p = e.pendingAddrLoads[i]; |
| 381 | let offset = branchOffsetToFunc(e, p.index, p.target); |
| 382 | let s = splitImm(offset); |
| 383 | |
| 384 | // `AUIPC rd, hi(offset)`. |
| 385 | patch(e, p.index, encode::auipc(p.rd, s.hi)); |
| 386 | // `ADDI rd, rd, lo(offset)`. |
| 387 | patch(e, p.index + 1, encode::addi(p.rd, p.rd, s.lo)); |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | ///////////////////////// |
| 392 | // Immediate Handling // |
| 393 | ///////////////////////// |
| 394 | |
| 395 | /// Split immediate into `hi` and `lo` bits. |
| 396 | pub record SplitImm { |
| 397 | /// Upper 20 bits. |
| 398 | hi: i32, |
| 399 | /// Lower 12 bits. |
| 400 | lo: i32, |
| 401 | } |
| 402 | |
| 403 | /// Split a 32-bit immediate for `AUIPC, ADDI` / `JALR` sequences. |
| 404 | /// Handles sign extension: if *lo* is negative, increment *hi*. |
| 405 | pub fn splitImm(imm: i32) -> SplitImm { |
| 406 | let lo = imm & 0xFFF; |
| 407 | let mut hi = (imm >> 12) & 0xFFFFF; |
| 408 | // If `lo`'s sign bit is set, it will be sign-extended to negative. |
| 409 | // Compensate by incrementing `hi`. |
| 410 | if (lo & 0x800) != 0 { |
| 411 | hi += 1; |
| 412 | return SplitImm { hi, lo: lo | 0xFFFFF000 as i32 }; |
| 413 | } |
| 414 | return SplitImm { hi, lo }; |
| 415 | } |
| 416 | |
| 417 | /// Adjust a large offset by loading *hi* bits into [`super::ADDR_SCRATCH`]. |
| 418 | /// Returns adjusted base register and remaining offset. |
| 419 | /// |
| 420 | /// When the offset fits a 12-bit signed immediate, returns it unchanged. |
| 421 | /// Otherwise uses [`super::ADDR_SCRATCH`] for the LUI+ADD decomposition. |
| 422 | fn adjustOffset(e: *mut Emitter, base: gen::Reg, offset: i32) -> AdjustedOffset { |
| 423 | if offset >= super::MIN_IMM and offset <= super::MAX_IMM { |
| 424 | return AdjustedOffset { base, offset }; |
| 425 | } |
| 426 | let s = splitImm(offset); |
| 427 | emit(e, encode::lui(super::ADDR_SCRATCH, s.hi)); |
| 428 | emit(e, encode::add(super::ADDR_SCRATCH, super::ADDR_SCRATCH, base)); |
| 429 | |
| 430 | return AdjustedOffset { base: super::ADDR_SCRATCH, offset: s.lo }; |
| 431 | } |
| 432 | |
| 433 | /// Load an immediate value into a register. |
| 434 | /// Handles the full range of 64-bit immediates. |
| 435 | /// For values fitting in 12 bits, uses a single `ADDI`. |
| 436 | /// For values fitting in 32 bits, uses `LUI` + `ADDIW`. |
| 437 | /// For wider values, loads upper and lower halves then combines with shift and add. |
| 438 | pub fn loadImm(e: *mut Emitter, rd: gen::Reg, imm: i64) { |
| 439 | let immMin = super::MIN_IMM as i64; |
| 440 | let immMax = super::MAX_IMM as i64; |
| 441 | |
| 442 | if imm >= immMin and imm <= immMax { |
| 443 | emit(e, encode::addi(rd, super::ZERO, imm as i32)); |
| 444 | return; |
| 445 | } |
| 446 | // Check if the value fits in 32 bits (sign-extended). |
| 447 | let lo32 = imm as i32; |
| 448 | if lo32 as i64 == imm { |
| 449 | let s = splitImm(lo32); |
| 450 | emit(e, encode::lui(rd, s.hi)); |
| 451 | if s.lo != 0 { |
| 452 | emit(e, encode::addiw(rd, rd, s.lo)); |
| 453 | } |
| 454 | return; |
| 455 | } |
| 456 | // Full 64-bit immediate: use only rd, no scratch registers. |
| 457 | // Load upper 32 bits first via the 32-bit path (LUI+ADDIW), |
| 458 | // then shift and add lower bits in 11-bit groups to avoid |
| 459 | // sign-extension issues with ADDI's 12-bit signed immediate. |
| 460 | let hi32 = (imm >> 32) as i32; |
| 461 | let lower = imm as i32; |
| 462 | |
| 463 | // Load upper 32 bits. |
| 464 | loadImm(e, rd, hi32 as i64); |
| 465 | // Shift left by 11, add bits [31:21]. |
| 466 | emit(e, encode::slli(rd, rd, 11)); |
| 467 | emit(e, encode::addi(rd, rd, (lower >> 21) & 0x7FF)); |
| 468 | // Shift left by 11, add bits [20:10]. |
| 469 | emit(e, encode::slli(rd, rd, 11)); |
| 470 | emit(e, encode::addi(rd, rd, (lower >> 10) & 0x7FF)); |
| 471 | // Shift left by 10, add bits [9:0]. |
| 472 | emit(e, encode::slli(rd, rd, 10)); |
| 473 | emit(e, encode::addi(rd, rd, lower & 0x3FF)); |
| 474 | } |
| 475 | |
| 476 | /// Emit add-immediate, handling large immediates. |
| 477 | pub fn emitAddImm(e: *mut Emitter, rd: gen::Reg, rs: gen::Reg, imm: i32) { |
| 478 | if imm >= super::MIN_IMM and imm <= super::MAX_IMM { |
| 479 | emit(e, encode::addi(rd, rs, imm)); |
| 480 | } else { |
| 481 | loadImm(e, super::SCRATCH1, imm as i64); |
| 482 | emit(e, encode::add(rd, rs, super::SCRATCH1)); |
| 483 | } |
| 484 | } |
| 485 | |
| 486 | //////////////////////// |
| 487 | // Load/Store Helpers // |
| 488 | //////////////////////// |
| 489 | |
| 490 | /// Emit unsigned load with automatic offset adjustment. |
| 491 | pub fn emitLoad(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) { |
| 492 | let adj = adjustOffset(e, base, offset); |
| 493 | match typ { |
| 494 | case il::Type::W8 => emit(e, encode::lbu(rd, adj.base, adj.offset)), |
| 495 | case il::Type::W16 => emit(e, encode::lhu(rd, adj.base, adj.offset)), |
| 496 | case il::Type::W32 => emit(e, encode::lwu(rd, adj.base, adj.offset)), |
| 497 | case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)), |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | /// Emit signed load with automatic offset adjustment. |
| 502 | pub fn emitSload(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) { |
| 503 | let adj = adjustOffset(e, base, offset); |
| 504 | match typ { |
| 505 | case il::Type::W8 => emit(e, encode::lb(rd, adj.base, adj.offset)), |
| 506 | case il::Type::W16 => emit(e, encode::lh(rd, adj.base, adj.offset)), |
| 507 | case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)), |
| 508 | case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)), |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | /// Emit store with automatic offset adjustment. |
| 513 | pub fn emitStore(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32, typ: il::Type) { |
| 514 | let adj = adjustOffset(e, base, offset); |
| 515 | match typ { |
| 516 | case il::Type::W8 => emit(e, encode::sb(rs, adj.base, adj.offset)), |
| 517 | case il::Type::W16 => emit(e, encode::sh(rs, adj.base, adj.offset)), |
| 518 | case il::Type::W32 => emit(e, encode::sw(rs, adj.base, adj.offset)), |
| 519 | case il::Type::W64 => emit(e, encode::sd(rs, adj.base, adj.offset)), |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | /// Emit 64-bit load with automatic offset adjustment. |
| 524 | pub fn emitLd(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) { |
| 525 | let adj = adjustOffset(e, base, offset); |
| 526 | emit(e, encode::ld(rd, adj.base, adj.offset)); |
| 527 | } |
| 528 | |
| 529 | /// Emit 64-bit store with automatic offset adjustment. |
| 530 | pub fn emitSd(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) { |
| 531 | let adj = adjustOffset(e, base, offset); |
| 532 | emit(e, encode::sd(rs, adj.base, adj.offset)); |
| 533 | } |
| 534 | |
| 535 | /// Emit 32-bit load with automatic offset adjustment. |
| 536 | pub fn emitLw(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) { |
| 537 | let adj = adjustOffset(e, base, offset); |
| 538 | emit(e, encode::lw(rd, adj.base, adj.offset)); |
| 539 | } |
| 540 | |
| 541 | /// Emit 32-bit store with automatic offset adjustment. |
| 542 | pub fn emitSw(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) { |
| 543 | let adj = adjustOffset(e, base, offset); |
| 544 | emit(e, encode::sw(rs, adj.base, adj.offset)); |
| 545 | } |
| 546 | |
| 547 | /// Emit 8-bit load with automatic offset adjustment. |
| 548 | pub fn emitLb(e: *mut Emitter, rd: gen::Reg, base: gen::Reg, offset: i32) { |
| 549 | let adj = adjustOffset(e, base, offset); |
| 550 | emit(e, encode::lb(rd, adj.base, adj.offset)); |
| 551 | } |
| 552 | |
| 553 | /// Emit 8-bit store with automatic offset adjustment. |
| 554 | pub fn emitSb(e: *mut Emitter, rs: gen::Reg, base: gen::Reg, offset: i32) { |
| 555 | let adj = adjustOffset(e, base, offset); |
| 556 | emit(e, encode::sb(rs, adj.base, adj.offset)); |
| 557 | } |
| 558 | |
| 559 | ////////////////////////// |
| 560 | // Prologue / Epilogue // |
| 561 | ////////////////////////// |
| 562 | |
| 563 | /// Emit function prologue. |
| 564 | /// Allocates stack frame, saves RA/FP, saves callee-saved registers. |
| 565 | pub fn emitPrologue(e: *mut Emitter, frame: *Frame) { |
| 566 | // Fast path: leaf function with no locals. |
| 567 | if frame.totalSize == 0 { |
| 568 | return; |
| 569 | } |
| 570 | let totalSize = frame.totalSize; |
| 571 | |
| 572 | // Allocate stack frame. |
| 573 | let negFrame = 0 - totalSize; |
| 574 | if negFrame >= super::MIN_IMM { |
| 575 | emit(e, encode::addi(super::SP, super::SP, negFrame)); |
| 576 | } else { |
| 577 | loadImm(e, super::SCRATCH1, totalSize as i64); |
| 578 | emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1)); |
| 579 | } |
| 580 | // Save return address. |
| 581 | if not frame.isLeaf { |
| 582 | emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
| 583 | } |
| 584 | // Save frame pointer. |
| 585 | emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
| 586 | |
| 587 | // Set up frame pointer, only needed when dynamic allocs may move SP. |
| 588 | if frame.isDynamic { |
| 589 | emitAddImm(e, super::FP, super::SP, totalSize); |
| 590 | } |
| 591 | // Save callee-saved registers. |
| 592 | for i in 0..frame.savedRegsLen { |
| 593 | let sr = frame.savedRegs[i]; |
| 594 | emitSd(e, sr.reg, super::SP, sr.offset); |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | /// Emit a return: jump to epilogue, or emit `ret` directly for leaf functions. |
| 599 | pub fn emitReturn(e: *mut Emitter, frame: *Frame) { |
| 600 | if frame.totalSize == 0 { |
| 601 | // Leaf function: no frame to tear down, emit ret directly. |
| 602 | emit(e, encode::ret()); |
| 603 | return; |
| 604 | } |
| 605 | recordBranch(e, frame.epilogueBlock, BranchKind::Jump); |
| 606 | } |
| 607 | |
| 608 | /// Emit function epilogue. |
| 609 | /// Restores callee-saved registers, `RA/FP`, deallocates frame, returns. |
| 610 | pub fn emitEpilogue(e: *mut Emitter, frame: *Frame) { |
| 611 | // Record epilogue block address for return jumps. |
| 612 | recordBlock(e, frame.epilogueBlock); |
| 613 | |
| 614 | // Fast path: leaf function with no locals. |
| 615 | if frame.totalSize == 0 { |
| 616 | emit(e, encode::ret()); |
| 617 | return; |
| 618 | } |
| 619 | let totalSize = frame.totalSize; |
| 620 | |
| 621 | // Restore SP to post-prologue value. Only needed when dynamic stack |
| 622 | // allocation may have moved SP. |
| 623 | if frame.isDynamic { |
| 624 | emitAddImm(e, super::SP, super::FP, 0 - totalSize); |
| 625 | } |
| 626 | // Restore callee-saved registers. |
| 627 | for i in 0..frame.savedRegsLen { |
| 628 | let sr = frame.savedRegs[i]; |
| 629 | emitLd(e, sr.reg, super::SP, sr.offset); |
| 630 | } |
| 631 | // Restore frame pointer. |
| 632 | emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
| 633 | // Restore return address. |
| 634 | if not frame.isLeaf { |
| 635 | emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
| 636 | } |
| 637 | // Deallocate stack frame. |
| 638 | emitAddImm(e, super::SP, super::SP, totalSize); |
| 639 | emit(e, encode::ret()); |
| 640 | } |
| 641 | |
| 642 | ////////////////// |
| 643 | // Code Access // |
| 644 | ////////////////// |
| 645 | |
| 646 | /// Get emitted code as a slice. |
| 647 | pub fn getCode(e: *Emitter) -> *[u32] { |
| 648 | return &e.code[..e.codeLen]; |
| 649 | } |
| 650 | |
| 651 | /// Get function addresses for printing. |
| 652 | pub fn getFuncs(e: *Emitter) -> *[types::FuncAddr] { |
| 653 | return &e.funcs[..e.funcsLen]; |
| 654 | } |
| 655 | |
| 656 | /// Record a debug entry mapping the current PC to a source location. |
| 657 | /// Deduplicates consecutive entries with the same location. |
| 658 | pub fn recordSrcLoc(e: *mut Emitter, loc: il::SrcLoc) { |
| 659 | let pc = e.codeLen * super::INSTR_SIZE as u32; |
| 660 | |
| 661 | // Skip if this is the same location as the previous entry. |
| 662 | if e.debugEntriesLen > 0 { |
| 663 | let prev = &e.debugEntries[e.debugEntriesLen - 1]; |
| 664 | if prev.offset == loc.offset and prev.moduleId == loc.moduleId { |
| 665 | return; |
| 666 | } |
| 667 | } |
| 668 | assert e.debugEntriesLen < e.debugEntries.len, "recordSrcLoc: debug entry buffer full"; |
| 669 | e.debugEntries[e.debugEntriesLen] = types::DebugEntry { |
| 670 | pc, |
| 671 | moduleId: loc.moduleId, |
| 672 | offset: loc.offset, |
| 673 | }; |
| 674 | e.debugEntriesLen += 1; |
| 675 | } |
| 676 | |
| 677 | /// Get debug entries as a slice. |
| 678 | pub fn getDebugEntries(e: *Emitter) -> *[types::DebugEntry] { |
| 679 | return &e.debugEntries[..e.debugEntriesLen]; |
| 680 | } |