compiler/
lib/
examples/
std/
arch/
rv64/
tests/
decode.rad
14.6 KiB
emit.rad
24.4 KiB
encode.rad
19.9 KiB
isel.rad
41.1 KiB
printer.rad
13.0 KiB
tests.rad
15.7 KiB
rv64.rad
13.0 KiB
collections/
lang/
sys/
arch.rad
65 B
collections.rad
36 B
fmt.rad
3.8 KiB
intrinsics.rad
206 B
io.rad
1.2 KiB
lang.rad
222 B
mem.rad
2.2 KiB
sys.rad
167 B
testing.rad
2.4 KiB
tests.rad
11.6 KiB
vec.rad
3.1 KiB
std.rad
231 B
scripts/
seed/
test/
vim/
.gitignore
353 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
3.1 KiB
README
2.5 KiB
std.lib
987 B
std.lib.test
252 B
lib/std/arch/rv64/emit.rad
raw
| 1 | //! RV64 binary emission. |
| 2 | //! |
| 3 | //! Emits RV64 machine code as `u32` list. |
| 4 | |
| 5 | use std::lang::il; |
| 6 | use std::lang::alloc; |
| 7 | use std::lang::gen::labels; |
| 8 | use std::collections::dict; |
| 9 | use std::mem; |
| 10 | |
| 11 | use super::encode; |
| 12 | |
| 13 | /// Maximum number of instructions in code buffer. |
| 14 | const MAX_INSTRS: u32 = 2097152; |
| 15 | /// Maximum code length before byte offset overflows 32-bits. |
| 16 | /// Computed as `0x7FFFFFFF` / [`super::INSTR_SIZE`]. |
| 17 | const MAX_CODE_LEN: u32 = 0x3FFFFFFE; |
| 18 | /// Maximum number of pending branches awaiting patching. |
| 19 | const MAX_PENDING: u32 = 65536; |
| 20 | /// Maximum number of function entries. |
| 21 | const MAX_FUNCS: u32 = 4096; |
| 22 | /// Maximum number of debug entries. |
| 23 | const MAX_DEBUG_ENTRIES: u32 = 524288; |
| 24 | |
| 25 | ////////////////////// |
| 26 | // Emission Context // |
| 27 | ////////////////////// |
| 28 | |
| 29 | /// Branch/jump that needs offset patching after all blocks are emitted. |
| 30 | pub record PendingBranch { |
| 31 | /// Index into code buffer where the branch instruction is. |
| 32 | index: u32, |
| 33 | /// Target block index. |
| 34 | target: u32, |
| 35 | /// Type of branch for re-encoding. |
| 36 | kind: BranchKind, |
| 37 | } |
| 38 | |
| 39 | /// Type of branch instruction. |
| 40 | pub union BranchKind { |
| 41 | /// Conditional branch (B-type encoding). |
| 42 | Cond { op: il::CmpOp, rs1: super::Reg, rs2: super::Reg }, |
| 43 | /// Inverted conditional branch (B-type encoding with negated condition). |
| 44 | InvertedCond { op: il::CmpOp, rs1: super::Reg, rs2: super::Reg }, |
| 45 | /// Unconditional jump (J-type encoding). |
| 46 | Jump, |
| 47 | } |
| 48 | |
| 49 | /// Function call that needs offset patching. |
| 50 | pub record PendingCall { |
| 51 | /// Index in code buffer where the call was emitted. |
| 52 | index: u32, |
| 53 | /// Target function name. |
| 54 | target: *[u8], |
| 55 | } |
| 56 | |
| 57 | /// Function address load that needs offset patching. |
| 58 | /// Used when taking a function's address as a value. |
| 59 | pub record PendingAddrLoad { |
| 60 | /// Index in code buffer where the load was emitted. |
| 61 | index: u32, |
| 62 | /// Target function name. |
| 63 | target: *[u8], |
| 64 | /// Destination register. |
| 65 | rd: super::Reg, |
| 66 | } |
| 67 | |
| 68 | /// Function address entry for printing. |
| 69 | pub record FuncAddr { |
| 70 | /// Function name. |
| 71 | name: *[u8], |
| 72 | /// Instruction index where this function starts. |
| 73 | index: u32, |
| 74 | } |
| 75 | |
| 76 | /// Debug entry mapping an instruction to a source location. |
| 77 | pub record DebugEntry { |
| 78 | /// Byte offset of the instruction from the start of the program. |
| 79 | pc: u32, |
| 80 | /// Module identifier. |
| 81 | moduleId: u16, |
| 82 | /// Byte offset into the module's source file. |
| 83 | offset: u32, |
| 84 | } |
| 85 | |
| 86 | /// Adjusted base register and offset for addressing. |
| 87 | pub record AdjustedOffset { |
| 88 | /// Base register. |
| 89 | base: super::Reg, |
| 90 | /// Byte offset from register. |
| 91 | offset: i32, |
| 92 | } |
| 93 | |
| 94 | /// Callee-saved register with its stack offset. |
| 95 | pub record SavedReg { |
| 96 | /// Register to save/restore. |
| 97 | reg: super::Reg, |
| 98 | /// Offset from SP. |
| 99 | offset: i32, |
| 100 | } |
| 101 | |
| 102 | /// Emission context. Tracks state during code generation. |
| 103 | pub record Emitter { |
| 104 | /// Emitted instructions storage. |
| 105 | code: *mut [u32], |
| 106 | /// Current number of emitted instructions. |
| 107 | codeLen: u32, |
| 108 | /// Local branches needing offset patching. |
| 109 | pendingBranches: *mut [PendingBranch], |
| 110 | /// Number of pending local branches. |
| 111 | pendingBranchesLen: u32, |
| 112 | /// Function calls needing offset patching. |
| 113 | pendingCalls: *mut [PendingCall], |
| 114 | /// Number of pending calls. |
| 115 | pendingCallsLen: u32, |
| 116 | /// Function address loads needing offset patching. |
| 117 | pendingAddrLoads: *mut [PendingAddrLoad], |
| 118 | /// Number of pending address loads. |
| 119 | pendingAddrLoadsLen: u32, |
| 120 | /// Block label tracking. |
| 121 | labels: labels::Labels, |
| 122 | /// Function start positions for printing. |
| 123 | funcs: *mut [FuncAddr], |
| 124 | /// Number of recorded functions. |
| 125 | funcsLen: u32, |
| 126 | /// Debug entries mapping PCs to source locations. |
| 127 | debugEntries: *mut [DebugEntry], |
| 128 | /// Number of debug entries recorded. |
| 129 | debugEntriesLen: u32, |
| 130 | } |
| 131 | |
| 132 | /// Computed stack frame layout for a function. |
| 133 | pub record Frame { |
| 134 | /// Total frame size in bytes (aligned). |
| 135 | totalSize: i32, |
| 136 | /// Callee-saved registers and their offsets. |
| 137 | // TODO: Use constant length when language supports it. |
| 138 | savedRegs: [SavedReg; 11], |
| 139 | /// Number of saved registers. |
| 140 | savedRegsLen: u32, |
| 141 | /// Epilogue block index for return jumps. |
| 142 | epilogueBlock: u32, |
| 143 | } |
| 144 | |
| 145 | /// Compute frame layout from local size and used callee-saved registers. |
| 146 | pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32) -> Frame { |
| 147 | let mut frame = Frame { |
| 148 | totalSize: 0, |
| 149 | savedRegs: undefined, |
| 150 | savedRegsLen: 0, |
| 151 | epilogueBlock, |
| 152 | }; |
| 153 | // TODO: Skip frame allocation for leaf functions with no locals. |
| 154 | // Compute total frame size. Includes RA and SP registers. |
| 155 | let savedRegs = mem::popCount(usedCalleeSaved) + 2; |
| 156 | let totalSize = mem::alignUpI32( |
| 157 | localSize + savedRegs * super::DWORD_SIZE, |
| 158 | super::STACK_ALIGNMENT |
| 159 | ); |
| 160 | frame.totalSize = totalSize; |
| 161 | |
| 162 | // Build list of callee-saved registers with offsets. |
| 163 | let mut offset = totalSize - (super::DWORD_SIZE * 3); |
| 164 | for reg, i in super::CALLEE_SAVED { |
| 165 | // Check if this register is in use. |
| 166 | if (usedCalleeSaved & (1 << i)) != 0 { |
| 167 | frame.savedRegs[frame.savedRegsLen] = SavedReg { |
| 168 | reg, |
| 169 | offset, |
| 170 | }; |
| 171 | frame.savedRegsLen += 1; |
| 172 | offset -= super::DWORD_SIZE; |
| 173 | } |
| 174 | } |
| 175 | return frame; |
| 176 | } |
| 177 | |
| 178 | /// Create a new emitter. |
| 179 | pub fn emitter(arena: *mut alloc::Arena, debug: bool) -> Emitter throws (alloc::AllocError) { |
| 180 | let code = try alloc::allocSlice(arena, @sizeOf(u32), @alignOf(u32), MAX_INSTRS); |
| 181 | let pendingBranches = try alloc::allocSlice(arena, @sizeOf(PendingBranch), @alignOf(PendingBranch), MAX_PENDING); |
| 182 | let pendingCalls = try alloc::allocSlice(arena, @sizeOf(PendingCall), @alignOf(PendingCall), MAX_PENDING); |
| 183 | let pendingAddrLoads = try alloc::allocSlice(arena, @sizeOf(PendingAddrLoad), @alignOf(PendingAddrLoad), MAX_PENDING); |
| 184 | let blockOffsets = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), labels::MAX_BLOCKS_PER_FN); |
| 185 | let funcEntries = try alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), labels::FUNC_TABLE_SIZE); |
| 186 | let funcs = try alloc::allocSlice(arena, @sizeOf(FuncAddr), @alignOf(FuncAddr), MAX_FUNCS); |
| 187 | |
| 188 | let mut debugEntries: *mut [DebugEntry] = &mut []; |
| 189 | if debug { |
| 190 | debugEntries = try alloc::allocSlice( |
| 191 | arena, @sizeOf(DebugEntry), @alignOf(DebugEntry), MAX_DEBUG_ENTRIES |
| 192 | ) as *mut [DebugEntry]; |
| 193 | } |
| 194 | return Emitter { |
| 195 | code: code as *mut [u32], |
| 196 | codeLen: 0, |
| 197 | pendingBranches: pendingBranches as *mut [PendingBranch], |
| 198 | pendingBranchesLen: 0, |
| 199 | pendingCalls: pendingCalls as *mut [PendingCall], |
| 200 | pendingCallsLen: 0, |
| 201 | pendingAddrLoads: pendingAddrLoads as *mut [PendingAddrLoad], |
| 202 | pendingAddrLoadsLen: 0, |
| 203 | labels: labels::init(blockOffsets as *mut [i32], funcEntries as *mut [dict::Entry]), |
| 204 | funcs: funcs as *mut [FuncAddr], |
| 205 | funcsLen: 0, |
| 206 | debugEntries, |
| 207 | debugEntriesLen: 0, |
| 208 | }; |
| 209 | } |
| 210 | |
| 211 | /////////////////////// |
| 212 | // Emission Helpers // |
| 213 | /////////////////////// |
| 214 | |
| 215 | /// Emit a single instruction. |
| 216 | pub fn emit(e: *mut Emitter, instr: u32) { |
| 217 | if e.codeLen >= e.code.len { |
| 218 | panic "emit: code buffer full"; |
| 219 | } |
| 220 | e.code[e.codeLen] = instr; |
| 221 | e.codeLen += 1; |
| 222 | } |
| 223 | |
| 224 | /// Compute branch offset to a function by name. |
| 225 | pub fn branchOffsetToFunc(e: *Emitter, srcIndex: u32, name: *[u8]) -> i32 { |
| 226 | return labels::branchToFunc(&e.labels, srcIndex, name, super::INSTR_SIZE); |
| 227 | } |
| 228 | |
| 229 | /// Patch an instruction at a given index. |
| 230 | pub fn patch(e: *mut Emitter, index: u32, instr: u32) { |
| 231 | e.code[index] = instr; |
| 232 | } |
| 233 | |
| 234 | /// Record a block's address for branch resolution. |
| 235 | pub fn recordBlock(e: *mut Emitter, blockIdx: u32) { |
| 236 | assert e.codeLen <= MAX_CODE_LEN; |
| 237 | labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE); |
| 238 | } |
| 239 | |
| 240 | /// Record a function's code offset for call resolution. |
| 241 | pub fn recordFuncOffset(e: *mut Emitter, name: *[u8]) { |
| 242 | assert e.codeLen <= MAX_CODE_LEN; |
| 243 | dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE); |
| 244 | } |
| 245 | |
| 246 | /// Record a function's start position for printing. |
| 247 | pub fn recordFunc(e: *mut Emitter, name: *[u8]) { |
| 248 | if e.funcsLen >= e.funcs.len { |
| 249 | panic "recordFunc: funcs buffer full"; |
| 250 | } |
| 251 | e.funcs[e.funcsLen] = FuncAddr { name, index: e.codeLen }; |
| 252 | e.funcsLen += 1; |
| 253 | } |
| 254 | |
| 255 | /// Record a local branch needing later patching. |
| 256 | /// Emits two placeholder instructions that will be patched later. |
| 257 | pub fn recordBranch(e: *mut Emitter, targetBlock: u32, kind: BranchKind) { |
| 258 | if e.pendingBranchesLen >= e.pendingBranches.len { |
| 259 | panic "recordBranch: buffer full"; |
| 260 | } |
| 261 | e.pendingBranches[e.pendingBranchesLen] = PendingBranch { |
| 262 | index: e.codeLen, |
| 263 | target: targetBlock, |
| 264 | kind: kind, |
| 265 | }; |
| 266 | e.pendingBranchesLen += 1; |
| 267 | |
| 268 | emit(e, encode::nop()); // Placeholder for branch/auipc. |
| 269 | emit(e, encode::nop()); // Placeholder for nop/jal/jalr. |
| 270 | } |
| 271 | |
| 272 | /// Record a function call needing later patching. |
| 273 | /// Emits placeholder instructions that will be patched later. |
| 274 | /// Uses two slots to support long-distance calls. |
| 275 | pub fn recordCall(e: *mut Emitter, target: *[u8]) { |
| 276 | if e.pendingCallsLen >= e.pendingCalls.len { |
| 277 | panic "recordCall: buffer full"; |
| 278 | } |
| 279 | e.pendingCalls[e.pendingCallsLen] = PendingCall { |
| 280 | index: e.codeLen, |
| 281 | target, |
| 282 | }; |
| 283 | e.pendingCallsLen += 1; |
| 284 | |
| 285 | emit(e, encode::nop()); // Placeholder for AUIPC. |
| 286 | emit(e, encode::nop()); // Placeholder for JALR. |
| 287 | } |
| 288 | |
| 289 | /// Record a function address load needing later patching. |
| 290 | /// Emits placeholder instructions that will be patched to load the function's address. |
| 291 | /// Uses two slots to compute long-distance addresses. |
| 292 | pub fn recordAddrLoad(e: *mut Emitter, target: *[u8], rd: super::Reg) { |
| 293 | if e.pendingAddrLoadsLen >= e.pendingAddrLoads.len { |
| 294 | panic "recordAddrLoad: buffer full"; |
| 295 | } |
| 296 | e.pendingAddrLoads[e.pendingAddrLoadsLen] = PendingAddrLoad { |
| 297 | index: e.codeLen, |
| 298 | target, |
| 299 | rd: rd, |
| 300 | }; |
| 301 | e.pendingAddrLoadsLen += 1; |
| 302 | |
| 303 | emit(e, encode::nop()); // Placeholder for AUIPC. |
| 304 | emit(e, encode::nop()); // Placeholder for ADDI. |
| 305 | } |
| 306 | |
| 307 | /// Patch local branches and clear the pending list. |
| 308 | /// |
| 309 | /// Called after each function. |
| 310 | /// |
| 311 | /// Uses two-instruction sequences: short branches use `branch` and `nop`, |
| 312 | /// long branches use inverted branch and `jal` or `auipc` and `jalr`. |
| 313 | pub fn patchLocalBranches(e: *mut Emitter) { |
| 314 | for i in 0..e.pendingBranchesLen { |
| 315 | let p = e.pendingBranches[i]; |
| 316 | let offset = labels::branchToBlock(&e.labels, p.index, p.target, super::INSTR_SIZE); |
| 317 | match p.kind { |
| 318 | case BranchKind::Cond { op, rs1, rs2 } => { |
| 319 | if encode::isBranchImm(offset) { |
| 320 | // Short: direct branch. |
| 321 | patch(e, p.index, encodeCondBranch(op, rs1, rs2, offset)); |
| 322 | patch(e, p.index + 1, encode::nop()); |
| 323 | } else { |
| 324 | // Long: inverted branch skips `jal`; `jal` goes to target. |
| 325 | let adj = offset - super::INSTR_SIZE; |
| 326 | if not encode::isJumpImm(adj) { |
| 327 | panic "patchLocalBranches: branch offset too large"; |
| 328 | } |
| 329 | patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, super::INSTR_SIZE * 2)); |
| 330 | patch(e, p.index + 1, encode::jal(super::ZERO, adj)); |
| 331 | } |
| 332 | }, |
| 333 | case BranchKind::InvertedCond { op, rs1, rs2 } => { |
| 334 | if encode::isBranchImm(offset) { |
| 335 | // Short: direct inverted branch. |
| 336 | patch(e, p.index, encodeInvertedBranch(op, rs1, rs2, offset)); |
| 337 | patch(e, p.index + 1, encode::nop()); |
| 338 | } else { |
| 339 | // Long: non-inverted branch skips `jal`, jal goes to target. |
| 340 | let adj = offset - super::INSTR_SIZE; |
| 341 | if not encode::isJumpImm(adj) { |
| 342 | panic "patchLocalBranches: branch offset too large"; |
| 343 | } |
| 344 | patch(e, p.index, encodeCondBranch(op, rs1, rs2, super::INSTR_SIZE * 2)); |
| 345 | patch(e, p.index + 1, encode::jal(super::ZERO, adj)); |
| 346 | } |
| 347 | }, |
| 348 | case BranchKind::Jump => { |
| 349 | if encode::isJumpImm(offset) { |
| 350 | patch(e, p.index, encode::jal(super::ZERO, offset)); |
| 351 | patch(e, p.index + 1, encode::nop()); |
| 352 | } else { |
| 353 | let s = splitImm(offset); |
| 354 | patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi)); |
| 355 | patch(e, p.index + 1, encode::jalr(super::ZERO, super::SCRATCH1, s.lo)); |
| 356 | } |
| 357 | }, |
| 358 | } |
| 359 | } |
| 360 | e.pendingBranchesLen = 0; |
| 361 | } |
| 362 | |
| 363 | /// Encode a conditional branch instruction. |
| 364 | fn encodeCondBranch(op: il::CmpOp, rs1: super::Reg, rs2: super::Reg, offset: i32) -> u32 { |
| 365 | match op { |
| 366 | case il::CmpOp::Eq => return encode::beq(rs1, rs2, offset), |
| 367 | case il::CmpOp::Ne => return encode::bne(rs1, rs2, offset), |
| 368 | case il::CmpOp::Slt => return encode::blt(rs1, rs2, offset), |
| 369 | case il::CmpOp::Ult => return encode::bltu(rs1, rs2, offset), |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | /// Encode an inverted conditional branch instruction. |
| 374 | fn encodeInvertedBranch(op: il::CmpOp, rs1: super::Reg, rs2: super::Reg, offset: i32) -> u32 { |
| 375 | match op { |
| 376 | case il::CmpOp::Eq => return encode::bne(rs1, rs2, offset), |
| 377 | case il::CmpOp::Ne => return encode::beq(rs1, rs2, offset), |
| 378 | case il::CmpOp::Slt => return encode::bge(rs1, rs2, offset), |
| 379 | case il::CmpOp::Ult => return encode::bgeu(rs1, rs2, offset), |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | /// Patch all pending function calls. |
| 384 | /// Called after all functions have been generated. |
| 385 | pub fn patchCalls(e: *mut Emitter) { |
| 386 | for i in 0..e.pendingCallsLen { |
| 387 | let p = e.pendingCalls[i]; |
| 388 | let offset = branchOffsetToFunc(e, p.index, p.target); |
| 389 | let s = splitImm(offset); |
| 390 | |
| 391 | // `AUIPC scratch, hi(offset)`. |
| 392 | patch(e, p.index, encode::auipc(super::SCRATCH1, s.hi)); |
| 393 | // `JALR ra, scratch, lo(offset)`. |
| 394 | patch(e, p.index + 1, encode::jalr(super::RA, super::SCRATCH1, s.lo)); |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | /// Patch all pending function address loads. |
| 399 | /// Called after all functions have been generated. |
| 400 | /// Uses PC-relative addresses up to 2GB away. |
| 401 | pub fn patchAddrLoads(e: *mut Emitter) { |
| 402 | for i in 0..e.pendingAddrLoadsLen { |
| 403 | let p = e.pendingAddrLoads[i]; |
| 404 | let offset = branchOffsetToFunc(e, p.index, p.target); |
| 405 | let s = splitImm(offset); |
| 406 | |
| 407 | // `AUIPC rd, hi(offset)`. |
| 408 | patch(e, p.index, encode::auipc(p.rd, s.hi)); |
| 409 | // `ADDI rd, rd, lo(offset)`. |
| 410 | patch(e, p.index + 1, encode::addi(p.rd, p.rd, s.lo)); |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | ///////////////////////// |
| 415 | // Immediate Handling // |
| 416 | ///////////////////////// |
| 417 | |
| 418 | /// Split immediate into `hi` and `lo` bits. |
| 419 | pub record SplitImm { |
| 420 | /// Upper 20 bits. |
| 421 | hi: i32, |
| 422 | /// Lower 12 bits. |
| 423 | lo: i32, |
| 424 | } |
| 425 | |
| 426 | /// Split a 32-bit immediate for `AUIPC, ADDI` / `JALR` sequences. |
| 427 | /// Handles sign extension: if *lo* is negative, increment *hi*. |
| 428 | pub fn splitImm(imm: i32) -> SplitImm { |
| 429 | let lo = imm & 0xFFF; |
| 430 | let mut hi = (imm >> 12) & 0xFFFFF; |
| 431 | // If `lo`'s sign bit is set, it will be sign-extended to negative. |
| 432 | // Compensate by incrementing `hi`. |
| 433 | if (lo & 0x800) != 0 { |
| 434 | hi += 1; |
| 435 | return SplitImm { hi, lo: lo | 0xFFFFF000 as i32 }; |
| 436 | } |
| 437 | return SplitImm { hi, lo }; |
| 438 | } |
| 439 | |
| 440 | /// Adjust a large offset by loading *hi* bits into [`super::ADDR_SCRATCH`]. |
| 441 | /// Returns adjusted base register and remaining offset. |
| 442 | /// |
| 443 | /// When the offset fits a 12-bit signed immediate, returns it unchanged. |
| 444 | /// Otherwise uses [`super::ADDR_SCRATCH`] for the LUI+ADD decomposition. |
| 445 | fn adjustOffset(e: *mut Emitter, base: super::Reg, offset: i32) -> AdjustedOffset { |
| 446 | if offset >= super::MIN_IMM and offset <= super::MAX_IMM { |
| 447 | return AdjustedOffset { base, offset }; |
| 448 | } |
| 449 | let s = splitImm(offset); |
| 450 | emit(e, encode::lui(super::ADDR_SCRATCH, s.hi)); |
| 451 | emit(e, encode::add(super::ADDR_SCRATCH, super::ADDR_SCRATCH, base)); |
| 452 | |
| 453 | return AdjustedOffset { base: super::ADDR_SCRATCH, offset: s.lo }; |
| 454 | } |
| 455 | |
| 456 | /// Load an immediate value into a register. |
| 457 | /// Handles the full range of 64-bit immediates. |
| 458 | /// For values fitting in 12 bits, uses a single `ADDI`. |
| 459 | /// For values fitting in 32 bits, uses `LUI` + `ADDIW`. |
| 460 | /// For wider values, loads upper and lower halves then combines with shift and add. |
| 461 | pub fn loadImm(e: *mut Emitter, rd: super::Reg, imm: i64) { |
| 462 | let immMin = super::MIN_IMM as i64; |
| 463 | let immMax = super::MAX_IMM as i64; |
| 464 | |
| 465 | if imm >= immMin and imm <= immMax { |
| 466 | emit(e, encode::addi(rd, super::ZERO, imm as i32)); |
| 467 | return; |
| 468 | } |
| 469 | // Check if the value fits in 32 bits (sign-extended). |
| 470 | let lo32 = imm as i32; |
| 471 | if lo32 as i64 == imm { |
| 472 | let s = splitImm(lo32); |
| 473 | emit(e, encode::lui(rd, s.hi)); |
| 474 | if s.lo != 0 { |
| 475 | emit(e, encode::addiw(rd, rd, s.lo)); |
| 476 | } |
| 477 | return; |
| 478 | } |
| 479 | // Full 64-bit immediate: use only rd, no scratch registers. |
| 480 | // Load upper 32 bits first via the 32-bit path (LUI+ADDIW), |
| 481 | // then shift and add lower bits in 11-bit groups to avoid |
| 482 | // sign-extension issues with ADDI's 12-bit signed immediate. |
| 483 | let hi32 = (imm >> 32) as i32; |
| 484 | let lower = imm as i32; |
| 485 | |
| 486 | // Load upper 32 bits. |
| 487 | loadImm(e, rd, hi32 as i64); |
| 488 | // Shift left by 11, add bits [31:21]. |
| 489 | emit(e, encode::slli(rd, rd, 11)); |
| 490 | emit(e, encode::addi(rd, rd, (lower >> 21) & 0x7FF)); |
| 491 | // Shift left by 11, add bits [20:10]. |
| 492 | emit(e, encode::slli(rd, rd, 11)); |
| 493 | emit(e, encode::addi(rd, rd, (lower >> 10) & 0x7FF)); |
| 494 | // Shift left by 10, add bits [9:0]. |
| 495 | emit(e, encode::slli(rd, rd, 10)); |
| 496 | emit(e, encode::addi(rd, rd, lower & 0x3FF)); |
| 497 | } |
| 498 | |
| 499 | /// Emit add-immediate, handling large immediates. |
| 500 | pub fn emitAddImm(e: *mut Emitter, rd: super::Reg, rs: super::Reg, imm: i32) { |
| 501 | if imm >= super::MIN_IMM and imm <= super::MAX_IMM { |
| 502 | emit(e, encode::addi(rd, rs, imm)); |
| 503 | } else { |
| 504 | loadImm(e, super::SCRATCH1, imm as i64); |
| 505 | emit(e, encode::add(rd, rs, super::SCRATCH1)); |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | //////////////////////// |
| 510 | // Load/Store Helpers // |
| 511 | //////////////////////// |
| 512 | |
| 513 | /// Emit unsigned load with automatic offset adjustment. |
| 514 | pub fn emitLoad(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32, typ: il::Type) { |
| 515 | let adj = adjustOffset(e, base, offset); |
| 516 | match typ { |
| 517 | case il::Type::W8 => emit(e, encode::lbu(rd, adj.base, adj.offset)), |
| 518 | case il::Type::W16 => emit(e, encode::lhu(rd, adj.base, adj.offset)), |
| 519 | case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)), |
| 520 | case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)), |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | /// Emit signed load with automatic offset adjustment. |
| 525 | pub fn emitSload(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32, typ: il::Type) { |
| 526 | let adj = adjustOffset(e, base, offset); |
| 527 | match typ { |
| 528 | case il::Type::W8 => emit(e, encode::lb(rd, adj.base, adj.offset)), |
| 529 | case il::Type::W16 => emit(e, encode::lh(rd, adj.base, adj.offset)), |
| 530 | case il::Type::W32 => emit(e, encode::lw(rd, adj.base, adj.offset)), |
| 531 | case il::Type::W64 => emit(e, encode::ld(rd, adj.base, adj.offset)), |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | /// Emit store with automatic offset adjustment. |
| 536 | pub fn emitStore(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32, typ: il::Type) { |
| 537 | let adj = adjustOffset(e, base, offset); |
| 538 | match typ { |
| 539 | case il::Type::W8 => emit(e, encode::sb(rs, adj.base, adj.offset)), |
| 540 | case il::Type::W16 => emit(e, encode::sh(rs, adj.base, adj.offset)), |
| 541 | case il::Type::W32 => emit(e, encode::sw(rs, adj.base, adj.offset)), |
| 542 | case il::Type::W64 => emit(e, encode::sd(rs, adj.base, adj.offset)), |
| 543 | } |
| 544 | } |
| 545 | |
| 546 | /// Emit 64-bit load with automatic offset adjustment. |
| 547 | pub fn emitLd(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) { |
| 548 | let adj = adjustOffset(e, base, offset); |
| 549 | emit(e, encode::ld(rd, adj.base, adj.offset)); |
| 550 | } |
| 551 | |
| 552 | /// Emit 64-bit store with automatic offset adjustment. |
| 553 | pub fn emitSd(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) { |
| 554 | let adj = adjustOffset(e, base, offset); |
| 555 | emit(e, encode::sd(rs, adj.base, adj.offset)); |
| 556 | } |
| 557 | |
| 558 | /// Emit 32-bit load with automatic offset adjustment. |
| 559 | pub fn emitLw(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) { |
| 560 | let adj = adjustOffset(e, base, offset); |
| 561 | emit(e, encode::lw(rd, adj.base, adj.offset)); |
| 562 | } |
| 563 | |
| 564 | /// Emit 32-bit store with automatic offset adjustment. |
| 565 | pub fn emitSw(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) { |
| 566 | let adj = adjustOffset(e, base, offset); |
| 567 | emit(e, encode::sw(rs, adj.base, adj.offset)); |
| 568 | } |
| 569 | |
| 570 | /// Emit 8-bit load with automatic offset adjustment. |
| 571 | pub fn emitLb(e: *mut Emitter, rd: super::Reg, base: super::Reg, offset: i32) { |
| 572 | let adj = adjustOffset(e, base, offset); |
| 573 | emit(e, encode::lb(rd, adj.base, adj.offset)); |
| 574 | } |
| 575 | |
| 576 | /// Emit 8-bit store with automatic offset adjustment. |
| 577 | pub fn emitSb(e: *mut Emitter, rs: super::Reg, base: super::Reg, offset: i32) { |
| 578 | let adj = adjustOffset(e, base, offset); |
| 579 | emit(e, encode::sb(rs, adj.base, adj.offset)); |
| 580 | } |
| 581 | |
| 582 | ////////////////////////// |
| 583 | // Prologue / Epilogue // |
| 584 | ////////////////////////// |
| 585 | |
| 586 | /// Emit function prologue. |
| 587 | /// Allocates stack frame, saves RA/FP, saves callee-saved registers. |
| 588 | pub fn emitPrologue(e: *mut Emitter, frame: *Frame) { |
| 589 | // Fast path: leaf function with no locals. |
| 590 | if frame.totalSize == 0 { |
| 591 | return; |
| 592 | } |
| 593 | let totalSize = frame.totalSize; |
| 594 | |
| 595 | // Allocate stack frame. |
| 596 | let negFrame = 0 - totalSize; |
| 597 | if negFrame >= super::MIN_IMM { |
| 598 | emit(e, encode::addi(super::SP, super::SP, negFrame)); |
| 599 | } else { |
| 600 | loadImm(e, super::SCRATCH1, totalSize as i64); |
| 601 | emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1)); |
| 602 | } |
| 603 | // Save return address and frame pointer. |
| 604 | emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
| 605 | emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
| 606 | |
| 607 | // Set up frame pointer. |
| 608 | emitAddImm(e, super::FP, super::SP, totalSize); |
| 609 | |
| 610 | // Save callee-saved registers. |
| 611 | for i in 0..frame.savedRegsLen { |
| 612 | let sr = frame.savedRegs[i]; |
| 613 | emitSd(e, sr.reg, super::SP, sr.offset); |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | /// Emit a return: jump to epilogue. |
| 618 | pub fn emitReturn(e: *mut Emitter, frame: *Frame) { |
| 619 | recordBranch(e, frame.epilogueBlock, BranchKind::Jump); |
| 620 | } |
| 621 | |
| 622 | /// Emit function epilogue. |
| 623 | /// Restores callee-saved registers, `RA/FP`, deallocates frame, returns. |
| 624 | pub fn emitEpilogue(e: *mut Emitter, frame: *Frame) { |
| 625 | // Record epilogue block address for return jumps. |
| 626 | recordBlock(e, frame.epilogueBlock); |
| 627 | |
| 628 | // Fast path: leaf function with no locals. |
| 629 | if frame.totalSize == 0 { |
| 630 | emit(e, encode::ret()); |
| 631 | return; |
| 632 | } |
| 633 | let totalSize = frame.totalSize; |
| 634 | |
| 635 | // Restore SP to post-prologue value. Required if we performed dynamic |
| 636 | // stack allocation, as SP may have moved. |
| 637 | // |
| 638 | // Since we set FP to `SP + totalSize`, we now set SP to `FP - totalSize`. |
| 639 | emitAddImm(e, super::SP, super::FP, 0 - totalSize); |
| 640 | |
| 641 | // Restore callee-saved registers. |
| 642 | for i in 0..frame.savedRegsLen { |
| 643 | let sr = frame.savedRegs[i]; |
| 644 | emitLd(e, sr.reg, super::SP, sr.offset); |
| 645 | } |
| 646 | // Restore frame pointer and return address. |
| 647 | emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
| 648 | emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
| 649 | |
| 650 | // Deallocate stack frame. |
| 651 | emitAddImm(e, super::SP, super::SP, totalSize); |
| 652 | emit(e, encode::ret()); |
| 653 | } |
| 654 | |
| 655 | ////////////////// |
| 656 | // Code Access // |
| 657 | ////////////////// |
| 658 | |
| 659 | /// Get emitted code as a slice. |
| 660 | pub fn getCode(e: *Emitter) -> *[u32] { |
| 661 | return &e.code[..e.codeLen]; |
| 662 | } |
| 663 | |
| 664 | /// Get function addresses for printing. |
| 665 | pub fn getFuncs(e: *Emitter) -> *[FuncAddr] { |
| 666 | return &e.funcs[..e.funcsLen]; |
| 667 | } |
| 668 | |
| 669 | /// Record a debug entry mapping the current PC to a source location. |
| 670 | /// Deduplicates consecutive entries with the same location. |
| 671 | pub fn recordSrcLoc(e: *mut Emitter, loc: il::SrcLoc) { |
| 672 | let pc = e.codeLen * super::INSTR_SIZE as u32; |
| 673 | |
| 674 | // Skip if this is the same location as the previous entry. |
| 675 | if e.debugEntriesLen > 0 { |
| 676 | let prev = &e.debugEntries[e.debugEntriesLen - 1]; |
| 677 | if prev.offset == loc.offset and prev.moduleId == loc.moduleId { |
| 678 | return; |
| 679 | } |
| 680 | } |
| 681 | if e.debugEntriesLen >= e.debugEntries.len { |
| 682 | panic "recordSrcLoc: debug entry buffer full"; |
| 683 | } |
| 684 | e.debugEntries[e.debugEntriesLen] = DebugEntry { |
| 685 | pc, |
| 686 | moduleId: loc.moduleId, |
| 687 | offset: loc.offset, |
| 688 | }; |
| 689 | e.debugEntriesLen += 1; |
| 690 | } |
| 691 | |
| 692 | /// Get debug entries as a slice. |
| 693 | pub fn getDebugEntries(e: *Emitter) -> *[DebugEntry] { |
| 694 | return &e.debugEntries[..e.debugEntriesLen]; |
| 695 | } |