Optimize code generation for non-dynamic functions
d84d2c73da2a6583bc6d27963b1f6b75f661d4a6fb283b232f12f82e2a560524
Skip FP save/restore for non-allocating functions.
1 parent
57e40912
lib/std/arch/rv64/emit.rad
+28 -15
| 121 | 121 | savedRegs: [SavedReg; 11], |
|
| 122 | 122 | /// Number of saved registers. |
|
| 123 | 123 | savedRegsLen: u32, |
|
| 124 | 124 | /// Epilogue block index for return jumps. |
|
| 125 | 125 | epilogueBlock: u32, |
|
| 126 | + | /// Whether this is a leaf function. Leaf functions |
|
| 127 | + | /// skip saving/restoring RA since it is never clobbered. |
|
| 128 | + | isLeaf: bool, |
|
| 129 | + | /// Whether the function has dynamic stack allocations. |
|
| 130 | + | /// When false, SP never changes after the prologue. |
|
| 131 | + | isDynamic: bool, |
|
| 126 | 132 | } |
|
| 127 | 133 | ||
| 128 | 134 | /// Compute frame layout from local size and used callee-saved registers. |
|
| 129 | - | pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool) -> Frame { |
|
| 135 | + | pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool, isDynamic: bool) -> Frame { |
|
| 130 | 136 | let mut frame = Frame { |
|
| 131 | 137 | totalSize: 0, |
|
| 132 | 138 | savedRegs: undefined, |
|
| 133 | 139 | savedRegsLen: 0, |
|
| 134 | 140 | epilogueBlock, |
|
| 141 | + | isLeaf, |
|
| 142 | + | isDynamic, |
|
| 135 | 143 | }; |
|
| 136 | 144 | // Skip frame allocation for leaf functions with no locals and no |
|
| 137 | 145 | // callee-saved registers. Leaf functions don't call other functions, |
|
| 138 | 146 | // so RA is never clobbered and doesn't need saving. |
|
| 139 | 147 | if isLeaf and localSize == 0 and usedCalleeSaved == 0 { |
| 586 | 594 | emit(e, encode::addi(super::SP, super::SP, negFrame)); |
|
| 587 | 595 | } else { |
|
| 588 | 596 | loadImm(e, super::SCRATCH1, totalSize as i64); |
|
| 589 | 597 | emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1)); |
|
| 590 | 598 | } |
|
| 591 | - | // Save return address and frame pointer. |
|
| 592 | - | emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
|
| 599 | + | // Save return address. |
|
| 600 | + | if not frame.isLeaf { |
|
| 601 | + | emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
|
| 602 | + | } |
|
| 603 | + | // Save frame pointer. |
|
| 593 | 604 | emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
|
| 594 | 605 | ||
| 595 | - | // Set up frame pointer. |
|
| 596 | - | emitAddImm(e, super::FP, super::SP, totalSize); |
|
| 597 | - | ||
| 606 | + | // Set up frame pointer, only needed when dynamic allocs may move SP. |
|
| 607 | + | if frame.isDynamic { |
|
| 608 | + | emitAddImm(e, super::FP, super::SP, totalSize); |
|
| 609 | + | } |
|
| 598 | 610 | // Save callee-saved registers. |
|
| 599 | 611 | for i in 0..frame.savedRegsLen { |
|
| 600 | 612 | let sr = frame.savedRegs[i]; |
|
| 601 | 613 | emitSd(e, sr.reg, super::SP, sr.offset); |
|
| 602 | 614 | } |
| 618 | 630 | emit(e, encode::ret()); |
|
| 619 | 631 | return; |
|
| 620 | 632 | } |
|
| 621 | 633 | let totalSize = frame.totalSize; |
|
| 622 | 634 | ||
| 623 | - | // Restore SP to post-prologue value. Required if we performed dynamic |
|
| 624 | - | // stack allocation, as SP may have moved. |
|
| 625 | - | // |
|
| 626 | - | // Since we set FP to `SP + totalSize`, we now set SP to `FP - totalSize`. |
|
| 627 | - | emitAddImm(e, super::SP, super::FP, 0 - totalSize); |
|
| 628 | - | ||
| 635 | + | // Restore SP to post-prologue value. Only needed when dynamic stack |
|
| 636 | + | // allocation may have moved SP. |
|
| 637 | + | if frame.isDynamic { |
|
| 638 | + | emitAddImm(e, super::SP, super::FP, 0 - totalSize); |
|
| 639 | + | } |
|
| 629 | 640 | // Restore callee-saved registers. |
|
| 630 | 641 | for i in 0..frame.savedRegsLen { |
|
| 631 | 642 | let sr = frame.savedRegs[i]; |
|
| 632 | 643 | emitLd(e, sr.reg, super::SP, sr.offset); |
|
| 633 | 644 | } |
|
| 634 | - | // Restore frame pointer and return address. |
|
| 645 | + | // Restore frame pointer. |
|
| 635 | 646 | emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2); |
|
| 636 | - | emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
|
| 637 | - | ||
| 647 | + | // Restore return address. |
|
| 648 | + | if not frame.isLeaf { |
|
| 649 | + | emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE); |
|
| 650 | + | } |
|
| 638 | 651 | // Deallocate stack frame. |
|
| 639 | 652 | emitAddImm(e, super::SP, super::SP, totalSize); |
|
| 640 | 653 | emit(e, encode::ret()); |
|
| 641 | 654 | } |
|
| 642 | 655 |
lib/std/arch/rv64/isel.rad
+49 -20
| 90 | 90 | reserveOffset: i32, |
|
| 91 | 91 | /// Pending spill store, auto-committed after each instruction. |
|
| 92 | 92 | pendingSpill: ?PendingSpill, |
|
| 93 | 93 | /// Next synthetic block index for skip-branch targets. |
|
| 94 | 94 | nextSynthBlock: u32, |
|
| 95 | + | /// Whether dynamic allocations exist. |
|
| 96 | + | isDynamic: bool, |
|
| 95 | 97 | } |
|
| 96 | 98 | ||
| 97 | 99 | ///////////////////////// |
|
| 98 | 100 | // Register Allocation // |
|
| 99 | 101 | ///////////////////////// |
| 104 | 106 | panic "getReg: spilled register has no physical assignment"; |
|
| 105 | 107 | }; |
|
| 106 | 108 | return super::Reg { n: phys }; |
|
| 107 | 109 | } |
|
| 108 | 110 | ||
| 109 | - | /// Compute the FP-relative offset for a spill slot. |
|
| 110 | - | /// Spill slots live at the bottom of the frame. |
|
| 111 | - | /// Since `FP = SP + totalFrameSize`, the offset from FP is `slot - totalSize`. |
|
| 111 | + | /// Compute the offset for a spill slot. |
|
| 112 | + | /// When using FP (dynamic): offset from FP = `slot - totalSize`. |
|
| 113 | + | /// When using SP: offset from SP = `slot`. |
|
| 112 | 114 | fn spillOffset(s: *Selector, slot: i32) -> i32 { |
|
| 113 | - | return slot - s.frameSize; |
|
| 115 | + | if s.isDynamic { |
|
| 116 | + | return slot - s.frameSize; |
|
| 117 | + | } |
|
| 118 | + | return slot; |
|
| 119 | + | } |
|
| 120 | + | ||
| 121 | + | /// Get the base register for spill slot addressing (FP or SP). |
|
| 122 | + | fn spillBase(s: *Selector) -> super::Reg { |
|
| 123 | + | if s.isDynamic { |
|
| 124 | + | return super::FP; |
|
| 125 | + | } |
|
| 126 | + | return super::SP; |
|
| 114 | 127 | } |
|
| 115 | 128 | ||
| 116 | 129 | /// Get the destination register for an SSA register. |
|
| 117 | 130 | /// If the register is spilled, records a pending spill and returns the scratch |
|
| 118 | 131 | /// register. The pending spill is auto-committed by [`selectBlock`] after each |
| 128 | 141 | /// Get the source register for an SSA register. |
|
| 129 | 142 | /// If the register is spilled, loads the value from the spill slot into the |
|
| 130 | 143 | /// scratch register and returns it. Otherwise returns the physical register. |
|
| 131 | 144 | fn getSrcReg(s: *mut Selector, ssa: il::Reg, scratch: super::Reg) -> super::Reg { |
|
| 132 | 145 | if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) { |
|
| 133 | - | emit::emitLd(s.e, scratch, super::FP, spillOffset(s, slot)); |
|
| 146 | + | emit::emitLd(s.e, scratch, spillBase(s), spillOffset(s, slot)); |
|
| 134 | 147 | return scratch; |
|
| 135 | 148 | } |
|
| 136 | 149 | return getReg(s, ssa); |
|
| 137 | 150 | } |
|
| 138 | 151 |
| 227 | 240 | ||
| 228 | 241 | //////////////////////// |
|
| 229 | 242 | // Instruction Select // |
|
| 230 | 243 | //////////////////////// |
|
| 231 | 244 | ||
| 245 | + | /// Pre-scan result for reserve analysis. |
|
| 246 | + | record ReserveInfo { |
|
| 247 | + | /// Total size needed for constant-sized reserves. |
|
| 248 | + | size: i32, |
|
| 249 | + | /// Whether any dynamic-sized reserves exist. |
|
| 250 | + | isDynamic: bool, |
|
| 251 | + | } |
|
| 252 | + | ||
| 232 | 253 | /// Pre-scan all blocks for constant-sized reserve instructions. |
|
| 233 | 254 | /// Returns the total size needed for all static reserves, respecting alignment. |
|
| 234 | - | fn computeReserveSize(func: *il::Fn) -> i32 { |
|
| 255 | + | fn computeReserveInfo(func: *il::Fn) -> ReserveInfo { |
|
| 235 | 256 | let mut offset: i32 = 0; |
|
| 257 | + | let mut isDynamic = false; |
|
| 258 | + | ||
| 236 | 259 | for b in 0..func.blocks.len { |
|
| 237 | 260 | let block = &func.blocks[b]; |
|
| 238 | 261 | for instr in block.instrs { |
|
| 239 | 262 | match instr { |
|
| 240 | 263 | case il::Instr::Reserve { size, alignment, .. } => { |
|
| 241 | 264 | if let case il::Val::Imm(sz) = size { |
|
| 242 | 265 | offset = mem::alignUpI32(offset, alignment as i32); |
|
| 243 | - | offset += (sz as i32); |
|
| 266 | + | offset += sz as i32; |
|
| 267 | + | } else { |
|
| 268 | + | isDynamic = true; |
|
| 244 | 269 | } |
|
| 245 | 270 | }, |
|
| 246 | 271 | else => {}, |
|
| 247 | 272 | } |
|
| 248 | 273 | } |
|
| 249 | 274 | } |
|
| 250 | - | return offset; |
|
| 275 | + | return ReserveInfo { size: offset, isDynamic }; |
|
| 251 | 276 | } |
|
| 252 | 277 | ||
| 253 | 278 | /// Select instructions for a function. |
|
| 254 | 279 | pub fn selectFn( |
|
| 255 | 280 | e: *mut emit::Emitter, |
| 258 | 283 | func: *il::Fn |
|
| 259 | 284 | ) { |
|
| 260 | 285 | // Reset block offsets for this function. |
|
| 261 | 286 | labels::resetBlocks(&mut e.labels); |
|
| 262 | 287 | // Pre-scan for constant-sized reserves to promote to fixed frame slots. |
|
| 263 | - | let reserveSize = computeReserveSize(func); |
|
| 288 | + | let reserveInfo = computeReserveInfo(func); |
|
| 264 | 289 | let isLeaf = func.isLeaf; |
|
| 265 | 290 | // Compute frame layout from spill slots, reserve slots, and used callee-saved registers. |
|
| 266 | 291 | let frame = emit::computeFrame( |
|
| 267 | - | ralloc.spill.frameSize + reserveSize, |
|
| 292 | + | ralloc.spill.frameSize + reserveInfo.size, |
|
| 268 | 293 | ralloc.usedCalleeSaved, |
|
| 269 | 294 | func.blocks.len, |
|
| 270 | - | isLeaf |
|
| 295 | + | isLeaf, |
|
| 296 | + | reserveInfo.isDynamic |
|
| 271 | 297 | ); |
|
| 272 | 298 | // Synthetic block indices start after real blocks and the epilogue block. |
|
| 273 | 299 | let mut s = Selector { |
|
| 274 | 300 | e, ralloc, dataSymMap, frameSize: frame.totalSize, |
|
| 275 | 301 | reserveOffset: 0, pendingSpill: nil, |
|
| 276 | 302 | nextSynthBlock: func.blocks.len + 1, |
|
| 303 | + | isDynamic: frame.isDynamic, |
|
| 277 | 304 | }; |
|
| 278 | 305 | // Record function name for printing. |
|
| 279 | 306 | emit::recordFunc(s.e, func.name); |
|
| 280 | 307 | // Record function code offset for call patching. |
|
| 281 | 308 | emit::recordFuncOffset(s.e, func.name); |
| 291 | 318 | let param = funcParam.value; |
|
| 292 | 319 | let argReg = super::ARG_REGS[i]; |
|
| 293 | 320 | ||
| 294 | 321 | if let slot = regalloc::spill::spillSlot(&ralloc.spill, param) { |
|
| 295 | 322 | // Spilled parameter: store arg register to spill slot. |
|
| 296 | - | emit::emitSd(s.e, argReg, super::FP, spillOffset(&s, slot)); |
|
| 323 | + | emit::emitSd(s.e, argReg, spillBase(&s), spillOffset(&s, slot)); |
|
| 297 | 324 | } else if let assigned = ralloc.assignments[param.n] { |
|
| 298 | 325 | emitMv(&mut s, super::Reg { n: assigned }, argReg); |
|
| 299 | 326 | } |
|
| 300 | 327 | } |
|
| 301 | 328 | } |
| 330 | 357 | selectInstr(s, blockIdx, instr, frame, func); |
|
| 331 | 358 | ||
| 332 | 359 | // Flush the pending spill store, if any. |
|
| 333 | 360 | if let p = s.pendingSpill { |
|
| 334 | 361 | if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, p.ssa) { |
|
| 335 | - | emit::emitSd(s.e, p.rd, super::FP, spillOffset(s, slot)); |
|
| 362 | + | emit::emitSd(s.e, p.rd, spillBase(s), spillOffset(s, slot)); |
|
| 336 | 363 | } |
|
| 337 | 364 | s.pendingSpill = nil; |
|
| 338 | 365 | } |
|
| 339 | 366 | } |
|
| 340 | 367 | } |
| 376 | 403 | match size { |
|
| 377 | 404 | case il::Val::Imm(sz) => { |
|
| 378 | 405 | // Constant-sized reserve: use pre-allocated frame slot. |
|
| 379 | 406 | let rd = getDstReg(s, dst, super::SCRATCH1); |
|
| 380 | 407 | let aligned: i32 = mem::alignUpI32(s.reserveOffset, alignment as i32); |
|
| 381 | - | let fpOffset: i32 = s.ralloc.spill.frameSize + aligned - s.frameSize; |
|
| 408 | + | let base = spillBase(s); |
|
| 409 | + | let offset = s.ralloc.spill.frameSize + aligned |
|
| 410 | + | - (s.frameSize if s.isDynamic else 0); |
|
| 382 | 411 | ||
| 383 | - | emit::emitAddImm(s.e, rd, super::FP, fpOffset); |
|
| 412 | + | emit::emitAddImm(s.e, rd, base, offset); |
|
| 384 | 413 | s.reserveOffset = aligned + (sz as i32); |
|
| 385 | 414 | }, |
|
| 386 | 415 | case il::Val::Reg(r) => { |
|
| 387 | 416 | // Dynamic-sized reserve: runtime SP adjustment. |
|
| 388 | 417 | let rd = getDstReg(s, dst, super::SCRATCH1); |
| 426 | 455 | panic "selectInstr: blit dst not spilled"; |
|
| 427 | 456 | }; |
|
| 428 | 457 | let srcSlot = regalloc::spill::spillSlot(&s.ralloc.spill, src) else { |
|
| 429 | 458 | panic "selectInstr: blit src not spilled"; |
|
| 430 | 459 | }; |
|
| 431 | - | emit::emitLd(s.e, super::SCRATCH2, super::FP, spillOffset(s, dstSlot)); |
|
| 460 | + | emit::emitLd(s.e, super::SCRATCH2, spillBase(s), spillOffset(s, dstSlot)); |
|
| 432 | 461 | srcReload = spillOffset(s, srcSlot); |
|
| 433 | 462 | } else { |
|
| 434 | 463 | rdst = getSrcReg(s, dst, super::SCRATCH2); |
|
| 435 | 464 | rsrc = getSrcReg(s, src, super::SCRATCH2); |
|
| 436 | 465 | } |
| 449 | 478 | emit::emitAddImm(s.e, rdst, rdst, offset); |
|
| 450 | 479 | } |
|
| 451 | 480 | offset = 0; |
|
| 452 | 481 | } |
|
| 453 | 482 | if let off = srcReload { |
|
| 454 | - | emit::emitLd(s.e, super::SCRATCH1, super::FP, off); |
|
| 483 | + | emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off); |
|
| 455 | 484 | emit::emitLd(s.e, super::SCRATCH1, super::SCRATCH1, offset); |
|
| 456 | 485 | } else { |
|
| 457 | 486 | emit::emitLd(s.e, super::SCRATCH1, rsrc, offset); |
|
| 458 | 487 | } |
|
| 459 | 488 | emit::emitSd(s.e, super::SCRATCH1, rdst, offset); |
| 467 | 496 | emit::emitAddImm(s.e, rdst, rdst, offset); |
|
| 468 | 497 | } |
|
| 469 | 498 | offset = 0; |
|
| 470 | 499 | } |
|
| 471 | 500 | if let off = srcReload { |
|
| 472 | - | emit::emitLd(s.e, super::SCRATCH1, super::FP, off); |
|
| 501 | + | emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off); |
|
| 473 | 502 | emit::emitLw(s.e, super::SCRATCH1, super::SCRATCH1, offset); |
|
| 474 | 503 | } else { |
|
| 475 | 504 | emit::emitLw(s.e, super::SCRATCH1, rsrc, offset); |
|
| 476 | 505 | } |
|
| 477 | 506 | emit::emitSw(s.e, super::SCRATCH1, rdst, offset); |
| 485 | 514 | emit::emitAddImm(s.e, rdst, rdst, offset); |
|
| 486 | 515 | } |
|
| 487 | 516 | offset = 0; |
|
| 488 | 517 | } |
|
| 489 | 518 | if let off = srcReload { |
|
| 490 | - | emit::emitLd(s.e, super::SCRATCH1, super::FP, off); |
|
| 519 | + | emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off); |
|
| 491 | 520 | emit::emitLb(s.e, super::SCRATCH1, super::SCRATCH1, offset); |
|
| 492 | 521 | } else { |
|
| 493 | 522 | emit::emitLb(s.e, super::SCRATCH1, rsrc, offset); |
|
| 494 | 523 | } |
|
| 495 | 524 | emit::emitSb(s.e, super::SCRATCH1, rdst, offset); |
| 1099 | 1128 | if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, param) { |
|
| 1100 | 1129 | if let case il::Val::Undef = arg { |
|
| 1101 | 1130 | // Undefined values don't need any move. |
|
| 1102 | 1131 | } else { |
|
| 1103 | 1132 | let rs = resolveVal(s, super::SCRATCH1, arg); |
|
| 1104 | - | emit::emitSd(s.e, rs, super::FP, spillOffset(s, slot)); |
|
| 1133 | + | emit::emitSd(s.e, rs, spillBase(s), spillOffset(s, slot)); |
|
| 1105 | 1134 | } |
|
| 1106 | 1135 | } else { |
|
| 1107 | 1136 | dsts[i] = getReg(s, param); |
|
| 1108 | 1137 | } |
|
| 1109 | 1138 | } |