Optimize code generation for non-dynamic functions

d84d2c73da2a6583bc6d27963b1f6b75f661d4a6fb283b232f12f82e2a560524
Skip FP save/restore for non-allocating functions.
Alexis Sellier committed ago 1 parent 57e40912
lib/std/arch/rv64/emit.rad +28 -15
121 121
    savedRegs: [SavedReg; 11],
122 122
    /// Number of saved registers.
123 123
    savedRegsLen: u32,
124 124
    /// Epilogue block index for return jumps.
125 125
    epilogueBlock: u32,
126 +
    /// Whether this is a leaf function. Leaf functions
127 +
    /// skip saving/restoring RA since it is never clobbered.
128 +
    isLeaf: bool,
129 +
    /// Whether the function has dynamic stack allocations.
130 +
    /// When false, SP never changes after the prologue.
131 +
    isDynamic: bool,
126 132
}
127 133
128 134
/// Compute frame layout from local size and used callee-saved registers.
129 -
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool) -> Frame {
135 +
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool, isDynamic: bool) -> Frame {
130 136
    let mut frame = Frame {
131 137
        totalSize: 0,
132 138
        savedRegs: undefined,
133 139
        savedRegsLen: 0,
134 140
        epilogueBlock,
141 +
        isLeaf,
142 +
        isDynamic,
135 143
    };
136 144
    // Skip frame allocation for leaf functions with no locals and no
137 145
    // callee-saved registers. Leaf functions don't call other functions,
138 146
    // so RA is never clobbered and doesn't need saving.
139 147
    if isLeaf and localSize == 0 and usedCalleeSaved == 0 {
586 594
        emit(e, encode::addi(super::SP, super::SP, negFrame));
587 595
    } else {
588 596
        loadImm(e, super::SCRATCH1, totalSize as i64);
589 597
        emit(e, encode::sub(super::SP, super::SP, super::SCRATCH1));
590 598
    }
591 -
    // Save return address and frame pointer.
592 -
    emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
599 +
    // Save return address.
600 +
    if not frame.isLeaf {
601 +
        emitSd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
602 +
    }
603 +
    // Save frame pointer.
593 604
    emitSd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
594 605
595 -
    // Set up frame pointer.
596 -
    emitAddImm(e, super::FP, super::SP, totalSize);
597 -
606 +
    // Set up frame pointer, only needed when dynamic allocs may move SP.
607 +
    if frame.isDynamic {
608 +
        emitAddImm(e, super::FP, super::SP, totalSize);
609 +
    }
598 610
    // Save callee-saved registers.
599 611
    for i in 0..frame.savedRegsLen {
600 612
        let sr = frame.savedRegs[i];
601 613
        emitSd(e, sr.reg, super::SP, sr.offset);
602 614
    }
618 630
        emit(e, encode::ret());
619 631
        return;
620 632
    }
621 633
    let totalSize = frame.totalSize;
622 634
623 -
    // Restore SP to post-prologue value. Required if we performed dynamic
624 -
    // stack allocation, as SP may have moved.
625 -
    //
626 -
    // Since we set FP to `SP + totalSize`, we now set SP to `FP - totalSize`.
627 -
    emitAddImm(e, super::SP, super::FP, 0 - totalSize);
628 -
635 +
    // Restore SP to post-prologue value. Only needed when dynamic stack
636 +
    // allocation may have moved SP.
637 +
    if frame.isDynamic {
638 +
        emitAddImm(e, super::SP, super::FP, 0 - totalSize);
639 +
    }
629 640
    // Restore callee-saved registers.
630 641
    for i in 0..frame.savedRegsLen {
631 642
        let sr = frame.savedRegs[i];
632 643
        emitLd(e, sr.reg, super::SP, sr.offset);
633 644
    }
634 -
    // Restore frame pointer and return address.
645 +
    // Restore frame pointer.
635 646
    emitLd(e, super::FP, super::SP, totalSize - super::DWORD_SIZE * 2);
636 -
    emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
637 -
647 +
    // Restore return address.
648 +
    if not frame.isLeaf {
649 +
        emitLd(e, super::RA, super::SP, totalSize - super::DWORD_SIZE);
650 +
    }
638 651
    // Deallocate stack frame.
639 652
    emitAddImm(e, super::SP, super::SP, totalSize);
640 653
    emit(e, encode::ret());
641 654
}
642 655
lib/std/arch/rv64/isel.rad +49 -20
90 90
    reserveOffset: i32,
91 91
    /// Pending spill store, auto-committed after each instruction.
92 92
    pendingSpill: ?PendingSpill,
93 93
    /// Next synthetic block index for skip-branch targets.
94 94
    nextSynthBlock: u32,
95 +
    /// Whether dynamic allocations exist.
96 +
    isDynamic: bool,
95 97
}
96 98
97 99
/////////////////////////
98 100
// Register Allocation //
99 101
/////////////////////////
104 106
        panic "getReg: spilled register has no physical assignment";
105 107
    };
106 108
    return super::Reg { n: phys };
107 109
}
108 110
109 -
/// Compute the FP-relative offset for a spill slot.
110 -
/// Spill slots live at the bottom of the frame.
111 -
/// Since `FP = SP + totalFrameSize`, the offset from FP is `slot - totalSize`.
111 +
/// Compute the offset for a spill slot.
112 +
/// When using FP (dynamic): offset from FP = `slot - totalSize`.
113 +
/// When using SP: offset from SP = `slot`.
112 114
fn spillOffset(s: *Selector, slot: i32) -> i32 {
113 -
    return slot - s.frameSize;
115 +
    if s.isDynamic {
116 +
        return slot - s.frameSize;
117 +
    }
118 +
    return slot;
119 +
}
120 +
121 +
/// Get the base register for spill slot addressing (FP or SP).
122 +
fn spillBase(s: *Selector) -> super::Reg {
123 +
    if s.isDynamic {
124 +
        return super::FP;
125 +
    }
126 +
    return super::SP;
114 127
}
115 128
116 129
/// Get the destination register for an SSA register.
117 130
/// If the register is spilled, records a pending spill and returns the scratch
118 131
/// register. The pending spill is auto-committed by [`selectBlock`] after each
128 141
/// Get the source register for an SSA register.
129 142
/// If the register is spilled, loads the value from the spill slot into the
130 143
/// scratch register and returns it. Otherwise returns the physical register.
131 144
fn getSrcReg(s: *mut Selector, ssa: il::Reg, scratch: super::Reg) -> super::Reg {
132 145
    if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, ssa) {
133 -
        emit::emitLd(s.e, scratch, super::FP, spillOffset(s, slot));
146 +
        emit::emitLd(s.e, scratch, spillBase(s), spillOffset(s, slot));
134 147
        return scratch;
135 148
    }
136 149
    return getReg(s, ssa);
137 150
}
138 151
227 240
228 241
////////////////////////
229 242
// Instruction Select //
230 243
////////////////////////
231 244
245 +
/// Pre-scan result for reserve analysis.
246 +
record ReserveInfo {
247 +
    /// Total size needed for constant-sized reserves.
248 +
    size: i32,
249 +
    /// Whether any dynamic-sized reserves exist.
250 +
    isDynamic: bool,
251 +
}
252 +
232 253
/// Pre-scan all blocks for constant-sized reserve instructions.
233 254
/// Returns the total size needed for all static reserves, respecting alignment.
234 -
fn computeReserveSize(func: *il::Fn) -> i32 {
255 +
fn computeReserveInfo(func: *il::Fn) -> ReserveInfo {
235 256
    let mut offset: i32 = 0;
257 +
    let mut isDynamic = false;
258 +
236 259
    for b in 0..func.blocks.len {
237 260
        let block = &func.blocks[b];
238 261
        for instr in block.instrs {
239 262
            match instr {
240 263
                case il::Instr::Reserve { size, alignment, .. } => {
241 264
                    if let case il::Val::Imm(sz) = size {
242 265
                        offset = mem::alignUpI32(offset, alignment as i32);
243 -
                        offset += (sz as i32);
266 +
                        offset += sz as i32;
267 +
                    } else {
268 +
                        isDynamic = true;
244 269
                    }
245 270
                },
246 271
                else => {},
247 272
            }
248 273
        }
249 274
    }
250 -
    return offset;
275 +
    return ReserveInfo { size: offset, isDynamic };
251 276
}
252 277
253 278
/// Select instructions for a function.
254 279
pub fn selectFn(
255 280
    e: *mut emit::Emitter,
258 283
    func: *il::Fn
259 284
) {
260 285
    // Reset block offsets for this function.
261 286
    labels::resetBlocks(&mut e.labels);
262 287
    // Pre-scan for constant-sized reserves to promote to fixed frame slots.
263 -
    let reserveSize = computeReserveSize(func);
288 +
    let reserveInfo = computeReserveInfo(func);
264 289
    let isLeaf = func.isLeaf;
265 290
    // Compute frame layout from spill slots, reserve slots, and used callee-saved registers.
266 291
    let frame = emit::computeFrame(
267 -
        ralloc.spill.frameSize + reserveSize,
292 +
        ralloc.spill.frameSize + reserveInfo.size,
268 293
        ralloc.usedCalleeSaved,
269 294
        func.blocks.len,
270 -
        isLeaf
295 +
        isLeaf,
296 +
        reserveInfo.isDynamic
271 297
    );
272 298
    // Synthetic block indices start after real blocks and the epilogue block.
273 299
    let mut s = Selector {
274 300
        e, ralloc, dataSymMap, frameSize: frame.totalSize,
275 301
        reserveOffset: 0, pendingSpill: nil,
276 302
        nextSynthBlock: func.blocks.len + 1,
303 +
        isDynamic: frame.isDynamic,
277 304
    };
278 305
    // Record function name for printing.
279 306
    emit::recordFunc(s.e, func.name);
280 307
    // Record function code offset for call patching.
281 308
    emit::recordFuncOffset(s.e, func.name);
291 318
            let param = funcParam.value;
292 319
            let argReg = super::ARG_REGS[i];
293 320
294 321
            if let slot = regalloc::spill::spillSlot(&ralloc.spill, param) {
295 322
                // Spilled parameter: store arg register to spill slot.
296 -
                emit::emitSd(s.e, argReg, super::FP, spillOffset(&s, slot));
323 +
                emit::emitSd(s.e, argReg, spillBase(&s), spillOffset(&s, slot));
297 324
            } else if let assigned = ralloc.assignments[param.n] {
298 325
                emitMv(&mut s, super::Reg { n: assigned }, argReg);
299 326
            }
300 327
        }
301 328
    }
330 357
        selectInstr(s, blockIdx, instr, frame, func);
331 358
332 359
        // Flush the pending spill store, if any.
333 360
        if let p = s.pendingSpill {
334 361
            if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, p.ssa) {
335 -
                emit::emitSd(s.e, p.rd, super::FP, spillOffset(s, slot));
362 +
                emit::emitSd(s.e, p.rd, spillBase(s), spillOffset(s, slot));
336 363
            }
337 364
            s.pendingSpill = nil;
338 365
        }
339 366
    }
340 367
}
376 403
            match size {
377 404
                case il::Val::Imm(sz) => {
378 405
                    // Constant-sized reserve: use pre-allocated frame slot.
379 406
                    let rd = getDstReg(s, dst, super::SCRATCH1);
380 407
                    let aligned: i32 = mem::alignUpI32(s.reserveOffset, alignment as i32);
381 -
                    let fpOffset: i32 = s.ralloc.spill.frameSize + aligned - s.frameSize;
408 +
                    let base = spillBase(s);
409 +
                    let offset = s.ralloc.spill.frameSize + aligned
410 +
                        - (s.frameSize if s.isDynamic else 0);
382 411
383 -
                    emit::emitAddImm(s.e, rd, super::FP, fpOffset);
412 +
                    emit::emitAddImm(s.e, rd, base, offset);
384 413
                    s.reserveOffset = aligned + (sz as i32);
385 414
                },
386 415
                case il::Val::Reg(r) => {
387 416
                    // Dynamic-sized reserve: runtime SP adjustment.
388 417
                    let rd = getDstReg(s, dst, super::SCRATCH1);
426 455
                    panic "selectInstr: blit dst not spilled";
427 456
                };
428 457
                let srcSlot = regalloc::spill::spillSlot(&s.ralloc.spill, src) else {
429 458
                    panic "selectInstr: blit src not spilled";
430 459
                };
431 -
                emit::emitLd(s.e, super::SCRATCH2, super::FP, spillOffset(s, dstSlot));
460 +
                emit::emitLd(s.e, super::SCRATCH2, spillBase(s), spillOffset(s, dstSlot));
432 461
                srcReload = spillOffset(s, srcSlot);
433 462
            } else {
434 463
                rdst = getSrcReg(s, dst, super::SCRATCH2);
435 464
                rsrc = getSrcReg(s, src, super::SCRATCH2);
436 465
            }
449 478
                        emit::emitAddImm(s.e, rdst, rdst, offset);
450 479
                    }
451 480
                    offset = 0;
452 481
                }
453 482
                if let off = srcReload {
454 -
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
483 +
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
455 484
                    emit::emitLd(s.e, super::SCRATCH1, super::SCRATCH1, offset);
456 485
                } else {
457 486
                    emit::emitLd(s.e, super::SCRATCH1, rsrc, offset);
458 487
                }
459 488
                emit::emitSd(s.e, super::SCRATCH1, rdst, offset);
467 496
                        emit::emitAddImm(s.e, rdst, rdst, offset);
468 497
                    }
469 498
                    offset = 0;
470 499
                }
471 500
                if let off = srcReload {
472 -
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
501 +
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
473 502
                    emit::emitLw(s.e, super::SCRATCH1, super::SCRATCH1, offset);
474 503
                } else {
475 504
                    emit::emitLw(s.e, super::SCRATCH1, rsrc, offset);
476 505
                }
477 506
                emit::emitSw(s.e, super::SCRATCH1, rdst, offset);
485 514
                        emit::emitAddImm(s.e, rdst, rdst, offset);
486 515
                    }
487 516
                    offset = 0;
488 517
                }
489 518
                if let off = srcReload {
490 -
                    emit::emitLd(s.e, super::SCRATCH1, super::FP, off);
519 +
                    emit::emitLd(s.e, super::SCRATCH1, spillBase(s), off);
491 520
                    emit::emitLb(s.e, super::SCRATCH1, super::SCRATCH1, offset);
492 521
                } else {
493 522
                    emit::emitLb(s.e, super::SCRATCH1, rsrc, offset);
494 523
                }
495 524
                emit::emitSb(s.e, super::SCRATCH1, rdst, offset);
1099 1128
        if let slot = regalloc::spill::spillSlot(&s.ralloc.spill, param) {
1100 1129
            if let case il::Val::Undef = arg {
1101 1130
                // Undefined values don't need any move.
1102 1131
            } else {
1103 1132
                let rs = resolveVal(s, super::SCRATCH1, arg);
1104 -
                emit::emitSd(s.e, rs, super::FP, spillOffset(s, slot));
1133 +
                emit::emitSd(s.e, rs, spillBase(s), spillOffset(s, slot));
1105 1134
            }
1106 1135
        } else {
1107 1136
            dsts[i] = getReg(s, param);
1108 1137
        }
1109 1138
    }