Skip frame allocation for leaf functions

6d184f2022f4d784538ec4d84d98c019d48d96e5efb508bfd9a1b771dd208ac6
Alexis Sellier committed ago 1 parent d46af834
lib/std/arch/rv64/emit.rad +8 -3
124 124
    /// Epilogue block index for return jumps.
125 125
    epilogueBlock: u32,
126 126
}
127 127
128 128
/// Compute frame layout from local size and used callee-saved registers.
129 -
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32) -> Frame {
129 +
pub fn computeFrame(localSize: i32, usedCalleeSaved: u32, epilogueBlock: u32, isLeaf: bool) -> Frame {
130 130
    let mut frame = Frame {
131 131
        totalSize: 0,
132 132
        savedRegs: undefined,
133 133
        savedRegsLen: 0,
134 134
        epilogueBlock,
135 135
    };
136 -
    // TODO: Skip frame allocation for leaf functions with no locals.
137 -
    // Compute total frame size. Includes RA and SP registers.
136 +
    // Skip frame allocation for leaf functions with no locals and no
137 +
    // callee-saved registers. Leaf functions don't call other functions,
138 +
    // so RA is never clobbered and doesn't need saving.
139 +
    if isLeaf and localSize == 0 and usedCalleeSaved == 0 {
140 +
        return frame;
141 +
    }
142 +
    // Compute total frame size. Includes RA and FP registers.
138 143
    let savedRegs = mem::popCount(usedCalleeSaved) + 2;
139 144
    let totalSize = mem::alignUpI32(
140 145
        localSize + savedRegs * super::DWORD_SIZE,
141 146
        super::STACK_ALIGNMENT
142 147
    );
lib/std/arch/rv64/isel.rad +3 -1
259 259
) {
260 260
    // Reset block offsets for this function.
261 261
    labels::resetBlocks(&mut e.labels);
262 262
    // Pre-scan for constant-sized reserves to promote to fixed frame slots.
263 263
    let reserveSize = computeReserveSize(func);
264 +
    let isLeaf = func.isLeaf;
264 265
    // Compute frame layout from spill slots, reserve slots, and used callee-saved registers.
265 266
    let frame = emit::computeFrame(
266 267
        ralloc.spill.frameSize + reserveSize,
267 268
        ralloc.usedCalleeSaved,
268 -
        func.blocks.len
269 +
        func.blocks.len,
270 +
        isLeaf
269 271
    );
270 272
    // Synthetic block indices start after real blocks and the epilogue block.
271 273
    let mut s = Selector {
272 274
        e, ralloc, dataSymMap, frameSize: frame.totalSize,
273 275
        reserveOffset: 0, pendingSpill: nil,
lib/std/lang/il.rad +2 -0
332 332
    params: *[Param],
333 333
    /// Return type.
334 334
    returnType: Type,
335 335
    /// Whether the function is extern (no body).
336 336
    isExtern: bool,
337 +
    /// Whether the function is a leaf (contains no call or ecall instructions).
338 +
    isLeaf: bool,
337 339
    /// Basic blocks. Empty for extern functions.
338 340
    blocks: *[Block],
339 341
}
340 342
341 343
/////////////
lib/std/lang/lower.rad +15 -0
658 658
    regCounter: u32,
659 659
    /// When the function returns an aggregate type, the caller passes a hidden
660 660
    /// pointer as the first parameter. The callee writes the return value into
661 661
    /// this buffer and returns the pointer.
662 662
    returnReg: ?il::Reg,
663 +
    /// Whether the function is a leaf.
664 +
    isLeaf: bool,
663 665
664 666
    // ~ Debug info ~ //
665 667
666 668
    /// Current debug source location, set when processing AST nodes.
667 669
    srcLoc: il::SrcLoc,
876 878
        loopDepth: 0,
877 879
        labelCounter: 0,
878 880
        dataCounter: 0,
879 881
        regCounter: 0,
880 882
        returnReg: nil,
883 +
        isLeaf: true,
881 884
        srcLoc: undefined,
882 885
    };
883 886
    if self.options.debug {
884 887
        let modId = self.currentMod else {
885 888
            panic "fnLowerer: debug enabled but no current module";
933 936
    *func = il::Fn {
934 937
        name: qualName,
935 938
        params: lowParams,
936 939
        returnType: undefined,
937 940
        isExtern,
941 +
        isLeaf: true,
938 942
        blocks: &[],
939 943
    };
940 944
    // Throwing functions return a result aggregate (word-sized pointer).
941 945
    // TODO: The resolver should set an appropriate type that takes into account
942 946
    //       the throws list. It shouldn't set the return type to the "success"
950 954
        // Extern functions have no body.
951 955
        assert isExtern;
952 956
        return func;
953 957
    };
954 958
    func.blocks = try lowerFnBody(&mut fnLow, body);
959 +
    func.isLeaf = fnLow.isLeaf;
955 960
956 961
    return func;
957 962
}
958 963
959 964
/// Build a qualified name of the form "Type::method".
1053 1058
        *func = il::Fn {
1054 1059
            name: qualName,
1055 1060
            params: lowParams,
1056 1061
            returnType: ilType(self, *fnType.returnType),
1057 1062
            isExtern: false,
1063 +
            isLeaf: true,
1058 1064
            blocks: &[],
1059 1065
        };
1060 1066
        if fnType.throwList.len > 0 {
1061 1067
            func.returnType = il::Type::W64;
1062 1068
        }
1063 1069
        func.blocks = try lowerFnBody(&mut fnLow, body);
1070 +
        func.isLeaf = fnLow.isLeaf;
1064 1071
        self.fns.append(func, self.allocator);
1065 1072
1066 1073
        let method = resolver::findTraitMethod(traitInfo, mName)
1067 1074
            else panic "lowerInstanceDecl: method not found in trait";
1068 1075
1886 1893
/// Emit an instruction to the current block.
1887 1894
fn emit(self: *mut FnLowerer, instr: il::Instr) {
1888 1895
    let blk = self.currentBlock else panic;
1889 1896
    let mut block = getBlockMut(self, blk);
1890 1897
1898 +
    // Track whether this function is a leaf.
1899 +
    if self.isLeaf {
1900 +
        match instr {
1901 +
            case il::Instr::Call { .. },
1902 +
                 il::Instr::Ecall { .. } => self.isLeaf = false,
1903 +
            else => {},
1904 +
        }
1905 +
    }
1891 1906
    // Record source location alongside instruction when enabled.
1892 1907
    if self.low.options.debug {
1893 1908
        block.locs.append(self.srcLoc, self.allocator);
1894 1909
    }
1895 1910
    block.instrs.append(instr, self.allocator);