lib/std/lang/gen/regalloc/spill.rad 10.8 KiB raw
1
//! Spilling pass - determines which SSA values need stack slots.
2
//!
3
//! This pass analyzes register pressure and determines which values must be
4
//! spilled to memory. It does not modify the IL.
5
//!
6
//! The instruction selector uses this information to emit load/store
7
//! instructions for spilled values.
8
//!
9
//! # Algorithm
10
//!
11
//! 1. Cost calculation: for each value, compute spill cost:
12
//!
13
//!        cost = (defs + uses) * 2^min(loopDepth, 10)
14
//!
15
//!    Values used in inner loops are expensive to spill.
16
//!
17
//! 2. Pressure analysis: walk instructions backwards, tracking live set.
18
//!
19
//!    When `|live| > numRegs`:
20
//!
21
//!    * Sort live values by cost (ascending).
22
//!    * Mark lowest-cost values as spilled until pressure is acceptable.
23
//!
24
//! 3. Slot assignment: assign stack offsets to spilled values.
25
//!
26
//! # Output
27
//!
28
//! Spill info containing:
29
//!
30
//! * Spill slot assignments: SSA reg -> stack offset, `-1` if not spilled.
31
//! * Total frame size needed for spills.
32
33
use std::lang::il;
34
use std::lang::alloc;
35
use std::lang::gen::bitset;
36
use std::lang::gen::regalloc::liveness;
37
38
/// Maximum number of candidates for spill sorting.
39
const MAX_CANDIDATES: u32 = 256;
40
/// Maximum loop depth for cost weighting (2^10 = 1024).
41
const MAX_LOOP_WEIGHT: u32 = 10;
42
43
/// Spill cost for a single SSA register.
44
record SpillCost {
45
    /// Number of definitions (weighted by loop depth).
46
    defs: u32,
47
    /// Number of uses (weighted by loop depth).
48
    uses: u32,
49
}
50
51
/// Spill decision for a function.
52
pub record SpillInfo {
53
    /// SSA register mapped to stack slot offset. `-1` means not spilled.
54
    slots: *mut [i32],
55
    /// Total spill frame size needed in bytes.
56
    frameSize: i32,
57
    /// Values that must be allocated in callee-saved registers.
58
    calleeClass: bitset::Bitset,
59
    /// Maximum SSA register number.
60
    maxReg: u32,
61
}
62
63
/// Candidate buffer for spill decisions.
64
record Candidates {
65
    entries: [CostEntry; 256],
66
    n: u32,
67
}
68
69
/// Entry for cost sorting.
70
record CostEntry {
71
    reg: u32,
72
    cost: u32,
73
}
74
75
/// Context for counting register uses.
76
record CountCtx {
77
    costs: *mut [SpillCost],
78
    weight: u32,
79
}
80
81
/// Analyze a function and determine which values need spill slots.
82
pub fn analyze(
83
    func: *il::Fn,
84
    live: *liveness::LiveInfo,
85
    numRegs: u32,
86
    numCalleeSaved: u32,
87
    slotSize: u32,
88
    arena: *mut alloc::Arena
89
) -> SpillInfo throws (alloc::AllocError) {
90
    let maxReg = live.maxReg;
91
    if maxReg == 0 {
92
        let calleeClass = try bitset::allocate(arena, 0);
93
        return SpillInfo {
94
            slots: &mut [],
95
            frameSize: 0,
96
            calleeClass,
97
            maxReg: 0,
98
        };
99
    }
100
    // Allocate spill slots array.
101
    let slots = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), maxReg) as *mut [i32];
102
    for i in 0..maxReg {
103
        slots[i] = -1;
104
    }
105
    // Allocate cost array.
106
    let costs = try alloc::allocSlice(arena, @sizeOf(SpillCost), @alignOf(SpillCost), maxReg) as *mut [SpillCost];
107
    for i in 0..maxReg {
108
        costs[i] = SpillCost { defs: 0, uses: 0 };
109
    }
110
    // Phase 1: Calculate spill costs.
111
    fillCosts(func, costs);
112
113
    // Phase 2: Find values that exceed register pressure.
114
    let mut spilled = try bitset::allocate(arena, maxReg);
115
    let mut calleeClass = try bitset::allocate(arena, maxReg);
116
    let mut scratch = try bitset::allocate(arena, maxReg);
117
118
    for b in 0..func.blocks.len {
119
        let block = &func.blocks[b];
120
121
        // Start with live-out set.
122
        bitset::copy(&mut scratch, &live.liveOut[b]);
123
124
        // Walk instructions backwards.
125
        let mut i = block.instrs.len;
126
        while i > 0 {
127
            i -= 1;
128
            let instr = block.instrs[i];
129
130
            // Limit register pressure before processing this instruction.
131
            limitPressure(&mut scratch, &mut spilled, costs, numRegs);
132
133
            // Enforce cross-call pressure at call sites.
134
            if il::isCall(instr) {
135
                limitCrossCallPressure(
136
                    &mut scratch, &mut spilled, costs,
137
                    &mut calleeClass, numCalleeSaved, il::instrDst(instr)
138
                );
139
            }
140
            // Remove definition from live set.
141
            if let dst = il::instrDst(instr) {
142
                bitset::clear(&mut scratch, dst.n);
143
            }
144
            // Add uses to live set.
145
            il::forEachReg(instr, addRegToSetCallback, &mut scratch as *mut opaque);
146
        }
147
        // Also limit pressure at block entry.
148
        limitPressure(&mut scratch, &mut spilled, costs, numRegs);
149
    }
150
151
    // Phase 3: Enforce global callee-class limit.
152
    // The per-call-site limit may leave the callee-class set larger than
153
    // `numCalleeSaved` when different call sites keep different subsets.
154
    // Spill excess values to guarantee the assignment phase always finds
155
    // a callee-saved register for cross-call values.
156
    while bitset::count(&calleeClass) > numCalleeSaved {
157
        let mut it = bitset::iter(&calleeClass);
158
        if let reg = bitset::iterNext(&mut it) {
159
            bitset::clear(&mut calleeClass, reg);
160
            bitset::set(&mut spilled, reg);
161
        } else {
162
            panic "spill: count > 0 but no set bits found";
163
        }
164
    }
165
166
    // Phase 4: Assign stack slots to spilled values.
167
    let mut frameSize: i32 = 0;
168
    let mut it = bitset::iter(&spilled);
169
170
    while let n = bitset::iterNext(&mut it) {
171
        slots[n] = frameSize;
172
        frameSize += slotSize as i32;
173
    }
174
    return SpillInfo { slots, frameSize, calleeClass, maxReg };
175
}
176
177
/// Calculate spill costs for all registers, weighted by loop depth.
178
fn fillCosts(func: *il::Fn, costs: *mut [SpillCost]) {
179
    for b in 0..func.blocks.len {
180
        let block = &func.blocks[b];
181
182
        // Exponential weight for loop depth, capped to avoid overflow.
183
        let depth = MAX_LOOP_WEIGHT if block.loopDepth > MAX_LOOP_WEIGHT else block.loopDepth;
184
        let weight: u32 = 1 << depth;
185
186
        // Count block parameter definitions.
187
        for p in block.params {
188
            if p.value.n < costs.len {
189
                costs[p.value.n].defs = costs[p.value.n].defs + weight;
190
            }
191
        }
192
        // Count instruction defs and uses.
193
        for i in 0..block.instrs.len {
194
            let instr = block.instrs[i];
195
196
            // Count definition.
197
            if let dst = il::instrDst(instr) {
198
                if dst.n < costs.len {
199
                    costs[dst.n].defs = costs[dst.n].defs + weight;
200
                }
201
            }
202
            // Count uses.
203
            let mut ctx = CountCtx { costs, weight };
204
            il::forEachReg(instr, countRegUseCallback, &mut ctx as *mut opaque);
205
        }
206
    }
207
}
208
209
/// Sort candidates by cost (ascending) using insertion sort, then spill
210
/// the cheapest `excess` values: set in `spilled`, clear in `source`.
211
fn spillCheapest(
212
    c: *mut Candidates,
213
    excess: u32,
214
    source: *mut bitset::Bitset,
215
    spilled: *mut bitset::Bitset
216
) {
217
    // Insertion sort ascending by cost.
218
    for i in 1..c.n {
219
        let key = c.entries[i];
220
        let mut j: u32 = i;
221
        while j > 0 and c.entries[j - 1].cost > key.cost {
222
            c.entries[j] = c.entries[j - 1];
223
            j -= 1;
224
        }
225
        c.entries[j] = key;
226
    }
227
    let toSpill = c.n if excess > c.n else excess;
228
    for i in 0..toSpill {
229
        bitset::set(spilled, c.entries[i].reg);
230
        bitset::clear(source, c.entries[i].reg);
231
    }
232
}
233
234
/// Collect all values from a bitset into a candidates buffer with their costs.
235
fn collectCandidates(set: *bitset::Bitset, costs: *[SpillCost]) -> Candidates {
236
    let mut c = Candidates { entries: undefined, n: 0 };
237
    let mut it = bitset::iter(set);
238
    while let reg = bitset::iterNext(&mut it) {
239
        assert c.n < MAX_CANDIDATES, "collectCandidates: too many live values";
240
        if reg < costs.len {
241
            c.entries[c.n] = CostEntry { reg, cost: costs[reg].defs + costs[reg].uses };
242
            c.n += 1;
243
        }
244
    }
245
    return c;
246
}
247
248
/// Limit register pressure by marking low-cost values as spilled.
249
fn limitPressure(
250
    live: *mut bitset::Bitset,
251
    spilled: *mut bitset::Bitset,
252
    costs: *[SpillCost],
253
    numRegs: u32
254
) {
255
    let liveCount = bitset::count(live);
256
    if liveCount <= numRegs {
257
        return;
258
    }
259
    let mut c = collectCandidates(live, costs);
260
    spillCheapest(&mut c, liveCount - numRegs, live, spilled);
261
}
262
263
/// Check whether an SSA register is the destination of a call instruction.
264
fn isCallDst(callDst: ?il::Reg, n: u32) -> bool {
265
    if let d = callDst {
266
        return d.n == n;
267
    }
268
    return false;
269
}
270
271
/// Limit cross-call pressure by spilling values that exceed callee-saved capacity.
272
///
273
/// At a call site, every live value must survive the call in a callee-saved
274
/// register. If the count exceeds `numCalleeSaved`, spill the cheapest
275
/// crossing values.
276
fn limitCrossCallPressure(
277
    live: *mut bitset::Bitset,
278
    spilled: *mut bitset::Bitset,
279
    costs: *[SpillCost],
280
    calleeClass: *mut bitset::Bitset,
281
    numCalleeSaved: u32,
282
    callDst: ?il::Reg
283
) {
284
    // Collect crossing candidates: live values excluding the call destination.
285
    let mut candidates: [CostEntry; 256] = undefined;
286
    let mut numCandidates: u32 = 0;
287
    let mut it = bitset::iter(live);
288
    while let n = bitset::iterNext(&mut it) {
289
        if not isCallDst(callDst, n) and n < costs.len {
290
            candidates[numCandidates] = CostEntry {
291
                reg: n,
292
                cost: costs[n].defs + costs[n].uses,
293
            };
294
            numCandidates += 1;
295
        }
296
    }
297
    // Spill cheapest candidates if crossing count exceeds callee-saved capacity.
298
    if numCandidates > numCalleeSaved {
299
        let mut c = Candidates { entries: candidates, n: numCandidates };
300
        spillCheapest(&mut c, numCandidates - numCalleeSaved, live, spilled);
301
    }
302
    // Mark crossing values that remain after spilling as callee-saved class.
303
    let mut it2 = bitset::iter(live);
304
    while let n = bitset::iterNext(&mut it2) {
305
        if not isCallDst(callDst, n) {
306
            bitset::set(calleeClass, n);
307
        }
308
    }
309
}
310
311
/// Callback for [`il::forEachReg`]: increments use count for register.
312
fn countRegUseCallback(reg: il::Reg, ctxPtr: *mut opaque) {
313
    let ctx = ctxPtr as *mut CountCtx;
314
    assert reg.n < ctx.costs.len, "countRegUseCallback: register out of bounds";
315
    ctx.costs[reg.n].uses = ctx.costs[reg.n].uses + ctx.weight;
316
}
317
318
/// Callback for [`il::forEachReg`]: adds register to live set.
319
fn addRegToSetCallback(reg: il::Reg, ctx: *mut opaque) {
320
    bitset::set(ctx as *mut bitset::Bitset, reg.n);
321
}
322
323
/// Check if a register is spilled.
324
pub fn isSpilled(info: *SpillInfo, reg: il::Reg) -> bool {
325
    if reg.n >= info.maxReg {
326
        return false;
327
    }
328
    return info.slots[reg.n] >= 0;
329
}
330
331
/// Get spill slot offset for a register, or `nil` if not spilled.
332
pub fn spillSlot(info: *SpillInfo, reg: il::Reg) -> ?i32 {
333
    if isSpilled(info, reg) {
334
        return info.slots[reg.n];
335
    }
336
    return nil;
337
}