lib/std/lang/gen/regalloc/spill.rad 10.8 KiB raw
1
//! Spilling pass - determines which SSA values need stack slots.
2
//!
3
//! This pass analyzes register pressure and determines which values must be
4
//! spilled to memory. It does not modify the IL.
5
//!
6
//! The instruction selector uses this information to emit load/store
7
//! instructions for spilled values.
8
//!
9
//! # Algorithm
10
//!
11
//! 1. Cost calculation: for each value, compute spill cost:
12
//!
13
//!        cost = (defs + uses) * 2^min(loopDepth, 10)
14
//!
15
//!    Values used in inner loops are expensive to spill.
16
//!
17
//! 2. Pressure analysis: walk instructions backwards, tracking live set.
18
//!
19
//!    When `|live| > numRegs`:
20
//!
21
//!    * Sort live values by cost (ascending).
22
//!    * Mark lowest-cost values as spilled until pressure is acceptable.
23
//!
24
//! 3. Slot assignment: assign stack offsets to spilled values.
25
//!
26
//! # Output
27
//!
28
//! Spill info containing:
29
//!
30
//! * Spill slot assignments: SSA reg -> stack offset, `-1` if not spilled.
31
//! * Total frame size needed for spills.
32
33
use std::lang::il;
34
use std::lang::alloc;
35
use std::lang::gen::bitset;
36
use std::lang::gen::regalloc::liveness;
37
38
/// Maximum number of candidates for spill sorting.
39
const MAX_CANDIDATES: u32 = 256;
40
/// Maximum loop depth for cost weighting (2^10 = 1024).
41
const MAX_LOOP_WEIGHT: u32 = 10;
42
43
/// Spill cost for a single SSA register.
44
record SpillCost {
45
    /// Number of definitions (weighted by loop depth).
46
    defs: u32,
47
    /// Number of uses (weighted by loop depth).
48
    uses: u32,
49
}
50
51
/// Spill decision for a function.
52
pub record SpillInfo {
53
    /// SSA register mapped to stack slot offset. `-1` means not spilled.
54
    slots: *mut [i32],
55
    /// Total spill frame size needed in bytes.
56
    frameSize: i32,
57
    /// Values that must be allocated in callee-saved registers.
58
    calleeClass: bitset::Bitset,
59
    /// Maximum SSA register number.
60
    maxReg: u32,
61
}
62
63
/// Candidate buffer for spill decisions.
64
record Candidates {
65
    entries: [CostEntry; 256],
66
    n: u32,
67
}
68
69
/// Entry for cost sorting.
70
record CostEntry {
71
    reg: u32,
72
    cost: u32,
73
}
74
75
/// Context for counting register uses.
76
record CountCtx {
77
    costs: *mut [SpillCost],
78
    weight: u32,
79
}
80
81
/// Analyze a function and determine which values need spill slots.
82
pub fn analyze(
83
    func: *il::Fn,
84
    live: *liveness::LiveInfo,
85
    numRegs: u32,
86
    numCalleeSaved: u32,
87
    slotSize: u32,
88
    arena: *mut alloc::Arena
89
) -> SpillInfo throws (alloc::AllocError) {
90
    let maxReg = live.maxReg;
91
    if maxReg == 0 {
92
        let calleeClass = try bitset::allocate(arena, 0);
93
        return SpillInfo {
94
            slots: &mut [],
95
            frameSize: 0,
96
            calleeClass,
97
            maxReg: 0,
98
        };
99
    }
100
    // Allocate spill slots array.
101
    let slots = try alloc::allocSlice(arena, @sizeOf(i32), @alignOf(i32), maxReg) as *mut [i32];
102
    for i in 0..maxReg {
103
        slots[i] = -1;
104
    }
105
    // Allocate cost array.
106
    let costs = try alloc::allocSlice(arena, @sizeOf(SpillCost), @alignOf(SpillCost), maxReg) as *mut [SpillCost];
107
    for i in 0..maxReg {
108
        costs[i] = SpillCost { defs: 0, uses: 0 };
109
    }
110
    // Phase 1: Calculate spill costs.
111
    fillCosts(func, costs);
112
113
    // Phase 2: Find values that exceed register pressure.
114
    let mut spilled = try bitset::allocate(arena, maxReg);
115
    let mut calleeClass = try bitset::allocate(arena, maxReg);
116
    let mut scratch = try bitset::allocate(arena, maxReg);
117
118
    for b in 0..func.blocks.len {
119
        let block = &func.blocks[b];
120
121
        // Start with live-out set.
122
        bitset::copy(&mut scratch, &live.liveOut[b]);
123
124
        // Walk instructions backwards.
125
        let mut i = block.instrs.len;
126
        while i > 0 {
127
            i -= 1;
128
            let instr = block.instrs[i];
129
130
            // Limit register pressure before processing this instruction.
131
            limitPressure(&mut scratch, &mut spilled, costs, numRegs);
132
133
            // Enforce cross-call pressure at call sites.
134
            if il::isCall(instr) {
135
                limitCrossCallPressure(
136
                    &mut scratch, &mut spilled, costs,
137
                    &mut calleeClass, numCalleeSaved, il::instrDst(instr)
138
                );
139
            }
140
            // Remove definition from live set.
141
            if let dst = il::instrDst(instr) {
142
                bitset::clear(&mut scratch, dst.n);
143
            }
144
            // Add uses to live set.
145
            il::forEachReg(instr, addRegToSetCallback, &mut scratch as *mut opaque);
146
        }
147
        // Also limit pressure at block entry.
148
        limitPressure(&mut scratch, &mut spilled, costs, numRegs);
149
    }
150
151
    // Phase 3: Enforce global callee-class limit.
152
    // The per-call-site limit may leave the callee-class set larger than
153
    // `numCalleeSaved` when different call sites keep different subsets.
154
    // Spill excess values to guarantee the assignment phase always finds
155
    // a callee-saved register for cross-call values.
156
    while bitset::count(&calleeClass) > numCalleeSaved {
157
        let mut it = bitset::iter(&calleeClass);
158
        if let reg = bitset::iterNext(&mut it) {
159
            bitset::clear(&mut calleeClass, reg);
160
            bitset::set(&mut spilled, reg);
161
        } else {
162
            panic "spill: count > 0 but no set bits found";
163
        }
164
    }
165
166
    // Phase 4: Assign stack slots to spilled values.
167
    let mut frameSize: i32 = 0;
168
    let mut it = bitset::iter(&spilled);
169
170
    while let n = bitset::iterNext(&mut it) {
171
        slots[n] = frameSize;
172
        frameSize += slotSize as i32;
173
    }
174
    return SpillInfo { slots, frameSize, calleeClass, maxReg };
175
}
176
177
/// Calculate spill costs for all registers, weighted by loop depth.
178
fn fillCosts(func: *il::Fn, costs: *mut [SpillCost]) {
179
    for b in 0..func.blocks.len {
180
        let block = &func.blocks[b];
181
182
        // Exponential weight for loop depth, capped to avoid overflow.
183
        let depth = MAX_LOOP_WEIGHT if block.loopDepth > MAX_LOOP_WEIGHT else block.loopDepth;
184
        let weight: u32 = 1 << depth;
185
186
        // Count block parameter definitions.
187
        for p in block.params {
188
            if p.value.n < costs.len {
189
                costs[p.value.n].defs = costs[p.value.n].defs + weight;
190
            }
191
        }
192
        // Count instruction defs and uses.
193
        for i in 0..block.instrs.len {
194
            let instr = block.instrs[i];
195
196
            // Count definition.
197
            if let dst = il::instrDst(instr) {
198
                if dst.n < costs.len {
199
                    costs[dst.n].defs = costs[dst.n].defs + weight;
200
                }
201
            }
202
            // Count uses.
203
            let mut ctx = CountCtx { costs, weight };
204
            il::forEachReg(instr, countRegUseCallback, &mut ctx as *mut opaque);
205
        }
206
    }
207
}
208
209
/// Sort candidates by cost (ascending) using insertion sort, then spill
210
/// the cheapest `excess` values: set in `spilled`, clear in `source`.
211
fn spillCheapest(
212
    c: *mut Candidates,
213
    excess: u32,
214
    source: *mut bitset::Bitset,
215
    spilled: *mut bitset::Bitset
216
) {
217
    // Insertion sort ascending by cost.
218
    for i in 1..c.n {
219
        let key = c.entries[i];
220
        let mut j: u32 = i;
221
        while j > 0 and c.entries[j - 1].cost > key.cost {
222
            c.entries[j] = c.entries[j - 1];
223
            j -= 1;
224
        }
225
        c.entries[j] = key;
226
    }
227
    let toSpill = c.n if excess > c.n else excess;
228
    for i in 0..toSpill {
229
        bitset::set(spilled, c.entries[i].reg);
230
        bitset::clear(source, c.entries[i].reg);
231
    }
232
}
233
234
/// Collect all values from a bitset into a candidates buffer with their costs.
235
fn collectCandidates(set: *bitset::Bitset, costs: *[SpillCost]) -> Candidates {
236
    let mut c = Candidates { entries: undefined, n: 0 };
237
    let mut it = bitset::iter(set);
238
    while let reg = bitset::iterNext(&mut it) {
239
        if c.n >= MAX_CANDIDATES {
240
            panic "collectCandidates: too many live values";
241
        }
242
        if reg < costs.len {
243
            c.entries[c.n] = CostEntry { reg, cost: costs[reg].defs + costs[reg].uses };
244
            c.n += 1;
245
        }
246
    }
247
    return c;
248
}
249
250
/// Limit register pressure by marking low-cost values as spilled.
251
fn limitPressure(
252
    live: *mut bitset::Bitset,
253
    spilled: *mut bitset::Bitset,
254
    costs: *[SpillCost],
255
    numRegs: u32
256
) {
257
    let liveCount = bitset::count(live);
258
    if liveCount <= numRegs {
259
        return;
260
    }
261
    let mut c = collectCandidates(live, costs);
262
    spillCheapest(&mut c, liveCount - numRegs, live, spilled);
263
}
264
265
/// Check whether an SSA register is the destination of a call instruction.
266
fn isCallDst(callDst: ?il::Reg, n: u32) -> bool {
267
    if let d = callDst {
268
        return d.n == n;
269
    }
270
    return false;
271
}
272
273
/// Limit cross-call pressure by spilling values that exceed callee-saved capacity.
274
///
275
/// At a call site, every live value must survive the call in a callee-saved
276
/// register. If the count exceeds `numCalleeSaved`, spill the cheapest
277
/// crossing values.
278
fn limitCrossCallPressure(
279
    live: *mut bitset::Bitset,
280
    spilled: *mut bitset::Bitset,
281
    costs: *[SpillCost],
282
    calleeClass: *mut bitset::Bitset,
283
    numCalleeSaved: u32,
284
    callDst: ?il::Reg
285
) {
286
    // Collect crossing candidates: live values excluding the call destination.
287
    let mut candidates: [CostEntry; 256] = undefined;
288
    let mut numCandidates: u32 = 0;
289
    let mut it = bitset::iter(live);
290
    while let n = bitset::iterNext(&mut it) {
291
        if not isCallDst(callDst, n) and n < costs.len {
292
            candidates[numCandidates] = CostEntry {
293
                reg: n,
294
                cost: costs[n].defs + costs[n].uses,
295
            };
296
            numCandidates += 1;
297
        }
298
    }
299
    // Spill cheapest candidates if crossing count exceeds callee-saved capacity.
300
    if numCandidates > numCalleeSaved {
301
        let mut c = Candidates { entries: candidates, n: numCandidates };
302
        spillCheapest(&mut c, numCandidates - numCalleeSaved, live, spilled);
303
    }
304
    // Mark crossing values that remain after spilling as callee-saved class.
305
    let mut it2 = bitset::iter(live);
306
    while let n = bitset::iterNext(&mut it2) {
307
        if not isCallDst(callDst, n) {
308
            bitset::set(calleeClass, n);
309
        }
310
    }
311
}
312
313
/// Callback for [`il::forEachReg`]: increments use count for register.
314
fn countRegUseCallback(reg: il::Reg, ctxPtr: *mut opaque) {
315
    let ctx = ctxPtr as *mut CountCtx;
316
    if reg.n >= ctx.costs.len {
317
        panic "countRegUseCallback: register out of bounds";
318
    }
319
    ctx.costs[reg.n].uses = ctx.costs[reg.n].uses + ctx.weight;
320
}
321
322
/// Callback for [`il::forEachReg`]: adds register to live set.
323
fn addRegToSetCallback(reg: il::Reg, ctx: *mut opaque) {
324
    bitset::set(ctx as *mut bitset::Bitset, reg.n);
325
}
326
327
/// Check if a register is spilled.
328
pub fn isSpilled(info: *SpillInfo, reg: il::Reg) -> bool {
329
    if reg.n >= info.maxReg {
330
        return false;
331
    }
332
    return info.slots[reg.n] >= 0;
333
}
334
335
/// Get spill slot offset for a register, or `nil` if not spilled.
336
pub fn spillSlot(info: *SpillInfo, reg: il::Reg) -> ?i32 {
337
    if isSpilled(info, reg) {
338
        return info.slots[reg.n];
339
    }
340
    return nil;
341
}