lib/std/arch/rv64.rad 13.0 KiB raw
1
//! RV64 code generation backend.
2
//!
3
//! Generates RISC-V 64-bit machine code from IL (intermediate language).
4
//!
5
//! # Submodules
6
//!
7
//! * encode: Instruction encoding functions
8
//! * decode: Instruction decoding (for disassembly/printing)
9
//! * emit: Binary emission context and branch patching
10
//! * isel: Instruction selection (IL to RV64 instructions)
11
//! * printer: Assembly text output
12
13
pub mod encode;
14
pub mod decode;
15
pub mod emit;
16
pub mod isel;
17
pub mod printer;
18
19
@test mod tests;
20
21
use std::mem;
22
use std::lang::il;
23
use std::lang::alloc;
24
use std::lang::gen::labels;
25
use std::lang::gen::regalloc;
26
27
////////////////
28
// Registers  //
29
////////////////
30
31
/// RISC-V general-purpose register.
32
pub record Reg { n: u8 }
33
34
pub const ZERO: Reg = { n: 0 };   /// Hard-wired zero.
35
pub const RA:   Reg = { n: 1 };   /// Return address.
36
pub const SP:   Reg = { n: 2 };   /// Stack pointer.
37
pub const GP:   Reg = { n: 3 };   /// Global pointer.
38
pub const TP:   Reg = { n: 4 };   /// Thread pointer.
39
pub const T0:   Reg = { n: 5 };   /// Temporary/alternate link register.
40
pub const T1:   Reg = { n: 6 };   /// Temporary.
41
pub const T2:   Reg = { n: 7 };   /// Temporary.
42
pub const S0:   Reg = { n: 8 };   /// Saved register/frame pointer.
43
pub const FP:   Reg = { n: 8 };   /// Frame pointer (alias for S0).
44
pub const S1:   Reg = { n: 9 };   /// Saved register.
45
pub const A0:   Reg = { n: 10 };  /// Function argument/return.
46
pub const A1:   Reg = { n: 11 };  /// Function argument/return.
47
pub const A2:   Reg = { n: 12 };  /// Function argument.
48
pub const A3:   Reg = { n: 13 };  /// Function argument.
49
pub const A4:   Reg = { n: 14 };  /// Function argument.
50
pub const A5:   Reg = { n: 15 };  /// Function argument.
51
pub const A6:   Reg = { n: 16 };  /// Function argument.
52
pub const A7:   Reg = { n: 17 };  /// Function argument.
53
pub const S2:   Reg = { n: 18 };  /// Saved register.
54
pub const S3:   Reg = { n: 19 };  /// Saved register.
55
pub const S4:   Reg = { n: 20 };  /// Saved register.
56
pub const S5:   Reg = { n: 21 };  /// Saved register.
57
pub const S6:   Reg = { n: 22 };  /// Saved register.
58
pub const S7:   Reg = { n: 23 };  /// Saved register.
59
pub const S8:   Reg = { n: 24 };  /// Saved register.
60
pub const S9:   Reg = { n: 25 };  /// Saved register.
61
pub const S10:  Reg = { n: 26 };  /// Saved register.
62
pub const S11:  Reg = { n: 27 };  /// Saved register.
63
pub const T3:   Reg = { n: 28 };  /// Temporary.
64
pub const T4:   Reg = { n: 29 };  /// Temporary.
65
pub const T5:   Reg = { n: 30 };  /// Temporary.
66
pub const T6:   Reg = { n: 31 };  /// Temporary.
67
68
/// Create a register from a number. Panics if `n > 31`.
69
pub fn reg(n: u8) -> Reg {
70
    assert n < 32;
71
    return Reg { n };
72
}
73
74
////////////////////////////
75
// Architecture constants //
76
////////////////////////////
77
78
/// Total number of general-purpose registers.
79
pub const NUM_REGISTERS: u8 = 32;
80
/// Word size in bytes (32-bit).
81
pub const WORD_SIZE: i32 = 4;
82
/// Doubleword size in bytes (64-bit).
83
pub const DWORD_SIZE: i32 = 8;
84
/// Instruction size in bytes.
85
pub const INSTR_SIZE: i32 = 4;
86
/// Stack alignment requirement in bytes.
87
pub const STACK_ALIGNMENT: i32 = 16;
88
89
/////////////////////////
90
// Codegen Allocation  //
91
/////////////////////////
92
93
/// Argument registers for function calls.
94
pub const ARG_REGS: [Reg; 8] = [A0, A1, A2, A3, A4, A5, A6, A7];
95
96
/// Scratch register for code gen. Never allocated to user values.
97
pub const SCRATCH1: Reg = { n: 30 };
98
99
/// Second scratch register for operations needing two temporaries.
100
pub const SCRATCH2: Reg = { n: 31 };
101
102
/// Dedicated scratch for address offset adjustment. Never allocated to user
103
/// values and never used for operand materialization, so it can never
104
/// conflict with `rd`, `rs`, or `base` in load/store helpers.
105
pub const ADDR_SCRATCH: Reg = { n: 29 };
106
107
/// Callee-saved registers that need save/restore if used.
108
pub const CALLEE_SAVED: [Reg; 11] = [S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11];
109
110
/// Maximum 12-bit signed immediate value.
111
pub const MAX_IMM: i32 = 2047;
112
113
/// Minimum 12-bit signed immediate value.
114
pub const MIN_IMM: i32 = -2048;
115
116
/// Allocatable register numbers for register allocation.
117
const ALLOCATABLE_REGS: [u8; 23] = [
118
    5, 6, 7, 28,                                // T0-T3
119
    10, 11, 12, 13, 14, 15, 16, 17,             // A0-A7
120
    9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,  // S1-S11
121
];
122
123
/// Argument register numbers for register allocation.
124
const ARG_REG_NUMS: [u8; 8] = [10, 11, 12, 13, 14, 15, 16, 17]; // A0-A7
125
126
/// Callee-saved register numbers for frame layout.
127
const CALLEE_SAVED_NUMS: [u8; 11] = [9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]; // S1-S11
128
129
/// Get target configuration for register allocation.
130
// TODO: This should be a constant variable.
131
pub fn targetConfig() -> regalloc::TargetConfig {
132
    return regalloc::TargetConfig {
133
        // TODO: Use inline slice when we move to self-hosted compiler.
134
        allocatable: &ALLOCATABLE_REGS[..],
135
        argRegs: &ARG_REG_NUMS[..],
136
        calleeSaved: &CALLEE_SAVED_NUMS[..],
137
        slotSize: DWORD_SIZE,
138
    };
139
}
140
141
///////////////////////
142
// Codegen Constants //
143
///////////////////////
144
145
/// Maximum number of data symbols.
146
pub const MAX_DATA_SYMS: u32 = 8192;
147
148
/// Base address where read-only data is loaded.
149
pub const RO_DATA_BASE: u32 = 0x10000;
150
151
/// Base address where read-write data is loaded.
152
pub const RW_DATA_BASE: u32 = 0xFFFFF0;
153
154
/// Data symbol entry mapping name to address.
155
pub record DataSym {
156
    /// Symbol name.
157
    name: *[u8],
158
    /// Absolute address, including data base address.
159
    addr: u32,
160
}
161
162
/// Storage buffers passed from driver for code generation.
163
pub record Storage {
164
    /// Buffer for data symbols.
165
    dataSyms: *mut [DataSym],
166
}
167
168
/// Result of code generation.
169
pub record Program {
170
    /// Slice of emitted code.
171
    code: *[u32],
172
    /// Slice of function addresses (name + start index).
173
    funcs: *[emit::FuncAddr],
174
    /// Number of read-only data bytes emitted.
175
    roDataSize: u32,
176
    /// Number of read-write data bytes emitted.
177
    rwDataSize: u32,
178
    /// Debug entries mapping PCs to source locations. Empty when debug is off.
179
    debugEntries: *[emit::DebugEntry],
180
}
181
182
/// Lay out data symbols for a single section.
183
/// Initialized data is placed first, then uninitialized, so that only
184
/// initialized data needs to be written to the output file.
185
/// Returns the updated offset past all placed symbols.
186
fn layoutSection(
187
    program: *il::Program,
188
    syms: *mut [DataSym],
189
    count: *mut u32,
190
    base: u32,
191
    readOnly: bool
192
) -> u32 {
193
    let mut offset: u32 = 0;
194
195
    // Initialized data first.
196
    for i in 0..program.data.len {
197
        let data = &program.data[i];
198
        if data.readOnly == readOnly and not data.isUndefined {
199
            offset = mem::alignUp(offset, data.alignment);
200
            syms[*count] = DataSym { name: data.name, addr: base + offset };
201
            *count += 1;
202
            offset += data.size;
203
        }
204
    }
205
    // Uninitialized data after.
206
    for i in 0..program.data.len {
207
        let data = &program.data[i];
208
        if data.readOnly == readOnly and data.isUndefined {
209
            offset = mem::alignUp(offset, data.alignment);
210
            syms[*count] = DataSym { name: data.name, addr: base + offset };
211
            *count += 1;
212
            offset += data.size;
213
        }
214
    }
215
    return offset;
216
}
217
218
/// Emit data bytes for a single section (read-only or read-write) into `buf`.
219
/// Iterates initialized data in the IL program, serializing each data item.
220
/// Returns the total number of bytes written.
221
pub fn emitSection(
222
    program: *il::Program,
223
    dataSyms: *[DataSym],
224
    fnLabels: *labels::Labels,
225
    codeBase: u32,
226
    buf: *mut [u8],
227
    readOnly: bool
228
) -> u32 {
229
    let mut offset: u32 = 0;
230
231
    for i in 0..program.data.len {
232
        let data = &program.data[i];
233
        if data.readOnly == readOnly and not data.isUndefined {
234
            offset = mem::alignUp(offset, data.alignment);
235
            if offset + data.size > buf.len {
236
                panic "emitSection: buffer overflow";
237
            }
238
            for j in 0..data.values.len {
239
                let v = &data.values[j];
240
                for _ in 0..v.count {
241
                    match v.item {
242
                        case il::DataItem::Val { typ, val } => {
243
                            let size = il::typeSize(typ);
244
                            let valPtr = &val as *u8;
245
                            try! mem::copy(&mut buf[offset..], @sliceOf(valPtr, size));
246
                            offset += size;
247
                        },
248
                        case il::DataItem::Sym(name) => {
249
                            let addr = lookupDataSymAddr(dataSyms, name) else {
250
                                panic "emitSection: data symbol not found";
251
                            };
252
                            // Write 8-byte pointer: low 4 bytes are the
253
                            // address, high 4 bytes are zero.
254
                            // TODO: Use `u64` once it's supported.
255
                            let lo: u32 = addr;
256
                            let hi: u32 = 0;
257
                            let loPtr = &lo as *u8;
258
                            let hiPtr = &hi as *u8;
259
260
                            try! mem::copy(&mut buf[offset..], @sliceOf(loPtr, 4));
261
                            try! mem::copy(&mut buf[(offset + 4)..], @sliceOf(hiPtr, 4));
262
263
                            offset += 8;
264
                        },
265
                        case il::DataItem::Fn(name) => {
266
                            let addr = codeBase + labels::funcOffset(fnLabels, name) as u32;
267
                            let lo: u32 = addr;
268
                            let hi: u32 = 0;
269
                            let loPtr = &lo as *u8;
270
                            let hiPtr = &hi as *u8;
271
272
                            try! mem::copy(&mut buf[offset..], @sliceOf(loPtr, 4));
273
                            try! mem::copy(&mut buf[(offset + 4)..], @sliceOf(hiPtr, 4));
274
275
                            offset += 8;
276
                        },
277
                        case il::DataItem::Str(s) => {
278
                            try! mem::copy(&mut buf[offset..], s);
279
                            offset += s.len;
280
                        },
281
                        case il::DataItem::Undef => {
282
                            buf[offset] = 0;
283
                            offset += 1;
284
                        },
285
                    }
286
                }
287
            }
288
        }
289
    }
290
    return offset;
291
}
292
293
/// Resolve a data symbol to its final absolute address.
294
fn lookupDataSymAddr(dataSyms: *[DataSym], name: *[u8]) -> ?u32 {
295
    for i in 0..dataSyms.len {
296
        let sym = dataSyms[i];
297
        if mem::eq(sym.name, name) {
298
            return sym.addr;
299
        }
300
    }
301
    return nil;
302
}
303
304
/// Generate code for an IL program.
305
pub fn generate(
306
    program: *il::Program,
307
    storage: Storage,
308
    roDataBuf: *mut [u8],
309
    rwDataBuf: *mut [u8],
310
    arena: *mut alloc::Arena,
311
    debug: bool
312
) -> Program {
313
    let mut e = try! emit::emitter(arena, debug);
314
    let config = targetConfig();
315
316
    // Build data map.
317
    let mut dataSymCount: u32 = 0;
318
319
    let roLayoutSize = layoutSection(program, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, true);
320
    layoutSection(program, storage.dataSyms, &mut dataSymCount, RW_DATA_BASE, false);
321
322
    // Code base address: code follows read-only data, aligned to dword boundary.
323
    let codeBase = mem::alignUp(RO_DATA_BASE + roLayoutSize, DWORD_SIZE as u32);
324
325
    // Emit placeholder entry jump to default function if there is one.
326
    // We'll patch this at the end once we know where the function is.
327
    let mut defaultName: ?*[u8] = nil;
328
    if let defIdx = program.defaultFnIdx {
329
        defaultName = program.fns[defIdx].name;
330
        emit::emit(&mut e, encode::nop()); // Placeholder for two-instruction jump.
331
        emit::emit(&mut e, encode::nop()); //
332
    }
333
334
    // Generate code for all functions.
335
    let dataSyms = &storage.dataSyms[..dataSymCount];
336
337
    for i in 0..program.fns.len {
338
        let func = program.fns[i];
339
        if not func.isExtern {
340
            let checkpoint = alloc::save(arena);
341
            let ralloc = try! regalloc::allocate(func, &config, arena);
342
            isel::selectFn(&mut e, dataSyms, &ralloc, func);
343
344
            // Reclaim unused memory after instruction selection.
345
            alloc::restore(arena, checkpoint);
346
        }
347
    }
348
    // Patch entry jump now that we know where the default function is.
349
    // Nb. we use a two-instruction jump even if it isn't always needed.
350
    if let target = defaultName {
351
        let offset = emit::branchOffsetToFunc(&e, 0, target);
352
        let s = emit::splitImm(offset);
353
354
        emit::patch(&mut e, 0, encode::auipc(SCRATCH1, s.hi));
355
        emit::patch(&mut e, 1, encode::jalr(ZERO, SCRATCH1, s.lo));
356
    }
357
    // Patch function calls and address loads now that all functions are emitted.
358
    emit::patchCalls(&mut e);
359
    emit::patchAddrLoads(&mut e);
360
361
    // Emit data sections.
362
    let roDataSize = emitSection(program, dataSyms, &e.labels, codeBase, roDataBuf, true);
363
    let rwDataSize = emitSection(program, dataSyms, &e.labels, codeBase, rwDataBuf, false);
364
365
    return Program {
366
        code: emit::getCode(&e),
367
        funcs: emit::getFuncs(&e),
368
        roDataSize,
369
        rwDataSize,
370
        debugEntries: emit::getDebugEntries(&e),
371
    };
372
}