lib/std/lang/il.rad 15.1 KiB raw
1
//! Radiance Intermediate Language (RIL).
2
//!
3
//! A minimal, low-level, SSA-based intermediate language.
4
//!
5
//! > The single-assignment property simplifies reasoning about variables,
6
//! > since for every value instance its (single) defining assignment is known.
7
//! > Because every assignment creates a new value name it cannot kill
8
//! > (i.e., invalidate) expressions previously computed from other values.
9
//! > In particular, if two expressions are textually the same, they are sure
10
//! > to evaluate the same result.
11
//! >
12
//! >   -- "Single-Pass Generation of Static Single-Assignment Form for
13
//! >       Structured Languages", by Marc M. Brandis and Hanspeter Mossenbock
14
//!
15
//! # Type System
16
//!
17
//! Primitive types: `W8`, `W16`, `W32` (word sizes).
18
//! Signedness is encoded in operations (e.g., `Sdiv` vs `Udiv`), not types.
19
//!
20
//! Types on arithmetic operations indicate logical width but don't
21
//! change code generation. Arithmetic is always machine word sized,
22
//! no 8-bit or 16-bit add instruction.
23
//!
24
//! # Truncation and Narrowing
25
//!
26
//! There is no truncation instruction.
27
//! * Narrow values are stored in word-sized registers with undefined high bits.
28
//! * Truncation happens implicitly via narrow store instructions.
29
//! * To normalize a narrow value, mask explicitly via a bitwise `and`.
30
//!
31
//! # Widerning
32
//!
33
//! `Zext`/`Sext` widen values when loading from memory.
34
//!
35
//! # Aggregate Types
36
//!
37
//! The IL provides no aggregate types. Records, slices, optionals, and unions
38
//! are represented as bytes in memory with explicit layout computed during
39
//! lowering. Access is via explicit address arithmetic plus load/store/blit.
40
//!
41
//! # Lowering Strategy
42
//!
43
//! * Slices lower to a pointer+length layout in memory.
44
//! * Optionals lower to a tag+value layout in memory.
45
//! * Tagged unions lower to a tag+payload layout in memory.
46
//! * Results lower to a tag+payload layout in memory.
47
//!
48
//! Field access is expressed as address arithmetic plus load/store.
49
//!
50
//! Bounds check elimination and nullable pointer optimization happen during
51
//! this lowering phase.
52
//!
53
//! # Control Flow
54
//!
55
//! Uses block parameters instead of phi nodes (like MLIR/SIL). Each block
56
//! can declare parameters, and jumps/branches pass arguments to their targets.
57
58
// TODO: Labels should have their own type.
59
// TODO: Blocks should have an instruction in `Instr`.
60
61
pub mod printer;
62
63
use std::mem;
64
use std::lang::alloc;
65
66
/// Source location for debug info.
67
///
68
/// Associates an IL instruction with its originating source module and
69
/// byte offset.
70
pub record SrcLoc {
71
    /// Module identifier.
72
    moduleId: u16,
73
    /// Byte offset into the module's source file.
74
    offset: u32,
75
}
76
77
///////////////////////
78
// Name Formatting   //
79
///////////////////////
80
81
/// Separator for qualified symbol names.
82
pub const PATH_SEPARATOR: *[u8] = "::";
83
84
/// Format a qualified symbol name: `pkg::mod::path::name`.
85
pub fn formatQualifiedName(arena: *mut alloc::Arena, path: *[*[u8]], name: *[u8]) -> *[u8] {
86
    let mut totalLen: u32 = name.len;
87
    for segment in path {
88
        totalLen += segment.len + PATH_SEPARATOR.len;
89
    }
90
    let buf = try! alloc::allocSlice(arena, 1, 1, totalLen) as *mut [u8];
91
    let mut pos: u32 = 0;
92
93
    for segment in path {
94
        pos += try! mem::copy(&mut buf[pos..], segment);
95
        pos += try! mem::copy(&mut buf[pos..], PATH_SEPARATOR);
96
    }
97
    try! mem::copy(&mut buf[pos..], name);
98
99
    return &buf[..totalLen];
100
}
101
102
///////////
103
// Types //
104
///////////
105
106
/// IL type representation. Integer signedness is encoded in operations, not types.
107
pub union Type {
108
    /// 8-bit value.
109
    W8,
110
    /// 16-bit value.
111
    W16,
112
    /// 32-bit value.
113
    W32,
114
    /// 64-bit value (used for pointers on RV64).
115
    W64,
116
}
117
118
/// Get the size of a type in bytes.
119
pub fn typeSize(t: Type) -> u32 {
120
    match t {
121
        case Type::W8 => return 1,
122
        case Type::W16 => return 2,
123
        case Type::W32 => return 4,
124
        case Type::W64 => return 8,
125
    }
126
}
127
128
/// SSA register reference.
129
pub record Reg { n: u32 }
130
131
/// Instruction value.
132
pub union Val {
133
    /// Register reference.
134
    Reg(Reg),
135
    /// Immediate integer value.
136
    Imm(i64),
137
    /// Data symbol address (globals, constants, string literals).
138
    DataSym(*[u8]),
139
    /// Function address by symbol name. Used directly in call instructions
140
    /// for direct calls, or stored first for indirect calls.
141
    FnAddr(*[u8]),
142
    /// Undefined value. Used when the value is to be discarded,
143
    /// eg. in void returns.
144
    Undef,
145
}
146
147
/// Comparison operation for compare-and-branch.
148
pub union CmpOp { Eq, Ne, Slt, Ult }
149
150
/// Binary ALU operation kind.
151
pub union BinOp {
152
    // Arithmetic.
153
    Add, Sub, Mul, Sdiv, Udiv, Srem, Urem,
154
    // Comparison.
155
    Eq, Ne, Slt, Sge, Ult, Uge,
156
    // Bitwise.
157
    And, Or, Xor, Shl, Sshr, Ushr,
158
}
159
160
/// Unary ALU operation kind.
161
pub union UnOp {
162
    Neg, Not,
163
}
164
165
//////////////////
166
// Instructions //
167
//////////////////
168
169
/// Block parameter.
170
pub record Param {
171
    /// SSA register.
172
    value: Reg,
173
    /// Parameter type.
174
    type: Type,
175
}
176
177
/// Switch case mapping a constant value to a branch target.
178
pub record SwitchCase {
179
    /// The constant value to match against.
180
    value: i64,
181
    /// The target block index.
182
    target: u32,
183
    /// Arguments to pass to the target block.
184
    args: *mut [Val],
185
}
186
187
/// IL instruction.
188
/// SSA registers are represented as `Reg`, values as `Val`.
189
pub union Instr {
190
    ///////////////////////
191
    // Memory operations //
192
    ///////////////////////
193
194
    /// Allocate space on the stack: `reserve %dst <size> <alignment>;`
195
    /// Size can be a register for dynamic stack allocation (eg. VLAs).
196
    Reserve { dst: Reg, size: Val, alignment: u32 },
197
    /// Load a value from memory (zero-extending): `load <type> %dst %src <offset>;`
198
    Load { typ: Type, dst: Reg, src: Reg, offset: i32 },
199
    /// Signed load from memory (sign-extending): `sload <type> %dst %src <offset>;`
200
    Sload { typ: Type, dst: Reg, src: Reg, offset: i32 },
201
    /// Store a value to memory: `store <type> <src> %dst <offset>;`
202
    Store {
203
        /// Type being stored.
204
        typ: Type,
205
        /// Value being stored.
206
        src: Val,
207
        /// Destination address.
208
        dst: Reg,
209
        /// Byte offset from the base address.
210
        offset: i32
211
    },
212
    /// Copy memory region: `blit %dst %src <size>;`
213
    /// Size can be an immediate or a register (eg. for generics).
214
    Blit {
215
        /// Destination address.
216
        dst: Reg,
217
        /// Source address.
218
        src: Reg,
219
        /// Size to copy in bytes.
220
        size: Val
221
    },
222
    /// Copy a value into a register: `copy %dst <val>;`.
223
    Copy { dst: Reg, val: Val },
224
225
    /////////////////////
226
    // ALU operations  //
227
    /////////////////////
228
229
    /// Binary ALU operation: `<op> <type> %dst <a> <b>;`
230
    BinOp { op: BinOp, typ: Type, dst: Reg, a: Val, b: Val },
231
    /// Unary ALU operation: `<op> <type> %dst <a>;`
232
    UnOp { op: UnOp, typ: Type, dst: Reg, a: Val },
233
234
    /////////////////
235
    // Conversions //
236
    /////////////////
237
238
    /// Zero-extend from narrower type to word: `zext <type> %dst <val>;`
239
    Zext {
240
        /// Source type (I8 or I16).
241
        typ: Type,
242
        /// Destination register, always I32.
243
        dst: Reg,
244
        /// Source value.
245
        val: Val,
246
    },
247
    /// Sign-extend from narrower type to word: `sext <type> %dst <val>;`
248
    Sext {
249
        /// Source type (I8 or I16).
250
        typ: Type,
251
        /// Destination register, always I32.
252
        dst: Reg,
253
        /// Source value.
254
        val: Val,
255
    },
256
257
    ////////////////////
258
    // Function calls //
259
    ////////////////////
260
261
    /// Call a function: `call <type> %dst $func(<arg>...);`
262
    Call {
263
        /// Return type.
264
        retTy: Type,
265
        /// Holds return value, for non-void function.
266
        dst: ?Reg,
267
        /// Function value, either a symbol or an address in a register.
268
        func: Val,
269
        /// Function arguments.
270
        args: *[Val]
271
    },
272
273
    /////////////////
274
    // Terminators //
275
    /////////////////
276
277
    /// Return from function: `ret <val>;` or `ret;`
278
    Ret { val: ?Val },
279
    /// Unconditional jump: `jmp @block(<arg>...);`
280
    Jmp { target: u32, args: *mut [Val] },
281
    /// Compare-and-branch: `br.<op> <type> <a> <b> @then @else;`
282
    Br { op: CmpOp, typ: Type, a: Val, b: Val, thenTarget: u32, thenArgs: *mut [Val], elseTarget: u32, elseArgs: *mut [Val] },
283
    /// Multi-way branch: `switch <val> (<n> @block) ... @default;`
284
    Switch { val: Val, defaultTarget: u32, defaultArgs: *mut [Val], cases: *mut [SwitchCase] },
285
    /// Unreachable code marker: `unreachable;`
286
    /// Indicates control flow cannot reach this point to allow for optimizations.
287
    Unreachable,
288
289
    /////////////////
290
    // Intrinsics  //
291
    /////////////////
292
293
    /// Environment call: `ecall %dst <num> <a0> <a1> <a2> <a3>;`
294
    Ecall { dst: Reg, num: Val, a0: Val, a1: Val, a2: Val, a3: Val },
295
    /// Environment break: `ebreak;`.
296
    /// Triggers a breakpoint exception for debugging.
297
    Ebreak,
298
}
299
300
//////////////////////////
301
// Blocks and Functions //
302
//////////////////////////
303
304
/// A basic block in a function.
305
///
306
/// Basic blocks are instruction sequences with a single entry point and no branch
307
/// instruction, except possibly at the end of the sequence, where a terminator
308
/// may be found, ie. an instruction that terminates the sequence by jumping
309
/// to another sequence.
310
pub record Block {
311
    /// Block label.
312
    label: *[u8],
313
    /// Block parameters.
314
    params: *[Param],
315
    /// Instructions in the block. The last instruction must be a terminator.
316
    instrs: *mut [Instr],
317
    /// Source locations for debugging and error reporting.
318
    /// One entry per instruction when debug info is enabled, empty otherwise.
319
    locs: *[SrcLoc],
320
    /// Predecessor block indices. Used for control flow analysis.
321
    preds: *[u32],
322
    /// Loop nesting depth.
323
    /// Used for spill cost weighting in register allocation.
324
    loopDepth: u32,
325
}
326
327
/// An IL function.
328
pub record Fn {
329
    /// Qualified function name (e.g. `$mod$path$func`).
330
    name: *[u8],
331
    /// Function parameters.
332
    params: *[Param],
333
    /// Return type.
334
    returnType: Type,
335
    /// Whether the function is extern (no body).
336
    isExtern: bool,
337
    /// Basic blocks. Empty for extern functions.
338
    blocks: *[Block],
339
}
340
341
/////////////
342
// Program //
343
/////////////
344
345
/// Data initializer item.
346
pub union DataItem {
347
    /// Typed value: `w32 42;` or `w8 255;`
348
    Val { typ: Type, val: i64 },
349
    /// Symbol reference: `$symbol`
350
    Sym(*[u8]),
351
    /// Function reference: `$fnName`
352
    Fn(*[u8]),
353
    /// String literal bytes: `"hello"` (no auto null-terminator)
354
    Str(*[u8]),
355
    /// Undefined value. Used for padding and `void` returns.
356
    Undef,
357
}
358
359
/// Data initializer value with optional repeat count.
360
pub record DataValue {
361
    /// The item contained in the value.
362
    item: DataItem,
363
    /// The number of times the item should be repeated.
364
    count: u32,
365
}
366
367
/// Global data definition.
368
pub record Data {
369
    /// Data name.
370
    name: *[u8],
371
    /// Size in bytes.
372
    size: u32,
373
    /// Alignment requirement.
374
    alignment: u32,
375
    /// Whether this is read-only data.
376
    /// Typically, `const` declaration are read-only, while `static`
377
    /// declarations are not.
378
    readOnly: bool,
379
    /// Whether the data is entirely undefined.
380
    /// Undefined data doesn't need to be written since memory is zero-initialized.
381
    isUndefined: bool,
382
    /// Initializer values.
383
    values: *[DataValue],
384
}
385
386
/// An IL program (compilation unit).
387
pub record Program {
388
    /// Global data.
389
    data: *[Data],
390
    /// Functions.
391
    fns: *[*Fn],
392
    /// Index of entry point function, if any.
393
    defaultFnIdx: ?u32,
394
}
395
396
///////////////////////
397
// Utility Functions //
398
///////////////////////
399
400
/// Get the destination register of an instruction, if any.
401
pub fn instrDst(instr: Instr) -> ?Reg {
402
    match instr {
403
        case Instr::Reserve { dst, .. } => return dst,
404
        case Instr::Load { dst, .. } => return dst,
405
        case Instr::Sload { dst, .. } => return dst,
406
        case Instr::Copy { dst, .. } => return dst,
407
        case Instr::BinOp { dst, .. } => return dst,
408
        case Instr::UnOp { dst, .. } => return dst,
409
        case Instr::Zext { dst, .. } => return dst,
410
        case Instr::Sext { dst, .. } => return dst,
411
        case Instr::Call { dst, .. } => return dst,
412
        case Instr::Ecall { dst, .. } => return dst,
413
        else => return nil,
414
    }
415
}
416
417
/// Check if an instruction is a function call.
418
pub fn isCall(instr: Instr) -> bool {
419
    match instr {
420
        case Instr::Call { .. } => return true,
421
        case Instr::Ecall { .. } => return true,
422
        else => return false,
423
    }
424
}
425
426
/// Call a function for each register used by an instruction.
427
/// This is called by the register allocator to analyze register usage.
428
pub fn forEachReg(instr: Instr, f: fn(Reg, *mut opaque), ctx: *mut opaque) {
429
    match instr {
430
        case Instr::Reserve { size, .. } =>
431
            withReg(size, f, ctx),
432
        case Instr::Load { src, .. } => f(src, ctx),
433
        case Instr::Sload { src, .. } => f(src, ctx),
434
        case Instr::Store { src, dst, .. } => {
435
            withReg(src, f, ctx);
436
            f(dst, ctx);
437
        },
438
        case Instr::Blit { dst, src, size } => {
439
            f(dst, ctx);
440
            f(src, ctx);
441
            withReg(size, f, ctx);
442
        },
443
        case Instr::Copy { val, .. } =>
444
            withReg(val, f, ctx),
445
        case Instr::BinOp { a, b, .. } => {
446
            withReg(a, f, ctx);
447
            withReg(b, f, ctx);
448
        },
449
        case Instr::UnOp { a, .. } =>
450
            withReg(a, f, ctx),
451
        case Instr::Zext { val, .. } =>
452
            withReg(val, f, ctx),
453
        case Instr::Sext { val, .. } =>
454
            withReg(val, f, ctx),
455
        case Instr::Call { func, args, .. } => {
456
            withReg(func, f, ctx);
457
            for arg in args {
458
                withReg(arg, f, ctx);
459
            }
460
        },
461
        case Instr::Ret { val } => {
462
            if let v = val {
463
                withReg(v, f, ctx);
464
            }
465
        },
466
        case Instr::Jmp { args, .. } => {
467
            for arg in args {
468
                withReg(arg, f, ctx);
469
            }
470
        },
471
        case Instr::Br { a, b, thenArgs, elseArgs, .. } => {
472
            withReg(a, f, ctx);
473
            withReg(b, f, ctx);
474
            for arg in thenArgs {
475
                withReg(arg, f, ctx);
476
            }
477
            for arg in elseArgs {
478
                withReg(arg, f, ctx);
479
            }
480
        },
481
        case Instr::Switch { val, defaultArgs, cases, .. } => {
482
            withReg(val, f, ctx);
483
            for arg in defaultArgs {
484
                withReg(arg, f, ctx);
485
            }
486
            for c in cases {
487
                for arg in c.args {
488
                    withReg(arg, f, ctx);
489
                }
490
            }
491
        },
492
        case Instr::Ecall { num, a0, a1, a2, a3, .. } => {
493
            withReg(num, f, ctx);
494
            withReg(a0, f, ctx);
495
            withReg(a1, f, ctx);
496
            withReg(a2, f, ctx);
497
            withReg(a3, f, ctx);
498
        },
499
        case Instr::Unreachable => {},
500
        case Instr::Ebreak => {},
501
    }
502
}
503
504
/// Call callback if value is a register.
505
fn withReg(val: Val, callback: fn(Reg, *mut opaque), ctx: *mut opaque) {
506
    if let case Val::Reg(r) = val {
507
        callback(r, ctx);
508
    }
509
}