lib/std/lang/il.rad 15.1 KiB raw
1
//! Radiance Intermediate Language (RIL).
2
//!
3
//! A minimal, low-level, SSA-based intermediate language.
4
//!
5
//! > The single-assignment property simplifies reasoning about variables,
6
//! > since for every value instance its (single) defining assignment is known.
7
//! > Because every assignment creates a new value name it cannot kill
8
//! > (i.e., invalidate) expressions previously computed from other values.
9
//! > In particular, if two expressions are textually the same, they are sure
10
//! > to evaluate the same result.
11
//! >
12
//! >   -- "Single-Pass Generation of Static Single-Assignment Form for
13
//! >       Structured Languages", by Marc M. Brandis and Hanspeter Mossenbock
14
//!
15
//! # Type System
16
//!
17
//! Primitive types: `W8`, `W16`, `W32` (word sizes).
18
//! Signedness is encoded in operations (e.g., `Sdiv` vs `Udiv`), not types.
19
//!
20
//! Types on arithmetic operations indicate logical width but don't
21
//! change code generation. Arithmetic is always machine word sized,
22
//! no 8-bit or 16-bit add instruction.
23
//!
24
//! # Truncation and Narrowing
25
//!
26
//! There is no truncation instruction.
27
//! * Narrow values are stored in word-sized registers with undefined high bits.
28
//! * Truncation happens implicitly via narrow store instructions.
29
//! * To normalize a narrow value, mask explicitly via a bitwise `and`.
30
//!
31
//! # Widerning
32
//!
33
//! `Zext`/`Sext` widen values when loading from memory.
34
//!
35
//! # Aggregate Types
36
//!
37
//! The IL provides no aggregate types. Records, slices, optionals, and unions
38
//! are represented as bytes in memory with explicit layout computed during
39
//! lowering. Access is via explicit address arithmetic plus load/store/blit.
40
//!
41
//! # Lowering Strategy
42
//!
43
//! * Slices lower to a pointer+length layout in memory.
44
//! * Optionals lower to a tag+value layout in memory.
45
//! * Tagged unions lower to a tag+payload layout in memory.
46
//! * Results lower to a tag+payload layout in memory.
47
//!
48
//! Field access is expressed as address arithmetic plus load/store.
49
//!
50
//! Bounds check elimination and nullable pointer optimization happen during
51
//! this lowering phase.
52
//!
53
//! # Control Flow
54
//!
55
//! Uses block parameters instead of phi nodes (like MLIR/SIL). Each block
56
//! can declare parameters, and jumps/branches pass arguments to their targets.
57
58
// TODO: Labels should have their own type.
59
// TODO: Blocks should have an instruction in `Instr`.
60
61
pub mod printer;
62
63
use std::mem;
64
use std::lang::alloc;
65
66
/// Source location for debug info.
67
///
68
/// Associates an IL instruction with its originating source module and
69
/// byte offset.
70
pub record SrcLoc {
71
    /// Module identifier.
72
    moduleId: u16,
73
    /// Byte offset into the module's source file.
74
    offset: u32,
75
}
76
77
///////////////////////
78
// Name Formatting   //
79
///////////////////////
80
81
/// Separator for qualified symbol names.
82
pub const PATH_SEPARATOR: *[u8] = "::";
83
84
/// Format a qualified symbol name: `pkg::mod::path::name`.
85
pub fn formatQualifiedName(arena: *mut alloc::Arena, path: *[*[u8]], name: *[u8]) -> *[u8] {
86
    let mut totalLen: u32 = name.len;
87
    for segment in path {
88
        totalLen += segment.len + PATH_SEPARATOR.len;
89
    }
90
    let buf = try! alloc::allocSlice(arena, 1, 1, totalLen) as *mut [u8];
91
    let mut pos: u32 = 0;
92
93
    for segment in path {
94
        pos += try! mem::copy(&mut buf[pos..], segment);
95
        pos += try! mem::copy(&mut buf[pos..], PATH_SEPARATOR);
96
    }
97
    try! mem::copy(&mut buf[pos..], name);
98
99
    return &buf[..totalLen];
100
}
101
102
///////////
103
// Types //
104
///////////
105
106
/// IL type representation. Integer signedness is encoded in operations, not types.
107
pub union Type {
108
    /// 8-bit value.
109
    W8,
110
    /// 16-bit value.
111
    W16,
112
    /// 32-bit value.
113
    W32,
114
    /// 64-bit value (used for pointers on RV64).
115
    W64,
116
}
117
118
/// Get the size of a type in bytes.
119
pub fn typeSize(t: Type) -> u32 {
120
    match t {
121
        case Type::W8 => return 1,
122
        case Type::W16 => return 2,
123
        case Type::W32 => return 4,
124
        case Type::W64 => return 8,
125
    }
126
}
127
128
/// SSA register reference.
129
pub record Reg { n: u32 }
130
131
/// Instruction value.
132
pub union Val {
133
    /// Register reference.
134
    Reg(Reg),
135
    /// Immediate integer value.
136
    Imm(i64),
137
    /// Data symbol address (globals, constants, string literals).
138
    DataSym(*[u8]),
139
    /// Function address by symbol name. Used directly in call instructions
140
    /// for direct calls, or stored first for indirect calls.
141
    FnAddr(*[u8]),
142
    /// Undefined value. Used when the value is to be discarded,
143
    /// eg. in void returns.
144
    Undef,
145
}
146
147
/// Comparison operation for compare-and-branch.
148
pub union CmpOp { Eq, Ne, Slt, Ult }
149
150
/// Binary ALU operation kind.
151
pub union BinOp {
152
    // Arithmetic.
153
    Add, Sub, Mul, Sdiv, Udiv, Srem, Urem,
154
    // Comparison.
155
    Eq, Ne, Slt, Sge, Ult, Uge,
156
    // Bitwise.
157
    And, Or, Xor, Shl, Sshr, Ushr,
158
}
159
160
/// Unary ALU operation kind.
161
pub union UnOp {
162
    Neg, Not,
163
}
164
165
//////////////////
166
// Instructions //
167
//////////////////
168
169
/// Block parameter.
170
pub record Param {
171
    /// SSA register.
172
    value: Reg,
173
    /// Parameter type.
174
    type: Type,
175
}
176
177
/// Switch case mapping a constant value to a branch target.
178
pub record SwitchCase {
179
    /// The constant value to match against.
180
    value: i64,
181
    /// The target block index.
182
    target: u32,
183
    /// Arguments to pass to the target block.
184
    args: *mut [Val],
185
}
186
187
/// IL instruction.
188
/// SSA registers are represented as `Reg`, values as `Val`.
189
pub union Instr {
190
    ///////////////////////
191
    // Memory operations //
192
    ///////////////////////
193
194
    /// Allocate space on the stack: `reserve %dst <size> <alignment>;`
195
    /// Size can be a register for dynamic stack allocation (eg. VLAs).
196
    Reserve { dst: Reg, size: Val, alignment: u32 },
197
    /// Load a value from memory (zero-extending): `load <type> %dst %src <offset>;`
198
    Load { typ: Type, dst: Reg, src: Reg, offset: i32 },
199
    /// Signed load from memory (sign-extending): `sload <type> %dst %src <offset>;`
200
    Sload { typ: Type, dst: Reg, src: Reg, offset: i32 },
201
    /// Store a value to memory: `store <type> <src> %dst <offset>;`
202
    Store {
203
        /// Type being stored.
204
        typ: Type,
205
        /// Value being stored.
206
        src: Val,
207
        /// Destination address.
208
        dst: Reg,
209
        /// Byte offset from the base address.
210
        offset: i32
211
    },
212
    /// Copy memory region: `blit %dst %src <size>;`
213
    /// Size can be an immediate or a register (eg. for generics).
214
    Blit {
215
        /// Destination address.
216
        dst: Reg,
217
        /// Source address.
218
        src: Reg,
219
        /// Size to copy in bytes.
220
        size: Val
221
    },
222
    /// Copy a value into a register: `copy %dst <val>;`.
223
    Copy { dst: Reg, val: Val },
224
225
    /////////////////////
226
    // ALU operations  //
227
    /////////////////////
228
229
    /// Binary ALU operation: `<op> <type> %dst <a> <b>;`
230
    BinOp { op: BinOp, typ: Type, dst: Reg, a: Val, b: Val },
231
    /// Unary ALU operation: `<op> <type> %dst <a>;`
232
    UnOp { op: UnOp, typ: Type, dst: Reg, a: Val },
233
234
    /////////////////
235
    // Conversions //
236
    /////////////////
237
238
    /// Zero-extend from narrower type to word: `zext <type> %dst <val>;`
239
    Zext {
240
        /// Source type (I8 or I16).
241
        typ: Type,
242
        /// Destination register, always I32.
243
        dst: Reg,
244
        /// Source value.
245
        val: Val,
246
    },
247
    /// Sign-extend from narrower type to word: `sext <type> %dst <val>;`
248
    Sext {
249
        /// Source type (I8 or I16).
250
        typ: Type,
251
        /// Destination register, always I32.
252
        dst: Reg,
253
        /// Source value.
254
        val: Val,
255
    },
256
257
    ////////////////////
258
    // Function calls //
259
    ////////////////////
260
261
    /// Call a function: `call <type> %dst $func(<arg>...);`
262
    Call {
263
        /// Return type.
264
        retTy: Type,
265
        /// Holds return value, for non-void function.
266
        dst: ?Reg,
267
        /// Function value, either a symbol or an address in a register.
268
        func: Val,
269
        /// Function arguments.
270
        args: *[Val]
271
    },
272
273
    /////////////////
274
    // Terminators //
275
    /////////////////
276
277
    /// Return from function: `ret <val>;` or `ret;`
278
    Ret { val: ?Val },
279
    /// Unconditional jump: `jmp @block(<arg>...);`
280
    Jmp { target: u32, args: *mut [Val] },
281
    /// Compare-and-branch: `br.<op> <type> <a> <b> @then @else;`
282
    Br { op: CmpOp, typ: Type, a: Val, b: Val, thenTarget: u32, thenArgs: *mut [Val], elseTarget: u32, elseArgs: *mut [Val] },
283
    /// Multi-way branch: `switch <val> (<n> @block) ... @default;`
284
    Switch { val: Val, defaultTarget: u32, defaultArgs: *mut [Val], cases: *mut [SwitchCase] },
285
    /// Unreachable code marker: `unreachable;`
286
    /// Indicates control flow cannot reach this point to allow for optimizations.
287
    Unreachable,
288
289
    /////////////////
290
    // Intrinsics  //
291
    /////////////////
292
293
    /// Environment call: `ecall %dst <num> <a0> <a1> <a2> <a3>;`
294
    Ecall { dst: Reg, num: Val, a0: Val, a1: Val, a2: Val, a3: Val },
295
    /// Environment break: `ebreak;`.
296
    /// Triggers a breakpoint exception for debugging.
297
    Ebreak,
298
}
299
300
//////////////////////////
301
// Blocks and Functions //
302
//////////////////////////
303
304
/// A basic block in a function.
305
///
306
/// Basic blocks are instruction sequences with a single entry point and no branch
307
/// instruction, except possibly at the end of the sequence, where a terminator
308
/// may be found, ie. an instruction that terminates the sequence by jumping
309
/// to another sequence.
310
pub record Block {
311
    /// Block label.
312
    label: *[u8],
313
    /// Block parameters.
314
    params: *[Param],
315
    /// Instructions in the block. The last instruction must be a terminator.
316
    instrs: *mut [Instr],
317
    /// Source locations for debugging and error reporting.
318
    /// One entry per instruction when debug info is enabled, empty otherwise.
319
    locs: *[SrcLoc],
320
    /// Predecessor block indices. Used for control flow analysis.
321
    preds: *[u32],
322
    /// Loop nesting depth.
323
    /// Used for spill cost weighting in register allocation.
324
    loopDepth: u32,
325
}
326
327
/// An IL function.
328
pub record Fn {
329
    /// Qualified function name (e.g. `$mod$path$func`).
330
    name: *[u8],
331
    /// Function parameters.
332
    params: *[Param],
333
    /// Return type.
334
    returnType: Type,
335
    /// Whether the function is extern (no body).
336
    isExtern: bool,
337
    /// Whether the function is a leaf (contains no call or ecall instructions).
338
    isLeaf: bool,
339
    /// Basic blocks. Empty for extern functions.
340
    blocks: *[Block],
341
}
342
343
/////////////
344
// Program //
345
/////////////
346
347
/// Data initializer item.
348
pub union DataItem {
349
    /// Typed value: `w32 42;` or `w8 255;`
350
    Val { typ: Type, val: i64 },
351
    /// Symbol reference: `$symbol`
352
    Sym(*[u8]),
353
    /// Function reference: `$fnName`
354
    Fn(*[u8]),
355
    /// String literal bytes: `"hello"` (no auto null-terminator)
356
    Str(*[u8]),
357
    /// Undefined value. Used for padding and `void` returns.
358
    Undef,
359
}
360
361
/// Data initializer value with optional repeat count.
362
pub record DataValue {
363
    /// The item contained in the value.
364
    item: DataItem,
365
    /// The number of times the item should be repeated.
366
    count: u32,
367
}
368
369
/// Global data definition.
370
pub record Data {
371
    /// Data name.
372
    name: *[u8],
373
    /// Size in bytes.
374
    size: u32,
375
    /// Alignment requirement.
376
    alignment: u32,
377
    /// Whether this is read-only data.
378
    /// Typically, `const` declaration are read-only, while `static`
379
    /// declarations are not.
380
    readOnly: bool,
381
    /// Whether the data is entirely undefined.
382
    /// Undefined data doesn't need to be written since memory is zero-initialized.
383
    isUndefined: bool,
384
    /// Initializer values.
385
    values: *[DataValue],
386
}
387
388
/// An IL program (compilation unit).
389
pub record Program {
390
    /// Global data.
391
    data: *[Data],
392
    /// Functions.
393
    fns: *[*Fn],
394
    /// Index of entry point function, if any.
395
    defaultFnIdx: ?u32,
396
}
397
398
///////////////////////
399
// Utility Functions //
400
///////////////////////
401
402
/// Get the destination register of an instruction, if any.
403
pub fn instrDst(instr: Instr) -> ?Reg {
404
    match instr {
405
        case Instr::Reserve { dst, .. } => return dst,
406
        case Instr::Load { dst, .. } => return dst,
407
        case Instr::Sload { dst, .. } => return dst,
408
        case Instr::Copy { dst, .. } => return dst,
409
        case Instr::BinOp { dst, .. } => return dst,
410
        case Instr::UnOp { dst, .. } => return dst,
411
        case Instr::Zext { dst, .. } => return dst,
412
        case Instr::Sext { dst, .. } => return dst,
413
        case Instr::Call { dst, .. } => return dst,
414
        case Instr::Ecall { dst, .. } => return dst,
415
        else => return nil,
416
    }
417
}
418
419
/// Check if an instruction is a function call.
420
pub fn isCall(instr: Instr) -> bool {
421
    match instr {
422
        case Instr::Call { .. },
423
             Instr::Ecall { .. } => return true,
424
        else => return false,
425
    }
426
}
427
428
/// Call a function for each register used by an instruction.
429
/// This is called by the register allocator to analyze register usage.
430
pub fn forEachReg(instr: Instr, f: fn(Reg, *mut opaque), ctx: *mut opaque) {
431
    match instr {
432
        case Instr::Reserve { size, .. } =>
433
            withReg(size, f, ctx),
434
        case Instr::Load { src, .. } => f(src, ctx),
435
        case Instr::Sload { src, .. } => f(src, ctx),
436
        case Instr::Store { src, dst, .. } => {
437
            withReg(src, f, ctx);
438
            f(dst, ctx);
439
        },
440
        case Instr::Blit { dst, src, size } => {
441
            f(dst, ctx);
442
            f(src, ctx);
443
            withReg(size, f, ctx);
444
        },
445
        case Instr::Copy { val, .. } =>
446
            withReg(val, f, ctx),
447
        case Instr::BinOp { a, b, .. } => {
448
            withReg(a, f, ctx);
449
            withReg(b, f, ctx);
450
        },
451
        case Instr::UnOp { a, .. } =>
452
            withReg(a, f, ctx),
453
        case Instr::Zext { val, .. } =>
454
            withReg(val, f, ctx),
455
        case Instr::Sext { val, .. } =>
456
            withReg(val, f, ctx),
457
        case Instr::Call { func, args, .. } => {
458
            withReg(func, f, ctx);
459
            for arg in args {
460
                withReg(arg, f, ctx);
461
            }
462
        },
463
        case Instr::Ret { val } => {
464
            if let v = val {
465
                withReg(v, f, ctx);
466
            }
467
        },
468
        case Instr::Jmp { args, .. } => {
469
            for arg in args {
470
                withReg(arg, f, ctx);
471
            }
472
        },
473
        case Instr::Br { a, b, thenArgs, elseArgs, .. } => {
474
            withReg(a, f, ctx);
475
            withReg(b, f, ctx);
476
            for arg in thenArgs {
477
                withReg(arg, f, ctx);
478
            }
479
            for arg in elseArgs {
480
                withReg(arg, f, ctx);
481
            }
482
        },
483
        case Instr::Switch { val, defaultArgs, cases, .. } => {
484
            withReg(val, f, ctx);
485
            for arg in defaultArgs {
486
                withReg(arg, f, ctx);
487
            }
488
            for c in cases {
489
                for arg in c.args {
490
                    withReg(arg, f, ctx);
491
                }
492
            }
493
        },
494
        case Instr::Ecall { num, a0, a1, a2, a3, .. } => {
495
            withReg(num, f, ctx);
496
            withReg(a0, f, ctx);
497
            withReg(a1, f, ctx);
498
            withReg(a2, f, ctx);
499
            withReg(a3, f, ctx);
500
        },
501
        case Instr::Unreachable,
502
             Instr::Ebreak => {},
503
    }
504
}
505
506
/// Call callback if value is a register.
507
fn withReg(val: Val, callback: fn(Reg, *mut opaque), ctx: *mut opaque) {
508
    if let case Val::Reg(r) = val {
509
        callback(r, ctx);
510
    }
511
}