lib/std/arch/rv64/asm.rad 22.6 KiB raw
1
//! Minimal RV64 assembler.
2
//!
3
//! This module assembles `.ras` source files into RV64 text words plus a raw
4
//! data prefix that can be linked into a compiler-generated program. It exists
5
//! so the Radiance driver can mix hand-written RV64 assembly with generated IL
6
//! output without invoking an external assembler or linker.
7
//!
8
//! Assembly is intentionally direct and buffer-oriented. The caller provides a
9
//! text buffer, a data buffer, an arena, and the runtime base address where the
10
//! data buffer will be loaded. The parser writes encoded instructions into the
11
//! text buffer as it reads them and writes directive bytes into the data buffer
12
//! while in `.data`. The returned [`Program`] only contains slices into those
13
//! caller-provided buffers, so no ownership transfer or late copy is needed.
14
//!
15
//! The scanner is assembly-specific. It produces tokens for registers (`%a0`),
16
//! labels (`@name`), directives, strings, characters, numbers, and
17
//! punctuation. The parser consumes those tokens as a small line-oriented
18
//! language: *directives* declare sections or emit data, *labels* define
19
//! symbols at the current section offset, and *instructions* are validated
20
//! against RV64 operand forms before being encoded.
21
//!
22
//! Labels are defined at the current text instruction index or data byte
23
//! offset. The parser is single-pass because it keeps assembly cheap and lets
24
//! instructions and data be emitted immediately, but forward references mean
25
//! some operands cannot be encoded when first seen. Branches, jumps,
26
//! load-address operands, and data directives that reference labels therefore
27
//! record fixups. After parsing reaches EOF, the emitter resolves the final
28
//! symbol table and patches every recorded use with the correct PC-relative
29
//! offset, absolute data address, or encoded data value.
30
//!
31
//! Data labels are resolved relative to the data base address. The compiler
32
//! driver accumulates all assembly data in a RO data prefix, passes
33
//! [`RO_DATA_BASE`] + `currentPrefixLen` for each input, then appends the
34
//! input's emitted data to that prefix. Global text symbols are exported for
35
//! call resolution when the assembled text is appended to the RV64 generator,
36
//! shifted by the generator's current code length so disassembly/debug output
37
//! can name those instruction addresses correctly. Non-global text labels
38
//! remain local to their assembly fragment.
39
use std::lang::alloc;
40
use std::lang::strings;
41
use std::lang::gen;
42
use std::collections::dict;
43
use std::arch::rv64::encode;
44
use std::arch::rv64;
45
46
/// Assembler scanner module.
47
export mod scanner;
48
/// Assembler parser module.
49
export mod parser;
50
/// Assembler emission and fixup module.
51
export mod emit;
52
/// Tests.
53
@test mod tests;
54
55
/// In-memory result of assembling one RV64 assembly fragment.
56
///
57
/// [`Program`] is the boundary between the textual assembler and the rest of
58
/// the compiler. The assembler reads an assembly source file, encodes all
59
/// instructions, lays out all data bytes, resolves fixups that can be resolved
60
/// inside the fragment, and returns these three slices as the assembled
61
/// program.
62
///
63
/// The value is intentionally not a standalone object file or linked
64
/// executable. It carries only the sections and symbol table needed by the
65
/// compiler driver. The slices point at caller-owned storage: `text` and
66
/// `data` are backed by the buffers passed to [`assemble`], while symbol names
67
/// are interned in the assembler's string pool.
68
///
69
/// Symbol offsets are section-local byte offsets. Text symbols name positions
70
/// in `text`; data symbols name positions in `data`. When the compiler
71
/// consumes the program, [`rv64::addAssembly`] appends the text words to the
72
/// generated text stream and registers text labels at their relocated offsets.
73
/// The driver copies `data` into the final read-only data prefix; the data
74
/// base supplied to [`assemble`] lets the assembler resolve data addresses as
75
/// they will appear in that final layout.
76
export record Program {
77
    /// Encoded instructions in the text section.
78
    text: *[u32],
79
    /// Raw bytes in the data section.
80
    data: *[u8],
81
    /// Symbols defined by the source.
82
    symbols: *[Symbol],
83
}
84
85
/// Errors reported while assembling source text.
86
export union Error {
87
    /// Invalid syntax or operand form at a source offset.
88
    Invalid { offset: u32, message: *[u8] },
89
    /// The source emitted more text words than the caller-provided buffer holds.
90
    TextOverflow,
91
    /// The source emitted more data bytes than the caller-provided buffer holds.
92
    DataOverflow,
93
}
94
95
/// Active output section.
96
export union Section {
97
    /// Instruction section.
98
    Text,
99
    /// Data byte section.
100
    Data,
101
}
102
103
/// Branch opcode that needs fixup.
104
export union BranchOp {
105
    /// Branch if equal.
106
    Beq,
107
    /// Branch if not equal.
108
    Bne,
109
    /// Branch if less than, signed.
110
    Blt,
111
    /// Branch if greater than or equal, signed.
112
    Bge,
113
    /// Branch if less than, unsigned.
114
    Bltu,
115
    /// Branch if greater than or equal, unsigned.
116
    Bgeu,
117
    /// Branch if less than or equal, signed pseudo-instruction.
118
    Ble,
119
    /// Branch if greater than, signed pseudo-instruction.
120
    Bgt,
121
}
122
123
/// Parser and encoder behavior for one instruction mnemonic.
124
export union InstructionEncoder {
125
    /// No-operand instruction encoded by a fixed encoder.
126
    NoOperand { enc: fn() -> u32 },
127
    /// Load-immediate pseudo-instruction.
128
    Li,
129
    /// Load-address pseudo-instruction.
130
    La,
131
    /// Two-register instruction or pseudo-instruction.
132
    RR { enc: fn(gen::Reg, gen::Reg) -> u32 },
133
    /// Three-register instruction.
134
    RRR { enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32 },
135
    /// Register, register, immediate instruction.
136
    RRI { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
137
    /// Shift-immediate instruction with RV64 shift bounds.
138
    Shift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
139
    /// Shift-immediate instruction with RV64 W-mode shift bounds.
140
    WordShift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
141
    /// Load instruction with memory operand syntax.
142
    Load { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
143
    /// Store instruction with memory operand syntax.
144
    Store { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
145
    /// Two-register branch instruction.
146
    Branch { op: BranchOp },
147
    /// One-register branch-to-zero pseudo-instruction.
148
    BranchZero { op: BranchOp },
149
    /// `jal` instruction with explicit destination register.
150
    Jal,
151
    /// Jump pseudo-instruction with fixed destination register.
152
    Jump { rd: gen::Reg },
153
    /// CSR read-style operand form.
154
    RdCsr { enc: fn(gen::Reg, u32) -> u32 },
155
    /// CSR write-style operand form.
156
    CsrRs1 { enc: fn(u32, gen::Reg) -> u32 },
157
    /// CSR read/write operand form.
158
    Csrrw,
159
    /// CSR immediate operand form.
160
    Csrsi,
161
    /// Upper-immediate operand form.
162
    Upper { enc: fn(gen::Reg, i32) -> u32 },
163
}
164
165
/// Classified directive name.
166
export union DirectiveKind {
167
    /// `.align` directive.
168
    Align,
169
    /// `.ascii` directive.
170
    Ascii,
171
    /// `.byte` directive.
172
    Byte,
173
    /// `.constant` directive.
174
    Constant,
175
    /// `.data` directive.
176
    Data,
177
    /// `.dword` directive.
178
    Dword,
179
    /// `.export` directive.
180
    Export,
181
    /// `.space` directive.
182
    Space,
183
    /// `.text` directive.
184
    Text,
185
    /// `.word` directive.
186
    Word,
187
}
188
189
/// Instruction descriptor table row.
190
record InstructionEntry {
191
    /// Assembly mnemonic text.
192
    name: *[u8],
193
    /// Operand parser and encoder behavior.
194
    encoder: InstructionEncoder,
195
}
196
197
/// Directive descriptor table row.
198
record DirectiveEntry {
199
    /// Directive name without the leading `.`.
200
    name: *[u8],
201
    /// Parser behavior for the directive.
202
    kind: DirectiveKind,
203
}
204
205
/// Register descriptor table row.
206
record RegisterEntry {
207
    /// Register alias text without the leading `%`.
208
    name: *[u8],
209
    /// Numeric register selected by the alias.
210
    reg: gen::Reg,
211
}
212
213
/// CSR descriptor table row.
214
record CsrEntry {
215
    /// CSR name text.
216
    name: *[u8],
217
    /// Numeric CSR address.
218
    csr: u32,
219
}
220
221
/// Width of an integer data directive.
222
export union DataWidth {
223
    /// 32-bit data value.
224
    Word,
225
    /// 64-bit data value.
226
    Dword,
227
}
228
229
/// Extra slot used when sizing source-derived symbol and fixup buffers.
230
export constant SOURCE_CAP_PADDING: u32 = 1;
231
/// Scale factor used to keep assembler hash tables sparse.
232
export constant TABLE_CAPACITY_SCALE: u32 = 4;
233
/// Minimum hash-table capacity used by the assembler.
234
export constant MIN_TABLE_CAPACITY: u32 = 8;
235
/// `@label` names exclude the leading sigil byte when interned.
236
export constant LABEL_SIGIL_LEN: u32 = 1;
237
/// `.directive` names exclude the leading sigil byte when matched.
238
export constant DIRECTIVE_SIGIL_LEN: u32 = 1;
239
/// String and character literals are delimited by one byte on each side.
240
export constant QUOTE_DELIM_LEN: u32 = 1;
241
/// Number of bits in one byte.
242
export constant BITS_PER_BYTE: u64 = 8;
243
/// Mask for extracting one encoded byte.
244
export constant BYTE_MASK: u64 = 0xFF;
245
/// Largest signed 32-bit assembler value.
246
export constant I32_MAX_VALUE: i64 = 2147483647;
247
/// Magnitude of the smallest signed 32-bit assembler value.
248
export constant I32_MIN_MAGNITUDE: i64 = 2147483648;
249
/// Largest unsigned 32-bit assembler value.
250
export constant U32_MAX_VALUE: i64 = 4294967295;
251
/// Largest unsigned 8-bit assembler value.
252
export constant U8_MAX_VALUE: i64 = 255;
253
/// Upper bound for CSR immediate operands.
254
export constant CSR_IMM_LIMIT: i64 = 32;
255
/// Upper bound for RV64 W-mode shift immediates.
256
export constant WORD_SHIFT_LIMIT: i32 = 32;
257
/// Upper bound for RV64 shift immediates.
258
export constant SHIFT_LIMIT: i32 = 64;
259
/// Largest `lui` or `auipc` immediate.
260
export constant UPPER_IMM_MAX_VALUE: i64 = 0xFFFFF;
261
262
/// Sorted instruction descriptor table used by the assembler parser.
263
export constant INSTRUCTIONS: [InstructionEntry; 87] = [
264
    { name: "add",    encoder: InstructionEncoder::RRR { enc: encode::add } },
265
    { name: "addi",   encoder: InstructionEncoder::RRI { enc: encode::addi } },
266
    { name: "addiw",  encoder: InstructionEncoder::RRI { enc: encode::addiw } },
267
    { name: "addw",   encoder: InstructionEncoder::RRR { enc: encode::addw } },
268
    { name: "and",    encoder: InstructionEncoder::RRR { enc: encode::and_ } },
269
    { name: "andi",   encoder: InstructionEncoder::RRI { enc: encode::andi } },
270
    { name: "auipc",  encoder: InstructionEncoder::Upper { enc: encode::auipc } },
271
    { name: "beq",    encoder: InstructionEncoder::Branch { op: BranchOp::Beq } },
272
    { name: "beqz",   encoder: InstructionEncoder::BranchZero { op: BranchOp::Beq } },
273
    { name: "bge",    encoder: InstructionEncoder::Branch { op: BranchOp::Bge } },
274
    { name: "bgeu",   encoder: InstructionEncoder::Branch { op: BranchOp::Bgeu } },
275
    { name: "bgt",    encoder: InstructionEncoder::Branch { op: BranchOp::Bgt } },
276
    { name: "ble",    encoder: InstructionEncoder::Branch { op: BranchOp::Ble } },
277
    { name: "blt",    encoder: InstructionEncoder::Branch { op: BranchOp::Blt } },
278
    { name: "bltu",   encoder: InstructionEncoder::Branch { op: BranchOp::Bltu } },
279
    { name: "bne",    encoder: InstructionEncoder::Branch { op: BranchOp::Bne } },
280
    { name: "bnez",   encoder: InstructionEncoder::BranchZero { op: BranchOp::Bne } },
281
    { name: "call",   encoder: InstructionEncoder::Jump { rd: rv64::RA } },
282
    { name: "csrc",   encoder: InstructionEncoder::CsrRs1 { enc: encode::csrc } },
283
    { name: "csrr",   encoder: InstructionEncoder::RdCsr { enc: encode::csrr } },
284
    { name: "csrrw",  encoder: InstructionEncoder::Csrrw },
285
    { name: "csrsi",  encoder: InstructionEncoder::Csrsi },
286
    { name: "csrw",   encoder: InstructionEncoder::CsrRs1 { enc: encode::csrw } },
287
    { name: "div",    encoder: InstructionEncoder::RRR { enc: encode::div } },
288
    { name: "divu",   encoder: InstructionEncoder::RRR { enc: encode::divu } },
289
    { name: "divuw",  encoder: InstructionEncoder::RRR { enc: encode::divuw } },
290
    { name: "divw",   encoder: InstructionEncoder::RRR { enc: encode::divw } },
291
    { name: "ebreak", encoder: InstructionEncoder::NoOperand { enc: encode::ebreak } },
292
    { name: "ecall",  encoder: InstructionEncoder::NoOperand { enc: encode::ecall } },
293
    { name: "j",      encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
294
    { name: "jal",    encoder: InstructionEncoder::Jal },
295
    { name: "jalr",   encoder: InstructionEncoder::RRI { enc: encode::jalr } },
296
    { name: "la",     encoder: InstructionEncoder::La },
297
    { name: "lb",     encoder: InstructionEncoder::Load { enc: encode::lb } },
298
    { name: "lbu",    encoder: InstructionEncoder::Load { enc: encode::lbu } },
299
    { name: "ld",     encoder: InstructionEncoder::Load { enc: encode::ld } },
300
    { name: "lh",     encoder: InstructionEncoder::Load { enc: encode::lh } },
301
    { name: "lhu",    encoder: InstructionEncoder::Load { enc: encode::lhu } },
302
    { name: "li",     encoder: InstructionEncoder::Li },
303
    { name: "lui",    encoder: InstructionEncoder::Upper { enc: encode::lui } },
304
    { name: "lw",     encoder: InstructionEncoder::Load { enc: encode::lw } },
305
    { name: "lwu",    encoder: InstructionEncoder::Load { enc: encode::lwu } },
306
    { name: "mret",   encoder: InstructionEncoder::NoOperand { enc: encode::mret } },
307
    { name: "mul",    encoder: InstructionEncoder::RRR { enc: encode::mul } },
308
    { name: "mulh",   encoder: InstructionEncoder::RRR { enc: encode::mulh } },
309
    { name: "mulhsu", encoder: InstructionEncoder::RRR { enc: encode::mulhsu } },
310
    { name: "mulhu",  encoder: InstructionEncoder::RRR { enc: encode::mulhu } },
311
    { name: "mulw",   encoder: InstructionEncoder::RRR { enc: encode::mulw } },
312
    { name: "mv",     encoder: InstructionEncoder::RR { enc: encode::mv } },
313
    { name: "neg",    encoder: InstructionEncoder::RR { enc: encode::neg } },
314
    { name: "nop",    encoder: InstructionEncoder::NoOperand { enc: encode::nop } },
315
    { name: "not",    encoder: InstructionEncoder::RR { enc: encode::not_ } },
316
    { name: "or",     encoder: InstructionEncoder::RRR { enc: encode::or_ } },
317
    { name: "ori",    encoder: InstructionEncoder::RRI { enc: encode::ori } },
318
    { name: "rem",    encoder: InstructionEncoder::RRR { enc: encode::rem } },
319
    { name: "remu",   encoder: InstructionEncoder::RRR { enc: encode::remu } },
320
    { name: "remuw",  encoder: InstructionEncoder::RRR { enc: encode::remuw } },
321
    { name: "remw",   encoder: InstructionEncoder::RRR { enc: encode::remw } },
322
    { name: "ret",    encoder: InstructionEncoder::NoOperand { enc: encode::ret } },
323
    { name: "sb",     encoder: InstructionEncoder::Store { enc: encode::sb } },
324
    { name: "sd",     encoder: InstructionEncoder::Store { enc: encode::sd } },
325
    { name: "seqz",   encoder: InstructionEncoder::RR { enc: encode::seqz } },
326
    { name: "sh",     encoder: InstructionEncoder::Store { enc: encode::sh } },
327
    { name: "sll",    encoder: InstructionEncoder::RRR { enc: encode::sll } },
328
    { name: "slli",   encoder: InstructionEncoder::Shift { enc: encode::slli } },
329
    { name: "slliw",  encoder: InstructionEncoder::WordShift { enc: encode::slliw } },
330
    { name: "sllw",   encoder: InstructionEncoder::RRR { enc: encode::sllw } },
331
    { name: "slt",    encoder: InstructionEncoder::RRR { enc: encode::slt } },
332
    { name: "slti",   encoder: InstructionEncoder::RRI { enc: encode::slti } },
333
    { name: "sltiu",  encoder: InstructionEncoder::RRI { enc: encode::sltiu } },
334
    { name: "sltu",   encoder: InstructionEncoder::RRR { enc: encode::sltu } },
335
    { name: "snez",   encoder: InstructionEncoder::RR { enc: encode::snez } },
336
    { name: "sra",    encoder: InstructionEncoder::RRR { enc: encode::sra } },
337
    { name: "srai",   encoder: InstructionEncoder::Shift { enc: encode::srai } },
338
    { name: "sraiw",  encoder: InstructionEncoder::WordShift { enc: encode::sraiw } },
339
    { name: "sraw",   encoder: InstructionEncoder::RRR { enc: encode::sraw } },
340
    { name: "srl",    encoder: InstructionEncoder::RRR { enc: encode::srl } },
341
    { name: "srli",   encoder: InstructionEncoder::Shift { enc: encode::srli } },
342
    { name: "srliw",  encoder: InstructionEncoder::WordShift { enc: encode::srliw } },
343
    { name: "srlw",   encoder: InstructionEncoder::RRR { enc: encode::srlw } },
344
    { name: "sub",    encoder: InstructionEncoder::RRR { enc: encode::sub } },
345
    { name: "subw",   encoder: InstructionEncoder::RRR { enc: encode::subw } },
346
    { name: "sw",     encoder: InstructionEncoder::Store { enc: encode::sw } },
347
    { name: "tail",   encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
348
    { name: "wfi",    encoder: InstructionEncoder::NoOperand { enc: encode::wfi } },
349
    { name: "xor",    encoder: InstructionEncoder::RRR { enc: encode::xor } },
350
    { name: "xori",   encoder: InstructionEncoder::RRI { enc: encode::xori } },
351
];
352
353
/// Sorted directive lookup table used by the assembler parser.
354
export constant DIRECTIVES: [DirectiveEntry; 10] = [
355
    { name: "align",    kind: DirectiveKind::Align },
356
    { name: "ascii",    kind: DirectiveKind::Ascii },
357
    { name: "byte",     kind: DirectiveKind::Byte },
358
    { name: "constant", kind: DirectiveKind::Constant },
359
    { name: "data",     kind: DirectiveKind::Data },
360
    { name: "dword",    kind: DirectiveKind::Dword },
361
    { name: "export",   kind: DirectiveKind::Export },
362
    { name: "space",    kind: DirectiveKind::Space },
363
    { name: "text",     kind: DirectiveKind::Text },
364
    { name: "word",     kind: DirectiveKind::Word },
365
];
366
367
/// Sorted register-name lookup table used by the assembler parser.
368
export constant REGISTERS: [RegisterEntry; 33] = [
369
    { name: "a0",   reg: rv64::A0 },
370
    { name: "a1",   reg: rv64::A1 },
371
    { name: "a2",   reg: rv64::A2 },
372
    { name: "a3",   reg: rv64::A3 },
373
    { name: "a4",   reg: rv64::A4 },
374
    { name: "a5",   reg: rv64::A5 },
375
    { name: "a6",   reg: rv64::A6 },
376
    { name: "a7",   reg: rv64::A7 },
377
    { name: "fp",   reg: rv64::FP },
378
    { name: "gp",   reg: rv64::GP },
379
    { name: "ra",   reg: rv64::RA },
380
    { name: "s0",   reg: rv64::S0 },
381
    { name: "s1",   reg: rv64::S1 },
382
    { name: "s10",  reg: rv64::S10 },
383
    { name: "s11",  reg: rv64::S11 },
384
    { name: "s2",   reg: rv64::S2 },
385
    { name: "s3",   reg: rv64::S3 },
386
    { name: "s4",   reg: rv64::S4 },
387
    { name: "s5",   reg: rv64::S5 },
388
    { name: "s6",   reg: rv64::S6 },
389
    { name: "s7",   reg: rv64::S7 },
390
    { name: "s8",   reg: rv64::S8 },
391
    { name: "s9",   reg: rv64::S9 },
392
    { name: "sp",   reg: rv64::SP },
393
    { name: "t0",   reg: rv64::T0 },
394
    { name: "t1",   reg: rv64::T1 },
395
    { name: "t2",   reg: rv64::T2 },
396
    { name: "t3",   reg: rv64::T3 },
397
    { name: "t4",   reg: rv64::T4 },
398
    { name: "t5",   reg: rv64::T5 },
399
    { name: "t6",   reg: rv64::T6 },
400
    { name: "tp",   reg: rv64::TP },
401
    { name: "zero", reg: rv64::ZERO },
402
];
403
404
/// Sorted CSR-name lookup table used by the assembler parser.
405
export constant CSRS: [CsrEntry; 9] = [
406
    { name: "mcause",   csr: 0x342 },
407
    { name: "mepc",     csr: 0x341 },
408
    { name: "mhartid",  csr: 0xF14 },
409
    { name: "mie",      csr: 0x304 },
410
    { name: "mip",      csr: 0x344 },
411
    { name: "mscratch", csr: 0x340 },
412
    { name: "mstatus",  csr: 0x300 },
413
    { name: "mtval",    csr: 0x343 },
414
    { name: "mtvec",    csr: 0x305 },
415
];
416
417
/// Recorded symbol definition.
418
export record Symbol {
419
    /// Symbol name.
420
    name: *[u8],
421
    /// Section the symbol belongs to.
422
    section: Section,
423
    /// Byte offset within the section.
424
    offset: i32,
425
    /// Whether `.export` exported this symbol outside its assembly fragment.
426
    isExported: bool,
427
}
428
429
/// Information needed to resolve a pending symbol reference.
430
export union FixupInfo {
431
    /// Branch to a text label.
432
    Branch { op: BranchOp, rs1: gen::Reg, rs2: gen::Reg, index: u32 },
433
    /// JAL-like jump to a text label.
434
    Jal { rd: gen::Reg, index: u32 },
435
    /// Absolute address materialization into a register.
436
    Addr { rd: gen::Reg, index: u32 },
437
    /// A 32-bit data word referring to a symbol offset.
438
    Word { offset: u32 },
439
    /// A 64-bit data word referring to a symbol offset.
440
    Dword { offset: u32 },
441
}
442
443
/// Pending symbol reference.
444
export record Fixup {
445
    /// Referenced symbol.
446
    symbol: *[u8],
447
    /// Fixup payload.
448
    info: FixupInfo,
449
}
450
451
/// Parser and emission state.
452
export record Assembler {
453
    /// Allocation arena for temporary assembler state.
454
    arena: *mut alloc::Arena,
455
    /// Assembler lexical scanner.
456
    scan: scanner::Scanner,
457
    /// Output text buffer.
458
    text: *mut [u32],
459
    /// Output data buffer.
460
    data: *mut [u8],
461
    /// Current output section.
462
    section: Section,
463
    /// Defined symbols.
464
    symbols: *mut [Symbol],
465
    /// Name-to-symbol index map.
466
    symbolMap: dict::Dict,
467
    /// Name-to-integer map.
468
    constMap: dict::Dict,
469
    /// Names marked by `.export`.
470
    exportMap: dict::Dict,
471
    /// Pending fixups.
472
    fixups: *mut [Fixup],
473
    /// Absolute runtime address of data-section offset zero.
474
    dataBase: u32,
475
}
476
477
/// Assemble source using `dataBase` as the runtime address of the data-section.
478
export fn assemble(
479
    sourceKind: scanner::SourceKind,
480
    source: *[u8],
481
    textBuf: *mut [u32],
482
    dataBuf: *mut [u8],
483
    arena: *mut alloc::Arena,
484
    pool: *mut strings::Pool,
485
    dataBase: u32
486
) -> Program throws (Error) {
487
    let slotCap = source.len + SOURCE_CAP_PADDING;
488
    let tableCap = nextPowerOfTwo(slotCap * TABLE_CAPACITY_SCALE);
489
490
    let symbols = try! alloc::allocSlice(arena, @sizeOf(Symbol), @alignOf(Symbol), slotCap);
491
    let fixups = try! alloc::allocSlice(arena, @sizeOf(Fixup), @alignOf(Fixup), slotCap);
492
    let entries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
493
    let constEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
494
    let exportEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
495
496
    let mut a = Assembler {
497
        arena,
498
        scan: scanner::scanner(sourceKind, source, pool),
499
        text: @sliceOf(textBuf.ptr, 0, textBuf.len),
500
        data: @sliceOf(dataBuf.ptr, 0, dataBuf.len),
501
        section: Section::Text,
502
        symbols: @sliceOf((symbols as *mut [Symbol]).ptr, 0, (symbols as *mut [Symbol]).len),
503
        symbolMap: dict::init(entries as *mut [dict::Entry]),
504
        constMap: dict::init(constEntries as *mut [dict::Entry]),
505
        exportMap: dict::init(exportEntries as *mut [dict::Entry]),
506
        fixups: @sliceOf((fixups as *mut [Fixup]).ptr, 0, (fixups as *mut [Fixup]).len),
507
        dataBase,
508
    };
509
    // Parse assembly source and emit instructions.
510
    try parser::parseProgram(&mut a);
511
    // Resolve fixups and finalize program.
512
    try emit::finishProgram(&mut a);
513
514
    return Program {
515
        text: a.text,
516
        data: a.data,
517
        symbols: a.symbols,
518
    };
519
}
520
521
/// Return the next power of two at least as large as `value`.
522
fn nextPowerOfTwo(value: u32) -> u32 {
523
    let mut n: u32 = MIN_TABLE_CAPACITY;
524
    while n < value {
525
        set n <<= 1;
526
    }
527
    return n;
528
}