compiler/
lib/
examples/
std/
arch/
rv64/
asm/
asm.rad
22.6 KiB
decode.rad
14.3 KiB
emit.rad
26.0 KiB
encode.rad
21.5 KiB
isel.rad
47.0 KiB
printer.rad
12.6 KiB
tests.rad
17.1 KiB
rv64.rad
11.9 KiB
char/
collections/
lang/
sys/
arch.rad
68 B
char.rad
855 B
collections.rad
39 B
fmt.rad
8.5 KiB
intrinsics.rad
391 B
io.rad
1.3 KiB
lang.rad
258 B
mem.rad
2.2 KiB
sys.rad
173 B
testing.rad
2.4 KiB
tests.rad
14.8 KiB
vec.rad
3.2 KiB
std.rad
281 B
scripts/
seed/
sublime/
test/
vim/
.gitignore
366 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
3.6 KiB
README
2.5 KiB
STYLE
2.5 KiB
std.lib
1.2 KiB
std.lib.test
347 B
lib/std/arch/rv64/asm.rad
raw
| 1 | //! Minimal RV64 assembler. |
| 2 | //! |
| 3 | //! This module assembles `.ras` source files into RV64 text words plus a raw |
| 4 | //! data prefix that can be linked into a compiler-generated program. It exists |
| 5 | //! so the Radiance driver can mix hand-written RV64 assembly with generated IL |
| 6 | //! output without invoking an external assembler or linker. |
| 7 | //! |
| 8 | //! Assembly is intentionally direct and buffer-oriented. The caller provides a |
| 9 | //! text buffer, a data buffer, an arena, and the runtime base address where the |
| 10 | //! data buffer will be loaded. The parser writes encoded instructions into the |
| 11 | //! text buffer as it reads them and writes directive bytes into the data buffer |
| 12 | //! while in `.data`. The returned [`Program`] only contains slices into those |
| 13 | //! caller-provided buffers, so no ownership transfer or late copy is needed. |
| 14 | //! |
| 15 | //! The scanner is assembly-specific. It produces tokens for registers (`%a0`), |
| 16 | //! labels (`@name`), directives, strings, characters, numbers, and |
| 17 | //! punctuation. The parser consumes those tokens as a small line-oriented |
| 18 | //! language: *directives* declare sections or emit data, *labels* define |
| 19 | //! symbols at the current section offset, and *instructions* are validated |
| 20 | //! against RV64 operand forms before being encoded. |
| 21 | //! |
| 22 | //! Labels are defined at the current text instruction index or data byte |
| 23 | //! offset. The parser is single-pass because it keeps assembly cheap and lets |
| 24 | //! instructions and data be emitted immediately, but forward references mean |
| 25 | //! some operands cannot be encoded when first seen. Branches, jumps, |
| 26 | //! load-address operands, and data directives that reference labels therefore |
| 27 | //! record fixups. After parsing reaches EOF, the emitter resolves the final |
| 28 | //! symbol table and patches every recorded use with the correct PC-relative |
| 29 | //! offset, absolute data address, or encoded data value. |
| 30 | //! |
| 31 | //! Data labels are resolved relative to the data base address. The compiler |
| 32 | //! driver accumulates all assembly data in a RO data prefix, passes |
| 33 | //! [`RO_DATA_BASE`] + `currentPrefixLen` for each input, then appends the |
| 34 | //! input's emitted data to that prefix. Global text symbols are exported for |
| 35 | //! call resolution when the assembled text is appended to the RV64 generator, |
| 36 | //! shifted by the generator's current code length so disassembly/debug output |
| 37 | //! can name those instruction addresses correctly. Non-global text labels |
| 38 | //! remain local to their assembly fragment. |
| 39 | use std::lang::alloc; |
| 40 | use std::lang::strings; |
| 41 | use std::lang::gen; |
| 42 | use std::collections::dict; |
| 43 | use std::arch::rv64::encode; |
| 44 | use std::arch::rv64; |
| 45 | |
| 46 | /// Assembler scanner module. |
| 47 | export mod scanner; |
| 48 | /// Assembler parser module. |
| 49 | export mod parser; |
| 50 | /// Assembler emission and fixup module. |
| 51 | export mod emit; |
| 52 | /// Tests. |
| 53 | @test mod tests; |
| 54 | |
| 55 | /// In-memory result of assembling one RV64 assembly fragment. |
| 56 | /// |
| 57 | /// [`Program`] is the boundary between the textual assembler and the rest of |
| 58 | /// the compiler. The assembler reads an assembly source file, encodes all |
| 59 | /// instructions, lays out all data bytes, resolves fixups that can be resolved |
| 60 | /// inside the fragment, and returns these three slices as the assembled |
| 61 | /// program. |
| 62 | /// |
| 63 | /// The value is intentionally not a standalone object file or linked |
| 64 | /// executable. It carries only the sections and symbol table needed by the |
| 65 | /// compiler driver. The slices point at caller-owned storage: `text` and |
| 66 | /// `data` are backed by the buffers passed to [`assemble`], while symbol names |
| 67 | /// are interned in the assembler's string pool. |
| 68 | /// |
| 69 | /// Symbol offsets are section-local byte offsets. Text symbols name positions |
| 70 | /// in `text`; data symbols name positions in `data`. When the compiler |
| 71 | /// consumes the program, [`rv64::addAssembly`] appends the text words to the |
| 72 | /// generated text stream and registers text labels at their relocated offsets. |
| 73 | /// The driver copies `data` into the final read-only data prefix; the data |
| 74 | /// base supplied to [`assemble`] lets the assembler resolve data addresses as |
| 75 | /// they will appear in that final layout. |
| 76 | export record Program { |
| 77 | /// Encoded instructions in the text section. |
| 78 | text: *[u32], |
| 79 | /// Raw bytes in the data section. |
| 80 | data: *[u8], |
| 81 | /// Symbols defined by the source. |
| 82 | symbols: *[Symbol], |
| 83 | } |
| 84 | |
| 85 | /// Errors reported while assembling source text. |
| 86 | export union Error { |
| 87 | /// Invalid syntax or operand form at a source offset. |
| 88 | Invalid { offset: u32, message: *[u8] }, |
| 89 | /// The source emitted more text words than the caller-provided buffer holds. |
| 90 | TextOverflow, |
| 91 | /// The source emitted more data bytes than the caller-provided buffer holds. |
| 92 | DataOverflow, |
| 93 | } |
| 94 | |
| 95 | /// Active output section. |
| 96 | export union Section { |
| 97 | /// Instruction section. |
| 98 | Text, |
| 99 | /// Data byte section. |
| 100 | Data, |
| 101 | } |
| 102 | |
| 103 | /// Branch opcode that needs fixup. |
| 104 | export union BranchOp { |
| 105 | /// Branch if equal. |
| 106 | Beq, |
| 107 | /// Branch if not equal. |
| 108 | Bne, |
| 109 | /// Branch if less than, signed. |
| 110 | Blt, |
| 111 | /// Branch if greater than or equal, signed. |
| 112 | Bge, |
| 113 | /// Branch if less than, unsigned. |
| 114 | Bltu, |
| 115 | /// Branch if greater than or equal, unsigned. |
| 116 | Bgeu, |
| 117 | /// Branch if less than or equal, signed pseudo-instruction. |
| 118 | Ble, |
| 119 | /// Branch if greater than, signed pseudo-instruction. |
| 120 | Bgt, |
| 121 | } |
| 122 | |
| 123 | /// Parser and encoder behavior for one instruction mnemonic. |
| 124 | export union InstructionEncoder { |
| 125 | /// No-operand instruction encoded by a fixed encoder. |
| 126 | NoOperand { enc: fn() -> u32 }, |
| 127 | /// Load-immediate pseudo-instruction. |
| 128 | Li, |
| 129 | /// Load-address pseudo-instruction. |
| 130 | La, |
| 131 | /// Two-register instruction or pseudo-instruction. |
| 132 | RR { enc: fn(gen::Reg, gen::Reg) -> u32 }, |
| 133 | /// Three-register instruction. |
| 134 | RRR { enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32 }, |
| 135 | /// Register, register, immediate instruction. |
| 136 | RRI { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
| 137 | /// Shift-immediate instruction with RV64 shift bounds. |
| 138 | Shift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
| 139 | /// Shift-immediate instruction with RV64 W-mode shift bounds. |
| 140 | WordShift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
| 141 | /// Load instruction with memory operand syntax. |
| 142 | Load { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
| 143 | /// Store instruction with memory operand syntax. |
| 144 | Store { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
| 145 | /// Two-register branch instruction. |
| 146 | Branch { op: BranchOp }, |
| 147 | /// One-register branch-to-zero pseudo-instruction. |
| 148 | BranchZero { op: BranchOp }, |
| 149 | /// `jal` instruction with explicit destination register. |
| 150 | Jal, |
| 151 | /// Jump pseudo-instruction with fixed destination register. |
| 152 | Jump { rd: gen::Reg }, |
| 153 | /// CSR read-style operand form. |
| 154 | RdCsr { enc: fn(gen::Reg, u32) -> u32 }, |
| 155 | /// CSR write-style operand form. |
| 156 | CsrRs1 { enc: fn(u32, gen::Reg) -> u32 }, |
| 157 | /// CSR read/write operand form. |
| 158 | Csrrw, |
| 159 | /// CSR immediate operand form. |
| 160 | Csrsi, |
| 161 | /// Upper-immediate operand form. |
| 162 | Upper { enc: fn(gen::Reg, i32) -> u32 }, |
| 163 | } |
| 164 | |
| 165 | /// Classified directive name. |
| 166 | export union DirectiveKind { |
| 167 | /// `.align` directive. |
| 168 | Align, |
| 169 | /// `.ascii` directive. |
| 170 | Ascii, |
| 171 | /// `.byte` directive. |
| 172 | Byte, |
| 173 | /// `.constant` directive. |
| 174 | Constant, |
| 175 | /// `.data` directive. |
| 176 | Data, |
| 177 | /// `.dword` directive. |
| 178 | Dword, |
| 179 | /// `.export` directive. |
| 180 | Export, |
| 181 | /// `.space` directive. |
| 182 | Space, |
| 183 | /// `.text` directive. |
| 184 | Text, |
| 185 | /// `.word` directive. |
| 186 | Word, |
| 187 | } |
| 188 | |
| 189 | /// Instruction descriptor table row. |
| 190 | record InstructionEntry { |
| 191 | /// Assembly mnemonic text. |
| 192 | name: *[u8], |
| 193 | /// Operand parser and encoder behavior. |
| 194 | encoder: InstructionEncoder, |
| 195 | } |
| 196 | |
| 197 | /// Directive descriptor table row. |
| 198 | record DirectiveEntry { |
| 199 | /// Directive name without the leading `.`. |
| 200 | name: *[u8], |
| 201 | /// Parser behavior for the directive. |
| 202 | kind: DirectiveKind, |
| 203 | } |
| 204 | |
| 205 | /// Register descriptor table row. |
| 206 | record RegisterEntry { |
| 207 | /// Register alias text without the leading `%`. |
| 208 | name: *[u8], |
| 209 | /// Numeric register selected by the alias. |
| 210 | reg: gen::Reg, |
| 211 | } |
| 212 | |
| 213 | /// CSR descriptor table row. |
| 214 | record CsrEntry { |
| 215 | /// CSR name text. |
| 216 | name: *[u8], |
| 217 | /// Numeric CSR address. |
| 218 | csr: u32, |
| 219 | } |
| 220 | |
| 221 | /// Width of an integer data directive. |
| 222 | export union DataWidth { |
| 223 | /// 32-bit data value. |
| 224 | Word, |
| 225 | /// 64-bit data value. |
| 226 | Dword, |
| 227 | } |
| 228 | |
| 229 | /// Extra slot used when sizing source-derived symbol and fixup buffers. |
| 230 | export constant SOURCE_CAP_PADDING: u32 = 1; |
| 231 | /// Scale factor used to keep assembler hash tables sparse. |
| 232 | export constant TABLE_CAPACITY_SCALE: u32 = 4; |
| 233 | /// Minimum hash-table capacity used by the assembler. |
| 234 | export constant MIN_TABLE_CAPACITY: u32 = 8; |
| 235 | /// `@label` names exclude the leading sigil byte when interned. |
| 236 | export constant LABEL_SIGIL_LEN: u32 = 1; |
| 237 | /// `.directive` names exclude the leading sigil byte when matched. |
| 238 | export constant DIRECTIVE_SIGIL_LEN: u32 = 1; |
| 239 | /// String and character literals are delimited by one byte on each side. |
| 240 | export constant QUOTE_DELIM_LEN: u32 = 1; |
| 241 | /// Number of bits in one byte. |
| 242 | export constant BITS_PER_BYTE: u64 = 8; |
| 243 | /// Mask for extracting one encoded byte. |
| 244 | export constant BYTE_MASK: u64 = 0xFF; |
| 245 | /// Largest signed 32-bit assembler value. |
| 246 | export constant I32_MAX_VALUE: i64 = 2147483647; |
| 247 | /// Magnitude of the smallest signed 32-bit assembler value. |
| 248 | export constant I32_MIN_MAGNITUDE: i64 = 2147483648; |
| 249 | /// Largest unsigned 32-bit assembler value. |
| 250 | export constant U32_MAX_VALUE: i64 = 4294967295; |
| 251 | /// Largest unsigned 8-bit assembler value. |
| 252 | export constant U8_MAX_VALUE: i64 = 255; |
| 253 | /// Upper bound for CSR immediate operands. |
| 254 | export constant CSR_IMM_LIMIT: i64 = 32; |
| 255 | /// Upper bound for RV64 W-mode shift immediates. |
| 256 | export constant WORD_SHIFT_LIMIT: i32 = 32; |
| 257 | /// Upper bound for RV64 shift immediates. |
| 258 | export constant SHIFT_LIMIT: i32 = 64; |
| 259 | /// Largest `lui` or `auipc` immediate. |
| 260 | export constant UPPER_IMM_MAX_VALUE: i64 = 0xFFFFF; |
| 261 | |
| 262 | /// Sorted instruction descriptor table used by the assembler parser. |
| 263 | export constant INSTRUCTIONS: [InstructionEntry; 87] = [ |
| 264 | { name: "add", encoder: InstructionEncoder::RRR { enc: encode::add } }, |
| 265 | { name: "addi", encoder: InstructionEncoder::RRI { enc: encode::addi } }, |
| 266 | { name: "addiw", encoder: InstructionEncoder::RRI { enc: encode::addiw } }, |
| 267 | { name: "addw", encoder: InstructionEncoder::RRR { enc: encode::addw } }, |
| 268 | { name: "and", encoder: InstructionEncoder::RRR { enc: encode::and_ } }, |
| 269 | { name: "andi", encoder: InstructionEncoder::RRI { enc: encode::andi } }, |
| 270 | { name: "auipc", encoder: InstructionEncoder::Upper { enc: encode::auipc } }, |
| 271 | { name: "beq", encoder: InstructionEncoder::Branch { op: BranchOp::Beq } }, |
| 272 | { name: "beqz", encoder: InstructionEncoder::BranchZero { op: BranchOp::Beq } }, |
| 273 | { name: "bge", encoder: InstructionEncoder::Branch { op: BranchOp::Bge } }, |
| 274 | { name: "bgeu", encoder: InstructionEncoder::Branch { op: BranchOp::Bgeu } }, |
| 275 | { name: "bgt", encoder: InstructionEncoder::Branch { op: BranchOp::Bgt } }, |
| 276 | { name: "ble", encoder: InstructionEncoder::Branch { op: BranchOp::Ble } }, |
| 277 | { name: "blt", encoder: InstructionEncoder::Branch { op: BranchOp::Blt } }, |
| 278 | { name: "bltu", encoder: InstructionEncoder::Branch { op: BranchOp::Bltu } }, |
| 279 | { name: "bne", encoder: InstructionEncoder::Branch { op: BranchOp::Bne } }, |
| 280 | { name: "bnez", encoder: InstructionEncoder::BranchZero { op: BranchOp::Bne } }, |
| 281 | { name: "call", encoder: InstructionEncoder::Jump { rd: rv64::RA } }, |
| 282 | { name: "csrc", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrc } }, |
| 283 | { name: "csrr", encoder: InstructionEncoder::RdCsr { enc: encode::csrr } }, |
| 284 | { name: "csrrw", encoder: InstructionEncoder::Csrrw }, |
| 285 | { name: "csrsi", encoder: InstructionEncoder::Csrsi }, |
| 286 | { name: "csrw", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrw } }, |
| 287 | { name: "div", encoder: InstructionEncoder::RRR { enc: encode::div } }, |
| 288 | { name: "divu", encoder: InstructionEncoder::RRR { enc: encode::divu } }, |
| 289 | { name: "divuw", encoder: InstructionEncoder::RRR { enc: encode::divuw } }, |
| 290 | { name: "divw", encoder: InstructionEncoder::RRR { enc: encode::divw } }, |
| 291 | { name: "ebreak", encoder: InstructionEncoder::NoOperand { enc: encode::ebreak } }, |
| 292 | { name: "ecall", encoder: InstructionEncoder::NoOperand { enc: encode::ecall } }, |
| 293 | { name: "j", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } }, |
| 294 | { name: "jal", encoder: InstructionEncoder::Jal }, |
| 295 | { name: "jalr", encoder: InstructionEncoder::RRI { enc: encode::jalr } }, |
| 296 | { name: "la", encoder: InstructionEncoder::La }, |
| 297 | { name: "lb", encoder: InstructionEncoder::Load { enc: encode::lb } }, |
| 298 | { name: "lbu", encoder: InstructionEncoder::Load { enc: encode::lbu } }, |
| 299 | { name: "ld", encoder: InstructionEncoder::Load { enc: encode::ld } }, |
| 300 | { name: "lh", encoder: InstructionEncoder::Load { enc: encode::lh } }, |
| 301 | { name: "lhu", encoder: InstructionEncoder::Load { enc: encode::lhu } }, |
| 302 | { name: "li", encoder: InstructionEncoder::Li }, |
| 303 | { name: "lui", encoder: InstructionEncoder::Upper { enc: encode::lui } }, |
| 304 | { name: "lw", encoder: InstructionEncoder::Load { enc: encode::lw } }, |
| 305 | { name: "lwu", encoder: InstructionEncoder::Load { enc: encode::lwu } }, |
| 306 | { name: "mret", encoder: InstructionEncoder::NoOperand { enc: encode::mret } }, |
| 307 | { name: "mul", encoder: InstructionEncoder::RRR { enc: encode::mul } }, |
| 308 | { name: "mulh", encoder: InstructionEncoder::RRR { enc: encode::mulh } }, |
| 309 | { name: "mulhsu", encoder: InstructionEncoder::RRR { enc: encode::mulhsu } }, |
| 310 | { name: "mulhu", encoder: InstructionEncoder::RRR { enc: encode::mulhu } }, |
| 311 | { name: "mulw", encoder: InstructionEncoder::RRR { enc: encode::mulw } }, |
| 312 | { name: "mv", encoder: InstructionEncoder::RR { enc: encode::mv } }, |
| 313 | { name: "neg", encoder: InstructionEncoder::RR { enc: encode::neg } }, |
| 314 | { name: "nop", encoder: InstructionEncoder::NoOperand { enc: encode::nop } }, |
| 315 | { name: "not", encoder: InstructionEncoder::RR { enc: encode::not_ } }, |
| 316 | { name: "or", encoder: InstructionEncoder::RRR { enc: encode::or_ } }, |
| 317 | { name: "ori", encoder: InstructionEncoder::RRI { enc: encode::ori } }, |
| 318 | { name: "rem", encoder: InstructionEncoder::RRR { enc: encode::rem } }, |
| 319 | { name: "remu", encoder: InstructionEncoder::RRR { enc: encode::remu } }, |
| 320 | { name: "remuw", encoder: InstructionEncoder::RRR { enc: encode::remuw } }, |
| 321 | { name: "remw", encoder: InstructionEncoder::RRR { enc: encode::remw } }, |
| 322 | { name: "ret", encoder: InstructionEncoder::NoOperand { enc: encode::ret } }, |
| 323 | { name: "sb", encoder: InstructionEncoder::Store { enc: encode::sb } }, |
| 324 | { name: "sd", encoder: InstructionEncoder::Store { enc: encode::sd } }, |
| 325 | { name: "seqz", encoder: InstructionEncoder::RR { enc: encode::seqz } }, |
| 326 | { name: "sh", encoder: InstructionEncoder::Store { enc: encode::sh } }, |
| 327 | { name: "sll", encoder: InstructionEncoder::RRR { enc: encode::sll } }, |
| 328 | { name: "slli", encoder: InstructionEncoder::Shift { enc: encode::slli } }, |
| 329 | { name: "slliw", encoder: InstructionEncoder::WordShift { enc: encode::slliw } }, |
| 330 | { name: "sllw", encoder: InstructionEncoder::RRR { enc: encode::sllw } }, |
| 331 | { name: "slt", encoder: InstructionEncoder::RRR { enc: encode::slt } }, |
| 332 | { name: "slti", encoder: InstructionEncoder::RRI { enc: encode::slti } }, |
| 333 | { name: "sltiu", encoder: InstructionEncoder::RRI { enc: encode::sltiu } }, |
| 334 | { name: "sltu", encoder: InstructionEncoder::RRR { enc: encode::sltu } }, |
| 335 | { name: "snez", encoder: InstructionEncoder::RR { enc: encode::snez } }, |
| 336 | { name: "sra", encoder: InstructionEncoder::RRR { enc: encode::sra } }, |
| 337 | { name: "srai", encoder: InstructionEncoder::Shift { enc: encode::srai } }, |
| 338 | { name: "sraiw", encoder: InstructionEncoder::WordShift { enc: encode::sraiw } }, |
| 339 | { name: "sraw", encoder: InstructionEncoder::RRR { enc: encode::sraw } }, |
| 340 | { name: "srl", encoder: InstructionEncoder::RRR { enc: encode::srl } }, |
| 341 | { name: "srli", encoder: InstructionEncoder::Shift { enc: encode::srli } }, |
| 342 | { name: "srliw", encoder: InstructionEncoder::WordShift { enc: encode::srliw } }, |
| 343 | { name: "srlw", encoder: InstructionEncoder::RRR { enc: encode::srlw } }, |
| 344 | { name: "sub", encoder: InstructionEncoder::RRR { enc: encode::sub } }, |
| 345 | { name: "subw", encoder: InstructionEncoder::RRR { enc: encode::subw } }, |
| 346 | { name: "sw", encoder: InstructionEncoder::Store { enc: encode::sw } }, |
| 347 | { name: "tail", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } }, |
| 348 | { name: "wfi", encoder: InstructionEncoder::NoOperand { enc: encode::wfi } }, |
| 349 | { name: "xor", encoder: InstructionEncoder::RRR { enc: encode::xor } }, |
| 350 | { name: "xori", encoder: InstructionEncoder::RRI { enc: encode::xori } }, |
| 351 | ]; |
| 352 | |
| 353 | /// Sorted directive lookup table used by the assembler parser. |
| 354 | export constant DIRECTIVES: [DirectiveEntry; 10] = [ |
| 355 | { name: "align", kind: DirectiveKind::Align }, |
| 356 | { name: "ascii", kind: DirectiveKind::Ascii }, |
| 357 | { name: "byte", kind: DirectiveKind::Byte }, |
| 358 | { name: "constant", kind: DirectiveKind::Constant }, |
| 359 | { name: "data", kind: DirectiveKind::Data }, |
| 360 | { name: "dword", kind: DirectiveKind::Dword }, |
| 361 | { name: "export", kind: DirectiveKind::Export }, |
| 362 | { name: "space", kind: DirectiveKind::Space }, |
| 363 | { name: "text", kind: DirectiveKind::Text }, |
| 364 | { name: "word", kind: DirectiveKind::Word }, |
| 365 | ]; |
| 366 | |
| 367 | /// Sorted register-name lookup table used by the assembler parser. |
| 368 | export constant REGISTERS: [RegisterEntry; 33] = [ |
| 369 | { name: "a0", reg: rv64::A0 }, |
| 370 | { name: "a1", reg: rv64::A1 }, |
| 371 | { name: "a2", reg: rv64::A2 }, |
| 372 | { name: "a3", reg: rv64::A3 }, |
| 373 | { name: "a4", reg: rv64::A4 }, |
| 374 | { name: "a5", reg: rv64::A5 }, |
| 375 | { name: "a6", reg: rv64::A6 }, |
| 376 | { name: "a7", reg: rv64::A7 }, |
| 377 | { name: "fp", reg: rv64::FP }, |
| 378 | { name: "gp", reg: rv64::GP }, |
| 379 | { name: "ra", reg: rv64::RA }, |
| 380 | { name: "s0", reg: rv64::S0 }, |
| 381 | { name: "s1", reg: rv64::S1 }, |
| 382 | { name: "s10", reg: rv64::S10 }, |
| 383 | { name: "s11", reg: rv64::S11 }, |
| 384 | { name: "s2", reg: rv64::S2 }, |
| 385 | { name: "s3", reg: rv64::S3 }, |
| 386 | { name: "s4", reg: rv64::S4 }, |
| 387 | { name: "s5", reg: rv64::S5 }, |
| 388 | { name: "s6", reg: rv64::S6 }, |
| 389 | { name: "s7", reg: rv64::S7 }, |
| 390 | { name: "s8", reg: rv64::S8 }, |
| 391 | { name: "s9", reg: rv64::S9 }, |
| 392 | { name: "sp", reg: rv64::SP }, |
| 393 | { name: "t0", reg: rv64::T0 }, |
| 394 | { name: "t1", reg: rv64::T1 }, |
| 395 | { name: "t2", reg: rv64::T2 }, |
| 396 | { name: "t3", reg: rv64::T3 }, |
| 397 | { name: "t4", reg: rv64::T4 }, |
| 398 | { name: "t5", reg: rv64::T5 }, |
| 399 | { name: "t6", reg: rv64::T6 }, |
| 400 | { name: "tp", reg: rv64::TP }, |
| 401 | { name: "zero", reg: rv64::ZERO }, |
| 402 | ]; |
| 403 | |
| 404 | /// Sorted CSR-name lookup table used by the assembler parser. |
| 405 | export constant CSRS: [CsrEntry; 9] = [ |
| 406 | { name: "mcause", csr: 0x342 }, |
| 407 | { name: "mepc", csr: 0x341 }, |
| 408 | { name: "mhartid", csr: 0xF14 }, |
| 409 | { name: "mie", csr: 0x304 }, |
| 410 | { name: "mip", csr: 0x344 }, |
| 411 | { name: "mscratch", csr: 0x340 }, |
| 412 | { name: "mstatus", csr: 0x300 }, |
| 413 | { name: "mtval", csr: 0x343 }, |
| 414 | { name: "mtvec", csr: 0x305 }, |
| 415 | ]; |
| 416 | |
| 417 | /// Recorded symbol definition. |
| 418 | export record Symbol { |
| 419 | /// Symbol name. |
| 420 | name: *[u8], |
| 421 | /// Section the symbol belongs to. |
| 422 | section: Section, |
| 423 | /// Byte offset within the section. |
| 424 | offset: i32, |
| 425 | /// Whether `.export` exported this symbol outside its assembly fragment. |
| 426 | isExported: bool, |
| 427 | } |
| 428 | |
| 429 | /// Information needed to resolve a pending symbol reference. |
| 430 | export union FixupInfo { |
| 431 | /// Branch to a text label. |
| 432 | Branch { op: BranchOp, rs1: gen::Reg, rs2: gen::Reg, index: u32 }, |
| 433 | /// JAL-like jump to a text label. |
| 434 | Jal { rd: gen::Reg, index: u32 }, |
| 435 | /// Absolute address materialization into a register. |
| 436 | Addr { rd: gen::Reg, index: u32 }, |
| 437 | /// A 32-bit data word referring to a symbol offset. |
| 438 | Word { offset: u32 }, |
| 439 | /// A 64-bit data word referring to a symbol offset. |
| 440 | Dword { offset: u32 }, |
| 441 | } |
| 442 | |
| 443 | /// Pending symbol reference. |
| 444 | export record Fixup { |
| 445 | /// Referenced symbol. |
| 446 | symbol: *[u8], |
| 447 | /// Fixup payload. |
| 448 | info: FixupInfo, |
| 449 | } |
| 450 | |
| 451 | /// Parser and emission state. |
| 452 | export record Assembler { |
| 453 | /// Allocation arena for temporary assembler state. |
| 454 | arena: *mut alloc::Arena, |
| 455 | /// Assembler lexical scanner. |
| 456 | scan: scanner::Scanner, |
| 457 | /// Output text buffer. |
| 458 | text: *mut [u32], |
| 459 | /// Output data buffer. |
| 460 | data: *mut [u8], |
| 461 | /// Current output section. |
| 462 | section: Section, |
| 463 | /// Defined symbols. |
| 464 | symbols: *mut [Symbol], |
| 465 | /// Name-to-symbol index map. |
| 466 | symbolMap: dict::Dict, |
| 467 | /// Name-to-integer map. |
| 468 | constMap: dict::Dict, |
| 469 | /// Names marked by `.export`. |
| 470 | exportMap: dict::Dict, |
| 471 | /// Pending fixups. |
| 472 | fixups: *mut [Fixup], |
| 473 | /// Absolute runtime address of data-section offset zero. |
| 474 | dataBase: u32, |
| 475 | } |
| 476 | |
| 477 | /// Assemble source using `dataBase` as the runtime address of the data-section. |
| 478 | export fn assemble( |
| 479 | sourceKind: scanner::SourceKind, |
| 480 | source: *[u8], |
| 481 | textBuf: *mut [u32], |
| 482 | dataBuf: *mut [u8], |
| 483 | arena: *mut alloc::Arena, |
| 484 | pool: *mut strings::Pool, |
| 485 | dataBase: u32 |
| 486 | ) -> Program throws (Error) { |
| 487 | let slotCap = source.len + SOURCE_CAP_PADDING; |
| 488 | let tableCap = nextPowerOfTwo(slotCap * TABLE_CAPACITY_SCALE); |
| 489 | |
| 490 | let symbols = try! alloc::allocSlice(arena, @sizeOf(Symbol), @alignOf(Symbol), slotCap); |
| 491 | let fixups = try! alloc::allocSlice(arena, @sizeOf(Fixup), @alignOf(Fixup), slotCap); |
| 492 | let entries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
| 493 | let constEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
| 494 | let exportEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
| 495 | |
| 496 | let mut a = Assembler { |
| 497 | arena, |
| 498 | scan: scanner::scanner(sourceKind, source, pool), |
| 499 | text: @sliceOf(textBuf.ptr, 0, textBuf.len), |
| 500 | data: @sliceOf(dataBuf.ptr, 0, dataBuf.len), |
| 501 | section: Section::Text, |
| 502 | symbols: @sliceOf((symbols as *mut [Symbol]).ptr, 0, (symbols as *mut [Symbol]).len), |
| 503 | symbolMap: dict::init(entries as *mut [dict::Entry]), |
| 504 | constMap: dict::init(constEntries as *mut [dict::Entry]), |
| 505 | exportMap: dict::init(exportEntries as *mut [dict::Entry]), |
| 506 | fixups: @sliceOf((fixups as *mut [Fixup]).ptr, 0, (fixups as *mut [Fixup]).len), |
| 507 | dataBase, |
| 508 | }; |
| 509 | // Parse assembly source and emit instructions. |
| 510 | try parser::parseProgram(&mut a); |
| 511 | // Resolve fixups and finalize program. |
| 512 | try emit::finishProgram(&mut a); |
| 513 | |
| 514 | return Program { |
| 515 | text: a.text, |
| 516 | data: a.data, |
| 517 | symbols: a.symbols, |
| 518 | }; |
| 519 | } |
| 520 | |
| 521 | /// Return the next power of two at least as large as `value`. |
| 522 | fn nextPowerOfTwo(value: u32) -> u32 { |
| 523 | let mut n: u32 = MIN_TABLE_CAPACITY; |
| 524 | while n < value { |
| 525 | set n <<= 1; |
| 526 | } |
| 527 | return n; |
| 528 | } |