compiler/
lib/
examples/
std/
arch/
rv64/
asm/
scanner/
emit.rad
8.2 KiB
parser.rad
29.8 KiB
scanner.rad
9.0 KiB
tests.rad
6.3 KiB
asm.rad
22.6 KiB
decode.rad
14.3 KiB
emit.rad
26.0 KiB
encode.rad
21.5 KiB
isel.rad
47.0 KiB
printer.rad
12.6 KiB
tests.rad
17.1 KiB
rv64.rad
11.9 KiB
char/
collections/
lang/
sys/
arch.rad
68 B
char.rad
855 B
collections.rad
39 B
fmt.rad
8.5 KiB
intrinsics.rad
391 B
io.rad
1.3 KiB
lang.rad
258 B
mem.rad
2.2 KiB
sys.rad
173 B
testing.rad
2.4 KiB
tests.rad
14.8 KiB
vec.rad
3.2 KiB
std.rad
281 B
scripts/
seed/
sublime/
test/
vim/
.gitignore
366 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
3.6 KiB
README
2.5 KiB
STYLE
2.5 KiB
std.lib
1.2 KiB
std.lib.test
347 B
lib/std/arch/rv64/asm/parser.rad
raw
| 1 | //! Assembler parser pass. |
| 2 | use std::mem; |
| 3 | use std::fmt; |
| 4 | use std::lang::alloc; |
| 5 | use std::lang::strings; |
| 6 | use std::lang::parser; |
| 7 | use std::lang::gen; |
| 8 | use std::collections::dict; |
| 9 | use std::arch::rv64::encode; |
| 10 | use std::arch::rv64; |
| 11 | |
| 12 | use super::emit; |
| 13 | use super::scanner; |
| 14 | |
| 15 | /// Parsed memory operand with base register and signed byte offset. |
| 16 | record MemOperand { |
| 17 | /// Base register inside the memory operand parentheses. |
| 18 | base: gen::Reg, |
| 19 | /// Signed byte offset preceding the base register. |
| 20 | offset: i32, |
| 21 | } |
| 22 | |
| 23 | /// Parse assembler source into the supplied assembler state. |
| 24 | export fn parseProgram(a: *mut super::Assembler) throws (super::Error) { |
| 25 | advance(a); |
| 26 | |
| 27 | while a.scan.current.kind <> scanner::TokenKind::Eof { |
| 28 | try parseItem(a); |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | /// Align `value` upward to `alignment`, returning nil on u32 overflow. |
| 33 | fn checkedAlignUp(value: u32, alignment: u32) -> ?u32 { |
| 34 | let padding = alignment - 1; |
| 35 | if value > parser::U32_MAX - padding { |
| 36 | return nil; |
| 37 | } |
| 38 | return mem::alignUp(value, alignment); |
| 39 | } |
| 40 | |
| 41 | /// Advance the parser by one token, preserving the previous token. |
| 42 | fn advance(a: *mut super::Assembler) { |
| 43 | set a.scan.previous = a.scan.current; |
| 44 | set a.scan.current = scanner::next(&mut a.scan); |
| 45 | } |
| 46 | |
| 47 | /// Consume the current token when it has `kind`. |
| 48 | fn consume(a: *mut super::Assembler, kind: scanner::TokenKind) -> bool { |
| 49 | if a.scan.current.kind == kind { |
| 50 | advance(a); |
| 51 | return true; |
| 52 | } |
| 53 | return false; |
| 54 | } |
| 55 | |
| 56 | /// Create an error at the current token. |
| 57 | fn fail(a: *super::Assembler, message: *[u8]) -> super::Error { |
| 58 | return super::Error::Invalid { offset: a.scan.current.offset, message }; |
| 59 | } |
| 60 | |
| 61 | /// Create an error at `tok`. |
| 62 | fn failOnToken(tok: scanner::Token, message: *[u8]) -> super::Error { |
| 63 | return super::Error::Invalid { offset: tok.offset, message }; |
| 64 | } |
| 65 | |
| 66 | /// Require that a data directive appears while assembling the data section. |
| 67 | fn expectDataSection(a: *super::Assembler, tok: scanner::Token) throws (super::Error) { |
| 68 | if a.section <> super::Section::Data { |
| 69 | throw failOnToken(tok, "data directive is only valid in the data section"); |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /// Consume `kind` or throw `message` at the current token. |
| 74 | fn expect(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) throws (super::Error) { |
| 75 | if not consume(a, kind) { |
| 76 | throw fail(a, message); |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | /// Consume `kind` and return the consumed token. |
| 81 | fn expectToken(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) -> scanner::Token throws (super::Error) { |
| 82 | try expect(a, kind, message); |
| 83 | return a.scan.previous; |
| 84 | } |
| 85 | |
| 86 | /// Require that the current item has reached its semicolon terminator. |
| 87 | fn expectTerminator(a: *super::Assembler, message: *[u8]) throws (super::Error) { |
| 88 | if a.scan.current.kind <> scanner::TokenKind::Semicolon { |
| 89 | throw fail(a, message); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | /// Require that `value` fits in i32. |
| 94 | fn expectI32Value(a: *super::Assembler, value: i64, message: *[u8]) -> i32 throws (super::Error) { |
| 95 | if value < -super::I32_MIN_MAGNITUDE or value > super::I32_MAX_VALUE { |
| 96 | throw fail(a, message); |
| 97 | } |
| 98 | return value as i32; |
| 99 | } |
| 100 | |
| 101 | /// Require that `value` fits in a signed 12-bit immediate field. |
| 102 | fn expectSmallImmValue(a: *super::Assembler, value: i64) -> i32 throws (super::Error) { |
| 103 | if not encode::isSmallImm64(value) { |
| 104 | throw fail(a, "immediate out of range"); |
| 105 | } |
| 106 | return value as i32; |
| 107 | } |
| 108 | |
| 109 | /// Define a label at the current text or data offset. |
| 110 | fn defineSymbol(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
| 111 | if dict::get(&a.symbolMap, name) <> nil { |
| 112 | throw failOnToken(tok, "duplicate label"); |
| 113 | } |
| 114 | emit::defineSymbol(a, name); |
| 115 | } |
| 116 | |
| 117 | /// Emit a parsed integer data value after applying source-level range checks. |
| 118 | fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) { |
| 119 | match width { |
| 120 | case super::DataWidth::Word => |
| 121 | try emit::emitDataValue(a, (try expectI32Value(a, value, "word literal out of range")) as i64, width), |
| 122 | case super::DataWidth::Dword => |
| 123 | try emit::emitDataValue(a, value, width), |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | /// Parse a possibly scoped name from one or more `::`-separated segments. |
| 128 | fn parseScopedName( |
| 129 | a: *mut super::Assembler, |
| 130 | kind: scanner::TokenKind, |
| 131 | message: *[u8], |
| 132 | trimPrefix: u32 |
| 133 | ) -> *[u8] throws (super::Error) { |
| 134 | let first = try expectToken(a, kind, message); |
| 135 | let start = first.offset + trimPrefix; |
| 136 | let mut end = first.offset + first.source.len; |
| 137 | |
| 138 | while consume(a, scanner::TokenKind::ColonColon) { |
| 139 | let segment = try expectToken(a, scanner::TokenKind::Ident, "expected identifier after `::`"); |
| 140 | set end = segment.offset + segment.source.len; |
| 141 | } |
| 142 | return strings::intern(a.scan.pool, &a.scan.source[start..end]); |
| 143 | } |
| 144 | |
| 145 | /// Parse a bare symbol name. |
| 146 | fn parseSymbolName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
| 147 | return try parseScopedName(a, scanner::TokenKind::Ident, "expected symbol name", 0); |
| 148 | } |
| 149 | |
| 150 | /// Return `true` when [`tok`] is any label token form. |
| 151 | fn isLabel(tok: scanner::TokenKind) -> bool { |
| 152 | return tok == scanner::TokenKind::Label or tok == scanner::TokenKind::QuotedLabel; |
| 153 | } |
| 154 | |
| 155 | /// Parse the contents of a quoted label token, decoding escapes as needed. |
| 156 | fn parseQuotedLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
| 157 | let tok = try expectToken(a, scanner::TokenKind::QuotedLabel, "expected label name"); |
| 158 | let rawStart = super::LABEL_SIGIL_LEN + super::QUOTE_DELIM_LEN; |
| 159 | let raw = &tok.source[rawStart..tok.source.len - super::QUOTE_DELIM_LEN]; |
| 160 | let storage = try alloc::allocSlice(a.arena, 1, 1, raw.len) catch { |
| 161 | panic "asm: out of memory allocating quoted label"; |
| 162 | } as *mut [u8]; |
| 163 | let len = fmt::unescapeString(raw, storage); |
| 164 | |
| 165 | return strings::intern(a.scan.pool, &storage[..len]); |
| 166 | } |
| 167 | |
| 168 | /// Parse a label reference or definition name. |
| 169 | fn parseLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
| 170 | if a.scan.current.kind == scanner::TokenKind::QuotedLabel { |
| 171 | return try parseQuotedLabelName(a); |
| 172 | } |
| 173 | return try parseScopedName(a, scanner::TokenKind::Label, "expected label name", super::LABEL_SIGIL_LEN); |
| 174 | } |
| 175 | |
| 176 | /// Parse a directive name without its leading `.`. |
| 177 | fn parseDirectiveName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
| 178 | let name = try expectToken(a, scanner::TokenKind::Directive, "expected directive name"); |
| 179 | return &name.source[super::DIRECTIVE_SIGIL_LEN..]; |
| 180 | } |
| 181 | |
| 182 | /// Parse one top-level assembler item. |
| 183 | fn parseItem(a: *mut super::Assembler) throws (super::Error) { |
| 184 | match a.scan.current.kind { |
| 185 | case scanner::TokenKind::Ident => { |
| 186 | let tok = a.scan.current; |
| 187 | let name = try parseSymbolName(a); |
| 188 | try parseInstruction(a, name, tok); |
| 189 | try expect(a, scanner::TokenKind::Semicolon, "expected `;` after instruction"); |
| 190 | } |
| 191 | case scanner::TokenKind::Number => { |
| 192 | let tok = a.scan.current; |
| 193 | advance(a); |
| 194 | throw failOnToken(tok, "unexpected number at top level"); |
| 195 | } |
| 196 | case scanner::TokenKind::Label, scanner::TokenKind::QuotedLabel => { |
| 197 | let tok = a.scan.current; |
| 198 | let name = try parseLabelName(a); |
| 199 | try defineSymbol(a, name, tok); |
| 200 | } |
| 201 | case scanner::TokenKind::Directive => { |
| 202 | let tok = a.scan.current; |
| 203 | let name = try parseDirectiveName(a); |
| 204 | try parseDirective(a, name, tok); |
| 205 | try expect(a, scanner::TokenKind::Semicolon, "expected `;` after directive"); |
| 206 | } |
| 207 | else => throw fail(a, "expected label, instruction, or directive"), |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | /// Find `name` in a sorted descriptor table. |
| 212 | fn findSortedNameIndex(name: *[u8], len: u32, getName: fn(u32) -> *[u8]) -> ?u32 { |
| 213 | let mut left: u32 = 0; |
| 214 | let mut right: u32 = len; |
| 215 | |
| 216 | while left < right { |
| 217 | let mid = left + ((right - left) / 2); |
| 218 | let cmp = mem::cmp(name, getName(mid)); |
| 219 | |
| 220 | match cmp { |
| 221 | case -1 => set right = mid, |
| 222 | case 1 => set left = mid + 1, |
| 223 | else => return mid, |
| 224 | } |
| 225 | } |
| 226 | return nil; |
| 227 | } |
| 228 | |
| 229 | /// Adapter used by [`findSortedNameIndex`] to read an instruction mnemonic. |
| 230 | fn instructionNameAt(index: u32) -> *[u8] { |
| 231 | return super::INSTRUCTIONS[index].name; |
| 232 | } |
| 233 | |
| 234 | /// Adapter used by [`findSortedNameIndex`] to read a directive name. |
| 235 | fn directiveNameAt(index: u32) -> *[u8] { |
| 236 | return super::DIRECTIVES[index].name; |
| 237 | } |
| 238 | |
| 239 | /// Adapter used by [`findSortedNameIndex`] to read a register name. |
| 240 | fn registerNameAt(index: u32) -> *[u8] { |
| 241 | return super::REGISTERS[index].name; |
| 242 | } |
| 243 | |
| 244 | /// Adapter used by [`findSortedNameIndex`] to read a CSR name. |
| 245 | fn csrNameAt(index: u32) -> *[u8] { |
| 246 | return super::CSRS[index].name; |
| 247 | } |
| 248 | |
| 249 | /// Look up the operand parser and encoder for an instruction mnemonic. |
| 250 | fn lookupInstruction(name: *[u8]) -> ?super::InstructionEncoder { |
| 251 | let index = findSortedNameIndex(name, super::INSTRUCTIONS.len, instructionNameAt) else { |
| 252 | return nil; |
| 253 | }; |
| 254 | return super::INSTRUCTIONS[index].encoder; |
| 255 | } |
| 256 | |
| 257 | /// Classify a directive name. |
| 258 | fn classifyDirective(name: *[u8]) -> ?super::DirectiveKind { |
| 259 | let index = findSortedNameIndex(name, super::DIRECTIVES.len, directiveNameAt) else { |
| 260 | return nil; |
| 261 | }; |
| 262 | return super::DIRECTIVES[index].kind; |
| 263 | } |
| 264 | |
| 265 | /// Look up a percent-prefixed register name after the `%` has been removed. |
| 266 | fn lookupRegister(name: *[u8]) -> ?gen::Reg { |
| 267 | let index = findSortedNameIndex(name, super::REGISTERS.len, registerNameAt) else { |
| 268 | return nil; |
| 269 | }; |
| 270 | return super::REGISTERS[index].reg; |
| 271 | } |
| 272 | |
| 273 | /// Look up a CSR name. |
| 274 | fn lookupCsr(name: *[u8]) -> ?u32 { |
| 275 | let index = findSortedNameIndex(name, super::CSRS.len, csrNameAt) else { |
| 276 | return nil; |
| 277 | }; |
| 278 | return super::CSRS[index].csr; |
| 279 | } |
| 280 | |
| 281 | /// Parse an instruction after its mnemonic has already been consumed. |
| 282 | fn parseInstruction(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
| 283 | if a.section <> super::Section::Text { |
| 284 | throw failOnToken(tok, "instructions are only valid in the text section"); |
| 285 | } |
| 286 | let form = lookupInstruction(name) else { |
| 287 | throw failOnToken(tok, "unknown instruction"); |
| 288 | }; |
| 289 | match form { |
| 290 | case super::InstructionEncoder::NoOperand { enc } => { |
| 291 | if a.scan.current.kind <> scanner::TokenKind::Semicolon { |
| 292 | throw fail(a, "unexpected operand"); |
| 293 | } |
| 294 | try emit::emitText(a, enc()); |
| 295 | return; |
| 296 | } |
| 297 | case super::InstructionEncoder::Li => return try parseLi(a), |
| 298 | case super::InstructionEncoder::La => return try parseLa(a), |
| 299 | case super::InstructionEncoder::RR { enc } => return try parseRR(a, enc), |
| 300 | case super::InstructionEncoder::RRR { enc } => return try parseRRR(a, enc), |
| 301 | case super::InstructionEncoder::RRI { enc } => return try parseRRI(a, enc), |
| 302 | case super::InstructionEncoder::Shift { enc } => |
| 303 | return try parseShift(a, enc, super::SHIFT_LIMIT, "shift amount out of range"), |
| 304 | case super::InstructionEncoder::WordShift { enc } => |
| 305 | return try parseShift(a, enc, super::WORD_SHIFT_LIMIT, "word shift amount out of range"), |
| 306 | case super::InstructionEncoder::Load { enc } => return try parseLoad(a, enc), |
| 307 | case super::InstructionEncoder::Store { enc } => return try parseStore(a, enc), |
| 308 | case super::InstructionEncoder::Branch { op } => return try parseBranch(a, op), |
| 309 | case super::InstructionEncoder::BranchZero { op } => return try parseBranchZero(a, op), |
| 310 | case super::InstructionEncoder::Jal => return try parseJal(a), |
| 311 | case super::InstructionEncoder::Jump { rd } => return try parseJ(a, rd), |
| 312 | case super::InstructionEncoder::RdCsr { enc } => return try parseRdCsr(a, enc), |
| 313 | case super::InstructionEncoder::CsrRs1 { enc } => return try parseCsrRs1(a, enc), |
| 314 | case super::InstructionEncoder::Csrrw => return try parseCsrrw(a), |
| 315 | case super::InstructionEncoder::Csrsi => return try parseCsrsi(a), |
| 316 | case super::InstructionEncoder::Upper { enc } => return try parseUpper(a, enc), |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | /// Parse the `li` pseudo-instruction. |
| 321 | fn parseLi(a: *mut super::Assembler) throws (super::Error) { |
| 322 | let rd = try parseRegister(a); |
| 323 | let value = try parseValue(a); |
| 324 | if encode::isSmallImm64(value) { |
| 325 | try emit::emitText(a, encode::addi(rd, rv64::ZERO, value as i32)); |
| 326 | return; |
| 327 | } |
| 328 | let imm = try expectI32Value(a, value, "li immediate out of range"); |
| 329 | let split = rv64::emit::splitImm(imm); |
| 330 | |
| 331 | try emit::emitText(a, encode::lui(rd, split.hi)); |
| 332 | try emit::emitText(a, encode::addi(rd, rd, split.lo)); |
| 333 | } |
| 334 | |
| 335 | /// Parse the `la` pseudo-instruction. |
| 336 | fn parseLa(a: *mut super::Assembler) throws (super::Error) { |
| 337 | let rd = try parseRegister(a); |
| 338 | let target = try parseLabelName(a); |
| 339 | let index = a.text.len; |
| 340 | |
| 341 | try emit::recordTextFixup(a, target, super::FixupInfo::Addr { rd, index }, 2); |
| 342 | } |
| 343 | |
| 344 | /// Parse a CSR read-like instruction with destination register then CSR. |
| 345 | fn parseRdCsr(a: *mut super::Assembler, enc: fn(gen::Reg, u32) -> u32) throws (super::Error) { |
| 346 | let rd = try parseRegister(a); |
| 347 | let csr = try parseCsr(a); |
| 348 | |
| 349 | try emit::emitText(a, enc(rd, csr)); |
| 350 | } |
| 351 | |
| 352 | /// Parse a CSR write-like instruction with CSR then source register. |
| 353 | fn parseCsrRs1(a: *mut super::Assembler, enc: fn(u32, gen::Reg) -> u32) throws (super::Error) { |
| 354 | let csr = try parseCsr(a); |
| 355 | let rs1 = try parseRegister(a); |
| 356 | |
| 357 | try emit::emitText(a, enc(csr, rs1)); |
| 358 | } |
| 359 | |
| 360 | /// Parse `csrrw`. |
| 361 | fn parseCsrrw(a: *mut super::Assembler) throws (super::Error) { |
| 362 | let rd = try parseRegister(a); |
| 363 | let csr = try parseCsr(a); |
| 364 | let rs1 = try parseRegister(a); |
| 365 | |
| 366 | try emit::emitText(a, encode::csrrw(rd, csr, rs1)); |
| 367 | } |
| 368 | |
| 369 | /// Parse a CSR immediate instruction. |
| 370 | fn parseCsrsi(a: *mut super::Assembler) throws (super::Error) { |
| 371 | let csr = try parseCsr(a); |
| 372 | let imm = try parseValue(a); |
| 373 | if imm < 0 or imm >= super::CSR_IMM_LIMIT { |
| 374 | throw fail(a, "CSR immediate out of range"); |
| 375 | } |
| 376 | try emit::emitText(a, encode::csrsi(csr, imm as u32)); |
| 377 | } |
| 378 | |
| 379 | /// Parse a two-register instruction. |
| 380 | fn parseRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg) -> u32) throws (super::Error) { |
| 381 | let rd = try parseRegister(a); |
| 382 | let rs = try parseRegister(a); |
| 383 | |
| 384 | try emit::emitText(a, enc(rd, rs)); |
| 385 | } |
| 386 | |
| 387 | /// Parse a three-register instruction. |
| 388 | fn parseRRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32) throws (super::Error) { |
| 389 | let rd = try parseRegister(a); |
| 390 | let rs1 = try parseRegister(a); |
| 391 | let rs2 = try parseRegister(a); |
| 392 | |
| 393 | try emit::emitText(a, enc(rd, rs1, rs2)); |
| 394 | } |
| 395 | |
| 396 | /// Parse a register-register-immediate instruction. |
| 397 | fn parseRRI(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
| 398 | let rd = try parseRegister(a); |
| 399 | let rs1 = try parseRegister(a); |
| 400 | let imm = try parseSmallImm(a); |
| 401 | |
| 402 | try emit::emitText(a, enc(rd, rs1, imm)); |
| 403 | } |
| 404 | |
| 405 | /// Parse a shift-immediate instruction and enforce its RV64 shift bound. |
| 406 | fn parseShift( |
| 407 | a: *mut super::Assembler, |
| 408 | enc: fn(gen::Reg, gen::Reg, i32) -> u32, |
| 409 | limit: i32, |
| 410 | message: *[u8] |
| 411 | ) throws (super::Error) { |
| 412 | let rd = try parseRegister(a); |
| 413 | let rs1 = try parseRegister(a); |
| 414 | let shamt64 = try parseValue(a); |
| 415 | |
| 416 | if shamt64 < 0 { |
| 417 | throw fail(a, "shift amount must be non-negative"); |
| 418 | } |
| 419 | if shamt64 >= limit as i64 { |
| 420 | throw fail(a, message); |
| 421 | } |
| 422 | let shamt = shamt64 as i32; |
| 423 | |
| 424 | try emit::emitText(a, enc(rd, rs1, shamt)); |
| 425 | } |
| 426 | |
| 427 | /// Parse a load instruction with a memory operand. |
| 428 | fn parseLoad(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
| 429 | let rd = try parseRegister(a); |
| 430 | let memop = try parseMemory(a); |
| 431 | |
| 432 | try emit::emitText(a, enc(rd, memop.base, memop.offset)); |
| 433 | } |
| 434 | |
| 435 | /// Parse a store instruction with a memory operand. |
| 436 | fn parseStore(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
| 437 | let rs2 = try parseRegister(a); |
| 438 | let memop = try parseMemory(a); |
| 439 | |
| 440 | try emit::emitText(a, enc(rs2, memop.base, memop.offset)); |
| 441 | } |
| 442 | |
| 443 | /// Parse a two-register branch instruction. |
| 444 | fn parseBranch(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) { |
| 445 | let rs1 = try parseRegister(a); |
| 446 | let rs2 = try parseRegister(a); |
| 447 | |
| 448 | try parseBranchLabel(a, op, rs1, rs2); |
| 449 | } |
| 450 | |
| 451 | /// Parse an optional label operand. |
| 452 | fn parseOptionalLabel(a: *mut super::Assembler) -> ?*[u8] throws (super::Error) { |
| 453 | if not isLabel(a.scan.current.kind) { |
| 454 | return nil; |
| 455 | } |
| 456 | return try parseLabelName(a); |
| 457 | } |
| 458 | |
| 459 | /// Parse a branch target as either a label fixup or immediate offset. |
| 460 | fn parseBranchLabel(a: *mut super::Assembler, op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg) throws (super::Error) { |
| 461 | let index = a.text.len; |
| 462 | if let target = try parseOptionalLabel(a) { |
| 463 | try emit::recordTextFixup(a, target, super::FixupInfo::Branch { op, rs1, rs2, index }, 1); |
| 464 | return; |
| 465 | } |
| 466 | let imm = try parseBranchImm(a); |
| 467 | try emit::emitText(a, emit::encodeBranch(op, rs1, rs2, imm)); |
| 468 | } |
| 469 | |
| 470 | /// Parse a branch-to-zero pseudo-instruction. |
| 471 | fn parseBranchZero(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) { |
| 472 | let rs = try parseRegister(a); |
| 473 | try parseBranchLabel(a, op, rs, rv64::ZERO); |
| 474 | } |
| 475 | |
| 476 | /// Parse `jal` with an explicit destination register. |
| 477 | fn parseJal(a: *mut super::Assembler) throws (super::Error) { |
| 478 | let rd = try parseRegister(a); |
| 479 | try parseJ(a, rd); |
| 480 | } |
| 481 | |
| 482 | /// Parse a jump target for `jal` or a jump pseudo-instruction. |
| 483 | fn parseJ(a: *mut super::Assembler, rd: gen::Reg) throws (super::Error) { |
| 484 | let index = a.text.len; |
| 485 | if let target = try parseOptionalLabel(a) { |
| 486 | try emit::recordTextFixup(a, target, super::FixupInfo::Jal { rd, index }, 1); |
| 487 | return; |
| 488 | } |
| 489 | let imm = try parseJumpImm(a); |
| 490 | try emit::emitText(a, encode::jal(rd, imm)); |
| 491 | } |
| 492 | |
| 493 | /// Parse an upper-immediate instruction. |
| 494 | fn parseUpper(a: *mut super::Assembler, enc: fn(gen::Reg, i32) -> u32) throws (super::Error) { |
| 495 | let rd = try parseRegister(a); |
| 496 | let imm64 = try parseValue(a); |
| 497 | if imm64 < 0 or imm64 > super::UPPER_IMM_MAX_VALUE { |
| 498 | throw fail(a, "upper immediate out of range"); |
| 499 | } |
| 500 | try emit::emitText(a, enc(rd, imm64 as i32)); |
| 501 | } |
| 502 | |
| 503 | /// Parse a directive after its name has already been consumed. |
| 504 | fn parseDirective(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
| 505 | let directive = classifyDirective(name) else { |
| 506 | throw failOnToken(tok, "unknown directive"); |
| 507 | }; |
| 508 | match directive { |
| 509 | case super::DirectiveKind::Text => { |
| 510 | try expectTerminator(a, "unexpected operand"); |
| 511 | set a.section = super::Section::Text; |
| 512 | return; |
| 513 | } |
| 514 | case super::DirectiveKind::Data => { |
| 515 | try expectTerminator(a, "unexpected operand"); |
| 516 | set a.section = super::Section::Data; |
| 517 | return; |
| 518 | } |
| 519 | case super::DirectiveKind::Align => |
| 520 | return try parseAlignDirective(a), |
| 521 | case super::DirectiveKind::Ascii => { |
| 522 | try expectDataSection(a, tok); |
| 523 | return try parseStringDirective(a); |
| 524 | } |
| 525 | case super::DirectiveKind::Byte => { |
| 526 | try expectDataSection(a, tok); |
| 527 | return try parseByteDirective(a); |
| 528 | } |
| 529 | case super::DirectiveKind::Constant => |
| 530 | return try parseConstantDirective(a), |
| 531 | case super::DirectiveKind::Dword => { |
| 532 | try expectDataSection(a, tok); |
| 533 | return try parseIntDirective(a, super::DataWidth::Dword); |
| 534 | } |
| 535 | case super::DirectiveKind::Export => |
| 536 | return try parseExportDirective(a), |
| 537 | case super::DirectiveKind::Space => { |
| 538 | try expectDataSection(a, tok); |
| 539 | return try parseSpaceDirective(a); |
| 540 | } |
| 541 | case super::DirectiveKind::Word => { |
| 542 | try expectDataSection(a, tok); |
| 543 | return try parseIntDirective(a, super::DataWidth::Word); |
| 544 | } |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | /// Parse a `.constant` directive. |
| 549 | fn parseConstantDirective(a: *mut super::Assembler) throws (super::Error) { |
| 550 | let name = try parseSymbolName(a); |
| 551 | let value = try expectI32Value(a, try parseExpr(a), "constant out of range"); |
| 552 | |
| 553 | dict::insert(&mut a.constMap, name, value); |
| 554 | } |
| 555 | |
| 556 | /// Parse a `.export` directive. |
| 557 | fn parseExportDirective(a: *mut super::Assembler) throws (super::Error) { |
| 558 | let name = try parseLabelName(a); |
| 559 | dict::insert(&mut a.exportMap, name, 1); |
| 560 | if let idx = dict::get(&a.symbolMap, name) { |
| 561 | set a.symbols[idx as u32].isExported = true; |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | /// Parse a `.space` directive. |
| 566 | fn parseSpaceDirective(a: *mut super::Assembler) throws (super::Error) { |
| 567 | let count = try parseValue(a); |
| 568 | if count < 0 { |
| 569 | throw fail(a, "space size must be non-negative"); |
| 570 | } |
| 571 | let remaining = a.data.cap - a.data.len; |
| 572 | if count > remaining as i64 { |
| 573 | throw super::Error::DataOverflow; |
| 574 | } |
| 575 | for _ in 0..count as u32 { |
| 576 | try emit::emitByte(a, 0); |
| 577 | } |
| 578 | } |
| 579 | |
| 580 | /// Parse an `.align` directive for the current section. |
| 581 | fn parseAlignDirective(a: *mut super::Assembler) throws (super::Error) { |
| 582 | let amount64 = try parseValue(a); |
| 583 | if amount64 <= 0 { |
| 584 | throw fail(a, "alignment must be positive"); |
| 585 | } |
| 586 | if amount64 > super::U32_MAX_VALUE { |
| 587 | throw fail(a, "alignment out of range"); |
| 588 | } |
| 589 | let amount = amount64 as u32; |
| 590 | if (amount & (amount - 1)) <> 0 { |
| 591 | throw fail(a, "alignment must be a power of two"); |
| 592 | } |
| 593 | match a.section { |
| 594 | case super::Section::Text => { |
| 595 | if amount % rv64::INSTR_SIZE as u32 <> 0 { |
| 596 | throw fail(a, "text alignment must be a multiple of 4"); |
| 597 | } |
| 598 | let bytes = a.text.len * rv64::INSTR_SIZE as u32; |
| 599 | let aligned = checkedAlignUp(bytes, amount) else { |
| 600 | throw super::Error::TextOverflow; |
| 601 | }; |
| 602 | let words = (aligned - bytes) / rv64::INSTR_SIZE as u32; |
| 603 | if words > a.text.cap - a.text.len { |
| 604 | throw super::Error::TextOverflow; |
| 605 | } |
| 606 | try emit::emitTextPadding(a, words); |
| 607 | } |
| 608 | case super::Section::Data => { |
| 609 | let aligned = checkedAlignUp(a.data.len, amount) else { |
| 610 | throw super::Error::DataOverflow; |
| 611 | }; |
| 612 | if aligned > a.data.cap { |
| 613 | throw super::Error::DataOverflow; |
| 614 | } |
| 615 | for _ in a.data.len..aligned { |
| 616 | try emit::emitByte(a, 0); |
| 617 | } |
| 618 | } |
| 619 | } |
| 620 | } |
| 621 | |
| 622 | /// Parse a `.byte` directive. |
| 623 | fn parseByteDirective(a: *mut super::Assembler) throws (super::Error) { |
| 624 | loop { |
| 625 | if a.scan.current.kind == scanner::TokenKind::Char { |
| 626 | let ch = parseCharLiteral(a.scan.current) else { |
| 627 | throw fail(a, "invalid char literal"); |
| 628 | }; |
| 629 | try emit::emitByte(a, ch); |
| 630 | advance(a); |
| 631 | } else { |
| 632 | let value = try parseValue(a); |
| 633 | if value < 0 or value > super::U8_MAX_VALUE { |
| 634 | throw fail(a, "byte literal out of range"); |
| 635 | } |
| 636 | try emit::emitByte(a, value as u8); |
| 637 | } |
| 638 | if not consume(a, scanner::TokenKind::Comma) { |
| 639 | return; |
| 640 | } |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | /// Parse a fixed-width integer data directive. |
| 645 | fn parseIntDirective(a: *mut super::Assembler, width: super::DataWidth) throws (super::Error) { |
| 646 | loop { |
| 647 | if isLabel(a.scan.current.kind) { |
| 648 | let target = try parseLabelName(a); |
| 649 | try emit::recordDataFixup(a, target, width); |
| 650 | } else if a.scan.current.kind == scanner::TokenKind::Char { |
| 651 | let ch = parseCharLiteral(a.scan.current) else { |
| 652 | throw fail(a, "invalid char literal"); |
| 653 | }; |
| 654 | advance(a); |
| 655 | try emitDataValue(a, ch as i64, width); |
| 656 | } else { |
| 657 | try emitDataValue(a, try parseValue(a), width); |
| 658 | } |
| 659 | if not consume(a, scanner::TokenKind::Comma) { |
| 660 | return; |
| 661 | } |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | /// Parse a `.ascii` string literal list. |
| 666 | fn parseStringDirective(a: *mut super::Assembler) throws (super::Error) { |
| 667 | loop { |
| 668 | let literal = try expectToken(a, scanner::TokenKind::String, "expected string literal"); |
| 669 | try emit::emitDecodedString(a, literal.source); |
| 670 | if not consume(a, scanner::TokenKind::Comma) { |
| 671 | return; |
| 672 | } |
| 673 | } |
| 674 | } |
| 675 | |
| 676 | /// Parse and resolve a register operand. |
| 677 | fn parseRegister(a: *mut super::Assembler) -> gen::Reg throws (super::Error) { |
| 678 | let tok = try expectToken(a, scanner::TokenKind::Register, "expected register"); |
| 679 | let reg = lookupRegister(&tok.source[1..]) else { |
| 680 | throw super::Error::Invalid { offset: tok.offset, message: "unknown register" }; |
| 681 | }; |
| 682 | return reg; |
| 683 | } |
| 684 | |
| 685 | /// Parse a simple signed immediate or constant value. |
| 686 | fn parseValue(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 687 | if consume(a, scanner::TokenKind::Minus) { |
| 688 | return -(try parseValuePrimary(a)); |
| 689 | } |
| 690 | return try parseValuePrimary(a); |
| 691 | } |
| 692 | |
| 693 | /// Parse the primary form used by simple immediate values. |
| 694 | fn parseValuePrimary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 695 | if a.scan.current.kind == scanner::TokenKind::Number { |
| 696 | return try parseInteger(a); |
| 697 | } |
| 698 | if a.scan.current.kind == scanner::TokenKind::Ident { |
| 699 | return try parseConstantValue(a); |
| 700 | } |
| 701 | throw fail(a, "expected number or constant"); |
| 702 | } |
| 703 | |
| 704 | /// Parse an additive constant expression. |
| 705 | fn parseExpr(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 706 | let mut value = try parseExprMul(a); |
| 707 | |
| 708 | while a.scan.current.kind == scanner::TokenKind::Plus or a.scan.current.kind == scanner::TokenKind::Minus { |
| 709 | let op = a.scan.current.kind; |
| 710 | advance(a); |
| 711 | |
| 712 | let rhs = try parseExprMul(a); |
| 713 | if op == scanner::TokenKind::Plus { |
| 714 | set value += rhs; |
| 715 | } else { |
| 716 | set value -= rhs; |
| 717 | } |
| 718 | } |
| 719 | return value; |
| 720 | } |
| 721 | |
| 722 | /// Parse multiplicative expression operators. |
| 723 | fn parseExprMul(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 724 | let mut value = try parseExprUnary(a); |
| 725 | |
| 726 | while a.scan.current.kind == scanner::TokenKind::Star or a.scan.current.kind == scanner::TokenKind::Slash { |
| 727 | let op = a.scan.current.kind; |
| 728 | advance(a); |
| 729 | |
| 730 | let rhs = try parseExprUnary(a); |
| 731 | if op == scanner::TokenKind::Star { |
| 732 | set value *= rhs; |
| 733 | } else { |
| 734 | if rhs == 0 { |
| 735 | throw fail(a, "division by zero"); |
| 736 | } |
| 737 | set value /= rhs; |
| 738 | } |
| 739 | } |
| 740 | return value; |
| 741 | } |
| 742 | |
| 743 | /// Parse unary expression operators. |
| 744 | fn parseExprUnary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 745 | if consume(a, scanner::TokenKind::Minus) { |
| 746 | return -(try parseExprUnary(a)); |
| 747 | } |
| 748 | if consume(a, scanner::TokenKind::Plus) { |
| 749 | return try parseExprUnary(a); |
| 750 | } |
| 751 | return try parseExprPrimary(a); |
| 752 | } |
| 753 | |
| 754 | /// Parse expression atoms. |
| 755 | fn parseExprPrimary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 756 | if consume(a, scanner::TokenKind::LParen) { |
| 757 | let value = try parseExpr(a); |
| 758 | try expect(a, scanner::TokenKind::RParen, "expected `)`"); |
| 759 | return value; |
| 760 | } |
| 761 | if a.scan.current.kind == scanner::TokenKind::Number { |
| 762 | return try parseInteger(a); |
| 763 | } |
| 764 | if a.scan.current.kind == scanner::TokenKind::Ident { |
| 765 | return try parseConstantValue(a); |
| 766 | } |
| 767 | throw fail(a, "expected expression"); |
| 768 | } |
| 769 | |
| 770 | /// Parse and resolve a named assembler constant. |
| 771 | fn parseConstantValue(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 772 | let name = try parseSymbolName(a); |
| 773 | let value = dict::get(&a.constMap, name) else { |
| 774 | throw super::Error::Invalid { offset: a.scan.previous.offset, message: "undefined constant" }; |
| 775 | }; |
| 776 | return value as i64; |
| 777 | } |
| 778 | |
| 779 | /// Parse and resolve a CSR operand. |
| 780 | fn parseCsr(a: *mut super::Assembler) -> u32 throws (super::Error) { |
| 781 | let name = try parseSymbolName(a); |
| 782 | let csr = lookupCsr(name) else { |
| 783 | throw super::Error::Invalid { offset: a.scan.previous.offset, message: "unknown CSR" }; |
| 784 | }; |
| 785 | return csr; |
| 786 | } |
| 787 | |
| 788 | /// Parse an offset(base) memory operand. |
| 789 | fn parseMemory(a: *mut super::Assembler) -> MemOperand throws (super::Error) { |
| 790 | let mut offset: i32 = 0; |
| 791 | if a.scan.current.kind <> scanner::TokenKind::LParen { |
| 792 | set offset = try expectSmallImmValue(a, try parseValue(a)); |
| 793 | } |
| 794 | try expect(a, scanner::TokenKind::LParen, "expected `(`"); |
| 795 | let base = try parseRegister(a); |
| 796 | try expect(a, scanner::TokenKind::RParen, "expected `)`"); |
| 797 | |
| 798 | return MemOperand { base, offset }; |
| 799 | } |
| 800 | |
| 801 | /// Parse an immediate value that fits in a signed 12-bit field. |
| 802 | fn parseSmallImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
| 803 | return try expectSmallImmValue(a, try parseValue(a)); |
| 804 | } |
| 805 | |
| 806 | /// Parse and validate a branch immediate. |
| 807 | fn parseBranchImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
| 808 | let value = try expectI32Value(a, try parseValue(a), "branch immediate out of range"); |
| 809 | if not encode::isBranchImm(value) { |
| 810 | throw fail(a, "branch immediate out of range"); |
| 811 | } |
| 812 | return value; |
| 813 | } |
| 814 | |
| 815 | /// Parse and validate a jump immediate. |
| 816 | fn parseJumpImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
| 817 | let value = try expectI32Value(a, try parseValue(a), "jump immediate out of range"); |
| 818 | if not encode::isJumpImm(value) { |
| 819 | throw fail(a, "jump immediate out of range"); |
| 820 | } |
| 821 | return value; |
| 822 | } |
| 823 | |
| 824 | /// Parse an integer token as an i64. |
| 825 | fn parseInteger(a: *mut super::Assembler) -> i64 throws (super::Error) { |
| 826 | let tok = try expectToken(a, scanner::TokenKind::Number, "expected number"); |
| 827 | let value = parseIntegerText(tok.source) else { |
| 828 | throw failOnToken(tok, "invalid integer literal"); |
| 829 | }; |
| 830 | return value; |
| 831 | } |
| 832 | |
| 833 | /// Parse integer literal text as an i64. |
| 834 | fn parseIntegerText(text: *[u8]) -> ?i64 { |
| 835 | let literal = try fmt::parseInt(text) catch { |
| 836 | return nil; |
| 837 | }; |
| 838 | if literal.negative { |
| 839 | if literal.magnitude > parser::I64_MIN_MAGNITUDE { |
| 840 | return nil; |
| 841 | } |
| 842 | if literal.magnitude == parser::I64_MIN_MAGNITUDE { |
| 843 | return parser::I64_MIN; |
| 844 | } |
| 845 | return -(literal.magnitude as i64); |
| 846 | } |
| 847 | if literal.magnitude > parser::I64_MAX_MAGNITUDE { |
| 848 | return nil; |
| 849 | } |
| 850 | return literal.magnitude as i64; |
| 851 | } |
| 852 | |
| 853 | /// Parse a character literal token as one byte. |
| 854 | fn parseCharLiteral(tok: scanner::Token) -> ?u8 { |
| 855 | return try fmt::parseChar(tok.source) catch { |
| 856 | return nil; |
| 857 | }; |
| 858 | } |