Add RV64 assembly support
364f0a910e6be6ef9be91ffcf1c6f159d8a4ae6eb70c4f403278389c2f7a9c98
This allows for modules written in RV64 assembly language to be compiled and linked with Radiance modules.
1 parent
e5efba42
Makefile
+13 -2
| 67 | 67 | # Binary Tests |
|
| 68 | 68 | ||
| 69 | 69 | BIN_TEST_DIR := test/tests |
|
| 70 | 70 | # Only tests with `//! returns:` are compiled to binaries and executed. |
|
| 71 | 71 | BIN_TEST_EXE_SRC := $(shell grep -rl '^//! returns:' $(BIN_TEST_DIR)) |
|
| 72 | - | BIN_TEST_EXE_BIN := $(BIN_TEST_EXE_SRC:.rad=.rv64) |
|
| 72 | + | BIN_TEST_RAD_EXE_SRC := $(filter %.rad,$(BIN_TEST_EXE_SRC)) |
|
| 73 | + | BIN_TEST_RAS_EXE_SRC := $(filter %.ras,$(BIN_TEST_EXE_SRC)) |
|
| 74 | + | BIN_TEST_RAD_ASM_SRC := $(wildcard $(BIN_TEST_RAD_EXE_SRC:.rad=.ras)) |
|
| 75 | + | BIN_TEST_EXE_BIN := $(patsubst %.rad,%.rv64,$(BIN_TEST_RAD_EXE_SRC)) \ |
|
| 76 | + | $(patsubst %.ras,%.rv64,$(BIN_TEST_RAS_EXE_SRC)) |
|
| 73 | 77 | BIN_RUNNER := test/runner.rv64 |
|
| 74 | 78 | BIN_TEST_RUN := test/run |
|
| 75 | 79 | ||
| 76 | 80 | bin-test: $(BIN_RUNNER) $(BIN_TEST_EXE_BIN) |
|
| 77 | 81 | @echo |
| 80 | 84 | # Runner binary: the lowering IL checker. |
|
| 81 | 85 | $(BIN_RUNNER): test/runner.rad $(STD_LIB) $(RAD_BIN) |
|
| 82 | 86 | @echo "radiance test/runner.rad => $@" |
|
| 83 | 87 | @$(RADIANCE) $(STD) -pkg runner -mod test/runner.rad -entry runner -o $@ |
|
| 84 | 88 | ||
| 89 | + | # A `.rad` executable test can have a same-basename `.ras` module. |
|
| 90 | + | $(patsubst %.ras,%.rv64,$(BIN_TEST_RAD_ASM_SRC)): %.rv64: %.ras |
|
| 91 | + | ||
| 85 | 92 | # Compile each executable test to a binary. |
|
| 86 | 93 | $(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.rad $(RAD_BIN) |
|
| 87 | 94 | @echo "radiance $< => $@" |
|
| 88 | - | @$(RADIANCE) -pkg test -mod $< -o $@ |
|
| 95 | + | @$(RADIANCE) -pkg test -mod $< $(patsubst %,-mod %,$(wildcard $(@:.rv64=.ras))) -o $@ |
|
| 96 | + | ||
| 97 | + | $(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.ras $(BIN_RUNNER) |
|
| 98 | + | @echo "asm $< => $@" |
|
| 99 | + | @$(EMU) $(EMU_FLAGS) -run $(BIN_RUNNER) -- assemble $< $@ |
|
| 89 | 100 | ||
| 90 | 101 | clean-bin-test: |
|
| 91 | 102 | @rm -f $(BIN_RUNNER) \ |
|
| 92 | 103 | $(BIN_RUNNER:.rv64=.rv64.debug) \ |
|
| 93 | 104 | $(BIN_RUNNER:.rv64=.rv64.s) \ |
compiler/radiance.rad
+203 -97
| 11 | 11 | use std::lang::strings; |
|
| 12 | 12 | use std::lang::package; |
|
| 13 | 13 | use std::lang::il; |
|
| 14 | 14 | use std::lang::lower; |
|
| 15 | 15 | use std::arch::rv64; |
|
| 16 | + | use std::arch::rv64::asm; |
|
| 16 | 17 | use std::arch::rv64::printer; |
|
| 17 | 18 | use std::lang::sexpr; |
|
| 18 | 19 | use std::lang::gen::data; |
|
| 19 | 20 | use std::lang::gen::types; |
|
| 20 | 21 | use std::sys; |
| 29 | 30 | constant MAX_TOTAL_MODULES: u32 = 192; |
|
| 30 | 31 | /// Source code buffer arena (2 MB). |
|
| 31 | 32 | constant MAX_SOURCES_SIZE: u32 = 2097152; |
|
| 32 | 33 | /// Maximum number of test functions we can discover. |
|
| 33 | 34 | constant MAX_TESTS: u32 = 1024; |
|
| 35 | + | /// Maximum number of assembly source paths we can load per package. |
|
| 36 | + | constant MAX_ASM_MODULES: u32 = 64; |
|
| 34 | 37 | ||
| 35 | 38 | /// Temporary arena size (32 MB) - retains all parsed AST until resolution. |
|
| 36 | 39 | /// Used for: AST during parsing, then codegen scratch space. |
|
| 37 | 40 | constant TEMP_ARENA_SIZE: u32 = 33554432; |
|
| 38 | 41 | /// Main arena size (64 MB) - lives throughout compilation. |
| 66 | 69 | ||
| 67 | 70 | /// Read-only data file extension. |
|
| 68 | 71 | constant RO_DATA_EXT: *[u8] = ".ro.data"; |
|
| 69 | 72 | /// Read-write data file extension. |
|
| 70 | 73 | constant RW_DATA_EXT: *[u8] = ".rw.data"; |
|
| 71 | - | /// Maximum rodata size (1MB). |
|
| 72 | - | constant MAX_RO_DATA_SIZE: u32 = 1048576; |
|
| 73 | - | /// Maximum rwdata size (1MB). |
|
| 74 | - | constant MAX_RW_DATA_SIZE: u32 = 1048576; |
|
| 74 | + | /// Maximum rodata size (4MB). |
|
| 75 | + | constant MAX_RO_DATA_SIZE: u32 = 4194304; |
|
| 76 | + | /// Maximum rwdata size (4MB). |
|
| 77 | + | constant MAX_RW_DATA_SIZE: u32 = 4194304; |
|
| 75 | 78 | /// Maximum path length. |
|
| 76 | 79 | constant MAX_PATH_LEN: u32 = 256; |
|
| 77 | 80 | /// Read-only data buffer. |
|
| 78 | 81 | static RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined; |
|
| 79 | 82 | /// Read-write data buffer. |
|
| 80 | 83 | static RW_DATA_BUF: [u8; MAX_RW_DATA_SIZE] = undefined; |
|
| 84 | + | /// Assembly module source buffer. |
|
| 85 | + | static ASM_SOURCE_BUF: [u8; MAX_SOURCES_SIZE] = undefined; |
|
| 86 | + | /// Temporary assembly text buffer. |
|
| 87 | + | static ASM_TEXT_BUF: [u32; 262144] = undefined; |
|
| 88 | + | /// Temporary assembly data buffer. |
|
| 89 | + | static ASM_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined; |
|
| 90 | + | /// Accumulated assembly read-only data. |
|
| 91 | + | static ASM_RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined; |
|
| 92 | + | ||
| 93 | + | /// Assembly source file extension. |
|
| 94 | + | constant ASM_SOURCE_EXT: *[u8] = ".ras"; |
|
| 81 | 95 | ||
| 82 | 96 | /// Usage string. |
|
| 83 | 97 | constant USAGE: *[u8] = |
|
| 84 | 98 | "usage: radiance -pkg <name> -mod <input>.. [-pkg <name> -mod <input>..] -entry <pkg> -o <output>\n"; |
|
| 85 | 99 |
| 108 | 122 | modPath: *[*[u8]], |
|
| 109 | 123 | /// Test function name (eg. "testFoo"). |
|
| 110 | 124 | fnName: *[u8], |
|
| 111 | 125 | } |
|
| 112 | 126 | ||
| 127 | + | /// Source inputs belonging to one command-line package. |
|
| 128 | + | record PackageInput { |
|
| 129 | + | /// Package name from the `-pkg` argument. |
|
| 130 | + | name: *[u8], |
|
| 131 | + | /// Radiance source paths for this package. |
|
| 132 | + | radPaths: [*[u8]; MAX_LOADED_MODULES], |
|
| 133 | + | /// Number of Radiance source paths. |
|
| 134 | + | radPathCount: u32, |
|
| 135 | + | /// Assembly source paths for this package. |
|
| 136 | + | asmPaths: [*[u8]; MAX_ASM_MODULES], |
|
| 137 | + | /// Number of assembly source paths. |
|
| 138 | + | asmPathCount: u32, |
|
| 139 | + | } |
|
| 140 | + | ||
| 113 | 141 | /// Compilation context. |
|
| 114 | 142 | record CompileContext { |
|
| 115 | 143 | /// Array of packages to compile. |
|
| 116 | 144 | packages: [package::Package; MAX_PACKAGES], |
|
| 145 | + | /// Driver inputs for each package slot. |
|
| 146 | + | inputs: [PackageInput; MAX_PACKAGES], |
|
| 117 | 147 | /// Number of packages. |
|
| 118 | 148 | packageCount: u32, |
|
| 119 | 149 | /// Index of entry package. |
|
| 120 | 150 | entryPkgIdx: ?u32, |
|
| 121 | 151 | /// Global module graph shared by all packages. |
| 162 | 192 | debug: bool, |
|
| 163 | 193 | /// How the generated program should handle entry. |
|
| 164 | 194 | entryMode: CodegenEntryMode, |
|
| 165 | 195 | } |
|
| 166 | 196 | ||
| 197 | + | /// Print a driver error line. |
|
| 198 | + | fn error(msg: *[*[u8]]) -> Error { |
|
| 199 | + | io::printError("radiance: "); |
|
| 200 | + | ||
| 201 | + | for part, i in msg { |
|
| 202 | + | io::printError(part); |
|
| 203 | + | if i < msg.len - 1 { |
|
| 204 | + | io::printError(" "); |
|
| 205 | + | } |
|
| 206 | + | } |
|
| 207 | + | io::printError("\n"); |
|
| 208 | + | return Error::Other; |
|
| 209 | + | } |
|
| 210 | + | ||
| 167 | 211 | /// Print a log line for the given package. |
|
| 168 | 212 | fn pkgLog(pkg: *package::Package, msg: *[*[u8]]) { |
|
| 169 | 213 | io::printError("radiance: "); |
|
| 170 | 214 | io::printError(pkg.name); |
|
| 171 | 215 | io::printError(": "); |
| 177 | 221 | } |
|
| 178 | 222 | } |
|
| 179 | 223 | io::printError("\n"); |
|
| 180 | 224 | } |
|
| 181 | 225 | ||
| 226 | + | /// Return `true` when `path` ends with `ext`. |
|
| 227 | + | fn hasExtension(path: *[u8], ext: *[u8]) -> bool { |
|
| 228 | + | if path.len < ext.len { |
|
| 229 | + | return false; |
|
| 230 | + | } |
|
| 231 | + | let start = path.len - ext.len; |
|
| 232 | + | return mem::eq(&path[start..], ext); |
|
| 233 | + | } |
|
| 234 | + | ||
| 235 | + | /// Create an empty source input set for one package. |
|
| 236 | + | fn packageInput(name: *[u8]) -> PackageInput { |
|
| 237 | + | return PackageInput { |
|
| 238 | + | name, |
|
| 239 | + | radPaths: undefined, |
|
| 240 | + | radPathCount: 0, |
|
| 241 | + | asmPaths: undefined, |
|
| 242 | + | asmPathCount: 0, |
|
| 243 | + | }; |
|
| 244 | + | } |
|
| 245 | + | ||
| 182 | 246 | /// Register, load, and parse `path` within `pkg`. |
|
| 183 | 247 | fn processModule( |
|
| 184 | 248 | pkg: *mut package::Package, |
|
| 185 | 249 | graph: *mut module::ModuleGraph, |
|
| 186 | 250 | path: *[u8], |
| 188 | 252 | sourceArena: *mut alloc::Arena |
|
| 189 | 253 | ) throws (Error) { |
|
| 190 | 254 | pkgLog(pkg, &["parsing", "(", path, ")", ".."]); |
|
| 191 | 255 | ||
| 192 | 256 | let moduleId = try package::registerModule(pkg, graph, path) catch { |
|
| 193 | - | io::printError("radiance: error registering module\n"); |
|
| 194 | - | throw Error::Other; |
|
| 257 | + | throw error(&["error registering module"]); |
|
| 195 | 258 | }; |
|
| 196 | 259 | // Read file into remaining arena space. |
|
| 197 | 260 | let buffer = alloc::remainingBuf(sourceArena); |
|
| 198 | 261 | if buffer.len == 0 { |
|
| 199 | - | io::printError("radiance: fatal: source arena exhausted\n"); |
|
| 200 | - | throw Error::Other; |
|
| 262 | + | throw error(&["fatal:", "source arena exhausted"]); |
|
| 201 | 263 | } |
|
| 202 | 264 | let source = unix::readFile(path, buffer) else { |
|
| 203 | - | io::printError("radiance: error reading file\n"); |
|
| 204 | - | throw Error::Other; |
|
| 265 | + | throw error(&["error reading file"]); |
|
| 205 | 266 | }; |
|
| 206 | 267 | if source.len == buffer.len { |
|
| 207 | - | io::printError("radiance: fatal: source arena too small, file truncated: "); |
|
| 208 | - | io::printError(path); |
|
| 209 | - | io::printError("\n"); |
|
| 210 | - | throw Error::Other; |
|
| 268 | + | throw error(&["fatal:", "source arena too small, file truncated:", path]); |
|
| 211 | 269 | } |
|
| 212 | 270 | // Commit only what was read. |
|
| 213 | 271 | alloc::commit(sourceArena, source.len); |
|
| 214 | 272 | ||
| 215 | 273 | let ast = try parser::parse(scanner::SourceLoc::File(path), source, nodeArena, &mut STRING_POOL) catch { |
|
| 216 | 274 | throw Error::Other; |
|
| 217 | 275 | }; |
|
| 218 | 276 | try module::setAst(graph, moduleId, ast) catch { |
|
| 219 | - | io::printError("radiance: error setting AST\n"); |
|
| 220 | - | throw Error::Other; |
|
| 277 | + | throw error(&["error setting AST"]); |
|
| 221 | 278 | }; |
|
| 222 | 279 | try module::setSource(graph, moduleId, source) catch { |
|
| 223 | - | io::printError("radiance: error setting source\n"); |
|
| 224 | - | throw Error::Other; |
|
| 280 | + | throw error(&["error setting source"]); |
|
| 225 | 281 | }; |
|
| 226 | 282 | } |
|
| 227 | 283 | ||
| 228 | 284 | /// Consume the next argument, or print an error and throw. |
|
| 229 | - | fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[u8]) -> *[u8] throws (Error) { |
|
| 285 | + | fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[*[u8]]) -> *[u8] throws (Error) { |
|
| 230 | 286 | set *idx += 1; |
|
| 231 | 287 | if *idx >= args.len { |
|
| 232 | - | io::printError(msg); |
|
| 233 | - | throw Error::Other; |
|
| 288 | + | throw error(msg); |
|
| 234 | 289 | } |
|
| 235 | 290 | return args[*idx]; |
|
| 236 | 291 | } |
|
| 237 | 292 | ||
| 238 | 293 | /// Parse CLI arguments and return compilation context. |
| 244 | 299 | let mut debugEnabled = false; |
|
| 245 | 300 | let mut outputPath: ?*[u8] = nil; |
|
| 246 | 301 | let mut dump = Dump::None; |
|
| 247 | 302 | let mut entryPkgName: ?*[u8] = nil; |
|
| 248 | 303 | ||
| 249 | - | // Per-package module path tracking. |
|
| 250 | - | let mut moduleCounts: [u32; MAX_PACKAGES] = undefined; |
|
| 251 | - | let mut modulePaths: [[*[u8]; MAX_LOADED_MODULES]; MAX_PACKAGES] = undefined; |
|
| 252 | - | let mut pkgNames: [*[u8]; MAX_PACKAGES] = undefined; |
|
| 304 | + | // Per-package source path tracking. |
|
| 305 | + | let mut inputs: [PackageInput; MAX_PACKAGES] = undefined; |
|
| 253 | 306 | let mut pkgCount: u32 = 0; |
|
| 254 | 307 | let mut currentPkgIdx: ?u32 = nil; |
|
| 255 | 308 | ||
| 256 | - | for i in 0..MAX_PACKAGES { |
|
| 257 | - | set moduleCounts[i] = 0; |
|
| 258 | - | } |
|
| 259 | 309 | if args.len == 0 { |
|
| 260 | 310 | io::printError(USAGE); |
|
| 261 | 311 | throw Error::Other; |
|
| 262 | 312 | } |
|
| 263 | 313 | let mut idx: u32 = 0; |
|
| 264 | 314 | ||
| 265 | 315 | while idx < args.len { |
|
| 266 | 316 | let arg = args[idx]; |
|
| 267 | 317 | if mem::eq(arg, "-pkg") { |
|
| 268 | - | try nextArg(args, &mut idx, "radiance: `-pkg` requires a package name\n"); |
|
| 318 | + | try nextArg(args, &mut idx, &["`-pkg` requires a package name"]); |
|
| 269 | 319 | if pkgCount >= MAX_PACKAGES { |
|
| 270 | - | io::printError("radiance: too many packages specified\n"); |
|
| 271 | - | throw Error::Other; |
|
| 320 | + | throw error(&["too many packages specified"]); |
|
| 272 | 321 | } |
|
| 273 | - | set pkgNames[pkgCount] = args[idx]; |
|
| 322 | + | set inputs[pkgCount] = packageInput(args[idx]); |
|
| 274 | 323 | set currentPkgIdx = pkgCount; |
|
| 275 | 324 | set pkgCount += 1; |
|
| 276 | 325 | } else if mem::eq(arg, "-mod") { |
|
| 277 | - | try nextArg(args, &mut idx, "radiance: `-mod` requires a module path\n"); |
|
| 326 | + | try nextArg(args, &mut idx, &["`-mod` requires a module path"]); |
|
| 278 | 327 | let pkgIdx = currentPkgIdx else { |
|
| 279 | - | io::printError("radiance: `-mod` must follow a `-pkg` argument\n"); |
|
| 280 | - | throw Error::Other; |
|
| 328 | + | throw error(&["`-mod` must follow a `-pkg` argument"]); |
|
| 281 | 329 | }; |
|
| 282 | - | if moduleCounts[pkgIdx] >= MAX_LOADED_MODULES { |
|
| 283 | - | io::printError("radiance: too many modules specified for package\n"); |
|
| 284 | - | throw Error::Other; |
|
| 330 | + | let input = &mut inputs[pkgIdx]; |
|
| 331 | + | if hasExtension(args[idx], ASM_SOURCE_EXT) { |
|
| 332 | + | if input.asmPathCount >= MAX_ASM_MODULES { |
|
| 333 | + | throw error(&["too many assembly modules specified"]); |
|
| 334 | + | } |
|
| 335 | + | set input.asmPaths[input.asmPathCount] = args[idx]; |
|
| 336 | + | set input.asmPathCount += 1; |
|
| 337 | + | } else { |
|
| 338 | + | if input.radPathCount >= MAX_LOADED_MODULES { |
|
| 339 | + | throw error(&["too many modules specified for package"]); |
|
| 340 | + | } |
|
| 341 | + | set input.radPaths[input.radPathCount] = args[idx]; |
|
| 342 | + | set input.radPathCount += 1; |
|
| 285 | 343 | } |
|
| 286 | - | set modulePaths[pkgIdx][moduleCounts[pkgIdx]] = args[idx]; |
|
| 287 | - | set moduleCounts[pkgIdx] += 1; |
|
| 288 | 344 | } else if mem::eq(arg, "-entry") { |
|
| 289 | - | try nextArg(args, &mut idx, "radiance: `-entry` requires a package name\n"); |
|
| 345 | + | try nextArg(args, &mut idx, &["`-entry` requires a package name"]); |
|
| 290 | 346 | set entryPkgName = args[idx]; |
|
| 291 | 347 | } else if mem::eq(arg, "-test") { |
|
| 292 | 348 | set buildTest = true; |
|
| 293 | 349 | } else if mem::eq(arg, "-debug") { |
|
| 294 | 350 | set debugEnabled = true; |
|
| 295 | 351 | } else if mem::eq(arg, "-o") { |
|
| 296 | - | try nextArg(args, &mut idx, "radiance: `-o` requires an output path\n"); |
|
| 352 | + | try nextArg(args, &mut idx, &["`-o` requires an output path"]); |
|
| 297 | 353 | set outputPath = args[idx]; |
|
| 298 | 354 | } else if mem::eq(arg, "-dump") { |
|
| 299 | - | try nextArg(args, &mut idx, "radiance: `-dump` requires a mode (eg. ast)\n"); |
|
| 355 | + | try nextArg(args, &mut idx, &["`-dump` requires a mode (eg. ast)"]); |
|
| 300 | 356 | let mode = args[idx]; |
|
| 301 | 357 | if mem::eq(mode, "ast") { |
|
| 302 | 358 | set dump = Dump::Ast; |
|
| 303 | 359 | } else if mem::eq(mode, "graph") { |
|
| 304 | 360 | set dump = Dump::Graph; |
|
| 305 | 361 | } else if mem::eq(mode, "il") { |
|
| 306 | 362 | set dump = Dump::Il; |
|
| 307 | 363 | } else if mem::eq(mode, "asm") { |
|
| 308 | 364 | set dump = Dump::Asm; |
|
| 309 | 365 | } else { |
|
| 310 | - | io::printError("radiance: unknown dump mode `"); |
|
| 311 | - | io::printError(mode); |
|
| 312 | - | io::printError("` (expected: ast, graph, il, asm)\n"); |
|
| 313 | - | throw Error::Other; |
|
| 366 | + | throw error(&["unknown dump mode", mode, "(expected: ast, graph, il, asm)"]); |
|
| 314 | 367 | } |
|
| 315 | 368 | } else { |
|
| 316 | - | io::printError("radiance: unknown argument `"); |
|
| 317 | - | io::printError(arg); |
|
| 318 | - | io::printError("`\n"); |
|
| 319 | - | throw Error::Other; |
|
| 369 | + | throw error(&["unknown argument", arg]); |
|
| 320 | 370 | } |
|
| 321 | 371 | set idx += 1; |
|
| 322 | 372 | } |
|
| 323 | 373 | if pkgCount == 0 { |
|
| 324 | - | io::printError("radiance: no package specified\n"); |
|
| 325 | - | throw Error::Other; |
|
| 374 | + | throw error(&["no package specified"]); |
|
| 375 | + | } |
|
| 376 | + | for i in 0..pkgCount { |
|
| 377 | + | if inputs[i].radPathCount == 0 { |
|
| 378 | + | throw error(&["package", inputs[i].name, "has no Radiance modules specified"]); |
|
| 379 | + | } |
|
| 326 | 380 | } |
|
| 327 | 381 | ||
| 328 | 382 | // Determine entry package index. |
|
| 329 | 383 | let mut entryPkgIdx: ?u32 = nil; |
|
| 330 | 384 | if pkgCount == 1 { |
|
| 331 | 385 | // Single package: it is the entry. |
|
| 332 | 386 | set entryPkgIdx = 0; |
|
| 333 | 387 | } else { |
|
| 334 | 388 | // Multiple packages: need -entry. |
|
| 335 | 389 | let entryName = entryPkgName else { |
|
| 336 | - | io::printError("radiance: `-entry` required when multiple packages specified\n"); |
|
| 337 | - | throw Error::Other; |
|
| 390 | + | throw error(&["`-entry` required when multiple packages specified"]); |
|
| 338 | 391 | }; |
|
| 339 | 392 | for i in 0..pkgCount { |
|
| 340 | - | if mem::eq(pkgNames[i], entryName) { |
|
| 393 | + | if mem::eq(inputs[i].name, entryName) { |
|
| 341 | 394 | set entryPkgIdx = i; |
|
| 342 | 395 | break; |
|
| 343 | 396 | } |
|
| 344 | 397 | } |
|
| 345 | 398 | if entryPkgIdx == nil { |
|
| 346 | - | io::printError("radiance: fatal: entry package `"); |
|
| 347 | - | io::printError(entryName); |
|
| 348 | - | io::printError("` not found\n"); |
|
| 349 | - | ||
| 350 | - | throw Error::Other; |
|
| 399 | + | throw error(&["fatal:", "entry package", entryName, "not found"]); |
|
| 351 | 400 | } |
|
| 352 | 401 | } |
|
| 353 | 402 | let graph = module::moduleGraph(&mut MODULE_ENTRIES[..], &mut STRING_POOL, arena); |
|
| 354 | 403 | let mut ctx = CompileContext { |
|
| 355 | 404 | packages: undefined, |
|
| 405 | + | inputs, |
|
| 356 | 406 | packageCount: pkgCount, |
|
| 357 | 407 | entryPkgIdx, |
|
| 358 | 408 | graph, |
|
| 359 | 409 | config: resolver::Config { buildTest }, |
|
| 360 | 410 | dump, |
| 362 | 412 | debug: debugEnabled, |
|
| 363 | 413 | }; |
|
| 364 | 414 | // Initialize and parse all packages. |
|
| 365 | 415 | let mut sourceArena = alloc::new(&mut MODULE_SOURCES[..]); |
|
| 366 | 416 | for i in 0..pkgCount { |
|
| 367 | - | package::init(&mut ctx.packages[i], i as u16, pkgNames[i], &mut STRING_POOL); |
|
| 417 | + | package::init(&mut ctx.packages[i], i as u16, ctx.inputs[i].name, &mut STRING_POOL); |
|
| 368 | 418 | ||
| 369 | - | for j in 0..moduleCounts[i] { |
|
| 370 | - | let path = modulePaths[i][j]; |
|
| 419 | + | for j in 0..ctx.inputs[i].radPathCount { |
|
| 420 | + | let path = ctx.inputs[i].radPaths[j]; |
|
| 371 | 421 | try processModule(&mut ctx.packages[i], &mut ctx.graph, path, arena, &mut sourceArena); |
|
| 372 | 422 | } |
|
| 373 | 423 | } |
|
| 374 | 424 | return ctx; |
|
| 375 | 425 | } |
|
| 376 | 426 | ||
| 377 | 427 | /// Get the entry package from the context. |
|
| 378 | 428 | fn getEntryPackage(ctx: *CompileContext) -> *package::Package throws (Error) { |
|
| 379 | 429 | let entryIdx = ctx.entryPkgIdx else { |
|
| 380 | - | io::printError("radiance: no entry package specified\n"); |
|
| 381 | - | throw Error::Other; |
|
| 430 | + | throw error(&["no entry package specified"]); |
|
| 382 | 431 | }; |
|
| 383 | 432 | return &ctx.packages[entryIdx]; |
|
| 384 | 433 | } |
|
| 385 | 434 | ||
| 386 | 435 | /// Get root module info from a package. |
|
| 387 | 436 | fn getRootModule(pkg: *package::Package, graph: *module::ModuleGraph) -> RootModule throws (Error) { |
|
| 388 | 437 | let rootId = pkg.rootModuleId else { |
|
| 389 | - | io::printError("radiance: no root module found\n"); |
|
| 390 | - | throw Error::Other; |
|
| 438 | + | throw error(&["no root module found"]); |
|
| 391 | 439 | }; |
|
| 392 | 440 | let rootEntry = module::get(graph, rootId) else { |
|
| 393 | - | io::printError("radiance: root module entry not found\n"); |
|
| 394 | - | throw Error::Other; |
|
| 441 | + | throw error(&["root module entry not found"]); |
|
| 395 | 442 | }; |
|
| 396 | 443 | let rootAst = rootEntry.ast else { |
|
| 397 | - | io::printError("radiance: root module has no AST\n"); |
|
| 398 | - | throw Error::Other; |
|
| 444 | + | throw error(&["root module has no AST"]); |
|
| 399 | 445 | }; |
|
| 400 | 446 | return RootModule { entry: rootEntry, ast: rootAst }; |
|
| 401 | 447 | } |
|
| 402 | 448 | ||
| 403 | 449 | /// Dump the module graph. |
| 463 | 509 | low: *mut lower::Lowerer, |
|
| 464 | 510 | pkg: *mut package::Package, |
|
| 465 | 511 | isEntry: bool |
|
| 466 | 512 | ) throws (Error) { |
|
| 467 | 513 | let rootId = pkg.rootModuleId else { |
|
| 468 | - | io::printError("radiance: no root module found\n"); |
|
| 469 | - | throw Error::Other; |
|
| 514 | + | throw error(&["no root module found"]); |
|
| 470 | 515 | }; |
|
| 471 | 516 | // Set lowerer's package context for qualified name generation. |
|
| 472 | 517 | // TODO: We shouldn't have to call this manually. |
|
| 473 | 518 | lower::setPackage(low, &ctx.graph, pkg.name); |
|
| 474 | 519 |
| 483 | 528 | modId: u16, |
|
| 484 | 529 | isRoot: bool, |
|
| 485 | 530 | pkg: *package::Package |
|
| 486 | 531 | ) throws (Error) { |
|
| 487 | 532 | let entry = module::get(graph, modId) else { |
|
| 488 | - | io::printError("radiance: module entry not found\n"); |
|
| 489 | - | throw Error::Other; |
|
| 533 | + | throw error(&["module entry not found"]); |
|
| 490 | 534 | }; |
|
| 491 | 535 | let modAst = entry.ast else { |
|
| 492 | - | io::printError("radiance: module has no AST\n"); |
|
| 493 | - | throw Error::Other; |
|
| 536 | + | throw error(&["module has no AST"]); |
|
| 494 | 537 | }; |
|
| 495 | 538 | pkgLog(pkg, &["lowering", "(", entry.filePath, ")", ".."]); |
|
| 496 | 539 | ||
| 497 | 540 | try lower::lowerModule(low, modId, modAst, isRoot) catch err { |
|
| 498 | - | io::printError("radiance: internal error during lowering: "); |
|
| 541 | + | io::printError("radiance: "); |
|
| 542 | + | io::printError("internal error during lowering: "); |
|
| 499 | 543 | lower::printError(err); |
|
| 500 | 544 | io::printError("\n"); |
|
| 545 | + | ||
| 501 | 546 | throw Error::Other; |
|
| 502 | 547 | }; |
|
| 503 | 548 | // Recurse into children. |
|
| 504 | 549 | for i in 0..entry.childrenLen { |
|
| 505 | 550 | let childId = module::childAt(entry, i); |
| 623 | 668 | if let entry = module::get(&ctx.graph, modIdx as u16) { |
|
| 624 | 669 | collectModuleTests(entry, &mut tests[..], &mut testCount); |
|
| 625 | 670 | } |
|
| 626 | 671 | } |
|
| 627 | 672 | if testCount == 0 { |
|
| 628 | - | io::printError("radiance: fatal: no test functions found\n"); |
|
| 629 | - | throw Error::Other; |
|
| 673 | + | throw error(&["fatal:", "no test functions found"]); |
|
| 630 | 674 | } |
|
| 631 | 675 | let mut countBuf: [u8; 10] = undefined; |
|
| 632 | 676 | let countStr = fmt::formatU32(testCount, &mut countBuf[..]); |
|
| 633 | 677 | pkgLog(entryPkg, &["found", countStr, "test(s)"]); |
|
| 634 | 678 |
| 725 | 769 | set pos += try! mem::copy(&mut path[pos..], basePath); |
|
| 726 | 770 | set pos += try! mem::copy(&mut path[pos..], ext); |
|
| 727 | 771 | set path[pos] = 0; // Null-terminate for syscall. |
|
| 728 | 772 | ||
| 729 | 773 | if not unix::writeFile(&path[..pos], data) { |
|
| 730 | - | io::printError("radiance: fatal: failed to write data file\n"); |
|
| 731 | - | throw Error::Other; |
|
| 774 | + | throw error(&["fatal:", "failed to write data file"]); |
|
| 732 | 775 | } |
|
| 733 | 776 | } |
|
| 734 | 777 | ||
| 735 | 778 | /// Serialize debug entries and write the `.debug` file. |
|
| 736 | 779 | /// Resolves module IDs to file paths via the module graph. |
| 779 | 822 | pkgScope: &mut RESOLVER_PKG_SCOPE, |
|
| 780 | 823 | errors: &mut RESOLVER_ERRORS[..], |
|
| 781 | 824 | }; |
|
| 782 | 825 | let mut res = resolver::resolver(storage, ctx.config); |
|
| 783 | 826 | ||
| 784 | - | // Build package inputs. |
|
| 785 | - | let mut packages: [resolver::Pkg; MAX_PACKAGES] = undefined; |
|
| 827 | + | // Build the semantic package list consumed by the resolver. |
|
| 828 | + | let mut resolverPkgs: [resolver::Pkg; MAX_PACKAGES] = undefined; |
|
| 829 | + | let mut resolverPackageCount: u32 = 0; |
|
| 786 | 830 | for i in 0..ctx.packageCount { |
|
| 787 | 831 | let pkg = &ctx.packages[i]; |
|
| 788 | 832 | let root = try getRootModule(pkg, &ctx.graph); |
|
| 789 | 833 | ||
| 790 | - | set packages[i] = resolver::Pkg { |
|
| 834 | + | set resolverPkgs[resolverPackageCount] = resolver::Pkg { |
|
| 791 | 835 | rootEntry: root.entry, |
|
| 792 | 836 | rootAst: root.ast, |
|
| 793 | 837 | }; |
|
| 838 | + | set resolverPackageCount += 1; |
|
| 794 | 839 | } |
|
| 795 | 840 | ||
| 796 | 841 | // Resolve all packages. |
|
| 797 | 842 | // TODO: Fix this error printing dance. |
|
| 798 | - | let diags = try resolver::resolve(&mut res, &ctx.graph, &packages[..ctx.packageCount]) catch { |
|
| 843 | + | let diags = try resolver::resolve(&mut res, &ctx.graph, &resolverPkgs[..resolverPackageCount]) catch { |
|
| 799 | 844 | let diags = resolver::Diagnostics { errors: res.errors }; |
|
| 800 | 845 | resolver::printer::printDiagnostics(&diags, &res); |
|
| 801 | 846 | throw Error::Other; |
|
| 802 | 847 | }; |
|
| 803 | 848 | if not resolver::success(&diags) { |
|
| 804 | 849 | resolver::printer::printDiagnostics(&diags, &res); |
|
| 805 | - | io::print("radiance: failed: "); |
|
| 806 | - | io::printU32(diags.errors.len); |
|
| 807 | - | io::printLn(" errors"); |
|
| 808 | - | throw Error::Other; |
|
| 850 | + | let mut countBuf: [u8; 10] = undefined; |
|
| 851 | + | let countStr = fmt::formatU32(diags.errors.len, &mut countBuf[..]); |
|
| 852 | + | throw error(&["failed:", countStr, "errors"]); |
|
| 809 | 853 | } |
|
| 810 | 854 | return res; |
|
| 811 | 855 | } |
|
| 812 | 856 | ||
| 813 | - | /// Emit one lowered function to RV64 codegen and reclaim its IL arena. |
|
| 857 | + | /// Emit one lowered function to machine code and reclaim its IL arena. |
|
| 814 | 858 | fn generateLoweredFn(ctxPtr: *mut opaque, func: *il::Fn, role: lower::FnRole) { |
|
| 815 | 859 | let ctx = ctxPtr as *mut CodegenSinkContext; |
|
| 816 | 860 | ||
| 817 | 861 | match role { |
|
| 818 | 862 | case lower::FnRole::Default => { |
| 827 | 871 | } |
|
| 828 | 872 | rv64::generateFunction(ctx.generator, func, ctx.codegenArena); |
|
| 829 | 873 | alloc::reset(ctx.fnArena); |
|
| 830 | 874 | } |
|
| 831 | 875 | ||
| 876 | + | /// Assemble one `.ras` input and merge it into the active code generator. |
|
| 877 | + | /// |
|
| 878 | + | /// Text symbols are appended to `generator`. Data emitted by the assembler is |
|
| 879 | + | /// copied into `ASM_RO_DATA_BUF` at `*asmDataLen`, and `*asmDataLen` is advanced |
|
| 880 | + | /// so the next assembly module receives the correct rodata base address. |
|
| 881 | + | fn assembleAsmModule( |
|
| 882 | + | generator: *mut rv64::Generator, |
|
| 883 | + | pkg: *package::Package, |
|
| 884 | + | path: *[u8], |
|
| 885 | + | asmDataLen: *mut u32, |
|
| 886 | + | arena: *mut alloc::Arena |
|
| 887 | + | ) throws (Error) { |
|
| 888 | + | pkgLog(pkg, &["asm:", "parsing", "(", path, ")", ".."]); |
|
| 889 | + | ||
| 890 | + | let source = unix::readFile(path, &mut ASM_SOURCE_BUF[..]) else { |
|
| 891 | + | throw error(&["error reading assembly file"]); |
|
| 892 | + | }; |
|
| 893 | + | if source.len == ASM_SOURCE_BUF.len { |
|
| 894 | + | throw error(&["fatal:", "assembly source too large:", path]); |
|
| 895 | + | } |
|
| 896 | + | let program = try asm::assemble( |
|
| 897 | + | asm::scanner::SourceKind::File { path }, |
|
| 898 | + | source, |
|
| 899 | + | &mut ASM_TEXT_BUF[..], |
|
| 900 | + | &mut ASM_DATA_BUF[..], |
|
| 901 | + | arena, |
|
| 902 | + | &mut STRING_POOL, |
|
| 903 | + | rv64::RO_DATA_BASE + *asmDataLen |
|
| 904 | + | ) catch { |
|
| 905 | + | throw error(&["assembly failed:", path]); |
|
| 906 | + | }; |
|
| 907 | + | if *asmDataLen + program.data.len > ASM_RO_DATA_BUF.len { |
|
| 908 | + | throw error(&["fatal:", "assembly rodata too large"]); |
|
| 909 | + | } |
|
| 910 | + | try! mem::copy(&mut ASM_RO_DATA_BUF[*asmDataLen..], program.data); |
|
| 911 | + | set *asmDataLen += program.data.len; |
|
| 912 | + | ||
| 913 | + | rv64::addAssembly(generator, program); |
|
| 914 | + | } |
|
| 915 | + | ||
| 916 | + | /// Assemble all inputs collected in the package inputs. |
|
| 917 | + | fn assembleAsmInputs( |
|
| 918 | + | ctx: *CompileContext, |
|
| 919 | + | generator: *mut rv64::Generator, |
|
| 920 | + | arena: *mut alloc::Arena |
|
| 921 | + | ) -> *[u8] throws (Error) { |
|
| 922 | + | let mut asmDataLen: u32 = 0; |
|
| 923 | + | ||
| 924 | + | for i in 0..ctx.packageCount { |
|
| 925 | + | let input = &ctx.inputs[i]; |
|
| 926 | + | for j in 0..input.asmPathCount { |
|
| 927 | + | try assembleAsmModule( |
|
| 928 | + | generator, |
|
| 929 | + | &ctx.packages[i], |
|
| 930 | + | input.asmPaths[j], |
|
| 931 | + | &mut asmDataLen, |
|
| 932 | + | arena |
|
| 933 | + | ); |
|
| 934 | + | } |
|
| 935 | + | } |
|
| 936 | + | return &ASM_RO_DATA_BUF[..asmDataLen]; |
|
| 937 | + | } |
|
| 938 | + | ||
| 832 | 939 | /// Lower all packages while streaming each lowered function into RV64 codegen. |
|
| 833 | 940 | fn lowerAndGenerateAllPackages( |
|
| 834 | 941 | ctx: *mut CompileContext, |
|
| 835 | 942 | res: *mut resolver::Resolver, |
|
| 836 | 943 | fnArena: *mut alloc::Arena, |
| 867 | 974 | set low.output = lower::FnOutput::Stream(lower::FnSink { |
|
| 868 | 975 | ctx: &mut codegenCtx as *mut opaque, |
|
| 869 | 976 | emitFn: generateLoweredFn, |
|
| 870 | 977 | }); |
|
| 871 | 978 | try lowerAllPackagesInto(ctx, res, &mut low); |
|
| 979 | + | let asmData = try assembleAsmInputs(ctx, &mut generator, &mut res.arena); |
|
| 872 | 980 | ||
| 873 | 981 | match generator.entryPatch { |
|
| 874 | 982 | case rv64::EntryPatch::Reserved(targetName) => { |
|
| 875 | 983 | if targetName == nil { |
|
| 876 | - | io::printError("radiance: fatal: no default function found\n"); |
|
| 877 | - | throw Error::Other; |
|
| 984 | + | throw error(&["fatal:", "no default function found"]); |
|
| 878 | 985 | } |
|
| 879 | 986 | } |
|
| 880 | 987 | else => {} |
|
| 881 | 988 | } |
|
| 882 | 989 | if let path = codegenOptions.logPath { |
|
| 883 | 990 | pkgLog(entryPkg, &["generating code", "(", path, ")", ".."]); |
|
| 884 | 991 | } |
|
| 885 | - | return rv64::finishProgram(&mut generator, &low.data[..], storage, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]); |
|
| 992 | + | return rv64::finishProgram(&mut generator, &low.data[..], storage, asmData, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]); |
|
| 886 | 993 | } |
|
| 887 | 994 | ||
| 888 | 995 | /// Lower, optionally dump, and optionally generate binary output. |
|
| 889 | 996 | fn compile( |
|
| 890 | 997 | ctx: *mut CompileContext, |
| 922 | 1029 | debug: ctx.debug, |
|
| 923 | 1030 | entryMode: CodegenEntryMode::DefaultEntry, |
|
| 924 | 1031 | }); |
|
| 925 | 1032 | ||
| 926 | 1033 | if not writeCode(result.code, outPath) { |
|
| 927 | - | io::printError("radiance: fatal: failed to write output file\n"); |
|
| 928 | - | throw Error::Other; |
|
| 1034 | + | throw error(&["fatal:", "failed to write output file"]); |
|
| 929 | 1035 | } |
|
| 930 | 1036 | // Write data files. |
|
| 931 | 1037 | try writeDataWithExt(&RO_DATA_BUF[..result.roDataSize], outPath, RO_DATA_EXT); |
|
| 932 | 1038 | try writeDataWithExt(&RW_DATA_BUF[..result.rwDataSize], outPath, RW_DATA_EXT); |
|
| 933 | 1039 |
lib/std.rad
+1 -0
| 1 | 1 | //! The Radiance Standard Library. |
|
| 2 | 2 | ||
| 3 | 3 | export mod io; |
|
| 4 | 4 | export mod collections; |
|
| 5 | + | export mod char; |
|
| 5 | 6 | export mod lang; |
|
| 6 | 7 | export mod sys; |
|
| 7 | 8 | export mod arch; |
|
| 8 | 9 | export mod fmt; |
|
| 9 | 10 | export mod mem; |
lib/std/arch/rv64.rad
+43 -4
| 13 | 13 | export mod encode; |
|
| 14 | 14 | export mod decode; |
|
| 15 | 15 | export mod emit; |
|
| 16 | 16 | export mod isel; |
|
| 17 | 17 | export mod printer; |
|
| 18 | + | export mod asm; |
|
| 18 | 19 | ||
| 19 | 20 | @test mod tests; |
|
| 20 | 21 | ||
| 21 | 22 | use std::mem; |
|
| 22 | 23 | use std::collections::dict; |
| 238 | 239 | ||
| 239 | 240 | // Reclaim unused memory after instruction selection. |
|
| 240 | 241 | alloc::restore(arena, checkpoint); |
|
| 241 | 242 | } |
|
| 242 | 243 | ||
| 244 | + | /// Add the text section of an assembled program to the generator. |
|
| 245 | + | /// |
|
| 246 | + | /// This function snapshots the generator's current code length as the base |
|
| 247 | + | /// index, converts each text symbol's byte offset to an instruction index, adds |
|
| 248 | + | /// that base, and records the final address for printing. Only `.export` text |
|
| 249 | + | /// symbols are exported to the emitter's function-offset table for extern call |
|
| 250 | + | /// resolution. Local labels must not escape their assembly fragment because |
|
| 251 | + | /// separate assembly inputs may reuse the same local names. |
|
| 252 | + | /// |
|
| 253 | + | /// Non-text symbols are ignored here because assembled data is not appended to |
|
| 254 | + | /// the generator's text stream. The driver merges assembled data into the RO data |
|
| 255 | + | /// prefix separately and passes that data to [`finishProgram`]. |
|
| 256 | + | export fn addAssembly(generator: *mut Generator, program: asm::Program) { |
|
| 257 | + | let baseIndex = generator.e.codeLen; |
|
| 258 | + | ||
| 259 | + | for symbol in program.symbols { |
|
| 260 | + | if symbol.section == asm::Section::Text { |
|
| 261 | + | let index = baseIndex + ((symbol.offset as u32) / INSTR_SIZE as u32); |
|
| 262 | + | emit::recordFuncAt(&mut generator.e, symbol.name, index); |
|
| 263 | + | if symbol.isExported { |
|
| 264 | + | emit::recordFuncOffsetAt(&mut generator.e, symbol.name, index); |
|
| 265 | + | } |
|
| 266 | + | } |
|
| 267 | + | } |
|
| 268 | + | for word in program.text { |
|
| 269 | + | emit::emit(&mut generator.e, word); |
|
| 270 | + | } |
|
| 271 | + | } |
|
| 272 | + | ||
| 243 | 273 | /// Finish RV64 code generation and return the emitted program. |
|
| 244 | 274 | export fn finishProgram( |
|
| 245 | 275 | generator: *mut Generator, |
|
| 246 | 276 | globalData: *[il::Data], |
|
| 247 | 277 | storage: Storage, |
|
| 278 | + | roDataPrefix: *[u8], |
|
| 248 | 279 | roDataBuf: *mut [u8], |
|
| 249 | 280 | rwDataBuf: *mut [u8] |
|
| 250 | 281 | ) -> Program { |
|
| 251 | 282 | // Build data map after function lowering. Function-local literals can add |
|
| 252 | 283 | // global data while functions are lowered, so final layout belongs here. |
|
| 253 | 284 | let mut dataSymCount: u32 = 0; |
|
| 254 | - | let roLayoutSize = data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, true); |
|
| 285 | + | let roLayoutSize = data::layoutSectionAtOffset( |
|
| 286 | + | globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, roDataPrefix.len, true |
|
| 287 | + | ); |
|
| 255 | 288 | data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RW_DATA_BASE, false); |
|
| 256 | 289 | ||
| 257 | 290 | let dataSyms = &storage.dataSyms[..dataSymCount]; |
|
| 258 | 291 | let dataSymMap = data::buildMap(dataSyms, storage.dataSymEntries); |
|
| 259 | 292 | let codeBase = mem::alignUp(RO_DATA_BASE + roLayoutSize, DWORD_SIZE as u32); |
| 274 | 307 | // Patch function calls and address loads now that all functions are emitted. |
|
| 275 | 308 | emit::patchCalls(&mut generator.e); |
|
| 276 | 309 | emit::patchAddrLoads(&mut generator.e, &dataSymMap); |
|
| 277 | 310 | ||
| 278 | 311 | // Emit data sections. |
|
| 279 | - | let roDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true); |
|
| 280 | - | let rwDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false); |
|
| 281 | - | ||
| 312 | + | assert roDataPrefix.len <= roDataBuf.len, "finishProgram: rodata prefix buffer overflow"; |
|
| 313 | + | try! mem::copy(roDataBuf, roDataPrefix); |
|
| 314 | + | ||
| 315 | + | let roDataSize = data::emitSectionAtOffset( |
|
| 316 | + | globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true, roDataPrefix.len |
|
| 317 | + | ); |
|
| 318 | + | let rwDataSize = data::emitSection( |
|
| 319 | + | globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false |
|
| 320 | + | ); |
|
| 282 | 321 | return Program { |
|
| 283 | 322 | code: emit::getCode(&generator.e), |
|
| 284 | 323 | funcs: emit::getFuncs(&generator.e), |
|
| 285 | 324 | roDataSize, |
|
| 286 | 325 | rwDataSize, |
lib/std/arch/rv64/asm.rad
added
+528 -0
| 1 | + | //! Minimal RV64 assembler. |
|
| 2 | + | //! |
|
| 3 | + | //! This module assembles `.ras` source files into RV64 text words plus a raw |
|
| 4 | + | //! data prefix that can be linked into a compiler-generated program. It exists |
|
| 5 | + | //! so the Radiance driver can mix hand-written RV64 assembly with generated IL |
|
| 6 | + | //! output without invoking an external assembler or linker. |
|
| 7 | + | //! |
|
| 8 | + | //! Assembly is intentionally direct and buffer-oriented. The caller provides a |
|
| 9 | + | //! text buffer, a data buffer, an arena, and the runtime base address where the |
|
| 10 | + | //! data buffer will be loaded. The parser writes encoded instructions into the |
|
| 11 | + | //! text buffer as it reads them and writes directive bytes into the data buffer |
|
| 12 | + | //! while in `.data`. The returned [`Program`] only contains slices into those |
|
| 13 | + | //! caller-provided buffers, so no ownership transfer or late copy is needed. |
|
| 14 | + | //! |
|
| 15 | + | //! The scanner is assembly-specific. It produces tokens for registers (`%a0`), |
|
| 16 | + | //! labels (`@name`), directives, strings, characters, numbers, and |
|
| 17 | + | //! punctuation. The parser consumes those tokens as a small line-oriented |
|
| 18 | + | //! language: *directives* declare sections or emit data, *labels* define |
|
| 19 | + | //! symbols at the current section offset, and *instructions* are validated |
|
| 20 | + | //! against RV64 operand forms before being encoded. |
|
| 21 | + | //! |
|
| 22 | + | //! Labels are defined at the current text instruction index or data byte |
|
| 23 | + | //! offset. The parser is single-pass because it keeps assembly cheap and lets |
|
| 24 | + | //! instructions and data be emitted immediately, but forward references mean |
|
| 25 | + | //! some operands cannot be encoded when first seen. Branches, jumps, |
|
| 26 | + | //! load-address operands, and data directives that reference labels therefore |
|
| 27 | + | //! record fixups. After parsing reaches EOF, the emitter resolves the final |
|
| 28 | + | //! symbol table and patches every recorded use with the correct PC-relative |
|
| 29 | + | //! offset, absolute data address, or encoded data value. |
|
| 30 | + | //! |
|
| 31 | + | //! Data labels are resolved relative to the data base address. The compiler |
|
| 32 | + | //! driver accumulates all assembly data in a RO data prefix, passes |
|
| 33 | + | //! [`RO_DATA_BASE`] + `currentPrefixLen` for each input, then appends the |
|
| 34 | + | //! input's emitted data to that prefix. Global text symbols are exported for |
|
| 35 | + | //! call resolution when the assembled text is appended to the RV64 generator, |
|
| 36 | + | //! shifted by the generator's current code length so disassembly/debug output |
|
| 37 | + | //! can name those instruction addresses correctly. Non-global text labels |
|
| 38 | + | //! remain local to their assembly fragment. |
|
| 39 | + | use std::lang::alloc; |
|
| 40 | + | use std::lang::strings; |
|
| 41 | + | use std::lang::gen; |
|
| 42 | + | use std::collections::dict; |
|
| 43 | + | use std::arch::rv64::encode; |
|
| 44 | + | use std::arch::rv64; |
|
| 45 | + | ||
| 46 | + | /// Assembler scanner module. |
|
| 47 | + | export mod scanner; |
|
| 48 | + | /// Assembler parser module. |
|
| 49 | + | export mod parser; |
|
| 50 | + | /// Assembler emission and fixup module. |
|
| 51 | + | export mod emit; |
|
| 52 | + | /// Tests. |
|
| 53 | + | @test mod tests; |
|
| 54 | + | ||
| 55 | + | /// In-memory result of assembling one RV64 assembly fragment. |
|
| 56 | + | /// |
|
| 57 | + | /// [`Program`] is the boundary between the textual assembler and the rest of |
|
| 58 | + | /// the compiler. The assembler reads an assembly source file, encodes all |
|
| 59 | + | /// instructions, lays out all data bytes, resolves fixups that can be resolved |
|
| 60 | + | /// inside the fragment, and returns these three slices as the assembled |
|
| 61 | + | /// program. |
|
| 62 | + | /// |
|
| 63 | + | /// The value is intentionally not a standalone object file or linked |
|
| 64 | + | /// executable. It carries only the sections and symbol table needed by the |
|
| 65 | + | /// compiler driver. The slices point at caller-owned storage: `text` and |
|
| 66 | + | /// `data` are backed by the buffers passed to [`assemble`], while symbol names |
|
| 67 | + | /// are interned in the assembler's string pool. |
|
| 68 | + | /// |
|
| 69 | + | /// Symbol offsets are section-local byte offsets. Text symbols name positions |
|
| 70 | + | /// in `text`; data symbols name positions in `data`. When the compiler |
|
| 71 | + | /// consumes the program, [`rv64::addAssembly`] appends the text words to the |
|
| 72 | + | /// generated text stream and registers text labels at their relocated offsets. |
|
| 73 | + | /// The driver copies `data` into the final read-only data prefix; the data |
|
| 74 | + | /// base supplied to [`assemble`] lets the assembler resolve data addresses as |
|
| 75 | + | /// they will appear in that final layout. |
|
| 76 | + | export record Program { |
|
| 77 | + | /// Encoded instructions in the text section. |
|
| 78 | + | text: *[u32], |
|
| 79 | + | /// Raw bytes in the data section. |
|
| 80 | + | data: *[u8], |
|
| 81 | + | /// Symbols defined by the source. |
|
| 82 | + | symbols: *[Symbol], |
|
| 83 | + | } |
|
| 84 | + | ||
| 85 | + | /// Errors reported while assembling source text. |
|
| 86 | + | export union Error { |
|
| 87 | + | /// Invalid syntax or operand form at a source offset. |
|
| 88 | + | Invalid { offset: u32, message: *[u8] }, |
|
| 89 | + | /// The source emitted more text words than the caller-provided buffer holds. |
|
| 90 | + | TextOverflow, |
|
| 91 | + | /// The source emitted more data bytes than the caller-provided buffer holds. |
|
| 92 | + | DataOverflow, |
|
| 93 | + | } |
|
| 94 | + | ||
| 95 | + | /// Active output section. |
|
| 96 | + | export union Section { |
|
| 97 | + | /// Instruction section. |
|
| 98 | + | Text, |
|
| 99 | + | /// Data byte section. |
|
| 100 | + | Data, |
|
| 101 | + | } |
|
| 102 | + | ||
| 103 | + | /// Branch opcode that needs fixup. |
|
| 104 | + | export union BranchOp { |
|
| 105 | + | /// Branch if equal. |
|
| 106 | + | Beq, |
|
| 107 | + | /// Branch if not equal. |
|
| 108 | + | Bne, |
|
| 109 | + | /// Branch if less than, signed. |
|
| 110 | + | Blt, |
|
| 111 | + | /// Branch if greater than or equal, signed. |
|
| 112 | + | Bge, |
|
| 113 | + | /// Branch if less than, unsigned. |
|
| 114 | + | Bltu, |
|
| 115 | + | /// Branch if greater than or equal, unsigned. |
|
| 116 | + | Bgeu, |
|
| 117 | + | /// Branch if less than or equal, signed pseudo-instruction. |
|
| 118 | + | Ble, |
|
| 119 | + | /// Branch if greater than, signed pseudo-instruction. |
|
| 120 | + | Bgt, |
|
| 121 | + | } |
|
| 122 | + | ||
| 123 | + | /// Parser and encoder behavior for one instruction mnemonic. |
|
| 124 | + | export union InstructionEncoder { |
|
| 125 | + | /// No-operand instruction encoded by a fixed encoder. |
|
| 126 | + | NoOperand { enc: fn() -> u32 }, |
|
| 127 | + | /// Load-immediate pseudo-instruction. |
|
| 128 | + | Li, |
|
| 129 | + | /// Load-address pseudo-instruction. |
|
| 130 | + | La, |
|
| 131 | + | /// Two-register instruction or pseudo-instruction. |
|
| 132 | + | RR { enc: fn(gen::Reg, gen::Reg) -> u32 }, |
|
| 133 | + | /// Three-register instruction. |
|
| 134 | + | RRR { enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32 }, |
|
| 135 | + | /// Register, register, immediate instruction. |
|
| 136 | + | RRI { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
|
| 137 | + | /// Shift-immediate instruction with RV64 shift bounds. |
|
| 138 | + | Shift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
|
| 139 | + | /// Shift-immediate instruction with RV64 W-mode shift bounds. |
|
| 140 | + | WordShift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
|
| 141 | + | /// Load instruction with memory operand syntax. |
|
| 142 | + | Load { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
|
| 143 | + | /// Store instruction with memory operand syntax. |
|
| 144 | + | Store { enc: fn(gen::Reg, gen::Reg, i32) -> u32 }, |
|
| 145 | + | /// Two-register branch instruction. |
|
| 146 | + | Branch { op: BranchOp }, |
|
| 147 | + | /// One-register branch-to-zero pseudo-instruction. |
|
| 148 | + | BranchZero { op: BranchOp }, |
|
| 149 | + | /// `jal` instruction with explicit destination register. |
|
| 150 | + | Jal, |
|
| 151 | + | /// Jump pseudo-instruction with fixed destination register. |
|
| 152 | + | Jump { rd: gen::Reg }, |
|
| 153 | + | /// CSR read-style operand form. |
|
| 154 | + | RdCsr { enc: fn(gen::Reg, u32) -> u32 }, |
|
| 155 | + | /// CSR write-style operand form. |
|
| 156 | + | CsrRs1 { enc: fn(u32, gen::Reg) -> u32 }, |
|
| 157 | + | /// CSR read/write operand form. |
|
| 158 | + | Csrrw, |
|
| 159 | + | /// CSR immediate operand form. |
|
| 160 | + | Csrsi, |
|
| 161 | + | /// Upper-immediate operand form. |
|
| 162 | + | Upper { enc: fn(gen::Reg, i32) -> u32 }, |
|
| 163 | + | } |
|
| 164 | + | ||
| 165 | + | /// Classified directive name. |
|
| 166 | + | export union DirectiveKind { |
|
| 167 | + | /// `.align` directive. |
|
| 168 | + | Align, |
|
| 169 | + | /// `.ascii` directive. |
|
| 170 | + | Ascii, |
|
| 171 | + | /// `.byte` directive. |
|
| 172 | + | Byte, |
|
| 173 | + | /// `.constant` directive. |
|
| 174 | + | Constant, |
|
| 175 | + | /// `.data` directive. |
|
| 176 | + | Data, |
|
| 177 | + | /// `.dword` directive. |
|
| 178 | + | Dword, |
|
| 179 | + | /// `.export` directive. |
|
| 180 | + | Export, |
|
| 181 | + | /// `.space` directive. |
|
| 182 | + | Space, |
|
| 183 | + | /// `.text` directive. |
|
| 184 | + | Text, |
|
| 185 | + | /// `.word` directive. |
|
| 186 | + | Word, |
|
| 187 | + | } |
|
| 188 | + | ||
| 189 | + | /// Instruction descriptor table row. |
|
| 190 | + | record InstructionEntry { |
|
| 191 | + | /// Assembly mnemonic text. |
|
| 192 | + | name: *[u8], |
|
| 193 | + | /// Operand parser and encoder behavior. |
|
| 194 | + | encoder: InstructionEncoder, |
|
| 195 | + | } |
|
| 196 | + | ||
| 197 | + | /// Directive descriptor table row. |
|
| 198 | + | record DirectiveEntry { |
|
| 199 | + | /// Directive name without the leading `.`. |
|
| 200 | + | name: *[u8], |
|
| 201 | + | /// Parser behavior for the directive. |
|
| 202 | + | kind: DirectiveKind, |
|
| 203 | + | } |
|
| 204 | + | ||
| 205 | + | /// Register descriptor table row. |
|
| 206 | + | record RegisterEntry { |
|
| 207 | + | /// Register alias text without the leading `%`. |
|
| 208 | + | name: *[u8], |
|
| 209 | + | /// Numeric register selected by the alias. |
|
| 210 | + | reg: gen::Reg, |
|
| 211 | + | } |
|
| 212 | + | ||
| 213 | + | /// CSR descriptor table row. |
|
| 214 | + | record CsrEntry { |
|
| 215 | + | /// CSR name text. |
|
| 216 | + | name: *[u8], |
|
| 217 | + | /// Numeric CSR address. |
|
| 218 | + | csr: u32, |
|
| 219 | + | } |
|
| 220 | + | ||
| 221 | + | /// Width of an integer data directive. |
|
| 222 | + | export union DataWidth { |
|
| 223 | + | /// 32-bit data value. |
|
| 224 | + | Word, |
|
| 225 | + | /// 64-bit data value. |
|
| 226 | + | Dword, |
|
| 227 | + | } |
|
| 228 | + | ||
| 229 | + | /// Extra slot used when sizing source-derived symbol and fixup buffers. |
|
| 230 | + | export constant SOURCE_CAP_PADDING: u32 = 1; |
|
| 231 | + | /// Scale factor used to keep assembler hash tables sparse. |
|
| 232 | + | export constant TABLE_CAPACITY_SCALE: u32 = 4; |
|
| 233 | + | /// Minimum hash-table capacity used by the assembler. |
|
| 234 | + | export constant MIN_TABLE_CAPACITY: u32 = 8; |
|
| 235 | + | /// `@label` names exclude the leading sigil byte when interned. |
|
| 236 | + | export constant LABEL_SIGIL_LEN: u32 = 1; |
|
| 237 | + | /// `.directive` names exclude the leading sigil byte when matched. |
|
| 238 | + | export constant DIRECTIVE_SIGIL_LEN: u32 = 1; |
|
| 239 | + | /// String and character literals are delimited by one byte on each side. |
|
| 240 | + | export constant QUOTE_DELIM_LEN: u32 = 1; |
|
| 241 | + | /// Number of bits in one byte. |
|
| 242 | + | export constant BITS_PER_BYTE: u64 = 8; |
|
| 243 | + | /// Mask for extracting one encoded byte. |
|
| 244 | + | export constant BYTE_MASK: u64 = 0xFF; |
|
| 245 | + | /// Largest signed 32-bit assembler value. |
|
| 246 | + | export constant I32_MAX_VALUE: i64 = 2147483647; |
|
| 247 | + | /// Magnitude of the smallest signed 32-bit assembler value. |
|
| 248 | + | export constant I32_MIN_MAGNITUDE: i64 = 2147483648; |
|
| 249 | + | /// Largest unsigned 32-bit assembler value. |
|
| 250 | + | export constant U32_MAX_VALUE: i64 = 4294967295; |
|
| 251 | + | /// Largest unsigned 8-bit assembler value. |
|
| 252 | + | export constant U8_MAX_VALUE: i64 = 255; |
|
| 253 | + | /// Upper bound for CSR immediate operands. |
|
| 254 | + | export constant CSR_IMM_LIMIT: i64 = 32; |
|
| 255 | + | /// Upper bound for RV64 W-mode shift immediates. |
|
| 256 | + | export constant WORD_SHIFT_LIMIT: i32 = 32; |
|
| 257 | + | /// Upper bound for RV64 shift immediates. |
|
| 258 | + | export constant SHIFT_LIMIT: i32 = 64; |
|
| 259 | + | /// Largest `lui` or `auipc` immediate. |
|
| 260 | + | export constant UPPER_IMM_MAX_VALUE: i64 = 0xFFFFF; |
|
| 261 | + | ||
| 262 | + | /// Sorted instruction descriptor table used by the assembler parser. |
|
| 263 | + | export constant INSTRUCTIONS: [InstructionEntry; 87] = [ |
|
| 264 | + | { name: "add", encoder: InstructionEncoder::RRR { enc: encode::add } }, |
|
| 265 | + | { name: "addi", encoder: InstructionEncoder::RRI { enc: encode::addi } }, |
|
| 266 | + | { name: "addiw", encoder: InstructionEncoder::RRI { enc: encode::addiw } }, |
|
| 267 | + | { name: "addw", encoder: InstructionEncoder::RRR { enc: encode::addw } }, |
|
| 268 | + | { name: "and", encoder: InstructionEncoder::RRR { enc: encode::and_ } }, |
|
| 269 | + | { name: "andi", encoder: InstructionEncoder::RRI { enc: encode::andi } }, |
|
| 270 | + | { name: "auipc", encoder: InstructionEncoder::Upper { enc: encode::auipc } }, |
|
| 271 | + | { name: "beq", encoder: InstructionEncoder::Branch { op: BranchOp::Beq } }, |
|
| 272 | + | { name: "beqz", encoder: InstructionEncoder::BranchZero { op: BranchOp::Beq } }, |
|
| 273 | + | { name: "bge", encoder: InstructionEncoder::Branch { op: BranchOp::Bge } }, |
|
| 274 | + | { name: "bgeu", encoder: InstructionEncoder::Branch { op: BranchOp::Bgeu } }, |
|
| 275 | + | { name: "bgt", encoder: InstructionEncoder::Branch { op: BranchOp::Bgt } }, |
|
| 276 | + | { name: "ble", encoder: InstructionEncoder::Branch { op: BranchOp::Ble } }, |
|
| 277 | + | { name: "blt", encoder: InstructionEncoder::Branch { op: BranchOp::Blt } }, |
|
| 278 | + | { name: "bltu", encoder: InstructionEncoder::Branch { op: BranchOp::Bltu } }, |
|
| 279 | + | { name: "bne", encoder: InstructionEncoder::Branch { op: BranchOp::Bne } }, |
|
| 280 | + | { name: "bnez", encoder: InstructionEncoder::BranchZero { op: BranchOp::Bne } }, |
|
| 281 | + | { name: "call", encoder: InstructionEncoder::Jump { rd: rv64::RA } }, |
|
| 282 | + | { name: "csrc", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrc } }, |
|
| 283 | + | { name: "csrr", encoder: InstructionEncoder::RdCsr { enc: encode::csrr } }, |
|
| 284 | + | { name: "csrrw", encoder: InstructionEncoder::Csrrw }, |
|
| 285 | + | { name: "csrsi", encoder: InstructionEncoder::Csrsi }, |
|
| 286 | + | { name: "csrw", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrw } }, |
|
| 287 | + | { name: "div", encoder: InstructionEncoder::RRR { enc: encode::div } }, |
|
| 288 | + | { name: "divu", encoder: InstructionEncoder::RRR { enc: encode::divu } }, |
|
| 289 | + | { name: "divuw", encoder: InstructionEncoder::RRR { enc: encode::divuw } }, |
|
| 290 | + | { name: "divw", encoder: InstructionEncoder::RRR { enc: encode::divw } }, |
|
| 291 | + | { name: "ebreak", encoder: InstructionEncoder::NoOperand { enc: encode::ebreak } }, |
|
| 292 | + | { name: "ecall", encoder: InstructionEncoder::NoOperand { enc: encode::ecall } }, |
|
| 293 | + | { name: "j", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } }, |
|
| 294 | + | { name: "jal", encoder: InstructionEncoder::Jal }, |
|
| 295 | + | { name: "jalr", encoder: InstructionEncoder::RRI { enc: encode::jalr } }, |
|
| 296 | + | { name: "la", encoder: InstructionEncoder::La }, |
|
| 297 | + | { name: "lb", encoder: InstructionEncoder::Load { enc: encode::lb } }, |
|
| 298 | + | { name: "lbu", encoder: InstructionEncoder::Load { enc: encode::lbu } }, |
|
| 299 | + | { name: "ld", encoder: InstructionEncoder::Load { enc: encode::ld } }, |
|
| 300 | + | { name: "lh", encoder: InstructionEncoder::Load { enc: encode::lh } }, |
|
| 301 | + | { name: "lhu", encoder: InstructionEncoder::Load { enc: encode::lhu } }, |
|
| 302 | + | { name: "li", encoder: InstructionEncoder::Li }, |
|
| 303 | + | { name: "lui", encoder: InstructionEncoder::Upper { enc: encode::lui } }, |
|
| 304 | + | { name: "lw", encoder: InstructionEncoder::Load { enc: encode::lw } }, |
|
| 305 | + | { name: "lwu", encoder: InstructionEncoder::Load { enc: encode::lwu } }, |
|
| 306 | + | { name: "mret", encoder: InstructionEncoder::NoOperand { enc: encode::mret } }, |
|
| 307 | + | { name: "mul", encoder: InstructionEncoder::RRR { enc: encode::mul } }, |
|
| 308 | + | { name: "mulh", encoder: InstructionEncoder::RRR { enc: encode::mulh } }, |
|
| 309 | + | { name: "mulhsu", encoder: InstructionEncoder::RRR { enc: encode::mulhsu } }, |
|
| 310 | + | { name: "mulhu", encoder: InstructionEncoder::RRR { enc: encode::mulhu } }, |
|
| 311 | + | { name: "mulw", encoder: InstructionEncoder::RRR { enc: encode::mulw } }, |
|
| 312 | + | { name: "mv", encoder: InstructionEncoder::RR { enc: encode::mv } }, |
|
| 313 | + | { name: "neg", encoder: InstructionEncoder::RR { enc: encode::neg } }, |
|
| 314 | + | { name: "nop", encoder: InstructionEncoder::NoOperand { enc: encode::nop } }, |
|
| 315 | + | { name: "not", encoder: InstructionEncoder::RR { enc: encode::not_ } }, |
|
| 316 | + | { name: "or", encoder: InstructionEncoder::RRR { enc: encode::or_ } }, |
|
| 317 | + | { name: "ori", encoder: InstructionEncoder::RRI { enc: encode::ori } }, |
|
| 318 | + | { name: "rem", encoder: InstructionEncoder::RRR { enc: encode::rem } }, |
|
| 319 | + | { name: "remu", encoder: InstructionEncoder::RRR { enc: encode::remu } }, |
|
| 320 | + | { name: "remuw", encoder: InstructionEncoder::RRR { enc: encode::remuw } }, |
|
| 321 | + | { name: "remw", encoder: InstructionEncoder::RRR { enc: encode::remw } }, |
|
| 322 | + | { name: "ret", encoder: InstructionEncoder::NoOperand { enc: encode::ret } }, |
|
| 323 | + | { name: "sb", encoder: InstructionEncoder::Store { enc: encode::sb } }, |
|
| 324 | + | { name: "sd", encoder: InstructionEncoder::Store { enc: encode::sd } }, |
|
| 325 | + | { name: "seqz", encoder: InstructionEncoder::RR { enc: encode::seqz } }, |
|
| 326 | + | { name: "sh", encoder: InstructionEncoder::Store { enc: encode::sh } }, |
|
| 327 | + | { name: "sll", encoder: InstructionEncoder::RRR { enc: encode::sll } }, |
|
| 328 | + | { name: "slli", encoder: InstructionEncoder::Shift { enc: encode::slli } }, |
|
| 329 | + | { name: "slliw", encoder: InstructionEncoder::WordShift { enc: encode::slliw } }, |
|
| 330 | + | { name: "sllw", encoder: InstructionEncoder::RRR { enc: encode::sllw } }, |
|
| 331 | + | { name: "slt", encoder: InstructionEncoder::RRR { enc: encode::slt } }, |
|
| 332 | + | { name: "slti", encoder: InstructionEncoder::RRI { enc: encode::slti } }, |
|
| 333 | + | { name: "sltiu", encoder: InstructionEncoder::RRI { enc: encode::sltiu } }, |
|
| 334 | + | { name: "sltu", encoder: InstructionEncoder::RRR { enc: encode::sltu } }, |
|
| 335 | + | { name: "snez", encoder: InstructionEncoder::RR { enc: encode::snez } }, |
|
| 336 | + | { name: "sra", encoder: InstructionEncoder::RRR { enc: encode::sra } }, |
|
| 337 | + | { name: "srai", encoder: InstructionEncoder::Shift { enc: encode::srai } }, |
|
| 338 | + | { name: "sraiw", encoder: InstructionEncoder::WordShift { enc: encode::sraiw } }, |
|
| 339 | + | { name: "sraw", encoder: InstructionEncoder::RRR { enc: encode::sraw } }, |
|
| 340 | + | { name: "srl", encoder: InstructionEncoder::RRR { enc: encode::srl } }, |
|
| 341 | + | { name: "srli", encoder: InstructionEncoder::Shift { enc: encode::srli } }, |
|
| 342 | + | { name: "srliw", encoder: InstructionEncoder::WordShift { enc: encode::srliw } }, |
|
| 343 | + | { name: "srlw", encoder: InstructionEncoder::RRR { enc: encode::srlw } }, |
|
| 344 | + | { name: "sub", encoder: InstructionEncoder::RRR { enc: encode::sub } }, |
|
| 345 | + | { name: "subw", encoder: InstructionEncoder::RRR { enc: encode::subw } }, |
|
| 346 | + | { name: "sw", encoder: InstructionEncoder::Store { enc: encode::sw } }, |
|
| 347 | + | { name: "tail", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } }, |
|
| 348 | + | { name: "wfi", encoder: InstructionEncoder::NoOperand { enc: encode::wfi } }, |
|
| 349 | + | { name: "xor", encoder: InstructionEncoder::RRR { enc: encode::xor } }, |
|
| 350 | + | { name: "xori", encoder: InstructionEncoder::RRI { enc: encode::xori } }, |
|
| 351 | + | ]; |
|
| 352 | + | ||
| 353 | + | /// Sorted directive lookup table used by the assembler parser. |
|
| 354 | + | export constant DIRECTIVES: [DirectiveEntry; 10] = [ |
|
| 355 | + | { name: "align", kind: DirectiveKind::Align }, |
|
| 356 | + | { name: "ascii", kind: DirectiveKind::Ascii }, |
|
| 357 | + | { name: "byte", kind: DirectiveKind::Byte }, |
|
| 358 | + | { name: "constant", kind: DirectiveKind::Constant }, |
|
| 359 | + | { name: "data", kind: DirectiveKind::Data }, |
|
| 360 | + | { name: "dword", kind: DirectiveKind::Dword }, |
|
| 361 | + | { name: "export", kind: DirectiveKind::Export }, |
|
| 362 | + | { name: "space", kind: DirectiveKind::Space }, |
|
| 363 | + | { name: "text", kind: DirectiveKind::Text }, |
|
| 364 | + | { name: "word", kind: DirectiveKind::Word }, |
|
| 365 | + | ]; |
|
| 366 | + | ||
| 367 | + | /// Sorted register-name lookup table used by the assembler parser. |
|
| 368 | + | export constant REGISTERS: [RegisterEntry; 33] = [ |
|
| 369 | + | { name: "a0", reg: rv64::A0 }, |
|
| 370 | + | { name: "a1", reg: rv64::A1 }, |
|
| 371 | + | { name: "a2", reg: rv64::A2 }, |
|
| 372 | + | { name: "a3", reg: rv64::A3 }, |
|
| 373 | + | { name: "a4", reg: rv64::A4 }, |
|
| 374 | + | { name: "a5", reg: rv64::A5 }, |
|
| 375 | + | { name: "a6", reg: rv64::A6 }, |
|
| 376 | + | { name: "a7", reg: rv64::A7 }, |
|
| 377 | + | { name: "fp", reg: rv64::FP }, |
|
| 378 | + | { name: "gp", reg: rv64::GP }, |
|
| 379 | + | { name: "ra", reg: rv64::RA }, |
|
| 380 | + | { name: "s0", reg: rv64::S0 }, |
|
| 381 | + | { name: "s1", reg: rv64::S1 }, |
|
| 382 | + | { name: "s10", reg: rv64::S10 }, |
|
| 383 | + | { name: "s11", reg: rv64::S11 }, |
|
| 384 | + | { name: "s2", reg: rv64::S2 }, |
|
| 385 | + | { name: "s3", reg: rv64::S3 }, |
|
| 386 | + | { name: "s4", reg: rv64::S4 }, |
|
| 387 | + | { name: "s5", reg: rv64::S5 }, |
|
| 388 | + | { name: "s6", reg: rv64::S6 }, |
|
| 389 | + | { name: "s7", reg: rv64::S7 }, |
|
| 390 | + | { name: "s8", reg: rv64::S8 }, |
|
| 391 | + | { name: "s9", reg: rv64::S9 }, |
|
| 392 | + | { name: "sp", reg: rv64::SP }, |
|
| 393 | + | { name: "t0", reg: rv64::T0 }, |
|
| 394 | + | { name: "t1", reg: rv64::T1 }, |
|
| 395 | + | { name: "t2", reg: rv64::T2 }, |
|
| 396 | + | { name: "t3", reg: rv64::T3 }, |
|
| 397 | + | { name: "t4", reg: rv64::T4 }, |
|
| 398 | + | { name: "t5", reg: rv64::T5 }, |
|
| 399 | + | { name: "t6", reg: rv64::T6 }, |
|
| 400 | + | { name: "tp", reg: rv64::TP }, |
|
| 401 | + | { name: "zero", reg: rv64::ZERO }, |
|
| 402 | + | ]; |
|
| 403 | + | ||
| 404 | + | /// Sorted CSR-name lookup table used by the assembler parser. |
|
| 405 | + | export constant CSRS: [CsrEntry; 9] = [ |
|
| 406 | + | { name: "mcause", csr: 0x342 }, |
|
| 407 | + | { name: "mepc", csr: 0x341 }, |
|
| 408 | + | { name: "mhartid", csr: 0xF14 }, |
|
| 409 | + | { name: "mie", csr: 0x304 }, |
|
| 410 | + | { name: "mip", csr: 0x344 }, |
|
| 411 | + | { name: "mscratch", csr: 0x340 }, |
|
| 412 | + | { name: "mstatus", csr: 0x300 }, |
|
| 413 | + | { name: "mtval", csr: 0x343 }, |
|
| 414 | + | { name: "mtvec", csr: 0x305 }, |
|
| 415 | + | ]; |
|
| 416 | + | ||
| 417 | + | /// Recorded symbol definition. |
|
| 418 | + | export record Symbol { |
|
| 419 | + | /// Symbol name. |
|
| 420 | + | name: *[u8], |
|
| 421 | + | /// Section the symbol belongs to. |
|
| 422 | + | section: Section, |
|
| 423 | + | /// Byte offset within the section. |
|
| 424 | + | offset: i32, |
|
| 425 | + | /// Whether `.export` exported this symbol outside its assembly fragment. |
|
| 426 | + | isExported: bool, |
|
| 427 | + | } |
|
| 428 | + | ||
| 429 | + | /// Information needed to resolve a pending symbol reference. |
|
| 430 | + | export union FixupInfo { |
|
| 431 | + | /// Branch to a text label. |
|
| 432 | + | Branch { op: BranchOp, rs1: gen::Reg, rs2: gen::Reg, index: u32 }, |
|
| 433 | + | /// JAL-like jump to a text label. |
|
| 434 | + | Jal { rd: gen::Reg, index: u32 }, |
|
| 435 | + | /// Absolute address materialization into a register. |
|
| 436 | + | Addr { rd: gen::Reg, index: u32 }, |
|
| 437 | + | /// A 32-bit data word referring to a symbol offset. |
|
| 438 | + | Word { offset: u32 }, |
|
| 439 | + | /// A 64-bit data word referring to a symbol offset. |
|
| 440 | + | Dword { offset: u32 }, |
|
| 441 | + | } |
|
| 442 | + | ||
| 443 | + | /// Pending symbol reference. |
|
| 444 | + | export record Fixup { |
|
| 445 | + | /// Referenced symbol. |
|
| 446 | + | symbol: *[u8], |
|
| 447 | + | /// Fixup payload. |
|
| 448 | + | info: FixupInfo, |
|
| 449 | + | } |
|
| 450 | + | ||
| 451 | + | /// Parser and emission state. |
|
| 452 | + | export record Assembler { |
|
| 453 | + | /// Allocation arena for temporary assembler state. |
|
| 454 | + | arena: *mut alloc::Arena, |
|
| 455 | + | /// Assembler lexical scanner. |
|
| 456 | + | scan: scanner::Scanner, |
|
| 457 | + | /// Output text buffer. |
|
| 458 | + | text: *mut [u32], |
|
| 459 | + | /// Output data buffer. |
|
| 460 | + | data: *mut [u8], |
|
| 461 | + | /// Current output section. |
|
| 462 | + | section: Section, |
|
| 463 | + | /// Defined symbols. |
|
| 464 | + | symbols: *mut [Symbol], |
|
| 465 | + | /// Name-to-symbol index map. |
|
| 466 | + | symbolMap: dict::Dict, |
|
| 467 | + | /// Name-to-integer map. |
|
| 468 | + | constMap: dict::Dict, |
|
| 469 | + | /// Names marked by `.export`. |
|
| 470 | + | exportMap: dict::Dict, |
|
| 471 | + | /// Pending fixups. |
|
| 472 | + | fixups: *mut [Fixup], |
|
| 473 | + | /// Absolute runtime address of data-section offset zero. |
|
| 474 | + | dataBase: u32, |
|
| 475 | + | } |
|
| 476 | + | ||
| 477 | + | /// Assemble source using `dataBase` as the runtime address of the data-section. |
|
| 478 | + | export fn assemble( |
|
| 479 | + | sourceKind: scanner::SourceKind, |
|
| 480 | + | source: *[u8], |
|
| 481 | + | textBuf: *mut [u32], |
|
| 482 | + | dataBuf: *mut [u8], |
|
| 483 | + | arena: *mut alloc::Arena, |
|
| 484 | + | pool: *mut strings::Pool, |
|
| 485 | + | dataBase: u32 |
|
| 486 | + | ) -> Program throws (Error) { |
|
| 487 | + | let slotCap = source.len + SOURCE_CAP_PADDING; |
|
| 488 | + | let tableCap = nextPowerOfTwo(slotCap * TABLE_CAPACITY_SCALE); |
|
| 489 | + | ||
| 490 | + | let symbols = try! alloc::allocSlice(arena, @sizeOf(Symbol), @alignOf(Symbol), slotCap); |
|
| 491 | + | let fixups = try! alloc::allocSlice(arena, @sizeOf(Fixup), @alignOf(Fixup), slotCap); |
|
| 492 | + | let entries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
|
| 493 | + | let constEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
|
| 494 | + | let exportEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap); |
|
| 495 | + | ||
| 496 | + | let mut a = Assembler { |
|
| 497 | + | arena, |
|
| 498 | + | scan: scanner::scanner(sourceKind, source, pool), |
|
| 499 | + | text: @sliceOf(textBuf.ptr, 0, textBuf.len), |
|
| 500 | + | data: @sliceOf(dataBuf.ptr, 0, dataBuf.len), |
|
| 501 | + | section: Section::Text, |
|
| 502 | + | symbols: @sliceOf((symbols as *mut [Symbol]).ptr, 0, (symbols as *mut [Symbol]).len), |
|
| 503 | + | symbolMap: dict::init(entries as *mut [dict::Entry]), |
|
| 504 | + | constMap: dict::init(constEntries as *mut [dict::Entry]), |
|
| 505 | + | exportMap: dict::init(exportEntries as *mut [dict::Entry]), |
|
| 506 | + | fixups: @sliceOf((fixups as *mut [Fixup]).ptr, 0, (fixups as *mut [Fixup]).len), |
|
| 507 | + | dataBase, |
|
| 508 | + | }; |
|
| 509 | + | // Parse assembly source and emit instructions. |
|
| 510 | + | try parser::parseProgram(&mut a); |
|
| 511 | + | // Resolve fixups and finalize program. |
|
| 512 | + | try emit::finishProgram(&mut a); |
|
| 513 | + | ||
| 514 | + | return Program { |
|
| 515 | + | text: a.text, |
|
| 516 | + | data: a.data, |
|
| 517 | + | symbols: a.symbols, |
|
| 518 | + | }; |
|
| 519 | + | } |
|
| 520 | + | ||
| 521 | + | /// Return the next power of two at least as large as `value`. |
|
| 522 | + | fn nextPowerOfTwo(value: u32) -> u32 { |
|
| 523 | + | let mut n: u32 = MIN_TABLE_CAPACITY; |
|
| 524 | + | while n < value { |
|
| 525 | + | set n <<= 1; |
|
| 526 | + | } |
|
| 527 | + | return n; |
|
| 528 | + | } |
lib/std/arch/rv64/asm/emit.rad
added
+210 -0
| 1 | + | //! Assembler emission and fixup helpers. |
|
| 2 | + | use std::arch::rv64::emit; |
|
| 3 | + | use std::arch::rv64::encode; |
|
| 4 | + | use std::arch::rv64; |
|
| 5 | + | use std::fmt; |
|
| 6 | + | ||
| 7 | + | use std::collections::dict; |
|
| 8 | + | use std::lang::gen; |
|
| 9 | + | ||
| 10 | + | /// Define a symbol at the current text or data offset. |
|
| 11 | + | export fn defineSymbol(a: *mut super::Assembler, name: *[u8]) { |
|
| 12 | + | if a.symbols.len >= a.symbols.cap { |
|
| 13 | + | panic "asm: symbol buffer full"; |
|
| 14 | + | } |
|
| 15 | + | let idx = a.symbols.len; |
|
| 16 | + | let offset: i32 = a.data.len as i32 |
|
| 17 | + | if a.section == super::Section::Data |
|
| 18 | + | else a.text.len as i32 * rv64::INSTR_SIZE; |
|
| 19 | + | ||
| 20 | + | set a.symbols = @sliceOf(a.symbols.ptr, idx + 1, a.symbols.cap); |
|
| 21 | + | set a.symbols[idx] = super::Symbol { |
|
| 22 | + | name, |
|
| 23 | + | section: a.section, |
|
| 24 | + | offset, |
|
| 25 | + | isExported: dict::get(&a.exportMap, name) <> nil, |
|
| 26 | + | }; |
|
| 27 | + | dict::insert(&mut a.symbolMap, name, idx as i32); |
|
| 28 | + | } |
|
| 29 | + | ||
| 30 | + | /// Append one encoded instruction word to the text section. |
|
| 31 | + | export fn emitText(a: *mut super::Assembler, word: u32) throws (super::Error) { |
|
| 32 | + | if a.text.len >= a.text.cap { |
|
| 33 | + | throw super::Error::TextOverflow; |
|
| 34 | + | } |
|
| 35 | + | let idx = a.text.len; |
|
| 36 | + | set a.text = @sliceOf(a.text.ptr, idx + 1, a.text.cap); |
|
| 37 | + | set a.text[idx] = word; |
|
| 38 | + | } |
|
| 39 | + | ||
| 40 | + | /// Append `words` no-op instructions to the text section. |
|
| 41 | + | export fn emitTextPadding(a: *mut super::Assembler, words: u32) throws (super::Error) { |
|
| 42 | + | for _ in 0..words { |
|
| 43 | + | try emitText(a, encode::nop()); |
|
| 44 | + | } |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | /// Append one byte to the data section. |
|
| 48 | + | export fn emitByte(a: *mut super::Assembler, byte: u8) throws (super::Error) { |
|
| 49 | + | if a.data.len >= a.data.cap { |
|
| 50 | + | throw super::Error::DataOverflow; |
|
| 51 | + | } |
|
| 52 | + | let idx = a.data.len; |
|
| 53 | + | set a.data = @sliceOf(a.data.ptr, idx + 1, a.data.cap); |
|
| 54 | + | set a.data[idx] = byte; |
|
| 55 | + | } |
|
| 56 | + | ||
| 57 | + | /// Emit a little-endian integer with `bytes` bytes. |
|
| 58 | + | fn emitDataInt(a: *mut super::Assembler, bits: u64, bytes: u32) throws (super::Error) { |
|
| 59 | + | for i in 0..bytes { |
|
| 60 | + | try emitByte(a, ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8); |
|
| 61 | + | } |
|
| 62 | + | } |
|
| 63 | + | ||
| 64 | + | /// Patch a little-endian integer with `bytes` bytes. |
|
| 65 | + | fn patchDataInt(a: *mut super::Assembler, offset: u32, bits: u64, bytes: u32) { |
|
| 66 | + | for i in 0..bytes { |
|
| 67 | + | set a.data[offset + i] = ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8; |
|
| 68 | + | } |
|
| 69 | + | } |
|
| 70 | + | ||
| 71 | + | /// Emit an integer data directive value. |
|
| 72 | + | export fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) { |
|
| 73 | + | match width { |
|
| 74 | + | case super::DataWidth::Word => try emitDataInt(a, value as u64, rv64::WORD_SIZE as u32), |
|
| 75 | + | case super::DataWidth::Dword => try emitDataInt(a, value as u64, rv64::DWORD_SIZE as u32), |
|
| 76 | + | } |
|
| 77 | + | } |
|
| 78 | + | ||
| 79 | + | /// Record a data-section symbol fixup and reserve its bytes. |
|
| 80 | + | export fn recordDataFixup(a: *mut super::Assembler, target: *[u8], width: super::DataWidth) throws (super::Error) { |
|
| 81 | + | let offset = a.data.len; |
|
| 82 | + | match width { |
|
| 83 | + | case super::DataWidth::Word => { |
|
| 84 | + | recordFixup(a, target, super::FixupInfo::Word { offset }); |
|
| 85 | + | try emitDataInt(a, 0, rv64::WORD_SIZE as u32); |
|
| 86 | + | } |
|
| 87 | + | case super::DataWidth::Dword => { |
|
| 88 | + | recordFixup(a, target, super::FixupInfo::Dword { offset }); |
|
| 89 | + | try emitDataInt(a, 0, rv64::DWORD_SIZE as u32); |
|
| 90 | + | } |
|
| 91 | + | } |
|
| 92 | + | } |
|
| 93 | + | ||
| 94 | + | /// Record a pending symbol fixup. |
|
| 95 | + | fn recordFixup(a: *mut super::Assembler, symbol: *[u8], info: super::FixupInfo) { |
|
| 96 | + | if a.fixups.len >= a.fixups.cap { |
|
| 97 | + | panic "asm: fixup buffer full"; |
|
| 98 | + | } |
|
| 99 | + | let idx = a.fixups.len as u32; |
|
| 100 | + | set a.fixups = @sliceOf(a.fixups.ptr, idx + 1, a.fixups.cap); |
|
| 101 | + | set a.fixups[idx] = super::Fixup { symbol, info }; |
|
| 102 | + | } |
|
| 103 | + | ||
| 104 | + | /// Record a text-section symbol fixup and reserve its instruction words. |
|
| 105 | + | export fn recordTextFixup(a: *mut super::Assembler, symbol: *[u8], info: super::FixupInfo, words: u32) throws (super::Error) { |
|
| 106 | + | recordFixup(a, symbol, info); |
|
| 107 | + | try emitTextPadding(a, words); |
|
| 108 | + | } |
|
| 109 | + | ||
| 110 | + | /// Find a previously defined symbol by name. |
|
| 111 | + | fn findSymbol(a: *super::Assembler, name: *[u8]) -> ?super::Symbol { |
|
| 112 | + | let idx = dict::get(&a.symbolMap, name) |
|
| 113 | + | else return nil; |
|
| 114 | + | return a.symbols[idx as u32]; |
|
| 115 | + | } |
|
| 116 | + | ||
| 117 | + | /// Return the final address for a data symbol. |
|
| 118 | + | fn dataSymbolAddr(a: *super::Assembler, symbol: super::Symbol) -> i32 throws (super::Error) { |
|
| 119 | + | if symbol.section <> super::Section::Data { |
|
| 120 | + | throw super::Error::Invalid { offset: 0, message: "data address target must be in data section" }; |
|
| 121 | + | } |
|
| 122 | + | return symbol.offset + (a.dataBase as i32); |
|
| 123 | + | } |
|
| 124 | + | ||
| 125 | + | /// Resolve final symbol references and patch all delayed output. |
|
| 126 | + | export fn finishProgram(a: *mut super::Assembler) throws (super::Error) { |
|
| 127 | + | for i in 0..a.fixups.len { |
|
| 128 | + | let fixup = a.fixups[i]; |
|
| 129 | + | let symbol = findSymbol(a, fixup.symbol) else { |
|
| 130 | + | throw super::Error::Invalid { offset: 0, message: "undefined symbol" }; |
|
| 131 | + | }; |
|
| 132 | + | match fixup.info { |
|
| 133 | + | case super::FixupInfo::Branch { op, rs1, rs2, index } => { |
|
| 134 | + | if symbol.section <> super::Section::Text { |
|
| 135 | + | throw super::Error::Invalid { offset: 0, message: "branch target must be in text section" }; |
|
| 136 | + | } |
|
| 137 | + | let srcOffset = index as i32 * rv64::INSTR_SIZE; |
|
| 138 | + | let rel = symbol.offset - srcOffset; |
|
| 139 | + | ||
| 140 | + | if not encode::isBranchImm(rel) { |
|
| 141 | + | throw super::Error::Invalid { offset: 0, message: "branch target out of range" }; |
|
| 142 | + | } |
|
| 143 | + | let word = encodeBranch(op, rs1, rs2, rel); |
|
| 144 | + | ||
| 145 | + | set a.text[index] = word; |
|
| 146 | + | } |
|
| 147 | + | case super::FixupInfo::Jal { rd, index } => { |
|
| 148 | + | if symbol.section <> super::Section::Text { |
|
| 149 | + | throw super::Error::Invalid { offset: 0, message: "jump target must be in text section" }; |
|
| 150 | + | } |
|
| 151 | + | let srcOffset = index as i32 * rv64::INSTR_SIZE; |
|
| 152 | + | let rel = symbol.offset - srcOffset; |
|
| 153 | + | ||
| 154 | + | if not encode::isJumpImm(rel) { |
|
| 155 | + | throw super::Error::Invalid { offset: 0, message: "jump target out of range" }; |
|
| 156 | + | } |
|
| 157 | + | set a.text[index] = encode::jal(rd, rel); |
|
| 158 | + | } |
|
| 159 | + | case super::FixupInfo::Addr { rd, index } => { |
|
| 160 | + | let mut addr = symbol.offset - (index as i32 * rv64::INSTR_SIZE); |
|
| 161 | + | if symbol.section == super::Section::Data { |
|
| 162 | + | set addr = symbol.offset + (a.dataBase as i32); |
|
| 163 | + | } |
|
| 164 | + | let split = emit::splitImm(addr); |
|
| 165 | + | set a.text[index] = encode::lui(rd, split.hi) |
|
| 166 | + | if symbol.section == super::Section::Data |
|
| 167 | + | else encode::auipc(rd, split.hi); |
|
| 168 | + | set a.text[index + 1] = encode::addi(rd, rd, split.lo); |
|
| 169 | + | } |
|
| 170 | + | case super::FixupInfo::Word { offset } => { |
|
| 171 | + | let addr = try dataSymbolAddr(a, symbol); |
|
| 172 | + | patchDataInt(a, offset, addr as u64, rv64::WORD_SIZE as u32); |
|
| 173 | + | } |
|
| 174 | + | case super::FixupInfo::Dword { offset } => { |
|
| 175 | + | let addr = try dataSymbolAddr(a, symbol); |
|
| 176 | + | patchDataInt(a, offset, addr as u64, rv64::DWORD_SIZE as u32); |
|
| 177 | + | } |
|
| 178 | + | } |
|
| 179 | + | } |
|
| 180 | + | } |
|
| 181 | + | ||
| 182 | + | /// Encode a concrete branch operation. |
|
| 183 | + | export fn encodeBranch(op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg, imm: i32) -> u32 { |
|
| 184 | + | match op { |
|
| 185 | + | case super::BranchOp::Beq => return encode::beq(rs1, rs2, imm), |
|
| 186 | + | case super::BranchOp::Bne => return encode::bne(rs1, rs2, imm), |
|
| 187 | + | case super::BranchOp::Blt => return encode::blt(rs1, rs2, imm), |
|
| 188 | + | case super::BranchOp::Bge => return encode::bge(rs1, rs2, imm), |
|
| 189 | + | case super::BranchOp::Bltu => return encode::bltu(rs1, rs2, imm), |
|
| 190 | + | case super::BranchOp::Bgeu => return encode::bgeu(rs1, rs2, imm), |
|
| 191 | + | case super::BranchOp::Ble => return encode::ble(rs1, rs2, imm), |
|
| 192 | + | case super::BranchOp::Bgt => return encode::bgt(rs1, rs2, imm), |
|
| 193 | + | } |
|
| 194 | + | } |
|
| 195 | + | ||
| 196 | + | /// Decode string literal escapes and emit the resulting data bytes. |
|
| 197 | + | export fn emitDecodedString(a: *mut super::Assembler, literal: *[u8]) throws (super::Error) { |
|
| 198 | + | let raw = &literal[super::QUOTE_DELIM_LEN..literal.len - super::QUOTE_DELIM_LEN]; |
|
| 199 | + | let mut i: u32 = 0; |
|
| 200 | + | ||
| 201 | + | while i < raw.len { |
|
| 202 | + | if raw[i] == '\\' and i + 1 < raw.len { |
|
| 203 | + | try emitByte(a, fmt::decodeAsciiEscape(raw[i + 1])); |
|
| 204 | + | set i += 2; |
|
| 205 | + | } else { |
|
| 206 | + | try emitByte(a, raw[i]); |
|
| 207 | + | set i += 1; |
|
| 208 | + | } |
|
| 209 | + | } |
|
| 210 | + | } |
lib/std/arch/rv64/asm/parser.rad
added
+858 -0
| 1 | + | //! Assembler parser pass. |
|
| 2 | + | use std::mem; |
|
| 3 | + | use std::fmt; |
|
| 4 | + | use std::lang::alloc; |
|
| 5 | + | use std::lang::strings; |
|
| 6 | + | use std::lang::parser; |
|
| 7 | + | use std::lang::gen; |
|
| 8 | + | use std::collections::dict; |
|
| 9 | + | use std::arch::rv64::encode; |
|
| 10 | + | use std::arch::rv64; |
|
| 11 | + | ||
| 12 | + | use super::emit; |
|
| 13 | + | use super::scanner; |
|
| 14 | + | ||
| 15 | + | /// Parsed memory operand with base register and signed byte offset. |
|
| 16 | + | record MemOperand { |
|
| 17 | + | /// Base register inside the memory operand parentheses. |
|
| 18 | + | base: gen::Reg, |
|
| 19 | + | /// Signed byte offset preceding the base register. |
|
| 20 | + | offset: i32, |
|
| 21 | + | } |
|
| 22 | + | ||
| 23 | + | /// Parse assembler source into the supplied assembler state. |
|
| 24 | + | export fn parseProgram(a: *mut super::Assembler) throws (super::Error) { |
|
| 25 | + | advance(a); |
|
| 26 | + | ||
| 27 | + | while a.scan.current.kind <> scanner::TokenKind::Eof { |
|
| 28 | + | try parseItem(a); |
|
| 29 | + | } |
|
| 30 | + | } |
|
| 31 | + | ||
| 32 | + | /// Align `value` upward to `alignment`, returning nil on u32 overflow. |
|
| 33 | + | fn checkedAlignUp(value: u32, alignment: u32) -> ?u32 { |
|
| 34 | + | let padding = alignment - 1; |
|
| 35 | + | if value > parser::U32_MAX - padding { |
|
| 36 | + | return nil; |
|
| 37 | + | } |
|
| 38 | + | return mem::alignUp(value, alignment); |
|
| 39 | + | } |
|
| 40 | + | ||
| 41 | + | /// Advance the parser by one token, preserving the previous token. |
|
| 42 | + | fn advance(a: *mut super::Assembler) { |
|
| 43 | + | set a.scan.previous = a.scan.current; |
|
| 44 | + | set a.scan.current = scanner::next(&mut a.scan); |
|
| 45 | + | } |
|
| 46 | + | ||
| 47 | + | /// Consume the current token when it has `kind`. |
|
| 48 | + | fn consume(a: *mut super::Assembler, kind: scanner::TokenKind) -> bool { |
|
| 49 | + | if a.scan.current.kind == kind { |
|
| 50 | + | advance(a); |
|
| 51 | + | return true; |
|
| 52 | + | } |
|
| 53 | + | return false; |
|
| 54 | + | } |
|
| 55 | + | ||
| 56 | + | /// Create an error at the current token. |
|
| 57 | + | fn fail(a: *super::Assembler, message: *[u8]) -> super::Error { |
|
| 58 | + | return super::Error::Invalid { offset: a.scan.current.offset, message }; |
|
| 59 | + | } |
|
| 60 | + | ||
| 61 | + | /// Create an error at `tok`. |
|
| 62 | + | fn failOnToken(tok: scanner::Token, message: *[u8]) -> super::Error { |
|
| 63 | + | return super::Error::Invalid { offset: tok.offset, message }; |
|
| 64 | + | } |
|
| 65 | + | ||
| 66 | + | /// Require that a data directive appears while assembling the data section. |
|
| 67 | + | fn expectDataSection(a: *super::Assembler, tok: scanner::Token) throws (super::Error) { |
|
| 68 | + | if a.section <> super::Section::Data { |
|
| 69 | + | throw failOnToken(tok, "data directive is only valid in the data section"); |
|
| 70 | + | } |
|
| 71 | + | } |
|
| 72 | + | ||
| 73 | + | /// Consume `kind` or throw `message` at the current token. |
|
| 74 | + | fn expect(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) throws (super::Error) { |
|
| 75 | + | if not consume(a, kind) { |
|
| 76 | + | throw fail(a, message); |
|
| 77 | + | } |
|
| 78 | + | } |
|
| 79 | + | ||
| 80 | + | /// Consume `kind` and return the consumed token. |
|
| 81 | + | fn expectToken(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) -> scanner::Token throws (super::Error) { |
|
| 82 | + | try expect(a, kind, message); |
|
| 83 | + | return a.scan.previous; |
|
| 84 | + | } |
|
| 85 | + | ||
| 86 | + | /// Require that the current item has reached its semicolon terminator. |
|
| 87 | + | fn expectTerminator(a: *super::Assembler, message: *[u8]) throws (super::Error) { |
|
| 88 | + | if a.scan.current.kind <> scanner::TokenKind::Semicolon { |
|
| 89 | + | throw fail(a, message); |
|
| 90 | + | } |
|
| 91 | + | } |
|
| 92 | + | ||
| 93 | + | /// Require that `value` fits in i32. |
|
| 94 | + | fn expectI32Value(a: *super::Assembler, value: i64, message: *[u8]) -> i32 throws (super::Error) { |
|
| 95 | + | if value < -super::I32_MIN_MAGNITUDE or value > super::I32_MAX_VALUE { |
|
| 96 | + | throw fail(a, message); |
|
| 97 | + | } |
|
| 98 | + | return value as i32; |
|
| 99 | + | } |
|
| 100 | + | ||
| 101 | + | /// Require that `value` fits in a signed 12-bit immediate field. |
|
| 102 | + | fn expectSmallImmValue(a: *super::Assembler, value: i64) -> i32 throws (super::Error) { |
|
| 103 | + | if not encode::isSmallImm64(value) { |
|
| 104 | + | throw fail(a, "immediate out of range"); |
|
| 105 | + | } |
|
| 106 | + | return value as i32; |
|
| 107 | + | } |
|
| 108 | + | ||
| 109 | + | /// Define a label at the current text or data offset. |
|
| 110 | + | fn defineSymbol(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
|
| 111 | + | if dict::get(&a.symbolMap, name) <> nil { |
|
| 112 | + | throw failOnToken(tok, "duplicate label"); |
|
| 113 | + | } |
|
| 114 | + | emit::defineSymbol(a, name); |
|
| 115 | + | } |
|
| 116 | + | ||
| 117 | + | /// Emit a parsed integer data value after applying source-level range checks. |
|
| 118 | + | fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) { |
|
| 119 | + | match width { |
|
| 120 | + | case super::DataWidth::Word => |
|
| 121 | + | try emit::emitDataValue(a, (try expectI32Value(a, value, "word literal out of range")) as i64, width), |
|
| 122 | + | case super::DataWidth::Dword => |
|
| 123 | + | try emit::emitDataValue(a, value, width), |
|
| 124 | + | } |
|
| 125 | + | } |
|
| 126 | + | ||
| 127 | + | /// Parse a possibly scoped name from one or more `::`-separated segments. |
|
| 128 | + | fn parseScopedName( |
|
| 129 | + | a: *mut super::Assembler, |
|
| 130 | + | kind: scanner::TokenKind, |
|
| 131 | + | message: *[u8], |
|
| 132 | + | trimPrefix: u32 |
|
| 133 | + | ) -> *[u8] throws (super::Error) { |
|
| 134 | + | let first = try expectToken(a, kind, message); |
|
| 135 | + | let start = first.offset + trimPrefix; |
|
| 136 | + | let mut end = first.offset + first.source.len; |
|
| 137 | + | ||
| 138 | + | while consume(a, scanner::TokenKind::ColonColon) { |
|
| 139 | + | let segment = try expectToken(a, scanner::TokenKind::Ident, "expected identifier after `::`"); |
|
| 140 | + | set end = segment.offset + segment.source.len; |
|
| 141 | + | } |
|
| 142 | + | return strings::intern(a.scan.pool, &a.scan.source[start..end]); |
|
| 143 | + | } |
|
| 144 | + | ||
| 145 | + | /// Parse a bare symbol name. |
|
| 146 | + | fn parseSymbolName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
|
| 147 | + | return try parseScopedName(a, scanner::TokenKind::Ident, "expected symbol name", 0); |
|
| 148 | + | } |
|
| 149 | + | ||
| 150 | + | /// Return `true` when [`tok`] is any label token form. |
|
| 151 | + | fn isLabel(tok: scanner::TokenKind) -> bool { |
|
| 152 | + | return tok == scanner::TokenKind::Label or tok == scanner::TokenKind::QuotedLabel; |
|
| 153 | + | } |
|
| 154 | + | ||
| 155 | + | /// Parse the contents of a quoted label token, decoding escapes as needed. |
|
| 156 | + | fn parseQuotedLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
|
| 157 | + | let tok = try expectToken(a, scanner::TokenKind::QuotedLabel, "expected label name"); |
|
| 158 | + | let rawStart = super::LABEL_SIGIL_LEN + super::QUOTE_DELIM_LEN; |
|
| 159 | + | let raw = &tok.source[rawStart..tok.source.len - super::QUOTE_DELIM_LEN]; |
|
| 160 | + | let storage = try alloc::allocSlice(a.arena, 1, 1, raw.len) catch { |
|
| 161 | + | panic "asm: out of memory allocating quoted label"; |
|
| 162 | + | } as *mut [u8]; |
|
| 163 | + | let len = fmt::unescapeString(raw, storage); |
|
| 164 | + | ||
| 165 | + | return strings::intern(a.scan.pool, &storage[..len]); |
|
| 166 | + | } |
|
| 167 | + | ||
| 168 | + | /// Parse a label reference or definition name. |
|
| 169 | + | fn parseLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
|
| 170 | + | if a.scan.current.kind == scanner::TokenKind::QuotedLabel { |
|
| 171 | + | return try parseQuotedLabelName(a); |
|
| 172 | + | } |
|
| 173 | + | return try parseScopedName(a, scanner::TokenKind::Label, "expected label name", super::LABEL_SIGIL_LEN); |
|
| 174 | + | } |
|
| 175 | + | ||
| 176 | + | /// Parse a directive name without its leading `.`. |
|
| 177 | + | fn parseDirectiveName(a: *mut super::Assembler) -> *[u8] throws (super::Error) { |
|
| 178 | + | let name = try expectToken(a, scanner::TokenKind::Directive, "expected directive name"); |
|
| 179 | + | return &name.source[super::DIRECTIVE_SIGIL_LEN..]; |
|
| 180 | + | } |
|
| 181 | + | ||
| 182 | + | /// Parse one top-level assembler item. |
|
| 183 | + | fn parseItem(a: *mut super::Assembler) throws (super::Error) { |
|
| 184 | + | match a.scan.current.kind { |
|
| 185 | + | case scanner::TokenKind::Ident => { |
|
| 186 | + | let tok = a.scan.current; |
|
| 187 | + | let name = try parseSymbolName(a); |
|
| 188 | + | try parseInstruction(a, name, tok); |
|
| 189 | + | try expect(a, scanner::TokenKind::Semicolon, "expected `;` after instruction"); |
|
| 190 | + | } |
|
| 191 | + | case scanner::TokenKind::Number => { |
|
| 192 | + | let tok = a.scan.current; |
|
| 193 | + | advance(a); |
|
| 194 | + | throw failOnToken(tok, "unexpected number at top level"); |
|
| 195 | + | } |
|
| 196 | + | case scanner::TokenKind::Label, scanner::TokenKind::QuotedLabel => { |
|
| 197 | + | let tok = a.scan.current; |
|
| 198 | + | let name = try parseLabelName(a); |
|
| 199 | + | try defineSymbol(a, name, tok); |
|
| 200 | + | } |
|
| 201 | + | case scanner::TokenKind::Directive => { |
|
| 202 | + | let tok = a.scan.current; |
|
| 203 | + | let name = try parseDirectiveName(a); |
|
| 204 | + | try parseDirective(a, name, tok); |
|
| 205 | + | try expect(a, scanner::TokenKind::Semicolon, "expected `;` after directive"); |
|
| 206 | + | } |
|
| 207 | + | else => throw fail(a, "expected label, instruction, or directive"), |
|
| 208 | + | } |
|
| 209 | + | } |
|
| 210 | + | ||
| 211 | + | /// Find `name` in a sorted descriptor table. |
|
| 212 | + | fn findSortedNameIndex(name: *[u8], len: u32, getName: fn(u32) -> *[u8]) -> ?u32 { |
|
| 213 | + | let mut left: u32 = 0; |
|
| 214 | + | let mut right: u32 = len; |
|
| 215 | + | ||
| 216 | + | while left < right { |
|
| 217 | + | let mid = left + ((right - left) / 2); |
|
| 218 | + | let cmp = mem::cmp(name, getName(mid)); |
|
| 219 | + | ||
| 220 | + | match cmp { |
|
| 221 | + | case -1 => set right = mid, |
|
| 222 | + | case 1 => set left = mid + 1, |
|
| 223 | + | else => return mid, |
|
| 224 | + | } |
|
| 225 | + | } |
|
| 226 | + | return nil; |
|
| 227 | + | } |
|
| 228 | + | ||
| 229 | + | /// Adapter used by [`findSortedNameIndex`] to read an instruction mnemonic. |
|
| 230 | + | fn instructionNameAt(index: u32) -> *[u8] { |
|
| 231 | + | return super::INSTRUCTIONS[index].name; |
|
| 232 | + | } |
|
| 233 | + | ||
| 234 | + | /// Adapter used by [`findSortedNameIndex`] to read a directive name. |
|
| 235 | + | fn directiveNameAt(index: u32) -> *[u8] { |
|
| 236 | + | return super::DIRECTIVES[index].name; |
|
| 237 | + | } |
|
| 238 | + | ||
| 239 | + | /// Adapter used by [`findSortedNameIndex`] to read a register name. |
|
| 240 | + | fn registerNameAt(index: u32) -> *[u8] { |
|
| 241 | + | return super::REGISTERS[index].name; |
|
| 242 | + | } |
|
| 243 | + | ||
| 244 | + | /// Adapter used by [`findSortedNameIndex`] to read a CSR name. |
|
| 245 | + | fn csrNameAt(index: u32) -> *[u8] { |
|
| 246 | + | return super::CSRS[index].name; |
|
| 247 | + | } |
|
| 248 | + | ||
| 249 | + | /// Look up the operand parser and encoder for an instruction mnemonic. |
|
| 250 | + | fn lookupInstruction(name: *[u8]) -> ?super::InstructionEncoder { |
|
| 251 | + | let index = findSortedNameIndex(name, super::INSTRUCTIONS.len, instructionNameAt) else { |
|
| 252 | + | return nil; |
|
| 253 | + | }; |
|
| 254 | + | return super::INSTRUCTIONS[index].encoder; |
|
| 255 | + | } |
|
| 256 | + | ||
| 257 | + | /// Classify a directive name. |
|
| 258 | + | fn classifyDirective(name: *[u8]) -> ?super::DirectiveKind { |
|
| 259 | + | let index = findSortedNameIndex(name, super::DIRECTIVES.len, directiveNameAt) else { |
|
| 260 | + | return nil; |
|
| 261 | + | }; |
|
| 262 | + | return super::DIRECTIVES[index].kind; |
|
| 263 | + | } |
|
| 264 | + | ||
| 265 | + | /// Look up a percent-prefixed register name after the `%` has been removed. |
|
| 266 | + | fn lookupRegister(name: *[u8]) -> ?gen::Reg { |
|
| 267 | + | let index = findSortedNameIndex(name, super::REGISTERS.len, registerNameAt) else { |
|
| 268 | + | return nil; |
|
| 269 | + | }; |
|
| 270 | + | return super::REGISTERS[index].reg; |
|
| 271 | + | } |
|
| 272 | + | ||
| 273 | + | /// Look up a CSR name. |
|
| 274 | + | fn lookupCsr(name: *[u8]) -> ?u32 { |
|
| 275 | + | let index = findSortedNameIndex(name, super::CSRS.len, csrNameAt) else { |
|
| 276 | + | return nil; |
|
| 277 | + | }; |
|
| 278 | + | return super::CSRS[index].csr; |
|
| 279 | + | } |
|
| 280 | + | ||
| 281 | + | /// Parse an instruction after its mnemonic has already been consumed. |
|
| 282 | + | fn parseInstruction(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
|
| 283 | + | if a.section <> super::Section::Text { |
|
| 284 | + | throw failOnToken(tok, "instructions are only valid in the text section"); |
|
| 285 | + | } |
|
| 286 | + | let form = lookupInstruction(name) else { |
|
| 287 | + | throw failOnToken(tok, "unknown instruction"); |
|
| 288 | + | }; |
|
| 289 | + | match form { |
|
| 290 | + | case super::InstructionEncoder::NoOperand { enc } => { |
|
| 291 | + | if a.scan.current.kind <> scanner::TokenKind::Semicolon { |
|
| 292 | + | throw fail(a, "unexpected operand"); |
|
| 293 | + | } |
|
| 294 | + | try emit::emitText(a, enc()); |
|
| 295 | + | return; |
|
| 296 | + | } |
|
| 297 | + | case super::InstructionEncoder::Li => return try parseLi(a), |
|
| 298 | + | case super::InstructionEncoder::La => return try parseLa(a), |
|
| 299 | + | case super::InstructionEncoder::RR { enc } => return try parseRR(a, enc), |
|
| 300 | + | case super::InstructionEncoder::RRR { enc } => return try parseRRR(a, enc), |
|
| 301 | + | case super::InstructionEncoder::RRI { enc } => return try parseRRI(a, enc), |
|
| 302 | + | case super::InstructionEncoder::Shift { enc } => |
|
| 303 | + | return try parseShift(a, enc, super::SHIFT_LIMIT, "shift amount out of range"), |
|
| 304 | + | case super::InstructionEncoder::WordShift { enc } => |
|
| 305 | + | return try parseShift(a, enc, super::WORD_SHIFT_LIMIT, "word shift amount out of range"), |
|
| 306 | + | case super::InstructionEncoder::Load { enc } => return try parseLoad(a, enc), |
|
| 307 | + | case super::InstructionEncoder::Store { enc } => return try parseStore(a, enc), |
|
| 308 | + | case super::InstructionEncoder::Branch { op } => return try parseBranch(a, op), |
|
| 309 | + | case super::InstructionEncoder::BranchZero { op } => return try parseBranchZero(a, op), |
|
| 310 | + | case super::InstructionEncoder::Jal => return try parseJal(a), |
|
| 311 | + | case super::InstructionEncoder::Jump { rd } => return try parseJ(a, rd), |
|
| 312 | + | case super::InstructionEncoder::RdCsr { enc } => return try parseRdCsr(a, enc), |
|
| 313 | + | case super::InstructionEncoder::CsrRs1 { enc } => return try parseCsrRs1(a, enc), |
|
| 314 | + | case super::InstructionEncoder::Csrrw => return try parseCsrrw(a), |
|
| 315 | + | case super::InstructionEncoder::Csrsi => return try parseCsrsi(a), |
|
| 316 | + | case super::InstructionEncoder::Upper { enc } => return try parseUpper(a, enc), |
|
| 317 | + | } |
|
| 318 | + | } |
|
| 319 | + | ||
| 320 | + | /// Parse the `li` pseudo-instruction. |
|
| 321 | + | fn parseLi(a: *mut super::Assembler) throws (super::Error) { |
|
| 322 | + | let rd = try parseRegister(a); |
|
| 323 | + | let value = try parseValue(a); |
|
| 324 | + | if encode::isSmallImm64(value) { |
|
| 325 | + | try emit::emitText(a, encode::addi(rd, rv64::ZERO, value as i32)); |
|
| 326 | + | return; |
|
| 327 | + | } |
|
| 328 | + | let imm = try expectI32Value(a, value, "li immediate out of range"); |
|
| 329 | + | let split = rv64::emit::splitImm(imm); |
|
| 330 | + | ||
| 331 | + | try emit::emitText(a, encode::lui(rd, split.hi)); |
|
| 332 | + | try emit::emitText(a, encode::addi(rd, rd, split.lo)); |
|
| 333 | + | } |
|
| 334 | + | ||
| 335 | + | /// Parse the `la` pseudo-instruction. |
|
| 336 | + | fn parseLa(a: *mut super::Assembler) throws (super::Error) { |
|
| 337 | + | let rd = try parseRegister(a); |
|
| 338 | + | let target = try parseLabelName(a); |
|
| 339 | + | let index = a.text.len; |
|
| 340 | + | ||
| 341 | + | try emit::recordTextFixup(a, target, super::FixupInfo::Addr { rd, index }, 2); |
|
| 342 | + | } |
|
| 343 | + | ||
| 344 | + | /// Parse a CSR read-like instruction with destination register then CSR. |
|
| 345 | + | fn parseRdCsr(a: *mut super::Assembler, enc: fn(gen::Reg, u32) -> u32) throws (super::Error) { |
|
| 346 | + | let rd = try parseRegister(a); |
|
| 347 | + | let csr = try parseCsr(a); |
|
| 348 | + | ||
| 349 | + | try emit::emitText(a, enc(rd, csr)); |
|
| 350 | + | } |
|
| 351 | + | ||
| 352 | + | /// Parse a CSR write-like instruction with CSR then source register. |
|
| 353 | + | fn parseCsrRs1(a: *mut super::Assembler, enc: fn(u32, gen::Reg) -> u32) throws (super::Error) { |
|
| 354 | + | let csr = try parseCsr(a); |
|
| 355 | + | let rs1 = try parseRegister(a); |
|
| 356 | + | ||
| 357 | + | try emit::emitText(a, enc(csr, rs1)); |
|
| 358 | + | } |
|
| 359 | + | ||
| 360 | + | /// Parse `csrrw`. |
|
| 361 | + | fn parseCsrrw(a: *mut super::Assembler) throws (super::Error) { |
|
| 362 | + | let rd = try parseRegister(a); |
|
| 363 | + | let csr = try parseCsr(a); |
|
| 364 | + | let rs1 = try parseRegister(a); |
|
| 365 | + | ||
| 366 | + | try emit::emitText(a, encode::csrrw(rd, csr, rs1)); |
|
| 367 | + | } |
|
| 368 | + | ||
| 369 | + | /// Parse a CSR immediate instruction. |
|
| 370 | + | fn parseCsrsi(a: *mut super::Assembler) throws (super::Error) { |
|
| 371 | + | let csr = try parseCsr(a); |
|
| 372 | + | let imm = try parseValue(a); |
|
| 373 | + | if imm < 0 or imm >= super::CSR_IMM_LIMIT { |
|
| 374 | + | throw fail(a, "CSR immediate out of range"); |
|
| 375 | + | } |
|
| 376 | + | try emit::emitText(a, encode::csrsi(csr, imm as u32)); |
|
| 377 | + | } |
|
| 378 | + | ||
| 379 | + | /// Parse a two-register instruction. |
|
| 380 | + | fn parseRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg) -> u32) throws (super::Error) { |
|
| 381 | + | let rd = try parseRegister(a); |
|
| 382 | + | let rs = try parseRegister(a); |
|
| 383 | + | ||
| 384 | + | try emit::emitText(a, enc(rd, rs)); |
|
| 385 | + | } |
|
| 386 | + | ||
| 387 | + | /// Parse a three-register instruction. |
|
| 388 | + | fn parseRRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32) throws (super::Error) { |
|
| 389 | + | let rd = try parseRegister(a); |
|
| 390 | + | let rs1 = try parseRegister(a); |
|
| 391 | + | let rs2 = try parseRegister(a); |
|
| 392 | + | ||
| 393 | + | try emit::emitText(a, enc(rd, rs1, rs2)); |
|
| 394 | + | } |
|
| 395 | + | ||
| 396 | + | /// Parse a register-register-immediate instruction. |
|
| 397 | + | fn parseRRI(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
|
| 398 | + | let rd = try parseRegister(a); |
|
| 399 | + | let rs1 = try parseRegister(a); |
|
| 400 | + | let imm = try parseSmallImm(a); |
|
| 401 | + | ||
| 402 | + | try emit::emitText(a, enc(rd, rs1, imm)); |
|
| 403 | + | } |
|
| 404 | + | ||
| 405 | + | /// Parse a shift-immediate instruction and enforce its RV64 shift bound. |
|
| 406 | + | fn parseShift( |
|
| 407 | + | a: *mut super::Assembler, |
|
| 408 | + | enc: fn(gen::Reg, gen::Reg, i32) -> u32, |
|
| 409 | + | limit: i32, |
|
| 410 | + | message: *[u8] |
|
| 411 | + | ) throws (super::Error) { |
|
| 412 | + | let rd = try parseRegister(a); |
|
| 413 | + | let rs1 = try parseRegister(a); |
|
| 414 | + | let shamt64 = try parseValue(a); |
|
| 415 | + | ||
| 416 | + | if shamt64 < 0 { |
|
| 417 | + | throw fail(a, "shift amount must be non-negative"); |
|
| 418 | + | } |
|
| 419 | + | if shamt64 >= limit as i64 { |
|
| 420 | + | throw fail(a, message); |
|
| 421 | + | } |
|
| 422 | + | let shamt = shamt64 as i32; |
|
| 423 | + | ||
| 424 | + | try emit::emitText(a, enc(rd, rs1, shamt)); |
|
| 425 | + | } |
|
| 426 | + | ||
| 427 | + | /// Parse a load instruction with a memory operand. |
|
| 428 | + | fn parseLoad(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
|
| 429 | + | let rd = try parseRegister(a); |
|
| 430 | + | let memop = try parseMemory(a); |
|
| 431 | + | ||
| 432 | + | try emit::emitText(a, enc(rd, memop.base, memop.offset)); |
|
| 433 | + | } |
|
| 434 | + | ||
| 435 | + | /// Parse a store instruction with a memory operand. |
|
| 436 | + | fn parseStore(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) { |
|
| 437 | + | let rs2 = try parseRegister(a); |
|
| 438 | + | let memop = try parseMemory(a); |
|
| 439 | + | ||
| 440 | + | try emit::emitText(a, enc(rs2, memop.base, memop.offset)); |
|
| 441 | + | } |
|
| 442 | + | ||
| 443 | + | /// Parse a two-register branch instruction. |
|
| 444 | + | fn parseBranch(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) { |
|
| 445 | + | let rs1 = try parseRegister(a); |
|
| 446 | + | let rs2 = try parseRegister(a); |
|
| 447 | + | ||
| 448 | + | try parseBranchLabel(a, op, rs1, rs2); |
|
| 449 | + | } |
|
| 450 | + | ||
| 451 | + | /// Parse an optional label operand. |
|
| 452 | + | fn parseOptionalLabel(a: *mut super::Assembler) -> ?*[u8] throws (super::Error) { |
|
| 453 | + | if not isLabel(a.scan.current.kind) { |
|
| 454 | + | return nil; |
|
| 455 | + | } |
|
| 456 | + | return try parseLabelName(a); |
|
| 457 | + | } |
|
| 458 | + | ||
| 459 | + | /// Parse a branch target as either a label fixup or immediate offset. |
|
| 460 | + | fn parseBranchLabel(a: *mut super::Assembler, op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg) throws (super::Error) { |
|
| 461 | + | let index = a.text.len; |
|
| 462 | + | if let target = try parseOptionalLabel(a) { |
|
| 463 | + | try emit::recordTextFixup(a, target, super::FixupInfo::Branch { op, rs1, rs2, index }, 1); |
|
| 464 | + | return; |
|
| 465 | + | } |
|
| 466 | + | let imm = try parseBranchImm(a); |
|
| 467 | + | try emit::emitText(a, emit::encodeBranch(op, rs1, rs2, imm)); |
|
| 468 | + | } |
|
| 469 | + | ||
| 470 | + | /// Parse a branch-to-zero pseudo-instruction. |
|
| 471 | + | fn parseBranchZero(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) { |
|
| 472 | + | let rs = try parseRegister(a); |
|
| 473 | + | try parseBranchLabel(a, op, rs, rv64::ZERO); |
|
| 474 | + | } |
|
| 475 | + | ||
| 476 | + | /// Parse `jal` with an explicit destination register. |
|
| 477 | + | fn parseJal(a: *mut super::Assembler) throws (super::Error) { |
|
| 478 | + | let rd = try parseRegister(a); |
|
| 479 | + | try parseJ(a, rd); |
|
| 480 | + | } |
|
| 481 | + | ||
| 482 | + | /// Parse a jump target for `jal` or a jump pseudo-instruction. |
|
| 483 | + | fn parseJ(a: *mut super::Assembler, rd: gen::Reg) throws (super::Error) { |
|
| 484 | + | let index = a.text.len; |
|
| 485 | + | if let target = try parseOptionalLabel(a) { |
|
| 486 | + | try emit::recordTextFixup(a, target, super::FixupInfo::Jal { rd, index }, 1); |
|
| 487 | + | return; |
|
| 488 | + | } |
|
| 489 | + | let imm = try parseJumpImm(a); |
|
| 490 | + | try emit::emitText(a, encode::jal(rd, imm)); |
|
| 491 | + | } |
|
| 492 | + | ||
| 493 | + | /// Parse an upper-immediate instruction. |
|
| 494 | + | fn parseUpper(a: *mut super::Assembler, enc: fn(gen::Reg, i32) -> u32) throws (super::Error) { |
|
| 495 | + | let rd = try parseRegister(a); |
|
| 496 | + | let imm64 = try parseValue(a); |
|
| 497 | + | if imm64 < 0 or imm64 > super::UPPER_IMM_MAX_VALUE { |
|
| 498 | + | throw fail(a, "upper immediate out of range"); |
|
| 499 | + | } |
|
| 500 | + | try emit::emitText(a, enc(rd, imm64 as i32)); |
|
| 501 | + | } |
|
| 502 | + | ||
| 503 | + | /// Parse a directive after its name has already been consumed. |
|
| 504 | + | fn parseDirective(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) { |
|
| 505 | + | let directive = classifyDirective(name) else { |
|
| 506 | + | throw failOnToken(tok, "unknown directive"); |
|
| 507 | + | }; |
|
| 508 | + | match directive { |
|
| 509 | + | case super::DirectiveKind::Text => { |
|
| 510 | + | try expectTerminator(a, "unexpected operand"); |
|
| 511 | + | set a.section = super::Section::Text; |
|
| 512 | + | return; |
|
| 513 | + | } |
|
| 514 | + | case super::DirectiveKind::Data => { |
|
| 515 | + | try expectTerminator(a, "unexpected operand"); |
|
| 516 | + | set a.section = super::Section::Data; |
|
| 517 | + | return; |
|
| 518 | + | } |
|
| 519 | + | case super::DirectiveKind::Align => |
|
| 520 | + | return try parseAlignDirective(a), |
|
| 521 | + | case super::DirectiveKind::Ascii => { |
|
| 522 | + | try expectDataSection(a, tok); |
|
| 523 | + | return try parseStringDirective(a); |
|
| 524 | + | } |
|
| 525 | + | case super::DirectiveKind::Byte => { |
|
| 526 | + | try expectDataSection(a, tok); |
|
| 527 | + | return try parseByteDirective(a); |
|
| 528 | + | } |
|
| 529 | + | case super::DirectiveKind::Constant => |
|
| 530 | + | return try parseConstantDirective(a), |
|
| 531 | + | case super::DirectiveKind::Dword => { |
|
| 532 | + | try expectDataSection(a, tok); |
|
| 533 | + | return try parseIntDirective(a, super::DataWidth::Dword); |
|
| 534 | + | } |
|
| 535 | + | case super::DirectiveKind::Export => |
|
| 536 | + | return try parseExportDirective(a), |
|
| 537 | + | case super::DirectiveKind::Space => { |
|
| 538 | + | try expectDataSection(a, tok); |
|
| 539 | + | return try parseSpaceDirective(a); |
|
| 540 | + | } |
|
| 541 | + | case super::DirectiveKind::Word => { |
|
| 542 | + | try expectDataSection(a, tok); |
|
| 543 | + | return try parseIntDirective(a, super::DataWidth::Word); |
|
| 544 | + | } |
|
| 545 | + | } |
|
| 546 | + | } |
|
| 547 | + | ||
| 548 | + | /// Parse a `.constant` directive. |
|
| 549 | + | fn parseConstantDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 550 | + | let name = try parseSymbolName(a); |
|
| 551 | + | let value = try expectI32Value(a, try parseExpr(a), "constant out of range"); |
|
| 552 | + | ||
| 553 | + | dict::insert(&mut a.constMap, name, value); |
|
| 554 | + | } |
|
| 555 | + | ||
| 556 | + | /// Parse a `.export` directive. |
|
| 557 | + | fn parseExportDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 558 | + | let name = try parseLabelName(a); |
|
| 559 | + | dict::insert(&mut a.exportMap, name, 1); |
|
| 560 | + | if let idx = dict::get(&a.symbolMap, name) { |
|
| 561 | + | set a.symbols[idx as u32].isExported = true; |
|
| 562 | + | } |
|
| 563 | + | } |
|
| 564 | + | ||
| 565 | + | /// Parse a `.space` directive. |
|
| 566 | + | fn parseSpaceDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 567 | + | let count = try parseValue(a); |
|
| 568 | + | if count < 0 { |
|
| 569 | + | throw fail(a, "space size must be non-negative"); |
|
| 570 | + | } |
|
| 571 | + | let remaining = a.data.cap - a.data.len; |
|
| 572 | + | if count > remaining as i64 { |
|
| 573 | + | throw super::Error::DataOverflow; |
|
| 574 | + | } |
|
| 575 | + | for _ in 0..count as u32 { |
|
| 576 | + | try emit::emitByte(a, 0); |
|
| 577 | + | } |
|
| 578 | + | } |
|
| 579 | + | ||
| 580 | + | /// Parse an `.align` directive for the current section. |
|
| 581 | + | fn parseAlignDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 582 | + | let amount64 = try parseValue(a); |
|
| 583 | + | if amount64 <= 0 { |
|
| 584 | + | throw fail(a, "alignment must be positive"); |
|
| 585 | + | } |
|
| 586 | + | if amount64 > super::U32_MAX_VALUE { |
|
| 587 | + | throw fail(a, "alignment out of range"); |
|
| 588 | + | } |
|
| 589 | + | let amount = amount64 as u32; |
|
| 590 | + | if (amount & (amount - 1)) <> 0 { |
|
| 591 | + | throw fail(a, "alignment must be a power of two"); |
|
| 592 | + | } |
|
| 593 | + | match a.section { |
|
| 594 | + | case super::Section::Text => { |
|
| 595 | + | if amount % rv64::INSTR_SIZE as u32 <> 0 { |
|
| 596 | + | throw fail(a, "text alignment must be a multiple of 4"); |
|
| 597 | + | } |
|
| 598 | + | let bytes = a.text.len * rv64::INSTR_SIZE as u32; |
|
| 599 | + | let aligned = checkedAlignUp(bytes, amount) else { |
|
| 600 | + | throw super::Error::TextOverflow; |
|
| 601 | + | }; |
|
| 602 | + | let words = (aligned - bytes) / rv64::INSTR_SIZE as u32; |
|
| 603 | + | if words > a.text.cap - a.text.len { |
|
| 604 | + | throw super::Error::TextOverflow; |
|
| 605 | + | } |
|
| 606 | + | try emit::emitTextPadding(a, words); |
|
| 607 | + | } |
|
| 608 | + | case super::Section::Data => { |
|
| 609 | + | let aligned = checkedAlignUp(a.data.len, amount) else { |
|
| 610 | + | throw super::Error::DataOverflow; |
|
| 611 | + | }; |
|
| 612 | + | if aligned > a.data.cap { |
|
| 613 | + | throw super::Error::DataOverflow; |
|
| 614 | + | } |
|
| 615 | + | for _ in a.data.len..aligned { |
|
| 616 | + | try emit::emitByte(a, 0); |
|
| 617 | + | } |
|
| 618 | + | } |
|
| 619 | + | } |
|
| 620 | + | } |
|
| 621 | + | ||
| 622 | + | /// Parse a `.byte` directive. |
|
| 623 | + | fn parseByteDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 624 | + | loop { |
|
| 625 | + | if a.scan.current.kind == scanner::TokenKind::Char { |
|
| 626 | + | let ch = parseCharLiteral(a.scan.current) else { |
|
| 627 | + | throw fail(a, "invalid char literal"); |
|
| 628 | + | }; |
|
| 629 | + | try emit::emitByte(a, ch); |
|
| 630 | + | advance(a); |
|
| 631 | + | } else { |
|
| 632 | + | let value = try parseValue(a); |
|
| 633 | + | if value < 0 or value > super::U8_MAX_VALUE { |
|
| 634 | + | throw fail(a, "byte literal out of range"); |
|
| 635 | + | } |
|
| 636 | + | try emit::emitByte(a, value as u8); |
|
| 637 | + | } |
|
| 638 | + | if not consume(a, scanner::TokenKind::Comma) { |
|
| 639 | + | return; |
|
| 640 | + | } |
|
| 641 | + | } |
|
| 642 | + | } |
|
| 643 | + | ||
| 644 | + | /// Parse a fixed-width integer data directive. |
|
| 645 | + | fn parseIntDirective(a: *mut super::Assembler, width: super::DataWidth) throws (super::Error) { |
|
| 646 | + | loop { |
|
| 647 | + | if isLabel(a.scan.current.kind) { |
|
| 648 | + | let target = try parseLabelName(a); |
|
| 649 | + | try emit::recordDataFixup(a, target, width); |
|
| 650 | + | } else if a.scan.current.kind == scanner::TokenKind::Char { |
|
| 651 | + | let ch = parseCharLiteral(a.scan.current) else { |
|
| 652 | + | throw fail(a, "invalid char literal"); |
|
| 653 | + | }; |
|
| 654 | + | advance(a); |
|
| 655 | + | try emitDataValue(a, ch as i64, width); |
|
| 656 | + | } else { |
|
| 657 | + | try emitDataValue(a, try parseValue(a), width); |
|
| 658 | + | } |
|
| 659 | + | if not consume(a, scanner::TokenKind::Comma) { |
|
| 660 | + | return; |
|
| 661 | + | } |
|
| 662 | + | } |
|
| 663 | + | } |
|
| 664 | + | ||
| 665 | + | /// Parse a `.ascii` string literal list. |
|
| 666 | + | fn parseStringDirective(a: *mut super::Assembler) throws (super::Error) { |
|
| 667 | + | loop { |
|
| 668 | + | let literal = try expectToken(a, scanner::TokenKind::String, "expected string literal"); |
|
| 669 | + | try emit::emitDecodedString(a, literal.source); |
|
| 670 | + | if not consume(a, scanner::TokenKind::Comma) { |
|
| 671 | + | return; |
|
| 672 | + | } |
|
| 673 | + | } |
|
| 674 | + | } |
|
| 675 | + | ||
| 676 | + | /// Parse and resolve a register operand. |
|
| 677 | + | fn parseRegister(a: *mut super::Assembler) -> gen::Reg throws (super::Error) { |
|
| 678 | + | let tok = try expectToken(a, scanner::TokenKind::Register, "expected register"); |
|
| 679 | + | let reg = lookupRegister(&tok.source[1..]) else { |
|
| 680 | + | throw super::Error::Invalid { offset: tok.offset, message: "unknown register" }; |
|
| 681 | + | }; |
|
| 682 | + | return reg; |
|
| 683 | + | } |
|
| 684 | + | ||
| 685 | + | /// Parse a simple signed immediate or constant value. |
|
| 686 | + | fn parseValue(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 687 | + | if consume(a, scanner::TokenKind::Minus) { |
|
| 688 | + | return -(try parseValuePrimary(a)); |
|
| 689 | + | } |
|
| 690 | + | return try parseValuePrimary(a); |
|
| 691 | + | } |
|
| 692 | + | ||
| 693 | + | /// Parse the primary form used by simple immediate values. |
|
| 694 | + | fn parseValuePrimary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 695 | + | if a.scan.current.kind == scanner::TokenKind::Number { |
|
| 696 | + | return try parseInteger(a); |
|
| 697 | + | } |
|
| 698 | + | if a.scan.current.kind == scanner::TokenKind::Ident { |
|
| 699 | + | return try parseConstantValue(a); |
|
| 700 | + | } |
|
| 701 | + | throw fail(a, "expected number or constant"); |
|
| 702 | + | } |
|
| 703 | + | ||
| 704 | + | /// Parse an additive constant expression. |
|
| 705 | + | fn parseExpr(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 706 | + | let mut value = try parseExprMul(a); |
|
| 707 | + | ||
| 708 | + | while a.scan.current.kind == scanner::TokenKind::Plus or a.scan.current.kind == scanner::TokenKind::Minus { |
|
| 709 | + | let op = a.scan.current.kind; |
|
| 710 | + | advance(a); |
|
| 711 | + | ||
| 712 | + | let rhs = try parseExprMul(a); |
|
| 713 | + | if op == scanner::TokenKind::Plus { |
|
| 714 | + | set value += rhs; |
|
| 715 | + | } else { |
|
| 716 | + | set value -= rhs; |
|
| 717 | + | } |
|
| 718 | + | } |
|
| 719 | + | return value; |
|
| 720 | + | } |
|
| 721 | + | ||
| 722 | + | /// Parse multiplicative expression operators. |
|
| 723 | + | fn parseExprMul(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 724 | + | let mut value = try parseExprUnary(a); |
|
| 725 | + | ||
| 726 | + | while a.scan.current.kind == scanner::TokenKind::Star or a.scan.current.kind == scanner::TokenKind::Slash { |
|
| 727 | + | let op = a.scan.current.kind; |
|
| 728 | + | advance(a); |
|
| 729 | + | ||
| 730 | + | let rhs = try parseExprUnary(a); |
|
| 731 | + | if op == scanner::TokenKind::Star { |
|
| 732 | + | set value *= rhs; |
|
| 733 | + | } else { |
|
| 734 | + | if rhs == 0 { |
|
| 735 | + | throw fail(a, "division by zero"); |
|
| 736 | + | } |
|
| 737 | + | set value /= rhs; |
|
| 738 | + | } |
|
| 739 | + | } |
|
| 740 | + | return value; |
|
| 741 | + | } |
|
| 742 | + | ||
| 743 | + | /// Parse unary expression operators. |
|
| 744 | + | fn parseExprUnary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 745 | + | if consume(a, scanner::TokenKind::Minus) { |
|
| 746 | + | return -(try parseExprUnary(a)); |
|
| 747 | + | } |
|
| 748 | + | if consume(a, scanner::TokenKind::Plus) { |
|
| 749 | + | return try parseExprUnary(a); |
|
| 750 | + | } |
|
| 751 | + | return try parseExprPrimary(a); |
|
| 752 | + | } |
|
| 753 | + | ||
| 754 | + | /// Parse expression atoms. |
|
| 755 | + | fn parseExprPrimary(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 756 | + | if consume(a, scanner::TokenKind::LParen) { |
|
| 757 | + | let value = try parseExpr(a); |
|
| 758 | + | try expect(a, scanner::TokenKind::RParen, "expected `)`"); |
|
| 759 | + | return value; |
|
| 760 | + | } |
|
| 761 | + | if a.scan.current.kind == scanner::TokenKind::Number { |
|
| 762 | + | return try parseInteger(a); |
|
| 763 | + | } |
|
| 764 | + | if a.scan.current.kind == scanner::TokenKind::Ident { |
|
| 765 | + | return try parseConstantValue(a); |
|
| 766 | + | } |
|
| 767 | + | throw fail(a, "expected expression"); |
|
| 768 | + | } |
|
| 769 | + | ||
| 770 | + | /// Parse and resolve a named assembler constant. |
|
| 771 | + | fn parseConstantValue(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 772 | + | let name = try parseSymbolName(a); |
|
| 773 | + | let value = dict::get(&a.constMap, name) else { |
|
| 774 | + | throw super::Error::Invalid { offset: a.scan.previous.offset, message: "undefined constant" }; |
|
| 775 | + | }; |
|
| 776 | + | return value as i64; |
|
| 777 | + | } |
|
| 778 | + | ||
| 779 | + | /// Parse and resolve a CSR operand. |
|
| 780 | + | fn parseCsr(a: *mut super::Assembler) -> u32 throws (super::Error) { |
|
| 781 | + | let name = try parseSymbolName(a); |
|
| 782 | + | let csr = lookupCsr(name) else { |
|
| 783 | + | throw super::Error::Invalid { offset: a.scan.previous.offset, message: "unknown CSR" }; |
|
| 784 | + | }; |
|
| 785 | + | return csr; |
|
| 786 | + | } |
|
| 787 | + | ||
| 788 | + | /// Parse an offset(base) memory operand. |
|
| 789 | + | fn parseMemory(a: *mut super::Assembler) -> MemOperand throws (super::Error) { |
|
| 790 | + | let mut offset: i32 = 0; |
|
| 791 | + | if a.scan.current.kind <> scanner::TokenKind::LParen { |
|
| 792 | + | set offset = try expectSmallImmValue(a, try parseValue(a)); |
|
| 793 | + | } |
|
| 794 | + | try expect(a, scanner::TokenKind::LParen, "expected `(`"); |
|
| 795 | + | let base = try parseRegister(a); |
|
| 796 | + | try expect(a, scanner::TokenKind::RParen, "expected `)`"); |
|
| 797 | + | ||
| 798 | + | return MemOperand { base, offset }; |
|
| 799 | + | } |
|
| 800 | + | ||
| 801 | + | /// Parse an immediate value that fits in a signed 12-bit field. |
|
| 802 | + | fn parseSmallImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
|
| 803 | + | return try expectSmallImmValue(a, try parseValue(a)); |
|
| 804 | + | } |
|
| 805 | + | ||
| 806 | + | /// Parse and validate a branch immediate. |
|
| 807 | + | fn parseBranchImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
|
| 808 | + | let value = try expectI32Value(a, try parseValue(a), "branch immediate out of range"); |
|
| 809 | + | if not encode::isBranchImm(value) { |
|
| 810 | + | throw fail(a, "branch immediate out of range"); |
|
| 811 | + | } |
|
| 812 | + | return value; |
|
| 813 | + | } |
|
| 814 | + | ||
| 815 | + | /// Parse and validate a jump immediate. |
|
| 816 | + | fn parseJumpImm(a: *mut super::Assembler) -> i32 throws (super::Error) { |
|
| 817 | + | let value = try expectI32Value(a, try parseValue(a), "jump immediate out of range"); |
|
| 818 | + | if not encode::isJumpImm(value) { |
|
| 819 | + | throw fail(a, "jump immediate out of range"); |
|
| 820 | + | } |
|
| 821 | + | return value; |
|
| 822 | + | } |
|
| 823 | + | ||
| 824 | + | /// Parse an integer token as an i64. |
|
| 825 | + | fn parseInteger(a: *mut super::Assembler) -> i64 throws (super::Error) { |
|
| 826 | + | let tok = try expectToken(a, scanner::TokenKind::Number, "expected number"); |
|
| 827 | + | let value = parseIntegerText(tok.source) else { |
|
| 828 | + | throw failOnToken(tok, "invalid integer literal"); |
|
| 829 | + | }; |
|
| 830 | + | return value; |
|
| 831 | + | } |
|
| 832 | + | ||
| 833 | + | /// Parse integer literal text as an i64. |
|
| 834 | + | fn parseIntegerText(text: *[u8]) -> ?i64 { |
|
| 835 | + | let literal = try fmt::parseInt(text) catch { |
|
| 836 | + | return nil; |
|
| 837 | + | }; |
|
| 838 | + | if literal.negative { |
|
| 839 | + | if literal.magnitude > parser::I64_MIN_MAGNITUDE { |
|
| 840 | + | return nil; |
|
| 841 | + | } |
|
| 842 | + | if literal.magnitude == parser::I64_MIN_MAGNITUDE { |
|
| 843 | + | return parser::I64_MIN; |
|
| 844 | + | } |
|
| 845 | + | return -(literal.magnitude as i64); |
|
| 846 | + | } |
|
| 847 | + | if literal.magnitude > parser::I64_MAX_MAGNITUDE { |
|
| 848 | + | return nil; |
|
| 849 | + | } |
|
| 850 | + | return literal.magnitude as i64; |
|
| 851 | + | } |
|
| 852 | + | ||
| 853 | + | /// Parse a character literal token as one byte. |
|
| 854 | + | fn parseCharLiteral(tok: scanner::Token) -> ?u8 { |
|
| 855 | + | return try fmt::parseChar(tok.source) catch { |
|
| 856 | + | return nil; |
|
| 857 | + | }; |
|
| 858 | + | } |
lib/std/arch/rv64/asm/scanner.rad
added
+315 -0
| 1 | + | //! Assembly-specific lexical scanner. |
|
| 2 | + | @test mod tests; |
|
| 3 | + | ||
| 4 | + | use std::char; |
|
| 5 | + | use std::lang::strings; |
|
| 6 | + | ||
| 7 | + | /// Token kinds recognized by the assembler scanner. |
|
| 8 | + | export union TokenKind { |
|
| 9 | + | /// Special end-of-file token generated when the input is exhausted. |
|
| 10 | + | Eof, |
|
| 11 | + | /// Special invalid token carrying an error message in [`Token::source`]. |
|
| 12 | + | Invalid, |
|
| 13 | + | ||
| 14 | + | LParen, // ( |
|
| 15 | + | RParen, // ) |
|
| 16 | + | Comma, // , |
|
| 17 | + | Colon, // : |
|
| 18 | + | ColonColon, // :: |
|
| 19 | + | Semicolon, // ; |
|
| 20 | + | Minus, // - |
|
| 21 | + | Plus, // + |
|
| 22 | + | Slash, // / |
|
| 23 | + | Star, // * |
|
| 24 | + | ||
| 25 | + | /// Bare identifier used for mnemonics, constants, CSR names, and symbol segments. |
|
| 26 | + | Ident, |
|
| 27 | + | /// Identifier-shaped label token including the leading `@`. |
|
| 28 | + | Label, |
|
| 29 | + | /// Quoted label token including the leading `@` and quote delimiters. |
|
| 30 | + | QuotedLabel, |
|
| 31 | + | /// Directive token including the leading `.`. |
|
| 32 | + | Directive, |
|
| 33 | + | /// Register token including the leading `%`. |
|
| 34 | + | Register, |
|
| 35 | + | ||
| 36 | + | /// String literal token including delimiters. |
|
| 37 | + | String, |
|
| 38 | + | /// Character literal token including delimiters. |
|
| 39 | + | Char, |
|
| 40 | + | /// Integer literal token. |
|
| 41 | + | Number, |
|
| 42 | + | } |
|
| 43 | + | ||
| 44 | + | /// Describes where assembler source originated from. |
|
| 45 | + | export union SourceKind { |
|
| 46 | + | /// Source loaded from a file at the given path. |
|
| 47 | + | File { path: *[u8] }, |
|
| 48 | + | /// Source provided as an inline string. |
|
| 49 | + | String, |
|
| 50 | + | } |
|
| 51 | + | ||
| 52 | + | /// Lexical scanner state for assembler source. |
|
| 53 | + | export record Scanner { |
|
| 54 | + | /// Origin of the source being scanned. |
|
| 55 | + | sourceKind: SourceKind, |
|
| 56 | + | /// Source buffer. |
|
| 57 | + | source: *[u8], |
|
| 58 | + | /// Offset of the current token in `source`. |
|
| 59 | + | token: u32, |
|
| 60 | + | /// Offset of the current cursor in `source`. |
|
| 61 | + | cursor: u32, |
|
| 62 | + | /// Current token observed by the parser. |
|
| 63 | + | current: Token, |
|
| 64 | + | /// Previously consumed token observed by the parser. |
|
| 65 | + | previous: Token, |
|
| 66 | + | /// Intern pool for identifier-shaped token text. |
|
| 67 | + | pool: *mut strings::Pool, |
|
| 68 | + | } |
|
| 69 | + | ||
| 70 | + | /// Individual token with kind, source text, and byte offset. |
|
| 71 | + | export record Token { |
|
| 72 | + | /// Token kind. |
|
| 73 | + | kind: TokenKind, |
|
| 74 | + | /// Token source text. |
|
| 75 | + | source: *[u8], |
|
| 76 | + | /// Byte offset of `source` in the input buffer. |
|
| 77 | + | offset: u32, |
|
| 78 | + | } |
|
| 79 | + | ||
| 80 | + | /// Create a new assembler scanner. |
|
| 81 | + | export fn scanner(sourceKind: SourceKind, source: *[u8], pool: *mut strings::Pool) -> Scanner { |
|
| 82 | + | let invalidToken = invalid(0, ""); |
|
| 83 | + | return Scanner { |
|
| 84 | + | sourceKind, |
|
| 85 | + | source, |
|
| 86 | + | token: 0, |
|
| 87 | + | cursor: 0, |
|
| 88 | + | current: invalidToken, |
|
| 89 | + | previous: invalidToken, |
|
| 90 | + | pool, |
|
| 91 | + | }; |
|
| 92 | + | } |
|
| 93 | + | ||
| 94 | + | /// Create an invalid token with the given message. |
|
| 95 | + | export fn invalid(offset: u32, message: *[u8]) -> Token { |
|
| 96 | + | return Token { kind: TokenKind::Invalid, source: message, offset }; |
|
| 97 | + | } |
|
| 98 | + | ||
| 99 | + | /// Return `true` when the scanner has consumed all input. |
|
| 100 | + | export fn isEof(s: *Scanner) -> bool { |
|
| 101 | + | return s.cursor >= s.source.len; |
|
| 102 | + | } |
|
| 103 | + | ||
| 104 | + | /// Return the current character without advancing. |
|
| 105 | + | fn current(s: *Scanner) -> ?u8 { |
|
| 106 | + | if isEof(s) { |
|
| 107 | + | return nil; |
|
| 108 | + | } |
|
| 109 | + | return s.source[s.cursor]; |
|
| 110 | + | } |
|
| 111 | + | ||
| 112 | + | /// Return the next character without advancing. |
|
| 113 | + | fn peek(s: *Scanner) -> ?u8 { |
|
| 114 | + | if s.cursor + 1 >= s.source.len { |
|
| 115 | + | return nil; |
|
| 116 | + | } |
|
| 117 | + | return s.source[s.cursor + 1]; |
|
| 118 | + | } |
|
| 119 | + | ||
| 120 | + | /// Advance the scanner cursor and return the consumed character. |
|
| 121 | + | fn advance(s: *mut Scanner) -> u8 { |
|
| 122 | + | set s.cursor += 1; |
|
| 123 | + | return s.source[s.cursor - 1]; |
|
| 124 | + | } |
|
| 125 | + | ||
| 126 | + | /// Consume `expected` when it is present at the current cursor. |
|
| 127 | + | fn consume(s: *mut Scanner, expected: u8) -> bool { |
|
| 128 | + | if let ch = current(s); ch == expected { |
|
| 129 | + | advance(s); |
|
| 130 | + | return true; |
|
| 131 | + | } |
|
| 132 | + | return false; |
|
| 133 | + | } |
|
| 134 | + | ||
| 135 | + | /// Skip spaces, newlines, tabs, and `//` line comments. |
|
| 136 | + | fn skipWhitespace(s: *mut Scanner) { |
|
| 137 | + | while let ch = current(s) { |
|
| 138 | + | match ch { |
|
| 139 | + | case ' ', '\n', '\r', '\t' => advance(s), |
|
| 140 | + | case '/' => { |
|
| 141 | + | if let nextCh = peek(s); nextCh == '/' { |
|
| 142 | + | while let lineCh = current(s); lineCh <> '\n' { |
|
| 143 | + | advance(s); |
|
| 144 | + | } |
|
| 145 | + | } else { |
|
| 146 | + | return; |
|
| 147 | + | } |
|
| 148 | + | } |
|
| 149 | + | else => return, |
|
| 150 | + | } |
|
| 151 | + | } |
|
| 152 | + | } |
|
| 153 | + | ||
| 154 | + | /// Return the next assembler token. |
|
| 155 | + | export fn next(s: *mut Scanner) -> Token { |
|
| 156 | + | skipWhitespace(s); |
|
| 157 | + | set s.token = s.cursor; |
|
| 158 | + | ||
| 159 | + | if isEof(s) { |
|
| 160 | + | return tok(s, TokenKind::Eof); |
|
| 161 | + | } |
|
| 162 | + | let ch = advance(s); |
|
| 163 | + | ||
| 164 | + | if char::isDigit(ch) { |
|
| 165 | + | return scanNumber(s); |
|
| 166 | + | } |
|
| 167 | + | if char::isAlpha(ch) or ch == '_' { |
|
| 168 | + | return scanIdentToken(s, TokenKind::Ident); |
|
| 169 | + | } |
|
| 170 | + | ||
| 171 | + | match ch { |
|
| 172 | + | case '(' => return tok(s, TokenKind::LParen), |
|
| 173 | + | case ')' => return tok(s, TokenKind::RParen), |
|
| 174 | + | case ',' => return tok(s, TokenKind::Comma), |
|
| 175 | + | case ';' => return tok(s, TokenKind::Semicolon), |
|
| 176 | + | case ':' => { |
|
| 177 | + | if consume(s, ':') { |
|
| 178 | + | return tok(s, TokenKind::ColonColon); |
|
| 179 | + | } |
|
| 180 | + | return invalid(s.token, "unexpected `:`"); |
|
| 181 | + | } |
|
| 182 | + | case '"' => return scanString(s), |
|
| 183 | + | case '\'' => return scanChar(s), |
|
| 184 | + | case '.' => return scanPrefixedToken(s, TokenKind::Directive, "expected directive name after `.`"), |
|
| 185 | + | case '@' => return scanLabelToken(s), |
|
| 186 | + | case '%' => return scanPrefixedToken(s, TokenKind::Register, "expected register after `%`"), |
|
| 187 | + | case '-' => return scanSignedNumberOrToken(s, TokenKind::Minus), |
|
| 188 | + | case '+' => return scanSignedNumberOrToken(s, TokenKind::Plus), |
|
| 189 | + | case '/' => return tok(s, TokenKind::Slash), |
|
| 190 | + | case '*' => return tok(s, TokenKind::Star), |
|
| 191 | + | else => return invalid(s.token, "unexpected character"), |
|
| 192 | + | } |
|
| 193 | + | } |
|
| 194 | + | ||
| 195 | + | /// Create a token spanning the current scanner range. |
|
| 196 | + | fn tok(s: *Scanner, kind: TokenKind) -> Token { |
|
| 197 | + | return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token }; |
|
| 198 | + | } |
|
| 199 | + | ||
| 200 | + | /// Scan the identifier continuation characters that follow the current token start. |
|
| 201 | + | fn scanIdentifierBody(s: *mut Scanner) { |
|
| 202 | + | while let ch = current(s); char::isAlpha(ch) or char::isDigit(ch) or ch == '_' { |
|
| 203 | + | advance(s); |
|
| 204 | + | } |
|
| 205 | + | } |
|
| 206 | + | ||
| 207 | + | /// Scan a signed number when `+` or `-` is followed by a digit, otherwise return the punctuation token. |
|
| 208 | + | fn scanSignedNumberOrToken(s: *mut Scanner, kind: TokenKind) -> Token { |
|
| 209 | + | if let nextCh = current(s); char::isDigit(nextCh) { |
|
| 210 | + | return scanNumber(s); |
|
| 211 | + | } |
|
| 212 | + | return tok(s, kind); |
|
| 213 | + | } |
|
| 214 | + | ||
| 215 | + | /// Scan a numeric literal. |
|
| 216 | + | fn scanNumber(s: *mut Scanner) -> Token { |
|
| 217 | + | let first = s.source[s.cursor - 1]; |
|
| 218 | + | if first == '-' or first == '+' { |
|
| 219 | + | advance(s); |
|
| 220 | + | } |
|
| 221 | + | if s.source[s.cursor - 1] == '0' { |
|
| 222 | + | if let ch = current(s); ch == 'x' or ch == 'X' { |
|
| 223 | + | advance(s); |
|
| 224 | + | if let digit = current(s); not char::isHexDigit(digit) { |
|
| 225 | + | return invalid(s.token, "invalid hex literal"); |
|
| 226 | + | } |
|
| 227 | + | while let digit = current(s); char::isHexDigit(digit) { |
|
| 228 | + | advance(s); |
|
| 229 | + | } |
|
| 230 | + | return tok(s, TokenKind::Number); |
|
| 231 | + | } |
|
| 232 | + | } |
|
| 233 | + | while let digit = current(s); char::isDigit(digit) { |
|
| 234 | + | advance(s); |
|
| 235 | + | } |
|
| 236 | + | return tok(s, TokenKind::Number); |
|
| 237 | + | } |
|
| 238 | + | ||
| 239 | + | /// Scan a printable token terminated by `delim`. |
|
| 240 | + | fn scanCharsUntil(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token { |
|
| 241 | + | while let ch = current(s); ch <> delim { |
|
| 242 | + | if not char::isPrint(ch) { |
|
| 243 | + | return invalid(s.token, "invalid character"); |
|
| 244 | + | } |
|
| 245 | + | if consume(s, '\\') { |
|
| 246 | + | if isEof(s) { |
|
| 247 | + | return nil; |
|
| 248 | + | } |
|
| 249 | + | } |
|
| 250 | + | advance(s); |
|
| 251 | + | } |
|
| 252 | + | if not consume(s, delim) { |
|
| 253 | + | return nil; |
|
| 254 | + | } |
|
| 255 | + | return tok(s, kind); |
|
| 256 | + | } |
|
| 257 | + | ||
| 258 | + | /// Scan a string literal. |
|
| 259 | + | fn scanString(s: *mut Scanner) -> Token { |
|
| 260 | + | if let token = scanCharsUntil(s, '"', TokenKind::String) { |
|
| 261 | + | return token; |
|
| 262 | + | } |
|
| 263 | + | return invalid(s.token, "unterminated string"); |
|
| 264 | + | } |
|
| 265 | + | ||
| 266 | + | /// Scan a character literal. |
|
| 267 | + | fn scanChar(s: *mut Scanner) -> Token { |
|
| 268 | + | if let token = scanCharsUntil(s, '\'', TokenKind::Char) { |
|
| 269 | + | return token; |
|
| 270 | + | } |
|
| 271 | + | return invalid(s.token, "unterminated character"); |
|
| 272 | + | } |
|
| 273 | + | ||
| 274 | + | /// Scan an identifier-shaped token of the given kind. |
|
| 275 | + | fn scanIdentToken(s: *mut Scanner, kind: TokenKind) -> Token { |
|
| 276 | + | scanIdentifierBody(s); |
|
| 277 | + | ||
| 278 | + | return Token { |
|
| 279 | + | kind, |
|
| 280 | + | source: strings::intern(s.pool, &s.source[s.token..s.cursor]), |
|
| 281 | + | offset: s.token, |
|
| 282 | + | }; |
|
| 283 | + | } |
|
| 284 | + | ||
| 285 | + | /// Scan a sigil-prefixed identifier-shaped token. |
|
| 286 | + | fn scanPrefixedToken(s: *mut Scanner, kind: TokenKind, message: *[u8]) -> Token { |
|
| 287 | + | let ch = current(s) else { |
|
| 288 | + | return invalid(s.token, message); |
|
| 289 | + | }; |
|
| 290 | + | if not char::isAlpha(ch) and ch <> '_' { |
|
| 291 | + | return invalid(s.token, message); |
|
| 292 | + | } |
|
| 293 | + | scanIdentifierBody(s); |
|
| 294 | + | ||
| 295 | + | return Token { |
|
| 296 | + | kind, |
|
| 297 | + | source: strings::intern(s.pool, &s.source[s.token..s.cursor]), |
|
| 298 | + | offset: s.token, |
|
| 299 | + | }; |
|
| 300 | + | } |
|
| 301 | + | ||
| 302 | + | /// Scan an assembler label token, accepting either `@name` or `@"quoted"` syntax. |
|
| 303 | + | fn scanLabelToken(s: *mut Scanner) -> Token { |
|
| 304 | + | let ch = current(s) else { |
|
| 305 | + | return invalid(s.token, "expected label after `@`"); |
|
| 306 | + | }; |
|
| 307 | + | if ch == '"' { |
|
| 308 | + | advance(s); |
|
| 309 | + | if let token = scanCharsUntil(s, '"', TokenKind::QuotedLabel) { |
|
| 310 | + | return token; |
|
| 311 | + | } |
|
| 312 | + | return invalid(s.token, "unterminated quoted label"); |
|
| 313 | + | } |
|
| 314 | + | return scanPrefixedToken(s, TokenKind::Label, "expected label after `@`"); |
|
| 315 | + | } |
lib/std/arch/rv64/asm/scanner/tests.rad
added
+140 -0
| 1 | + | use std::mem; |
|
| 2 | + | use std::testing; |
|
| 3 | + | use std::lang::strings; |
|
| 4 | + | ||
| 5 | + | /// String pool used by assembler scanner tests. |
|
| 6 | + | static TEST_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 }; |
|
| 7 | + | ||
| 8 | + | /// Create a scanner for test input. |
|
| 9 | + | fn testScanner(source: *[u8]) -> super::Scanner { |
|
| 10 | + | return super::scanner(super::SourceKind::String, source, &mut TEST_STRING_POOL); |
|
| 11 | + | } |
|
| 12 | + | ||
| 13 | + | /// Scanner recognizes assembler-specific sigils and scoped names. |
|
| 14 | + | @test fn testScanRegisterDirectiveAndLabelTokens() throws (testing::TestError) { |
|
| 15 | + | let mut s = testScanner( |
|
| 16 | + | ".text %sp @entry name::tail 42" |
|
| 17 | + | ); |
|
| 18 | + | let directive = super::next(&mut s); |
|
| 19 | + | try testing::expect(directive.kind == super::TokenKind::Directive); |
|
| 20 | + | try testing::expect(mem::eq(directive.source, ".text")); |
|
| 21 | + | ||
| 22 | + | let reg = super::next(&mut s); |
|
| 23 | + | try testing::expect(reg.kind == super::TokenKind::Register); |
|
| 24 | + | try testing::expect(mem::eq(reg.source, "%sp")); |
|
| 25 | + | ||
| 26 | + | let label = super::next(&mut s); |
|
| 27 | + | try testing::expect(label.kind == super::TokenKind::Label); |
|
| 28 | + | try testing::expect(mem::eq(label.source, "@entry")); |
|
| 29 | + | ||
| 30 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 31 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::ColonColon); |
|
| 32 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 33 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Number); |
|
| 34 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof); |
|
| 35 | + | } |
|
| 36 | + | ||
| 37 | + | /// Keyword-shaped text remains plain assembler identifiers. |
|
| 38 | + | @test fn testScanKeywordShapedAsmNamesRemainAsmTokens() throws (testing::TestError) { |
|
| 39 | + | let mut s = testScanner( |
|
| 40 | + | "and or not align addi .text @label" |
|
| 41 | + | ); |
|
| 42 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 43 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 44 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 45 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 46 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident); |
|
| 47 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Directive); |
|
| 48 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Label); |
|
| 49 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof); |
|
| 50 | + | } |
|
| 51 | + | ||
| 52 | + | /// Quoted labels can spell symbol names that are not identifier-shaped. |
|
| 53 | + | @test fn testScanQuotedLabelToken() throws (testing::TestError) { |
|
| 54 | + | let mut s = testScanner( |
|
| 55 | + | "@\"foo.bar.baz\"" |
|
| 56 | + | ); |
|
| 57 | + | let label = super::next(&mut s); |
|
| 58 | + | try testing::expect(label.kind == super::TokenKind::QuotedLabel); |
|
| 59 | + | try testing::expect(mem::eq(label.source, "@\"foo.bar.baz\"")); |
|
| 60 | + | try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof); |
|
| 61 | + | } |
|
| 62 | + | ||
| 63 | + | /// Sigil-prefixed tokens require the name to start immediately after the sigil. |
|
| 64 | + | @test fn testScanSigilsRequireAdjacency() throws (testing::TestError) { |
|
| 65 | + | let mut regScan = testScanner("% a0"); |
|
| 66 | + | try testing::expect(super::next(&mut regScan).kind == super::TokenKind::Invalid); |
|
| 67 | + | ||
| 68 | + | let mut labelScan = testScanner("@ entry"); |
|
| 69 | + | try testing::expect(super::next(&mut labelScan).kind == super::TokenKind::Invalid); |
|
| 70 | + | ||
| 71 | + | let mut directiveScan = testScanner(". text"); |
|
| 72 | + | try testing::expect(super::next(&mut directiveScan).kind == super::TokenKind::Invalid); |
|
| 73 | + | } |
|
| 74 | + | ||
| 75 | + | /// Scanner reaches EOF after trailing whitespace and comments. |
|
| 76 | + | @test fn testScanProgramEndingWithNewline() throws (testing::TestError) { |
|
| 77 | + | let mut s = testScanner( |
|
| 78 | + | ".text;\n@start\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n// comment\nbeq %a0 %zero @done;\n@done\nret;\n" |
|
| 79 | + | ); |
|
| 80 | + | loop { |
|
| 81 | + | let tok = super::next(&mut s); |
|
| 82 | + | if tok.kind == super::TokenKind::Eof { |
|
| 83 | + | try testing::expect(tok.source.len == 0); |
|
| 84 | + | return; |
|
| 85 | + | } |
|
| 86 | + | } |
|
| 87 | + | } |
|
| 88 | + | ||
| 89 | + | /// Signed numbers scan only the numeric formats supported by the assembler scanner. |
|
| 90 | + | @test fn testScanSignedHexAndUnsupportedNumericForms() throws (testing::TestError) { |
|
| 91 | + | let mut s = testScanner( |
|
| 92 | + | "+0x2a -0b10 45.5" |
|
| 93 | + | ); |
|
| 94 | + | let mut tok = super::next(&mut s); |
|
| 95 | + | try testing::expect(tok.kind == super::TokenKind::Number); |
|
| 96 | + | try testing::expect(mem::eq(tok.source, "+0x2a")); |
|
| 97 | + | ||
| 98 | + | set tok = super::next(&mut s); |
|
| 99 | + | try testing::expect(tok.kind == super::TokenKind::Number); |
|
| 100 | + | try testing::expect(mem::eq(tok.source, "-0")); |
|
| 101 | + | ||
| 102 | + | set tok = super::next(&mut s); |
|
| 103 | + | try testing::expect(tok.kind == super::TokenKind::Ident); |
|
| 104 | + | try testing::expect(mem::eq(tok.source, "b10")); |
|
| 105 | + | ||
| 106 | + | set tok = super::next(&mut s); |
|
| 107 | + | try testing::expect(tok.kind == super::TokenKind::Number); |
|
| 108 | + | try testing::expect(mem::eq(tok.source, "45")); |
|
| 109 | + | ||
| 110 | + | set tok = super::next(&mut s); |
|
| 111 | + | try testing::expect(tok.kind == super::TokenKind::Invalid); |
|
| 112 | + | try testing::expect(mem::eq(tok.source, "expected directive name after `.`")); |
|
| 113 | + | ||
| 114 | + | set tok = super::next(&mut s); |
|
| 115 | + | try testing::expect(tok.kind == super::TokenKind::Number); |
|
| 116 | + | try testing::expect(mem::eq(tok.source, "5")); |
|
| 117 | + | } |
|
| 118 | + | ||
| 119 | + | /// Unterminated string and character literals report invalid tokens. |
|
| 120 | + | @test fn testScanUnterminatedDelimitedLiterals() throws (testing::TestError) { |
|
| 121 | + | let mut stringScan = testScanner("\"unterminated"); |
|
| 122 | + | let stringTok = super::next(&mut stringScan); |
|
| 123 | + | try testing::expect(stringTok.kind == super::TokenKind::Invalid); |
|
| 124 | + | try testing::expect(mem::eq(stringTok.source, "unterminated string")); |
|
| 125 | + | ||
| 126 | + | let mut escapedStringScan = testScanner("\"unterminated\\"); |
|
| 127 | + | let escapedStringTok = super::next(&mut escapedStringScan); |
|
| 128 | + | try testing::expect(escapedStringTok.kind == super::TokenKind::Invalid); |
|
| 129 | + | try testing::expect(mem::eq(escapedStringTok.source, "unterminated string")); |
|
| 130 | + | ||
| 131 | + | let mut charScan = testScanner("'x"); |
|
| 132 | + | let charTok = super::next(&mut charScan); |
|
| 133 | + | try testing::expect(charTok.kind == super::TokenKind::Invalid); |
|
| 134 | + | try testing::expect(mem::eq(charTok.source, "unterminated character")); |
|
| 135 | + | ||
| 136 | + | let mut escapedCharScan = testScanner("'\\"); |
|
| 137 | + | let escapedCharTok = super::next(&mut escapedCharScan); |
|
| 138 | + | try testing::expect(escapedCharTok.kind == super::TokenKind::Invalid); |
|
| 139 | + | try testing::expect(mem::eq(escapedCharTok.source, "unterminated character")); |
|
| 140 | + | } |
lib/std/arch/rv64/asm/tests.rad
added
+211 -0
| 1 | + | //! RV64 assembler tests. |
|
| 2 | + | ||
| 3 | + | use std::testing; |
|
| 4 | + | use std::mem; |
|
| 5 | + | use std::lang::alloc; |
|
| 6 | + | use std::lang::sexpr; |
|
| 7 | + | use std::lang::strings; |
|
| 8 | + | use std::arch::rv64; |
|
| 9 | + | use std::arch::rv64::encode; |
|
| 10 | + | use std::arch::rv64::printer; |
|
| 11 | + | ||
| 12 | + | use super::scanner; |
|
| 13 | + | ||
| 14 | + | static ASM_ARENA_STORAGE: [u8; 65536] = undefined; |
|
| 15 | + | static ASM_TEXT_STORAGE: [u32; 256] = undefined; |
|
| 16 | + | static ASM_DATA_STORAGE: [u8; 1024] = undefined; |
|
| 17 | + | static ASM_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 }; |
|
| 18 | + | static PRINT_ARENA_STORAGE: [u8; 1024] = undefined; |
|
| 19 | + | static PRINT_BUFFER: [u8; 128] = undefined; |
|
| 20 | + | ||
| 21 | + | fn assembleSource(source: *[u8]) -> super::Program throws (testing::TestError) { |
|
| 22 | + | let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]); |
|
| 23 | + | return try super::assemble( |
|
| 24 | + | scanner::SourceKind::String, |
|
| 25 | + | source, |
|
| 26 | + | &mut ASM_TEXT_STORAGE[..], |
|
| 27 | + | &mut ASM_DATA_STORAGE[..], |
|
| 28 | + | &mut arena, |
|
| 29 | + | &mut ASM_STRING_POOL, |
|
| 30 | + | rv64::RO_DATA_BASE |
|
| 31 | + | ) catch { |
|
| 32 | + | throw testing::TestError::Failed; |
|
| 33 | + | }; |
|
| 34 | + | } |
|
| 35 | + | ||
| 36 | + | fn expectAssembleFail(source: *[u8]) throws (testing::TestError) { |
|
| 37 | + | let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]); |
|
| 38 | + | try super::assemble( |
|
| 39 | + | scanner::SourceKind::String, |
|
| 40 | + | source, |
|
| 41 | + | &mut ASM_TEXT_STORAGE[..], |
|
| 42 | + | &mut ASM_DATA_STORAGE[..], |
|
| 43 | + | &mut arena, |
|
| 44 | + | &mut ASM_STRING_POOL, |
|
| 45 | + | rv64::RO_DATA_BASE |
|
| 46 | + | ) catch { |
|
| 47 | + | return; |
|
| 48 | + | }; |
|
| 49 | + | throw testing::TestError::Failed; |
|
| 50 | + | } |
|
| 51 | + | ||
| 52 | + | fn printInstrText(instr: u32) -> *[u8] { |
|
| 53 | + | let mut arena = alloc::new(&mut PRINT_ARENA_STORAGE[..]); |
|
| 54 | + | let mut pos: u32 = 0; |
|
| 55 | + | let mut out = sexpr::Output::Buffer { buf: &mut PRINT_BUFFER[..], pos: &mut pos }; |
|
| 56 | + | printer::printInstr(&mut out, &mut arena, instr); |
|
| 57 | + | return &PRINT_BUFFER[..pos]; |
|
| 58 | + | } |
|
| 59 | + | ||
| 60 | + | @test fn testAssemblePercentPrefixedRegisters() throws (testing::TestError) { |
|
| 61 | + | let program = try assembleSource( |
|
| 62 | + | ".text;\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n" |
|
| 63 | + | ); |
|
| 64 | + | try testing::expect(program.text.len == 2); |
|
| 65 | + | try testing::expect(program.text[0] == encode::addi(rv64::A0, rv64::ZERO, 42)); |
|
| 66 | + | try testing::expect(program.text[1] == encode::sd(rv64::A0, rv64::SP, 8)); |
|
| 67 | + | } |
|
| 68 | + | ||
| 69 | + | @test fn testAssembleDataAddressUsesRoDataBase() throws (testing::TestError) { |
|
| 70 | + | let program = try assembleSource( |
|
| 71 | + | ".text;\nla %t0 @value;\n.data;\n.byte 0;\n@value\n.byte 1;\n" |
|
| 72 | + | ); |
|
| 73 | + | try testing::expect(program.text.len == 2); |
|
| 74 | + | try testing::expect(program.text[0] == encode::lui(rv64::T0, 0x10)); |
|
| 75 | + | try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 1)); |
|
| 76 | + | } |
|
| 77 | + | ||
| 78 | + | @test fn testAssembleTextAddressUsesPcRelative() throws (testing::TestError) { |
|
| 79 | + | let program = try assembleSource( |
|
| 80 | + | ".text;\nla %t0 @target;\n@target\nret;\n" |
|
| 81 | + | ); |
|
| 82 | + | try testing::expect(program.text.len == 3); |
|
| 83 | + | try testing::expect(program.text[0] == encode::auipc(rv64::T0, 0)); |
|
| 84 | + | try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 8)); |
|
| 85 | + | } |
|
| 86 | + | ||
| 87 | + | @test fn testAssembleQuotedLabelNames() throws (testing::TestError) { |
|
| 88 | + | let program = try assembleSource( |
|
| 89 | + | ".text;\nj @\"foo.bar.baz\";\n@\"foo.bar.baz\"\nret;\n" |
|
| 90 | + | ); |
|
| 91 | + | try testing::expect(program.text.len == 2); |
|
| 92 | + | try testing::expect(program.text[0] == encode::jal(rv64::ZERO, 4)); |
|
| 93 | + | try testing::expect(program.text[1] == encode::jalr(rv64::ZERO, rv64::RA, 0)); |
|
| 94 | + | } |
|
| 95 | + | ||
| 96 | + | @test fn testAssembleGlobalMarksOnlyDeclaredSymbols() throws (testing::TestError) { |
|
| 97 | + | let program = try assembleSource( |
|
| 98 | + | ".text;\n.export @exported;\n@local\nret;\n@exported\nret;\n@late\n.export @late;\nret;\n" |
|
| 99 | + | ); |
|
| 100 | + | try testing::expect(program.symbols.len == 3); |
|
| 101 | + | try testing::expect(not program.symbols[0].isExported); |
|
| 102 | + | try testing::expect(program.symbols[1].isExported); |
|
| 103 | + | try testing::expect(program.symbols[2].isExported); |
|
| 104 | + | } |
|
| 105 | + | ||
| 106 | + | @test fn testAssembleInvalidOperandsFail() throws (testing::TestError) { |
|
| 107 | + | try expectAssembleFail( |
|
| 108 | + | ".text;\nbeq %a0 %a1 @missing;\n" |
|
| 109 | + | ); |
|
| 110 | + | try expectAssembleFail( |
|
| 111 | + | ".text;\naddi a0 zero 1;\n" |
|
| 112 | + | ); |
|
| 113 | + | try expectAssembleFail( |
|
| 114 | + | ".text;\naddi % a0 %zero 1;\n" |
|
| 115 | + | ); |
|
| 116 | + | try expectAssembleFail( |
|
| 117 | + | ".text;\nli %a0 UNKNOWN;\n" |
|
| 118 | + | ); |
|
| 119 | + | try expectAssembleFail( |
|
| 120 | + | ".text;\n@start\nj start;\n" |
|
| 121 | + | ); |
|
| 122 | + | } |
|
| 123 | + | ||
| 124 | + | @test fn testAssembleInvalidSyntaxFails() throws (testing::TestError) { |
|
| 125 | + | try expectAssembleFail( |
|
| 126 | + | ".text;\n@dup\n@dup\nret;\n" |
|
| 127 | + | ); |
|
| 128 | + | try expectAssembleFail( |
|
| 129 | + | ".text;\naddi %a0, %zero, 1\n" |
|
| 130 | + | ); |
|
| 131 | + | try expectAssembleFail( |
|
| 132 | + | ".constant PAGE, 4096;\n" |
|
| 133 | + | ); |
|
| 134 | + | try expectAssembleFail( |
|
| 135 | + | ".text;\naddi %a0, %zero, 1;\n" |
|
| 136 | + | ); |
|
| 137 | + | try expectAssembleFail( |
|
| 138 | + | ".export @kernel::main, @data::sym;\n" |
|
| 139 | + | ); |
|
| 140 | + | } |
|
| 141 | + | ||
| 142 | + | @test fn testAssembleInvalidSectionsFail() throws (testing::TestError) { |
|
| 143 | + | try expectAssembleFail( |
|
| 144 | + | ".data;\n.dword @target;\n.text;\n@target\nret;\n" |
|
| 145 | + | ); |
|
| 146 | + | try expectAssembleFail( |
|
| 147 | + | ".data;\naddi %a0 %zero 1;\n" |
|
| 148 | + | ); |
|
| 149 | + | try expectAssembleFail( |
|
| 150 | + | ".text;\n.byte 1;\n" |
|
| 151 | + | ); |
|
| 152 | + | try expectAssembleFail( |
|
| 153 | + | ".text;\n.word 1;\n" |
|
| 154 | + | ); |
|
| 155 | + | try expectAssembleFail( |
|
| 156 | + | ".text;\n.dword 1;\n" |
|
| 157 | + | ); |
|
| 158 | + | try expectAssembleFail( |
|
| 159 | + | ".text;\n.ascii \"x\";\n" |
|
| 160 | + | ); |
|
| 161 | + | try expectAssembleFail( |
|
| 162 | + | ".data;\n@value\n.byte 1;\n.text;\nj @value;\n" |
|
| 163 | + | ); |
|
| 164 | + | } |
|
| 165 | + | ||
| 166 | + | @test fn testAssembleInvalidDirectivesFail() throws (testing::TestError) { |
|
| 167 | + | try expectAssembleFail( |
|
| 168 | + | ".data;\n.ascii 'x';\n" |
|
| 169 | + | ); |
|
| 170 | + | try expectAssembleFail( |
|
| 171 | + | ".data;\n.byte 1 + 2;\n" |
|
| 172 | + | ); |
|
| 173 | + | try expectAssembleFail( |
|
| 174 | + | ".data;\n.byte 256;\n" |
|
| 175 | + | ); |
|
| 176 | + | try expectAssembleFail( |
|
| 177 | + | ".data;\n.word 2147483648;\n" |
|
| 178 | + | ); |
|
| 179 | + | try expectAssembleFail( |
|
| 180 | + | ".data;\n.space 4294967296;\n" |
|
| 181 | + | ); |
|
| 182 | + | try expectAssembleFail( |
|
| 183 | + | ".data;\n.align 3;\n" |
|
| 184 | + | ); |
|
| 185 | + | try expectAssembleFail( |
|
| 186 | + | ".text;\n.align 12;\n" |
|
| 187 | + | ); |
|
| 188 | + | try expectAssembleFail( |
|
| 189 | + | ".data;\n.align 4294967296;\n" |
|
| 190 | + | ); |
|
| 191 | + | } |
|
| 192 | + | ||
| 193 | + | @test fn testAssembleInvalidImmediateRangesFail() throws (testing::TestError) { |
|
| 194 | + | try expectAssembleFail( |
|
| 195 | + | ".text;\nslli %a0 %a1 64;\n" |
|
| 196 | + | ); |
|
| 197 | + | try expectAssembleFail( |
|
| 198 | + | ".text;\nslli %a0 %a1 4294967296;\n" |
|
| 199 | + | ); |
|
| 200 | + | try expectAssembleFail( |
|
| 201 | + | ".text;\nslliw %a0 %a1 2147483648;\n" |
|
| 202 | + | ); |
|
| 203 | + | try expectAssembleFail( |
|
| 204 | + | ".text;\ncsrsi mstatus 32;\n" |
|
| 205 | + | ); |
|
| 206 | + | } |
|
| 207 | + | ||
| 208 | + | @test fn testPrintInstrUsesPercentPrefixedRegisters() throws (testing::TestError) { |
|
| 209 | + | let text = printInstrText(encode::addi(rv64::A0, rv64::SP, 42)); |
|
| 210 | + | try testing::expect(mem::eq(text, "addi %a0, %sp, 42")); |
|
| 211 | + | } |
lib/std/arch/rv64/emit.rad
+13 -3
| 235 | 235 | labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE); |
|
| 236 | 236 | } |
|
| 237 | 237 | ||
| 238 | 238 | /// Record a function's code offset for call resolution. |
|
| 239 | 239 | export fn recordFuncOffset(e: *mut Emitter, name: *[u8]) { |
|
| 240 | - | assert e.codeLen <= MAX_CODE_LEN; |
|
| 241 | - | dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE); |
|
| 240 | + | recordFuncOffsetAt(e, name, e.codeLen); |
|
| 241 | + | } |
|
| 242 | + | ||
| 243 | + | /// Record a function's code offset at `index` for call resolution. |
|
| 244 | + | export fn recordFuncOffsetAt(e: *mut Emitter, name: *[u8], index: u32) { |
|
| 245 | + | assert index <= MAX_CODE_LEN; |
|
| 246 | + | dict::insert(&mut e.labels.funcs, name, index as i32 * super::INSTR_SIZE); |
|
| 242 | 247 | } |
|
| 243 | 248 | ||
| 244 | 249 | /// Record a function's start position for printing. |
|
| 245 | 250 | export fn recordFunc(e: *mut Emitter, name: *[u8]) { |
|
| 251 | + | recordFuncAt(e, name, e.codeLen); |
|
| 252 | + | } |
|
| 253 | + | ||
| 254 | + | /// Record a function's start position at `index` for printing. |
|
| 255 | + | export fn recordFuncAt(e: *mut Emitter, name: *[u8], index: u32) { |
|
| 246 | 256 | assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full"; |
|
| 247 | - | set e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen }; |
|
| 257 | + | set e.funcs[e.funcsLen] = types::FuncAddr { name, index }; |
|
| 248 | 258 | set e.funcsLen += 1; |
|
| 249 | 259 | } |
|
| 250 | 260 | ||
| 251 | 261 | /// Record a local branch needing later patching. |
|
| 252 | 262 | /// Unconditional jumps use a single slot (J-type, +-1MB range). |
lib/std/arch/rv64/encode.rad
+63 -0
| 53 | 53 | export constant F3_BLT: u32 = 0x4; |
|
| 54 | 54 | export constant F3_BGE: u32 = 0x5; |
|
| 55 | 55 | export constant F3_BLTU: u32 = 0x6; |
|
| 56 | 56 | export constant F3_BGEU: u32 = 0x7; |
|
| 57 | 57 | ||
| 58 | + | // CSR/system operations |
|
| 59 | + | ||
| 60 | + | export constant F3_CSRRW: u32 = 0x1; |
|
| 61 | + | export constant F3_CSRRS: u32 = 0x2; |
|
| 62 | + | export constant F3_CSRRC: u32 = 0x3; |
|
| 63 | + | export constant F3_CSRRWI: u32 = 0x5; |
|
| 64 | + | export constant F3_CSRRSI: u32 = 0x6; |
|
| 65 | + | export constant F3_CSRRCI: u32 = 0x7; |
|
| 66 | + | ||
| 58 | 67 | ////////////////////// |
|
| 59 | 68 | // Funct7 Constants // |
|
| 60 | 69 | ////////////////////// |
|
| 61 | 70 | ||
| 62 | 71 | export constant F7_NORMAL: u32 = 0b0000000; |
| 535 | 544 | /// Environment break (debugger breakpoint). |
|
| 536 | 545 | export fn ebreak() -> u32 { |
|
| 537 | 546 | return encodeI(OP_SYSTEM, super::ZERO, super::ZERO, 0, 1); |
|
| 538 | 547 | } |
|
| 539 | 548 | ||
| 549 | + | /// Encode a CSR instruction with a register source. |
|
| 550 | + | fn encodeCsr(op: u32, rd: gen::Reg, csr: u32, funct3: u32, rs1: gen::Reg) -> u32 { |
|
| 551 | + | return (op & 0x7F) |
|
| 552 | + | | ((*rd as u32 & 0x1F) << 7) |
|
| 553 | + | | ((funct3 & 0x07) << 12) |
|
| 554 | + | | ((*rs1 as u32 & 0x1F) << 15) |
|
| 555 | + | | ((csr & 0xFFF) << 20); |
|
| 556 | + | } |
|
| 557 | + | ||
| 558 | + | /// Encode a CSR instruction with an immediate source. |
|
| 559 | + | fn encodeCsrImm(op: u32, rd: gen::Reg, csr: u32, funct3: u32, imm: u32) -> u32 { |
|
| 560 | + | assert imm < 32; |
|
| 561 | + | return (op & 0x7F) |
|
| 562 | + | | ((*rd as u32 & 0x1F) << 7) |
|
| 563 | + | | ((funct3 & 0x07) << 12) |
|
| 564 | + | | ((imm & 0x1F) << 15) |
|
| 565 | + | | ((csr & 0xFFF) << 20); |
|
| 566 | + | } |
|
| 567 | + | ||
| 568 | + | /// Read CSR into `rd`. |
|
| 569 | + | export fn csrr(rd: gen::Reg, csr: u32) -> u32 { |
|
| 570 | + | return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRS, super::ZERO); |
|
| 571 | + | } |
|
| 572 | + | ||
| 573 | + | /// Read/write CSR: old CSR to `rd`, write `rs1`. |
|
| 574 | + | export fn csrrw(rd: gen::Reg, csr: u32, rs1: gen::Reg) -> u32 { |
|
| 575 | + | return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRW, rs1); |
|
| 576 | + | } |
|
| 577 | + | ||
| 578 | + | /// Write `rs1` into CSR and discard old value. |
|
| 579 | + | export fn csrw(csr: u32, rs1: gen::Reg) -> u32 { |
|
| 580 | + | return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRW, rs1); |
|
| 581 | + | } |
|
| 582 | + | ||
| 583 | + | /// Clear CSR bits from `rs1` and discard old value. |
|
| 584 | + | export fn csrc(csr: u32, rs1: gen::Reg) -> u32 { |
|
| 585 | + | return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRC, rs1); |
|
| 586 | + | } |
|
| 587 | + | ||
| 588 | + | /// Set CSR bits from a 5-bit immediate and discard old value. |
|
| 589 | + | export fn csrsi(csr: u32, imm: u32) -> u32 { |
|
| 590 | + | return encodeCsrImm(OP_SYSTEM, super::ZERO, csr, F3_CSRRSI, imm); |
|
| 591 | + | } |
|
| 592 | + | ||
| 593 | + | /// Wait for interrupt. |
|
| 594 | + | export fn wfi() -> u32 { |
|
| 595 | + | return 0x10500073; |
|
| 596 | + | } |
|
| 597 | + | ||
| 598 | + | /// Return from machine mode trap. |
|
| 599 | + | export fn mret() -> u32 { |
|
| 600 | + | return 0x30200073; |
|
| 601 | + | } |
|
| 602 | + | ||
| 540 | 603 | ///////////////////////// |
|
| 541 | 604 | // Pseudo-instructions // |
|
| 542 | 605 | ///////////////////////// |
|
| 543 | 606 | ||
| 544 | 607 | /// No operation: `addi zero, zero, 0`. |
lib/std/arch/rv64/printer.rad
+4 -4
| 16 | 16 | // Register Names // |
|
| 17 | 17 | ///////////////////// |
|
| 18 | 18 | ||
| 19 | 19 | /// ABI register names. |
|
| 20 | 20 | constant REG_NAMES: [*[u8]; 32] = [ |
|
| 21 | - | "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", |
|
| 22 | - | "fp", "s1", "a0", "a1", "a2", "a3", "a4", "a5", |
|
| 23 | - | "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", |
|
| 24 | - | "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6" |
|
| 21 | + | "%zero", "%ra", "%sp", "%gp", "%tp", "%t0", "%t1", "%t2", |
|
| 22 | + | "%fp", "%s1", "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", |
|
| 23 | + | "%a6", "%a7", "%s2", "%s3", "%s4", "%s5", "%s6", "%s7", |
|
| 24 | + | "%s8", "%s9", "%s10", "%s11", "%t3", "%t4", "%t5", "%t6" |
|
| 25 | 25 | ]; |
|
| 26 | 26 | ||
| 27 | 27 | /// Get register name from number. |
|
| 28 | 28 | fn regName(n: u8) -> *[u8] { |
|
| 29 | 29 | return "?" if n >= 32 else REG_NAMES[n as u32]; |
lib/std/arch/rv64/tests.rad
+42 -0
| 2 | 2 | //! |
|
| 3 | 3 | //! These tests verify that instruction encodings match the RISC-V specification |
|
| 4 | 4 | //! by comparing against known-good values. |
|
| 5 | 5 | ||
| 6 | 6 | use std::testing; |
|
| 7 | + | use std::lang::alloc; |
|
| 8 | + | use std::collections::dict; |
|
| 9 | + | ||
| 7 | 10 | use super::encode; |
|
| 11 | + | use super::asm; |
|
| 12 | + | ||
| 13 | + | static ASSEMBLY_ARENA_STORAGE: [u8; 16777216] = undefined; |
|
| 14 | + | static ASSEMBLY_TEXT_STORAGE: [u32; 2] = undefined; |
|
| 8 | 15 | ||
| 9 | 16 | /// Helper to check encoding equals expected value. |
|
| 10 | 17 | fn expectEncoding(actual: u32, expected: u32) throws (testing::TestError) { |
|
| 11 | 18 | try testing::expect(actual == expected); |
|
| 12 | 19 | } |
|
| 13 | 20 | ||
| 21 | + | @test fn testAddAssemblyExportsOnlyGlobalTextSymbols() throws (testing::TestError) { |
|
| 22 | + | let mut arena = alloc::new(&mut ASSEMBLY_ARENA_STORAGE[..]); |
|
| 23 | + | let symbols = try alloc::allocSlice(&mut arena, @sizeOf(asm::Symbol), @alignOf(asm::Symbol), 2) catch { |
|
| 24 | + | throw testing::TestError::Failed; |
|
| 25 | + | }; |
|
| 26 | + | let mut symbolSlice = @sliceOf((symbols as *mut [asm::Symbol]).ptr, 2, 2); |
|
| 27 | + | set symbolSlice[0] = asm::Symbol { |
|
| 28 | + | name: "local", |
|
| 29 | + | section: asm::Section::Text, |
|
| 30 | + | offset: 0, |
|
| 31 | + | isExported: false, |
|
| 32 | + | }; |
|
| 33 | + | set symbolSlice[1] = asm::Symbol { |
|
| 34 | + | name: "exported", |
|
| 35 | + | section: asm::Section::Text, |
|
| 36 | + | offset: super::INSTR_SIZE, |
|
| 37 | + | isExported: true, |
|
| 38 | + | }; |
|
| 39 | + | ||
| 40 | + | let mut generator = super::beginProgram( |
|
| 41 | + | super::ProgramOptions { entryPatch: super::EntryPatch::None, debug: false }, |
|
| 42 | + | &mut arena |
|
| 43 | + | ); |
|
| 44 | + | super::addAssembly( |
|
| 45 | + | &mut generator, |
|
| 46 | + | asm::Program { text: &ASSEMBLY_TEXT_STORAGE[..], data: &[], symbols: symbolSlice } |
|
| 47 | + | ); |
|
| 48 | + | ||
| 49 | + | try testing::expect(dict::get(&generator.e.labels.funcs, "local") == nil); |
|
| 50 | + | let exportedOffset = dict::get(&generator.e.labels.funcs, "exported") else { |
|
| 51 | + | throw testing::TestError::Failed; |
|
| 52 | + | }; |
|
| 53 | + | try testing::expect(exportedOffset == super::INSTR_SIZE); |
|
| 54 | + | } |
|
| 55 | + | ||
| 14 | 56 | /////////////////////// |
|
| 15 | 57 | // R-type ALU tests // |
|
| 16 | 58 | /////////////////////// |
|
| 17 | 59 | ||
| 18 | 60 | @test fn testEncodeAdd() throws (testing::TestError) { |
lib/std/char.rad
added
+31 -0
| 1 | + | //! ASCII character classification helpers shared across the standard library. |
|
| 2 | + | ||
| 3 | + | @test mod tests; |
|
| 4 | + | ||
| 5 | + | /// Return `true` when `ch` is an ASCII digit. |
|
| 6 | + | export fn isDigit(ch: u8) -> bool { |
|
| 7 | + | return ch >= '0' and ch <= '9'; |
|
| 8 | + | } |
|
| 9 | + | ||
| 10 | + | /// Return `true` when `ch` is an ASCII hexadecimal digit. |
|
| 11 | + | export fn isHexDigit(ch: u8) -> bool { |
|
| 12 | + | return (ch >= '0' and ch <= '9') |
|
| 13 | + | or (ch >= 'a' and ch <= 'f') |
|
| 14 | + | or (ch >= 'A' and ch <= 'F'); |
|
| 15 | + | } |
|
| 16 | + | ||
| 17 | + | /// Return `true` when `ch` is a binary digit. |
|
| 18 | + | export fn isBinDigit(ch: u8) -> bool { |
|
| 19 | + | return ch == '0' or ch == '1'; |
|
| 20 | + | } |
|
| 21 | + | ||
| 22 | + | /// Return `true` when `ch` is an ASCII alphabetic character. |
|
| 23 | + | export fn isAlpha(ch: u8) -> bool { |
|
| 24 | + | return (ch >= 'a' and ch <= 'z') |
|
| 25 | + | or (ch >= 'A' and ch <= 'Z'); |
|
| 26 | + | } |
|
| 27 | + | ||
| 28 | + | /// Return `true` when `ch` is printable ASCII. |
|
| 29 | + | export fn isPrint(ch: u8) -> bool { |
|
| 30 | + | return ch >= ' ' and ch <= '~'; |
|
| 31 | + | } |
lib/std/char/tests.rad
added
+42 -0
| 1 | + | use std::testing; |
|
| 2 | + | ||
| 3 | + | @test fn testIsDigit() throws (testing::TestError) { |
|
| 4 | + | try testing::expect(super::isDigit('0')); |
|
| 5 | + | try testing::expect(super::isDigit('9')); |
|
| 6 | + | try testing::expectNot(super::isDigit('/')); |
|
| 7 | + | try testing::expectNot(super::isDigit(':')); |
|
| 8 | + | } |
|
| 9 | + | ||
| 10 | + | @test fn testIsHexDigit() throws (testing::TestError) { |
|
| 11 | + | try testing::expect(super::isHexDigit('0')); |
|
| 12 | + | try testing::expect(super::isHexDigit('9')); |
|
| 13 | + | try testing::expect(super::isHexDigit('a')); |
|
| 14 | + | try testing::expect(super::isHexDigit('f')); |
|
| 15 | + | try testing::expect(super::isHexDigit('A')); |
|
| 16 | + | try testing::expect(super::isHexDigit('F')); |
|
| 17 | + | try testing::expectNot(super::isHexDigit('g')); |
|
| 18 | + | try testing::expectNot(super::isHexDigit('G')); |
|
| 19 | + | } |
|
| 20 | + | ||
| 21 | + | @test fn testIsBinDigit() throws (testing::TestError) { |
|
| 22 | + | try testing::expect(super::isBinDigit('0')); |
|
| 23 | + | try testing::expect(super::isBinDigit('1')); |
|
| 24 | + | try testing::expectNot(super::isBinDigit('2')); |
|
| 25 | + | try testing::expectNot(super::isBinDigit('a')); |
|
| 26 | + | } |
|
| 27 | + | ||
| 28 | + | @test fn testIsAlpha() throws (testing::TestError) { |
|
| 29 | + | try testing::expect(super::isAlpha('a')); |
|
| 30 | + | try testing::expect(super::isAlpha('z')); |
|
| 31 | + | try testing::expect(super::isAlpha('A')); |
|
| 32 | + | try testing::expect(super::isAlpha('Z')); |
|
| 33 | + | try testing::expectNot(super::isAlpha('0')); |
|
| 34 | + | try testing::expectNot(super::isAlpha('_')); |
|
| 35 | + | } |
|
| 36 | + | ||
| 37 | + | @test fn testIsPrint() throws (testing::TestError) { |
|
| 38 | + | try testing::expect(super::isPrint(' ')); |
|
| 39 | + | try testing::expect(super::isPrint('~')); |
|
| 40 | + | try testing::expectNot(super::isPrint(31)); |
|
| 41 | + | try testing::expectNot(super::isPrint(127)); |
|
| 42 | + | } |
lib/std/fmt.rad
+168 -0
| 1 | 1 | //! Formatting utilities for converting values to strings. |
|
| 2 | 2 | use super::mem; |
|
| 3 | 3 | ||
| 4 | + | /// Maximum `u64` value. |
|
| 5 | + | export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF; |
|
| 4 | 6 | /// Maximum string length for a formatted u32 (eg. "4294967295"). |
|
| 5 | 7 | export constant U32_STR_LEN: u32 = 10; |
|
| 6 | 8 | /// Maximum string length for a formatted i32 (eg. "-2147483648"). |
|
| 7 | 9 | export constant I32_STR_LEN: u32 = U32_STR_LEN + 1; |
|
| 8 | 10 | /// Maximum string length for a formatted u64 (eg. "18446744073709551615"). |
| 10 | 12 | /// Maximum string length for a formatted i64 (eg. "-9223372036854775808"). |
|
| 11 | 13 | export constant I64_STR_LEN: u32 = 20; |
|
| 12 | 14 | /// Maximum string length for a formatted bool (eg. "false"). |
|
| 13 | 15 | export constant BOOL_STR_LEN: u32 = 5; |
|
| 14 | 16 | ||
| 17 | + | /// Radix/base of a parsed integer literal. |
|
| 18 | + | export union Radix { |
|
| 19 | + | /// Binary literal (0b...). |
|
| 20 | + | Binary, |
|
| 21 | + | /// Decimal literal. |
|
| 22 | + | Decimal, |
|
| 23 | + | /// Hexadecimal literal (0x...). |
|
| 24 | + | Hex, |
|
| 25 | + | } |
|
| 26 | + | ||
| 27 | + | /// Errors reported while parsing literal text. |
|
| 28 | + | export union ParseError { |
|
| 29 | + | /// Literal text was empty or missing required digits. |
|
| 30 | + | Invalid, |
|
| 31 | + | /// Literal contained an invalid digit for its radix. |
|
| 32 | + | InvalidDigit, |
|
| 33 | + | /// Literal value exceeded the supported range. |
|
| 34 | + | Overflow, |
|
| 35 | + | } |
|
| 36 | + | ||
| 37 | + | /// Parsed integer literal metadata. |
|
| 38 | + | export record IntLiteral { |
|
| 39 | + | /// Raw characters that comprised the literal. |
|
| 40 | + | text: *[u8], |
|
| 41 | + | /// Absolute magnitude parsed from the literal. |
|
| 42 | + | magnitude: u64, |
|
| 43 | + | /// Radix used by the literal. |
|
| 44 | + | radix: Radix, |
|
| 45 | + | /// Whether the literal spelled an explicit sign. |
|
| 46 | + | signed: bool, |
|
| 47 | + | /// Whether the literal used a negative sign. |
|
| 48 | + | negative: bool, |
|
| 49 | + | } |
|
| 50 | + | ||
| 15 | 51 | /// Format a u32 by writing it to the provided buffer. |
|
| 16 | 52 | export fn formatU32(val: u32, buffer: *mut [u8]) -> *[u8] { |
|
| 17 | 53 | assert buffer.len >= U32_STR_LEN; |
|
| 18 | 54 | ||
| 19 | 55 | let mut x: u32 = val; |
| 135 | 171 | } else { |
|
| 136 | 172 | try! mem::copy(buffer, "false"); |
|
| 137 | 173 | return &buffer[..5]; |
|
| 138 | 174 | } |
|
| 139 | 175 | } |
|
| 176 | + | ||
| 177 | + | /// Convert a single ASCII digit into its numeric value for the given radix. |
|
| 178 | + | export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 { |
|
| 179 | + | assert radix >= 2 and radix <= 36; |
|
| 180 | + | ||
| 181 | + | // Default to an out-of-range value so non-digits fall through to `nil`. |
|
| 182 | + | let mut value: u32 = 36; |
|
| 183 | + | ||
| 184 | + | if ch >= '0' and ch <= '9' { |
|
| 185 | + | set value = (ch - '0') as u32; |
|
| 186 | + | } else if radix > 10 { |
|
| 187 | + | // Mask to convert ASCII letters to uppercase. |
|
| 188 | + | let upper = ch & 0xDF; |
|
| 189 | + | if upper >= 'A' and upper <= 'Z' { |
|
| 190 | + | set value = (upper - 'A') as u32 + 10; |
|
| 191 | + | } |
|
| 192 | + | } |
|
| 193 | + | if value < radix { |
|
| 194 | + | return value; |
|
| 195 | + | } |
|
| 196 | + | return nil; |
|
| 197 | + | } |
|
| 198 | + | ||
| 199 | + | /// Decode a single-byte ASCII escape. |
|
| 200 | + | export fn decodeAsciiEscape(ch: u8) -> u8 { |
|
| 201 | + | match ch { |
|
| 202 | + | case 'n' => return '\n', |
|
| 203 | + | case 't' => return '\t', |
|
| 204 | + | case 'r' => return '\r', |
|
| 205 | + | case '\\' => return '\\', |
|
| 206 | + | case '"' => return '"', |
|
| 207 | + | case '\'' => return '\'', |
|
| 208 | + | case '0' => return 0, |
|
| 209 | + | else => return ch, |
|
| 210 | + | } |
|
| 211 | + | } |
|
| 212 | + | ||
| 213 | + | /// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign. |
|
| 214 | + | export fn parseInt(text: *[u8]) -> IntLiteral throws (ParseError) { |
|
| 215 | + | if text.len == 0 { |
|
| 216 | + | throw ParseError::Invalid; |
|
| 217 | + | } |
|
| 218 | + | let first = text[0]; |
|
| 219 | + | let negative = first == '-'; |
|
| 220 | + | let signed: bool = negative or (first == '+'); |
|
| 221 | + | ||
| 222 | + | let mut start: u32 = 0; |
|
| 223 | + | let mut radix: u32 = 10; |
|
| 224 | + | let mut radixType = Radix::Decimal; |
|
| 225 | + | ||
| 226 | + | if signed { |
|
| 227 | + | set start = 1; |
|
| 228 | + | if start >= text.len { |
|
| 229 | + | throw ParseError::Invalid; |
|
| 230 | + | } |
|
| 231 | + | } |
|
| 232 | + | if start + 1 < text.len and text[start] == '0' { |
|
| 233 | + | let prefix = text[start + 1]; |
|
| 234 | + | if prefix == 'x' or prefix == 'X' { |
|
| 235 | + | set radix = 16; |
|
| 236 | + | set radixType = Radix::Hex; |
|
| 237 | + | set start += 2; |
|
| 238 | + | } else if prefix == 'b' or prefix == 'B' { |
|
| 239 | + | set radix = 2; |
|
| 240 | + | set radixType = Radix::Binary; |
|
| 241 | + | set start += 2; |
|
| 242 | + | } |
|
| 243 | + | if start >= text.len { |
|
| 244 | + | throw ParseError::Invalid; |
|
| 245 | + | } |
|
| 246 | + | } |
|
| 247 | + | let mut value: u64 = 0; |
|
| 248 | + | let radix64: u64 = radix as u64; |
|
| 249 | + | for i in start..text.len { |
|
| 250 | + | let ch = text[i]; |
|
| 251 | + | let digit = digitFromAscii(ch, radix) else { |
|
| 252 | + | throw ParseError::InvalidDigit; |
|
| 253 | + | }; |
|
| 254 | + | if value > (U64_MAX / radix64) { |
|
| 255 | + | throw ParseError::Overflow; |
|
| 256 | + | } |
|
| 257 | + | set value *= radix64; |
|
| 258 | + | ||
| 259 | + | if value > U64_MAX - (digit as u64) { |
|
| 260 | + | throw ParseError::Overflow; |
|
| 261 | + | } |
|
| 262 | + | set value += (digit as u64); |
|
| 263 | + | } |
|
| 264 | + | return IntLiteral { |
|
| 265 | + | text, magnitude: value, radix: radixType, signed, negative, |
|
| 266 | + | }; |
|
| 267 | + | } |
|
| 268 | + | ||
| 269 | + | /// Process escape sequences in a raw string, writing the result into `dst`. |
|
| 270 | + | /// Returns the number of bytes written. |
|
| 271 | + | export fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 { |
|
| 272 | + | let mut i: u32 = 0; |
|
| 273 | + | let mut j: u32 = 0; |
|
| 274 | + | ||
| 275 | + | while i < raw.len { |
|
| 276 | + | if raw[i] == '\\' and i + 1 < raw.len { |
|
| 277 | + | set dst[j] = decodeAsciiEscape(raw[i + 1]); |
|
| 278 | + | set i += 2; |
|
| 279 | + | } else { |
|
| 280 | + | set dst[j] = raw[i]; |
|
| 281 | + | set i += 1; |
|
| 282 | + | } |
|
| 283 | + | set j += 1; |
|
| 284 | + | } |
|
| 285 | + | return j; |
|
| 286 | + | } |
|
| 287 | + | ||
| 288 | + | /// Parse a single-byte character literal, including the single quotes. |
|
| 289 | + | export fn parseChar(text: *[u8]) -> u8 throws (ParseError) { |
|
| 290 | + | if text.len < 2 { |
|
| 291 | + | throw ParseError::Invalid; |
|
| 292 | + | } |
|
| 293 | + | let raw = &text[1..text.len - 1]; |
|
| 294 | + | if raw.len == 0 { |
|
| 295 | + | throw ParseError::Invalid; |
|
| 296 | + | } |
|
| 297 | + | if raw[0] == '\\' { |
|
| 298 | + | if raw.len <> 2 { |
|
| 299 | + | throw ParseError::Invalid; |
|
| 300 | + | } |
|
| 301 | + | return decodeAsciiEscape(raw[1]); |
|
| 302 | + | } |
|
| 303 | + | if raw.len <> 1 { |
|
| 304 | + | throw ParseError::Invalid; |
|
| 305 | + | } |
|
| 306 | + | return raw[0]; |
|
| 307 | + | } |
lib/std/lang/ast.rad
+2 -25
| 1 | 1 | //! Radiance AST modules. |
|
| 2 | 2 | export mod printer; |
|
| 3 | 3 | ||
| 4 | 4 | use std::io; |
|
| 5 | + | use std::fmt; |
|
| 5 | 6 | use std::lang::alloc; |
|
| 6 | 7 | ||
| 7 | 8 | /// Maximum number of trait methods. |
|
| 8 | 9 | export constant MAX_TRAIT_METHODS: u32 = 8; |
|
| 9 | 10 |
| 75 | 76 | Signed, |
|
| 76 | 77 | /// Unsigned, eg. `u32`. |
|
| 77 | 78 | Unsigned, |
|
| 78 | 79 | } |
|
| 79 | 80 | ||
| 80 | - | /// Radix/base of a number. |
|
| 81 | - | export union Radix { |
|
| 82 | - | /// Binary literal (0b...). |
|
| 83 | - | Binary, |
|
| 84 | - | /// Decimal literal. |
|
| 85 | - | Decimal, |
|
| 86 | - | /// Hexadecimal literal (0x...). |
|
| 87 | - | Hex, |
|
| 88 | - | } |
|
| 89 | - | ||
| 90 | - | /// Parsed integer literal metadata. |
|
| 91 | - | export record IntLiteral { |
|
| 92 | - | /// Raw characters that comprised the literal. |
|
| 93 | - | text: *[u8], |
|
| 94 | - | /// Absolute magnitude parsed from the literal. |
|
| 95 | - | magnitude: u64, |
|
| 96 | - | /// Radix used by the literal. |
|
| 97 | - | radix: Radix, |
|
| 98 | - | /// Whether the literal spelled an explicit sign. |
|
| 99 | - | signed: bool, |
|
| 100 | - | /// Whether the literal used a negative sign. |
|
| 101 | - | negative: bool, |
|
| 102 | - | } |
|
| 103 | - | ||
| 104 | 81 | /// Binary operator kinds used in numeric expressions. |
|
| 105 | 82 | export union BinaryOp { |
|
| 106 | 83 | /// Addition (`+`). |
|
| 107 | 84 | Add, |
|
| 108 | 85 | /// Subtraction (`-`). |
| 616 | 593 | /// String literal like `"Hello World!"`. |
|
| 617 | 594 | String(*[u8]), |
|
| 618 | 595 | /// Identifier expression. |
|
| 619 | 596 | Ident(*[u8]), |
|
| 620 | 597 | /// Numeric literal such as `42` or `0xFF`. |
|
| 621 | - | Number(IntLiteral), |
|
| 598 | + | Number(fmt::IntLiteral), |
|
| 622 | 599 | /// Range expression such as `0..10` or `..`. |
|
| 623 | 600 | Range(Range), |
|
| 624 | 601 | /// Array literal expression. |
|
| 625 | 602 | ArrayLit(*mut [*Node]), |
|
| 626 | 603 | /// Array repeat literal expression. |
lib/std/lang/gen/data.rad
+32 -6
| 40 | 40 | syms: *mut [DataSym], |
|
| 41 | 41 | count: *mut u32, |
|
| 42 | 42 | base: u32, |
|
| 43 | 43 | readOnly: bool |
|
| 44 | 44 | ) -> u32 { |
|
| 45 | - | let mut offset: u32 = 0; |
|
| 45 | + | return layoutSectionAtOffset(items, syms, count, base, 0, readOnly); |
|
| 46 | + | } |
|
| 47 | + | ||
| 48 | + | /// Lay out data symbols for a single section starting at [`startOffset`]. |
|
| 49 | + | export fn layoutSectionAtOffset( |
|
| 50 | + | items: *[il::Data], |
|
| 51 | + | syms: *mut [DataSym], |
|
| 52 | + | count: *mut u32, |
|
| 53 | + | base: u32, |
|
| 54 | + | startOffset: u32, |
|
| 55 | + | readOnly: bool |
|
| 56 | + | ) -> u32 { |
|
| 57 | + | let mut offset: u32 = startOffset; |
|
| 46 | 58 | ||
| 47 | 59 | // Initialized data first. |
|
| 48 | 60 | for i in 0..items.len { |
|
| 49 | 61 | let data = &items[i]; |
|
| 50 | 62 | if data.readOnly == readOnly and not data.isUndefined { |
| 76 | 88 | fnLabels: *labels::Labels, |
|
| 77 | 89 | codeBase: u32, |
|
| 78 | 90 | buf: *mut [u8], |
|
| 79 | 91 | readOnly: bool |
|
| 80 | 92 | ) -> u32 { |
|
| 81 | - | let mut offset: u32 = 0; |
|
| 93 | + | return emitSectionAtOffset(items, dataSymMap, fnLabels, codeBase, buf, readOnly, 0); |
|
| 94 | + | } |
|
| 95 | + | ||
| 96 | + | /// Emit data bytes for a single section starting at `startOffset`. |
|
| 97 | + | export fn emitSectionAtOffset( |
|
| 98 | + | items: *[il::Data], |
|
| 99 | + | dataSymMap: *DataSymMap, |
|
| 100 | + | fnLabels: *labels::Labels, |
|
| 101 | + | codeBase: u32, |
|
| 102 | + | buf: *mut [u8], |
|
| 103 | + | readOnly: bool, |
|
| 104 | + | startOffset: u32 |
|
| 105 | + | ) -> u32 { |
|
| 106 | + | let mut offset: u32 = startOffset; |
|
| 82 | 107 | ||
| 83 | 108 | for i in 0..items.len { |
|
| 84 | 109 | let data = &items[i]; |
|
| 85 | 110 | if data.readOnly == readOnly and not data.isUndefined { |
|
| 86 | 111 | set offset = mem::alignUp(offset, data.alignment); |
|
| 87 | - | assert offset + data.size <= buf.len, "emitSection: buffer overflow"; |
|
| 112 | + | assert offset + data.size <= buf.len, "emitSectionAtOffset: buffer overflow"; |
|
| 88 | 113 | for j in 0..data.values.len { |
|
| 89 | 114 | let v = &data.values[j]; |
|
| 90 | 115 | for _ in 0..v.count { |
|
| 91 | 116 | match v.item { |
|
| 92 | 117 | case il::DataItem::Val { typ, val } => { |
|
| 93 | 118 | let size = il::typeSize(typ); |
|
| 94 | 119 | let valPtr = &val as *u8; |
|
| 95 | 120 | try! mem::copy(&mut buf[offset..], @sliceOf(valPtr, size)); |
|
| 121 | + | ||
| 96 | 122 | set offset += size; |
|
| 97 | 123 | }, |
|
| 98 | 124 | case il::DataItem::Sym(name) => { |
|
| 99 | 125 | let addr = lookupAddr(dataSymMap, name) else { |
|
| 100 | - | panic "emitSection: data symbol not found"; |
|
| 126 | + | panic "emitSectionAtOffset: data symbol not found"; |
|
| 101 | 127 | }; |
|
| 102 | 128 | let addr64: u64 = addr as u64; |
|
| 103 | 129 | let addrPtr = &addr64 as *u8; |
|
| 104 | 130 | ||
| 105 | 131 | try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8)); |
|
| 106 | 132 | ||
| 107 | - | set offset += 8; |
|
| 133 | + | set offset += @sizeOf(u64); |
|
| 108 | 134 | }, |
|
| 109 | 135 | case il::DataItem::Fn(name) => { |
|
| 110 | 136 | let addr = codeBase + labels::funcOffset(fnLabels, name) as u32; |
|
| 111 | 137 | let addr64: u64 = addr as u64; |
|
| 112 | 138 | let addrPtr = &addr64 as *u8; |
|
| 113 | 139 | ||
| 114 | 140 | try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8)); |
|
| 115 | 141 | ||
| 116 | - | set offset += 8; |
|
| 142 | + | set offset += @sizeOf(*u8); |
|
| 117 | 143 | }, |
|
| 118 | 144 | case il::DataItem::Str(s) => { |
|
| 119 | 145 | try! mem::copy(&mut buf[offset..], s); |
|
| 120 | 146 | set offset += s.len; |
|
| 121 | 147 | }, |
lib/std/lang/parser.rad
+23 -118
| 1 | 1 | //! Recursive descent parser for the Radiance programming language. |
|
| 2 | 2 | @test export mod tests; |
|
| 3 | 3 | ||
| 4 | 4 | use std::mem; |
|
| 5 | 5 | use std::io; |
|
| 6 | + | use std::fmt; |
|
| 6 | 7 | use std::lang::alloc; |
|
| 7 | 8 | use std::lang::ast; |
|
| 8 | 9 | use std::lang::strings; |
|
| 9 | 10 | use std::lang::scanner; |
|
| 10 | 11 | ||
| 11 | 12 | /// Maximum `u32` value. |
|
| 12 | 13 | export constant U32_MAX: u32 = 0xFFFFFFFF; |
|
| 14 | + | /// Minimum `i64` value. |
|
| 15 | + | export constant I64_MIN: i64 = -0x8000000000000000; |
|
| 16 | + | /// Largest magnitude representable by a negative `i64`. |
|
| 17 | + | export constant I64_MIN_MAGNITUDE: u64 = 0x8000000000000000; |
|
| 18 | + | /// Maximum representable `i64` magnitude. |
|
| 19 | + | export constant I64_MAX_MAGNITUDE: u64 = 0x7FFFFFFFFFFFFFFF; |
|
| 13 | 20 | /// Maximum representable `u64` value. |
|
| 14 | 21 | export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF; |
|
| 15 | 22 | /// Maximum number of fields in a record. |
|
| 16 | 23 | export constant MAX_RECORD_FIELDS: u32 = 32; |
|
| 17 | 24 |
| 169 | 176 | /// Emit a `true` or `false` literal node. |
|
| 170 | 177 | fn nodeBool(p: *mut Parser, value: bool) -> *ast::Node { |
|
| 171 | 178 | return node(p, ast::NodeValue::Bool(value)); |
|
| 172 | 179 | } |
|
| 173 | 180 | ||
| 174 | - | /// Convert a single ASCII digit into its numeric value for the given radix. |
|
| 175 | - | export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 { |
|
| 176 | - | assert radix >= 2 and radix <= 36; |
|
| 177 | - | ||
| 178 | - | // Default to an out-of-range value so non-digits fall through to `nil`. |
|
| 179 | - | let mut value: u32 = 36; |
|
| 180 | - | ||
| 181 | - | if ch >= '0' and ch <= '9' { |
|
| 182 | - | set value = (ch - '0') as u32; |
|
| 183 | - | } else if radix > 10 { |
|
| 184 | - | // Mask to convert ASCII letters to uppercase. |
|
| 185 | - | let upper = ch & 0xDF; |
|
| 186 | - | if upper >= 'A' and upper <= 'Z' { |
|
| 187 | - | set value = (upper - 'A') as u32 + 10; |
|
| 188 | - | } |
|
| 189 | - | } |
|
| 190 | - | if value < radix { |
|
| 191 | - | return value; |
|
| 192 | - | } |
|
| 193 | - | return nil; |
|
| 194 | - | } |
|
| 195 | - | ||
| 196 | - | /// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign. |
|
| 197 | - | fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> ast::IntLiteral |
|
| 181 | + | /// Parse an integer literal while mapping shared errors into parser diagnostics. |
|
| 182 | + | fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> fmt::IntLiteral |
|
| 198 | 183 | throws (ParseError) |
|
| 199 | 184 | { |
|
| 200 | - | if text.len == 0 { |
|
| 201 | - | throw failParsing(p, "integer literal is empty"); |
|
| 202 | - | } |
|
| 203 | - | let first = text[0]; |
|
| 204 | - | let negative = first == '-'; |
|
| 205 | - | let signed: bool = negative or (first == '+'); |
|
| 206 | - | ||
| 207 | - | let mut start: u32 = 0; |
|
| 208 | - | let mut radix: u32 = 10; |
|
| 209 | - | let mut radixType = ast::Radix::Decimal; |
|
| 210 | - | ||
| 211 | - | if signed { |
|
| 212 | - | set start = 1; |
|
| 213 | - | if start >= text.len { |
|
| 214 | - | throw failParsing(p, "integer literal requires digits after sign"); |
|
| 215 | - | } |
|
| 216 | - | } |
|
| 217 | - | if start + 1 < text.len and text[start] == '0' { |
|
| 218 | - | let prefix = text[start + 1]; |
|
| 219 | - | if prefix == 'x' or prefix == 'X' { |
|
| 220 | - | set radix = 16; |
|
| 221 | - | set radixType = ast::Radix::Hex; |
|
| 222 | - | set start += 2; |
|
| 223 | - | } else if prefix == 'b' or prefix == 'B' { |
|
| 224 | - | set radix = 2; |
|
| 225 | - | set radixType = ast::Radix::Binary; |
|
| 226 | - | set start += 2; |
|
| 227 | - | } |
|
| 228 | - | if start >= text.len { |
|
| 229 | - | throw failParsing(p, "integer literal prefix must be followed by digits"); |
|
| 185 | + | let literal = try fmt::parseInt(text) catch err { |
|
| 186 | + | match err { |
|
| 187 | + | case fmt::ParseError::Invalid => |
|
| 188 | + | throw failParsing(p, "invalid integer literal"), |
|
| 189 | + | case fmt::ParseError::InvalidDigit => |
|
| 190 | + | throw failParsing(p, "invalid digit in integer literal"), |
|
| 191 | + | case fmt::ParseError::Overflow => |
|
| 192 | + | throw failParsing(p, "integer literal overflow"), |
|
| 230 | 193 | } |
|
| 231 | - | } |
|
| 232 | - | let mut value: u64 = 0; |
|
| 233 | - | let radix64: u64 = radix as u64; |
|
| 234 | - | for i in start..text.len { |
|
| 235 | - | let ch = text[i]; |
|
| 236 | - | let digit = digitFromAscii(ch, radix) else { |
|
| 237 | - | throw failParsing(p, "invalid digit in integer literal"); |
|
| 238 | - | }; |
|
| 239 | - | if value > (U64_MAX / radix64) { |
|
| 240 | - | throw failParsing(p, "integer literal overflow"); |
|
| 241 | - | } |
|
| 242 | - | set value *= radix64; |
|
| 243 | - | ||
| 244 | - | if value > U64_MAX - (digit as u64) { |
|
| 245 | - | throw failParsing(p, "integer literal overflow"); |
|
| 246 | - | } |
|
| 247 | - | set value += (digit as u64); |
|
| 248 | - | } |
|
| 249 | - | return ast::IntLiteral { |
|
| 250 | - | text, magnitude: value, radix: radixType, signed, negative, |
|
| 251 | 194 | }; |
|
| 195 | + | return literal; |
|
| 252 | 196 | } |
|
| 253 | 197 | ||
| 254 | 198 | /// Emit an integer type node. |
|
| 255 | 199 | fn nodeTypeInt(p: *mut Parser, width: u8, sign: ast::Signedness) -> *ast::Node { |
|
| 256 | 200 | return node(p, ast::NodeValue::TypeSig( |
|
| 257 | 201 | ast::TypeSig::Integer { width, sign } |
|
| 258 | 202 | )); |
|
| 259 | 203 | } |
|
| 260 | 204 | ||
| 261 | 205 | /// Emit a number literal node with the provided literal metadata. |
|
| 262 | - | fn nodeNumber(p: *mut Parser, literal: ast::IntLiteral) -> *ast::Node { |
|
| 206 | + | fn nodeNumber(p: *mut Parser, literal: fmt::IntLiteral) -> *ast::Node { |
|
| 263 | 207 | return node(p, ast::NodeValue::Number(literal)); |
|
| 264 | 208 | } |
|
| 265 | 209 | ||
| 266 | 210 | /// Emit a `super` node. |
|
| 267 | 211 | fn nodeSuper(p: *mut Parser) -> *ast::Node { |
|
| 268 | 212 | return node(p, ast::NodeValue::Super); |
|
| 269 | 213 | } |
|
| 270 | 214 | ||
| 271 | - | /// Process escape sequences in a raw string, writing the result into `dst`. |
|
| 272 | - | /// Returns the number of bytes written. |
|
| 273 | - | fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 { |
|
| 274 | - | let mut i: u32 = 0; |
|
| 275 | - | let mut j: u32 = 0; |
|
| 276 | - | ||
| 277 | - | while i < raw.len { |
|
| 278 | - | if raw[i] == '\\' and i + 1 < raw.len { |
|
| 279 | - | match raw[i + 1] { |
|
| 280 | - | case 'n' => set dst[j] = '\n', |
|
| 281 | - | case 't' => set dst[j] = '\t', |
|
| 282 | - | case 'r' => set dst[j] = '\r', |
|
| 283 | - | case '\\' => set dst[j] = '\\', |
|
| 284 | - | case '"' => set dst[j] = '"', |
|
| 285 | - | case '0' => set dst[j] = 0, |
|
| 286 | - | else => set dst[j] = raw[i + 1], |
|
| 287 | - | } |
|
| 288 | - | set i += 2; |
|
| 289 | - | } else { |
|
| 290 | - | set dst[j] = raw[i]; |
|
| 291 | - | set i += 1; |
|
| 292 | - | } |
|
| 293 | - | set j += 1; |
|
| 294 | - | } |
|
| 295 | - | return j; |
|
| 296 | - | } |
|
| 297 | - | ||
| 298 | 215 | /// Emit a single attribute node. |
|
| 299 | 216 | fn nodeAttribute(p: *mut Parser, attr: ast::Attribute) -> *ast::Node { |
|
| 300 | 217 | return node(p, ast::NodeValue::Attribute(attr)); |
|
| 301 | 218 | } |
|
| 302 | 219 |
| 696 | 613 | advance(p); |
|
| 697 | 614 | return node(p, ast::NodeValue::Undef); |
|
| 698 | 615 | } |
|
| 699 | 616 | case scanner::TokenKind::Char => { |
|
| 700 | 617 | advance(p); |
|
| 701 | - | let src = p.previous.source; |
|
| 702 | - | let mut ch: u8 = 0; |
|
| 703 | - | ||
| 704 | - | if src[1] == '\\' { // Handle escape sequences. |
|
| 705 | - | match src[2] { |
|
| 706 | - | case 'n' => { set ch = '\n'; } |
|
| 707 | - | case 't' => { set ch = '\t'; } |
|
| 708 | - | case 'r' => { set ch = '\r'; } |
|
| 709 | - | case '\'' => { set ch = '\''; } |
|
| 710 | - | case '\\' => { set ch = '\\'; } |
|
| 711 | - | else => { set ch = src[2]; } |
|
| 712 | - | } |
|
| 713 | - | } else { |
|
| 714 | - | set ch = src[1]; |
|
| 715 | - | } |
|
| 618 | + | let ch = try fmt::parseChar(p.previous.source) catch { |
|
| 619 | + | throw failParsing(p, "invalid char literal"); |
|
| 620 | + | }; |
|
| 716 | 621 | return node(p, ast::NodeValue::Char(ch)); |
|
| 717 | 622 | } |
|
| 718 | 623 | case scanner::TokenKind::String => { |
|
| 719 | 624 | advance(p); |
|
| 720 | 625 | let src = p.previous.source; |
|
| 721 | 626 | let raw = &src[1..src.len - 1]; // Strip quotes. |
|
| 722 | 627 | ||
| 723 | 628 | // Process escape sequences into arena buffer. |
|
| 724 | 629 | let buf = alloc::remainingBuf(&mut p.arena.arena); |
|
| 725 | - | let len = unescapeString(raw, buf); |
|
| 630 | + | let len = fmt::unescapeString(raw, buf); |
|
| 726 | 631 | alloc::commit(&mut p.arena.arena, len); |
|
| 727 | 632 | ||
| 728 | 633 | return node(p, ast::NodeValue::String(&buf[..len])); |
|
| 729 | 634 | } |
|
| 730 | 635 | case scanner::TokenKind::Underscore => { |
lib/std/lang/parser/tests.rad
+8 -31
| 1 | 1 | //! Parser tests. |
|
| 2 | 2 | ||
| 3 | 3 | use std::mem; |
|
| 4 | + | use std::fmt; |
|
| 4 | 5 | use std::testing; |
|
| 5 | 6 | use std::lang::ast; |
|
| 6 | 7 | use std::lang::scanner; |
|
| 7 | 8 | use std::lang::strings; |
|
| 8 | 9 |
| 113 | 114 | ||
| 114 | 115 | return root; |
|
| 115 | 116 | } |
|
| 116 | 117 | ||
| 117 | 118 | /// Parse an expression expected to be a number literal and return its payload. |
|
| 118 | - | fn parseNumberLiteral(text: *[u8]) -> ast::IntLiteral |
|
| 119 | + | fn parseNumberLiteral(text: *[u8]) -> fmt::IntLiteral |
|
| 119 | 120 | throws (testing::TestError) |
|
| 120 | 121 | { |
|
| 121 | 122 | let mut arena = ast::nodeArena(&mut ARENA_STORAGE[..]); |
|
| 122 | 123 | let mut parser = super::mkParser(scanner::SourceLoc::String, text, &mut arena, &mut STRING_POOL); |
|
| 123 | 124 | super::advance(&mut parser); |
| 275 | 276 | ||
| 276 | 277 | /// Verify that decimal literals record magnitude and base metadata. |
|
| 277 | 278 | @test fn testParseDecimalLiteralMetadata() throws (testing::TestError) { |
|
| 278 | 279 | let lit = try parseNumberLiteral("1234"); |
|
| 279 | 280 | try testing::expect(lit.magnitude == 1234); |
|
| 280 | - | try testing::expect(lit.radix == ast::Radix::Decimal); |
|
| 281 | + | try testing::expect(lit.radix == fmt::Radix::Decimal); |
|
| 281 | 282 | } |
|
| 282 | 283 | ||
| 283 | 284 | /// Verify that hexadecimal literals record metadata without marking them signed. |
|
| 284 | 285 | @test fn testParseNumberMetadata() throws (testing::TestError) { |
|
| 285 | 286 | let lit = try parseNumberLiteral("0xFF"); |
|
| 286 | 287 | try testing::expect(lit.magnitude == 0xFF); |
|
| 287 | - | try testing::expect(lit.radix == ast::Radix::Hex); |
|
| 288 | + | try testing::expect(lit.radix == fmt::Radix::Hex); |
|
| 288 | 289 | try testing::expect(not lit.signed); |
|
| 289 | 290 | try testing::expect(not lit.negative); |
|
| 290 | 291 | } |
|
| 291 | 292 | ||
| 292 | 293 | /// Verify that binary literals capture their radix. |
|
| 293 | 294 | @test fn testParseBinaryLiteralMetadata() throws (testing::TestError) { |
|
| 294 | 295 | let lit = try parseNumberLiteral("0b1010"); |
|
| 295 | 296 | try testing::expect(lit.magnitude == 0b1010); |
|
| 296 | - | try testing::expect(lit.radix == ast::Radix::Binary); |
|
| 297 | + | try testing::expect(lit.radix == fmt::Radix::Binary); |
|
| 297 | 298 | } |
|
| 298 | 299 | ||
| 299 | 300 | /// Signed literals produced by the scanner keep sign details in metadata. |
|
| 300 | 301 | @test fn testParseSignedLiteralMetadata() throws (testing::TestError) { |
|
| 301 | 302 | let literal = try parseNumberLiteral("42"); |
| 312 | 313 | /// Literals with prefixes still parse correctly when explicitly signed. |
|
| 313 | 314 | @test fn testParseSignedPrefixedLiteral() throws (testing::TestError) { |
|
| 314 | 315 | let hex = try parseNumberLiteral("+0x2A"); |
|
| 315 | 316 | try testing::expect(hex.signed); |
|
| 316 | 317 | try testing::expect(not hex.negative); |
|
| 317 | - | try testing::expect(hex.radix == ast::Radix::Hex); |
|
| 318 | + | try testing::expect(hex.radix == fmt::Radix::Hex); |
|
| 318 | 319 | try testing::expect(hex.magnitude == 0x2A); |
|
| 319 | 320 | ||
| 320 | 321 | let neg = try parseNumberLiteral("-0x2A"); |
|
| 321 | 322 | try testing::expect(neg.signed); |
|
| 322 | 323 | try testing::expect(neg.negative); |
|
| 323 | - | try testing::expect(neg.radix == ast::Radix::Hex); |
|
| 324 | + | try testing::expect(neg.radix == fmt::Radix::Hex); |
|
| 324 | 325 | try testing::expect(neg.magnitude == 0x2A); |
|
| 325 | 326 | ||
| 326 | 327 | let bin = try parseNumberLiteral("-0b11"); |
|
| 327 | 328 | try testing::expect(bin.signed); |
|
| 328 | 329 | try testing::expect(bin.negative); |
|
| 329 | - | try testing::expect(bin.radix == ast::Radix::Binary); |
|
| 330 | + | try testing::expect(bin.radix == fmt::Radix::Binary); |
|
| 330 | 331 | try testing::expect(bin.magnitude == 0b11); |
|
| 331 | 332 | } |
|
| 332 | 333 | ||
| 333 | 334 | /// Range expressions parse with explicit start and end bounds. |
|
| 334 | 335 | @test fn testParseRangeExpr() throws (testing::TestError) { |
| 356 | 357 | try expectNumberLiteralFail("0x1G"); |
|
| 357 | 358 | try expectNumberLiteralFail("0b102"); |
|
| 358 | 359 | try expectNumberLiteralFail("+0x1G"); |
|
| 359 | 360 | } |
|
| 360 | 361 | ||
| 361 | - | /// Ensure digit-to-value conversion covers decimal and hex ranges. |
|
| 362 | - | @test fn testDigitFromAscii() throws (testing::TestError) { |
|
| 363 | - | let zero = super::digitFromAscii('0', 10) else throw testing::TestError::Failed; |
|
| 364 | - | try testing::expect(zero == 0); |
|
| 365 | - | ||
| 366 | - | let nine = super::digitFromAscii('9', 10) else throw testing::TestError::Failed; |
|
| 367 | - | try testing::expect(nine == 9); |
|
| 368 | - | ||
| 369 | - | let lower = super::digitFromAscii('a', 16) else throw testing::TestError::Failed; |
|
| 370 | - | try testing::expect(lower == 10); |
|
| 371 | - | ||
| 372 | - | let lowerF = super::digitFromAscii('f', 16) else throw testing::TestError::Failed; |
|
| 373 | - | try testing::expect(lowerF == 15); |
|
| 374 | - | ||
| 375 | - | let upper = super::digitFromAscii('A', 16) else throw testing::TestError::Failed; |
|
| 376 | - | try testing::expect(upper == 10); |
|
| 377 | - | ||
| 378 | - | let upperF = super::digitFromAscii('F', 16) else throw testing::TestError::Failed; |
|
| 379 | - | try testing::expect(upperF == 15); |
|
| 380 | - | ||
| 381 | - | try testing::expect(super::digitFromAscii('g', 16) == nil); |
|
| 382 | - | try testing::expect(super::digitFromAscii('_', 10) == nil); |
|
| 383 | - | } |
|
| 384 | - | ||
| 385 | 362 | /// Test parsing nil literal. |
|
| 386 | 363 | @test fn testParseNil() throws (testing::TestError) { |
|
| 387 | 364 | let r1 = try! parseExprStr("nil"); |
|
| 388 | 365 | let case ast::NodeValue::Nil = r1.value |
|
| 389 | 366 | else throw testing::TestError::Failed; |
lib/std/lang/scanner.rad
+22 -49
| 2 | 2 | //! |
|
| 3 | 3 | //! This module implements a hand-written scanner that tokenizes Radiance |
|
| 4 | 4 | //! source code into a stream of tokens for consumption by the parser. |
|
| 5 | 5 | @test mod tests; |
|
| 6 | 6 | ||
| 7 | + | use std::char; |
|
| 7 | 8 | use std::mem; |
|
| 8 | 9 | use std::lang::strings; |
|
| 9 | 10 | ||
| 10 | 11 | /// Token kinds representing all lexical elements in Radiance. |
|
| 11 | 12 | /// |
| 298 | 299 | else => return, |
|
| 299 | 300 | } |
|
| 300 | 301 | } |
|
| 301 | 302 | } |
|
| 302 | 303 | ||
| 303 | - | /// Check if character is an ASCII digit (0-9). |
|
| 304 | - | fn isDigit(c: u8) -> bool { |
|
| 305 | - | return c >= '0' and c <= '9'; |
|
| 306 | - | } |
|
| 307 | - | ||
| 308 | - | /// Check if character is a hexadecimal digit (0-9, a-f, A-F). |
|
| 309 | - | fn isHexDigit(c: u8) -> bool { |
|
| 310 | - | return (c >= '0' and c <= '9') |
|
| 311 | - | or (c >= 'a' and c <= 'f') |
|
| 312 | - | or (c >= 'A' and c <= 'F'); |
|
| 313 | - | } |
|
| 314 | - | ||
| 315 | - | /// Check if character is a binary digit (0 or 1). |
|
| 316 | - | fn isBinDigit(c: u8) -> bool { |
|
| 317 | - | return c == '0' or c == '1'; |
|
| 318 | - | } |
|
| 319 | - | ||
| 320 | - | /// Check if character is alphabetic. |
|
| 321 | - | fn isAlpha(c: u8) -> bool { |
|
| 322 | - | return (c >= 'a' and c <= 'z') |
|
| 323 | - | or (c >= 'A' and c <= 'Z'); |
|
| 324 | - | } |
|
| 325 | - | ||
| 326 | - | /// Check if character is printable ASCII. |
|
| 327 | - | fn isPrint(c: u8) -> bool { |
|
| 328 | - | return c >= ' ' and c <= '~'; |
|
| 329 | - | } |
|
| 330 | - | ||
| 331 | 304 | /// Scan numeric literal (decimal, hex, or binary). |
|
| 332 | 305 | fn scanNumber(s: *mut Scanner) -> Token { |
|
| 333 | 306 | let first = s.source[s.cursor - 1]; |
|
| 334 | 307 | if first == '-' or first == '+' { |
|
| 335 | 308 | advance(s); |
| 337 | 310 | // Check for hex literal (`0x` or `0X` prefix). |
|
| 338 | 311 | if s.source[s.cursor - 1] == '0' { |
|
| 339 | 312 | if let ch = current(s); ch == 'x' or ch == 'X' { |
|
| 340 | 313 | advance(s); |
|
| 341 | 314 | // Must have at least one hex digit after `0x`. |
|
| 342 | - | if let ch = current(s); not isHexDigit(ch) { |
|
| 315 | + | if let ch = current(s); not char::isHexDigit(ch) { |
|
| 343 | 316 | return invalid(s.token, "invalid hex literal"); |
|
| 344 | 317 | } |
|
| 345 | - | while let ch = current(s); isHexDigit(ch) { |
|
| 318 | + | while let ch = current(s); char::isHexDigit(ch) { |
|
| 346 | 319 | advance(s); |
|
| 347 | 320 | } |
|
| 348 | 321 | return tok(s, TokenKind::Number); |
|
| 349 | 322 | } |
|
| 350 | 323 | // Check for binary literal (`0b` or `0B` prefix). |
|
| 351 | 324 | if let ch = current(s); ch == 'b' or ch == 'B' { |
|
| 352 | 325 | advance(s); |
|
| 353 | 326 | // Must have at least one binary digit after `0b`. |
|
| 354 | - | if let ch = current(s); not isBinDigit(ch) { |
|
| 327 | + | if let ch = current(s); not char::isBinDigit(ch) { |
|
| 355 | 328 | return invalid(s.token, "invalid binary literal"); |
|
| 356 | 329 | } |
|
| 357 | - | while let ch = current(s); isBinDigit(ch) { |
|
| 330 | + | while let ch = current(s); char::isBinDigit(ch) { |
|
| 358 | 331 | advance(s); |
|
| 359 | 332 | } |
|
| 360 | 333 | return tok(s, TokenKind::Number); |
|
| 361 | 334 | } |
|
| 362 | 335 | } |
|
| 363 | - | ||
| 364 | 336 | // Regular decimal number. |
|
| 365 | - | while let ch = current(s); isDigit(ch) { |
|
| 337 | + | while let ch = current(s); char::isDigit(ch) { |
|
| 366 | 338 | advance(s); |
|
| 367 | 339 | } |
|
| 368 | - | ||
| 369 | 340 | // Look for decimal part. |
|
| 370 | 341 | if let ch = current(s); ch == '.' { |
|
| 371 | - | if let p = peek(s); isDigit(p) { |
|
| 342 | + | if let p = peek(s); char::isDigit(p) { |
|
| 372 | 343 | advance(s); // Consume the "." |
|
| 373 | - | while let ch = current(s); isDigit(ch) { |
|
| 344 | + | while let ch = current(s); char::isDigit(ch) { |
|
| 374 | 345 | advance(s); |
|
| 375 | 346 | } |
|
| 376 | 347 | } |
|
| 377 | 348 | } |
|
| 378 | 349 | return tok(s, TokenKind::Number); |
|
| 379 | 350 | } |
|
| 380 | 351 | ||
| 381 | 352 | fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token { |
|
| 382 | 353 | while let ch = current(s); ch <> delim { |
|
| 383 | - | if not isPrint(ch) { |
|
| 354 | + | if not char::isPrint(ch) { |
|
| 384 | 355 | return invalid(s.token, "invalid character"); |
|
| 385 | 356 | } |
|
| 386 | 357 | if consume(s, '\\') { // Consume escapes |
|
| 387 | 358 | if isEof(s) { |
|
| 388 | 359 | return nil; |
| 431 | 402 | return TokenKind::Ident; |
|
| 432 | 403 | } |
|
| 433 | 404 | ||
| 434 | 405 | /// Scan an identifier, keyword, or label. |
|
| 435 | 406 | fn scanIdentifier(s: *mut Scanner) -> Token { |
|
| 436 | - | while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' { |
|
| 407 | + | while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) { |
|
| 437 | 408 | advance(s); |
|
| 438 | 409 | } |
|
| 439 | 410 | let ident = &s.source[s.token..s.cursor]; |
|
| 440 | 411 | let kind = keywordOrIdent(ident); |
|
| 441 | 412 |
| 454 | 425 | if isEof(s) { |
|
| 455 | 426 | return tok(s, TokenKind::Eof); |
|
| 456 | 427 | } |
|
| 457 | 428 | let c: u8 = advance(s); |
|
| 458 | 429 | ||
| 459 | - | if isDigit(c) { |
|
| 430 | + | if char::isDigit(c) { |
|
| 460 | 431 | return scanNumber(s); |
|
| 461 | 432 | } |
|
| 462 | - | if isAlpha(c) { |
|
| 433 | + | if char::isAlpha(c) { |
|
| 463 | 434 | return scanIdentifier(s); |
|
| 464 | 435 | } |
|
| 465 | 436 | match c { |
|
| 466 | 437 | case '\'' => return scanChar(s), |
|
| 467 | 438 | case '"' => return scanString(s), |
| 491 | 462 | } |
|
| 492 | 463 | if consume(s, '=') { |
|
| 493 | 464 | return tok(s, TokenKind::MinusEqual); |
|
| 494 | 465 | } |
|
| 495 | 466 | // If followed by a digit, scan as negative number |
|
| 496 | - | if let ch = current(s); isDigit(ch) { |
|
| 467 | + | if let ch = current(s); char::isDigit(ch) { |
|
| 497 | 468 | return scanNumber(s); |
|
| 498 | 469 | } |
|
| 499 | 470 | return tok(s, TokenKind::Minus); |
|
| 500 | 471 | } |
|
| 501 | 472 | case '+' => { |
|
| 502 | 473 | if consume(s, '=') { |
|
| 503 | 474 | return tok(s, TokenKind::PlusEqual); |
|
| 504 | 475 | } |
|
| 505 | - | if let ch = current(s); isDigit(ch) { |
|
| 476 | + | if let ch = current(s); char::isDigit(ch) { |
|
| 506 | 477 | return scanNumber(s); |
|
| 507 | 478 | } |
|
| 508 | 479 | return tok(s, TokenKind::Plus); |
|
| 509 | 480 | } |
|
| 510 | 481 | case '/' => { |
| 582 | 553 | } |
|
| 583 | 554 | return tok(s, TokenKind::Gt); |
|
| 584 | 555 | } |
|
| 585 | 556 | case '@' => { |
|
| 586 | 557 | // Scan `@identifier` as a single token. |
|
| 587 | - | while let ch = current(s); isAlpha(ch) { |
|
| 588 | - | advance(s); |
|
| 589 | - | } |
|
| 590 | - | // Must have at least one character after `@`. |
|
| 591 | - | if s.cursor - s.token <= 1 { |
|
| 558 | + | let ch = current(s) else { |
|
| 592 | 559 | return invalid(s.token, "expected identifier after `@`"); |
|
| 560 | + | }; |
|
| 561 | + | if not char::isAlpha(ch) and ch <> '_' { |
|
| 562 | + | return invalid(s.token, "expected identifier after `@`"); |
|
| 563 | + | } |
|
| 564 | + | while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) { |
|
| 565 | + | advance(s); |
|
| 593 | 566 | } |
|
| 594 | 567 | let name = &s.source[s.token..s.cursor]; |
|
| 595 | 568 | return Token { |
|
| 596 | 569 | kind: TokenKind::AtIdent, |
|
| 597 | 570 | source: strings::intern(s.pool, name), |
|
| 598 | 571 | offset: s.token, |
|
| 599 | 572 | }; |
|
| 600 | 573 | } |
|
| 601 | 574 | case '_' => { |
|
| 602 | - | if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' { |
|
| 575 | + | if let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) { |
|
| 603 | 576 | // This is part of an identifier like `_foo` or `__start` |
|
| 604 | 577 | return scanIdentifier(s); |
|
| 605 | 578 | } |
|
| 606 | 579 | return tok(s, TokenKind::Underscore); |
|
| 607 | 580 | } |
lib/std/lang/strings.rad
+1 -1
| 8 | 8 | ||
| 9 | 9 | use std::mem; |
|
| 10 | 10 | use std::collections::dict; |
|
| 11 | 11 | ||
| 12 | 12 | /// Table size. |
|
| 13 | - | constant TABLE_SIZE: u32 = 8192; |
|
| 13 | + | constant TABLE_SIZE: u32 = 32768; |
|
| 14 | 14 | ||
| 15 | 15 | /// String interning pool using open-addressed hash table. |
|
| 16 | 16 | /// |
|
| 17 | 17 | /// Each unique string content is stored only once, allowing pointer equality |
|
| 18 | 18 | /// to be used instead of content comparison for symbol lookups and module names. |
lib/std/tests.rad
+99 -0
| 103 | 103 | let result: *[u8] = fmt::formatI64(-9223372036854775808, &mut buffer[..]); |
|
| 104 | 104 | try testing::expect(result.len == 20); |
|
| 105 | 105 | try testing::expectBytesEq(result, "-9223372036854775808"); |
|
| 106 | 106 | } |
|
| 107 | 107 | ||
| 108 | + | @test fn testParseIntLiteralText() throws (testing::TestError) { |
|
| 109 | + | let dec = try fmt::parseInt("123") catch { |
|
| 110 | + | throw testing::TestError::Failed; |
|
| 111 | + | }; |
|
| 112 | + | try testing::expect(dec.magnitude == 123); |
|
| 113 | + | try testing::expect(dec.radix == fmt::Radix::Decimal); |
|
| 114 | + | try testing::expect(not dec.signed); |
|
| 115 | + | try testing::expect(not dec.negative); |
|
| 116 | + | ||
| 117 | + | let hex = try fmt::parseInt("-0x2a") catch { |
|
| 118 | + | throw testing::TestError::Failed; |
|
| 119 | + | }; |
|
| 120 | + | try testing::expect(hex.magnitude == 42); |
|
| 121 | + | try testing::expect(hex.radix == fmt::Radix::Hex); |
|
| 122 | + | try testing::expect(hex.signed); |
|
| 123 | + | try testing::expect(hex.negative); |
|
| 124 | + | ||
| 125 | + | let bin = try fmt::parseInt("+0b101") catch { |
|
| 126 | + | throw testing::TestError::Failed; |
|
| 127 | + | }; |
|
| 128 | + | try testing::expect(bin.magnitude == 5); |
|
| 129 | + | try testing::expect(bin.radix == fmt::Radix::Binary); |
|
| 130 | + | try testing::expect(bin.signed); |
|
| 131 | + | try testing::expect(not bin.negative); |
|
| 132 | + | } |
|
| 133 | + | ||
| 134 | + | @test fn testDigitFromAscii() throws (testing::TestError) { |
|
| 135 | + | let zero = fmt::digitFromAscii('0', 10) else throw testing::TestError::Failed; |
|
| 136 | + | try testing::expect(zero == 0); |
|
| 137 | + | ||
| 138 | + | let nine = fmt::digitFromAscii('9', 10) else throw testing::TestError::Failed; |
|
| 139 | + | try testing::expect(nine == 9); |
|
| 140 | + | ||
| 141 | + | let lower = fmt::digitFromAscii('a', 16) else throw testing::TestError::Failed; |
|
| 142 | + | try testing::expect(lower == 10); |
|
| 143 | + | ||
| 144 | + | let lowerF = fmt::digitFromAscii('f', 16) else throw testing::TestError::Failed; |
|
| 145 | + | try testing::expect(lowerF == 15); |
|
| 146 | + | ||
| 147 | + | let upper = fmt::digitFromAscii('A', 16) else throw testing::TestError::Failed; |
|
| 148 | + | try testing::expect(upper == 10); |
|
| 149 | + | ||
| 150 | + | let upperF = fmt::digitFromAscii('F', 16) else throw testing::TestError::Failed; |
|
| 151 | + | try testing::expect(upperF == 15); |
|
| 152 | + | ||
| 153 | + | try testing::expect(fmt::digitFromAscii('g', 16) == nil); |
|
| 154 | + | try testing::expect(fmt::digitFromAscii('_', 10) == nil); |
|
| 155 | + | } |
|
| 156 | + | ||
| 157 | + | @test fn testParseIntLiteralTextErrors() throws (testing::TestError) { |
|
| 158 | + | try fmt::parseInt("") catch { |
|
| 159 | + | return; |
|
| 160 | + | }; |
|
| 161 | + | throw testing::TestError::Failed; |
|
| 162 | + | } |
|
| 163 | + | ||
| 164 | + | @test fn testParseIntLiteralTextInvalidDigitErrors() throws (testing::TestError) { |
|
| 165 | + | try fmt::parseInt("0b2") catch { |
|
| 166 | + | return; |
|
| 167 | + | }; |
|
| 168 | + | throw testing::TestError::Failed; |
|
| 169 | + | } |
|
| 170 | + | ||
| 171 | + | @test fn testParseIntLiteralTextOverflowErrors() throws (testing::TestError) { |
|
| 172 | + | try fmt::parseInt("18446744073709551616") catch { |
|
| 173 | + | return; |
|
| 174 | + | }; |
|
| 175 | + | throw testing::TestError::Failed; |
|
| 176 | + | } |
|
| 177 | + | ||
| 178 | + | @test fn testParseCharLiteralText() throws (testing::TestError) { |
|
| 179 | + | let x = try fmt::parseChar("'x'") catch { |
|
| 180 | + | throw testing::TestError::Failed; |
|
| 181 | + | }; |
|
| 182 | + | try testing::expect(x == 'x'); |
|
| 183 | + | ||
| 184 | + | let newline = try fmt::parseChar("'\\n'") catch { |
|
| 185 | + | throw testing::TestError::Failed; |
|
| 186 | + | }; |
|
| 187 | + | try testing::expect(newline == '\n'); |
|
| 188 | + | } |
|
| 189 | + | ||
| 190 | + | @test fn testParseCharLiteralTextErrors() throws (testing::TestError) { |
|
| 191 | + | try fmt::parseChar("''") catch { |
|
| 192 | + | return; |
|
| 193 | + | }; |
|
| 194 | + | throw testing::TestError::Failed; |
|
| 195 | + | } |
|
| 196 | + | ||
| 197 | + | @test fn testUnescapeString() throws (testing::TestError) { |
|
| 198 | + | let mut buffer: [u8; 8] = [0; 8]; |
|
| 199 | + | let len = fmt::unescapeString("a\\n\\0", &mut buffer[..]); |
|
| 200 | + | ||
| 201 | + | try testing::expect(len == 3); |
|
| 202 | + | try testing::expect(buffer[0] == 'a'); |
|
| 203 | + | try testing::expect(buffer[1] == '\n'); |
|
| 204 | + | try testing::expect(buffer[2] == 0); |
|
| 205 | + | } |
|
| 206 | + | ||
| 108 | 207 | // mem ///////////////////////////////////////////////////////////////////////// |
|
| 109 | 208 | ||
| 110 | 209 | @test fn testCopyFullSlice() throws (testing::TestError) { |
|
| 111 | 210 | let mut xs: [u8; 3] = [1, 2, 3]; |
|
| 112 | 211 | let mut ys: [u8; 3] = [4, 5, 6]; |
scripts/count-lines-no-comments.sh
+35 -11
| 1 | 1 | #!/bin/sh |
|
| 2 | - | # Count non-blank lines in all .rad files, excluding comment lines and tests. |
|
| 2 | + | # Count non-blank, non-comment lines in .rad files, skipping tests. |
|
| 3 | 3 | ||
| 4 | - | dir="${1:-.}" |
|
| 5 | - | ||
| 6 | - | if [ ! -d "$dir" ]; then |
|
| 7 | - | echo "Error: Directory '$dir' does not exist" |
|
| 8 | - | exit 1 |
|
| 4 | + | if [ "$#" -eq 0 ]; then |
|
| 5 | + | set -- . |
|
| 9 | 6 | fi |
|
| 10 | 7 | ||
| 11 | - | echo "Counting non-blank, non-comment lines in .rad files in: $dir" |
|
| 12 | - | echo "--------------------------------------------------------------" |
|
| 8 | + | tmpList=$(mktemp) |
|
| 9 | + | tmpFiles=$(mktemp) |
|
| 10 | + | trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM |
|
| 11 | + | ||
| 12 | + | for input in "$@"; do |
|
| 13 | + | if [ -d "$input" ]; then |
|
| 14 | + | find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList" |
|
| 15 | + | elif [ -f "$input" ]; then |
|
| 16 | + | case "$input" in |
|
| 17 | + | */tests/*|*/tests.rad|tests.rad) |
|
| 18 | + | ;; |
|
| 19 | + | *.rad) |
|
| 20 | + | printf '%s\n' "$input" >> "$tmpList" |
|
| 21 | + | ;; |
|
| 22 | + | *) |
|
| 23 | + | echo "Error: File '$input' is not a .rad file" >&2 |
|
| 24 | + | exit 1 |
|
| 25 | + | ;; |
|
| 26 | + | esac |
|
| 27 | + | else |
|
| 28 | + | echo "Error: Path '$input' does not exist" >&2 |
|
| 29 | + | exit 1 |
|
| 30 | + | fi |
|
| 31 | + | done |
|
| 32 | + | ||
| 33 | + | echo "Counting non-blank, non-comment lines in .rad files for inputs: $*" |
|
| 34 | + | echo "---------------------------------------------------------------------" |
|
| 13 | 35 | ||
| 14 | 36 | total=0 |
|
| 15 | - | for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do |
|
| 37 | + | sort -u "$tmpList" > "$tmpFiles" |
|
| 38 | + | ||
| 39 | + | while IFS= read -r file; do |
|
| 16 | 40 | if [ -f "$file" ]; then |
|
| 17 | 41 | count=$(grep -v '^[[:space:]]*$' "$file" | grep -v '^[[:space:]]*//' | wc -l) |
|
| 18 | 42 | total=$((total + count)) |
|
| 19 | 43 | printf "%6d %s\n" "$count" "$file" |
|
| 20 | 44 | fi |
|
| 21 | - | done |
|
| 45 | + | done < "$tmpFiles" |
|
| 22 | 46 | ||
| 23 | - | echo "--------------------------------------------------------------" |
|
| 47 | + | echo "---------------------------------------------------------------------" |
|
| 24 | 48 | printf "%6d TOTAL\n" "$total" |
scripts/count-lines.sh
+35 -11
| 1 | 1 | #!/bin/sh |
|
| 2 | - | # Count non-blank lines in all .rad files, skipping tests. |
|
| 2 | + | # Count non-blank lines in .rad files, skipping tests. |
|
| 3 | 3 | ||
| 4 | - | dir="${1:-.}" |
|
| 5 | - | ||
| 6 | - | if [ ! -d "$dir" ]; then |
|
| 7 | - | echo "Error: Directory '$dir' does not exist" |
|
| 8 | - | exit 1 |
|
| 4 | + | if [ "$#" -eq 0 ]; then |
|
| 5 | + | set -- . |
|
| 9 | 6 | fi |
|
| 10 | 7 | ||
| 11 | - | echo "Counting non-blank lines in .rad files in: $dir" |
|
| 12 | - | echo "-------------------------------------------" |
|
| 8 | + | tmpList=$(mktemp) |
|
| 9 | + | tmpFiles=$(mktemp) |
|
| 10 | + | trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM |
|
| 11 | + | ||
| 12 | + | for input in "$@"; do |
|
| 13 | + | if [ -d "$input" ]; then |
|
| 14 | + | find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList" |
|
| 15 | + | elif [ -f "$input" ]; then |
|
| 16 | + | case "$input" in |
|
| 17 | + | */tests/*|*/tests.rad|tests.rad) |
|
| 18 | + | ;; |
|
| 19 | + | *.rad) |
|
| 20 | + | printf '%s\n' "$input" >> "$tmpList" |
|
| 21 | + | ;; |
|
| 22 | + | *) |
|
| 23 | + | echo "Error: File '$input' is not a .rad file" >&2 |
|
| 24 | + | exit 1 |
|
| 25 | + | ;; |
|
| 26 | + | esac |
|
| 27 | + | else |
|
| 28 | + | echo "Error: Path '$input' does not exist" >&2 |
|
| 29 | + | exit 1 |
|
| 30 | + | fi |
|
| 31 | + | done |
|
| 32 | + | ||
| 33 | + | echo "Counting non-blank lines in .rad files for inputs: $*" |
|
| 34 | + | echo "------------------------------------------------------" |
|
| 13 | 35 | ||
| 14 | 36 | total=0 |
|
| 15 | - | for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do |
|
| 37 | + | sort -u "$tmpList" > "$tmpFiles" |
|
| 38 | + | ||
| 39 | + | while IFS= read -r file; do |
|
| 16 | 40 | if [ -f "$file" ]; then |
|
| 17 | 41 | count=$(grep -v '^[[:space:]]*$' "$file" | wc -l) |
|
| 18 | 42 | total=$((total + count)) |
|
| 19 | 43 | printf "%6d %s\n" "$count" "$file" |
|
| 20 | 44 | fi |
|
| 21 | - | done |
|
| 45 | + | done < "$tmpFiles" |
|
| 22 | 46 | ||
| 23 | - | echo "-------------------------------------------" |
|
| 47 | + | echo "------------------------------------------------------" |
|
| 24 | 48 | printf "%6d TOTAL\n" "$total" |
std.lib
+5 -0
| 1 | 1 | lib/std.rad |
|
| 2 | + | lib/std/char.rad |
|
| 2 | 3 | lib/std/fmt.rad |
|
| 3 | 4 | lib/std/mem.rad |
|
| 4 | 5 | lib/std/vec.rad |
|
| 5 | 6 | lib/std/io.rad |
|
| 6 | 7 | lib/std/intrinsics.rad |
| 13 | 14 | lib/std/arch/rv64/encode.rad |
|
| 14 | 15 | lib/std/arch/rv64/decode.rad |
|
| 15 | 16 | lib/std/arch/rv64/emit.rad |
|
| 16 | 17 | lib/std/arch/rv64/isel.rad |
|
| 17 | 18 | lib/std/arch/rv64/printer.rad |
|
| 19 | + | lib/std/arch/rv64/asm.rad |
|
| 20 | + | lib/std/arch/rv64/asm/scanner.rad |
|
| 21 | + | lib/std/arch/rv64/asm/parser.rad |
|
| 22 | + | lib/std/arch/rv64/asm/emit.rad |
|
| 18 | 23 | lib/std/lang.rad |
|
| 19 | 24 | lib/std/lang/alloc.rad |
|
| 20 | 25 | lib/std/lang/strings.rad |
|
| 21 | 26 | lib/std/lang/sexpr.rad |
|
| 22 | 27 | lib/std/lang/ast.rad |
std.lib.test
+3 -0
| 1 | 1 | lib/std/testing.rad |
|
| 2 | 2 | lib/std/tests.rad |
|
| 3 | + | lib/std/char/tests.rad |
|
| 3 | 4 | lib/std/arch/rv64/tests.rad |
|
| 5 | + | lib/std/arch/rv64/asm/tests.rad |
|
| 6 | + | lib/std/arch/rv64/asm/scanner/tests.rad |
|
| 4 | 7 | lib/std/lang/alloc/tests.rad |
|
| 5 | 8 | lib/std/lang/parser/tests.rad |
|
| 6 | 9 | lib/std/lang/module/tests.rad |
|
| 7 | 10 | lib/std/lang/scanner/tests.rad |
|
| 8 | 11 | lib/std/lang/resolver/tests.rad |
test/run
+13 -6
| 1 | 1 | #!/bin/sh |
|
| 2 | 2 | # Run binary tests. |
|
| 3 | - | # Usage: test/run [<test.rad>...] |
|
| 3 | + | # Usage: test/run [<test.rad|test.ras>...] |
|
| 4 | 4 | # |
|
| 5 | - | # If no arguments are provided, runs all tests in `test/tests/`. |
|
| 5 | + | # If no arguments are provided, runs all `.rad` and `.ras` tests in |
|
| 6 | + | # `test/tests/`. |
|
| 6 | 7 | # |
|
| 7 | 8 | # For each test: |
|
| 8 | 9 | # - If a `.ril` file exists alongside it, the IL output is checked |
|
| 9 | 10 | # against it via the runner binary. |
|
| 10 | 11 | # - If `//! returns: N` appears in the file, the test is compiled to |
|
| 11 | 12 | # a binary and executed; the exit code must match N. |
|
| 12 | 13 | ||
| 13 | 14 | RUNNER="test/runner.rv64" |
|
| 14 | 15 | TEST_DIR="test/tests" |
|
| 15 | 16 | EMU="${RAD_EMULATOR:-emulator} -stack-size=1024 -run" |
|
| 16 | - | EMU_RUN="${RAD_EMULATOR:-emulator} -run" |
|
| 17 | + | EMU_RUN="${RAD_EMULATOR:-emulator} -no-jit -run" |
|
| 17 | 18 | ||
| 18 | 19 | if [ ! -f "$RUNNER" ]; then |
|
| 19 | 20 | echo "error: runner binary not found: $RUNNER" >&2 |
|
| 20 | 21 | echo "hint: run 'make test' first" >&2 |
|
| 21 | 22 | exit 1 |
| 24 | 25 | # Disable core dumps for tests. |
|
| 25 | 26 | ulimit -c 0 |
|
| 26 | 27 | ||
| 27 | 28 | # Collect tests. |
|
| 28 | 29 | if [ $# -eq 0 ]; then |
|
| 29 | - | tests=$(find "$TEST_DIR" -name '*.rad' | sort) |
|
| 30 | + | tests=$(find "$TEST_DIR" \( -name '*.rad' -o -name '*.ras' \) | sort) |
|
| 30 | 31 | else |
|
| 31 | 32 | tests="$*" |
|
| 32 | 33 | fi |
|
| 33 | 34 | ||
| 34 | 35 | if [ -z "$tests" ]; then |
| 38 | 39 | ||
| 39 | 40 | passed=0 |
|
| 40 | 41 | failed=0 |
|
| 41 | 42 | ||
| 42 | 43 | for test in $tests; do |
|
| 43 | - | ril="${test%.rad}.ril" |
|
| 44 | - | bin="${test%.rad}.rv64" |
|
| 44 | + | case "$test" in |
|
| 45 | + | *.rad) base="${test%.rad}" ;; |
|
| 46 | + | *.ras) base="${test%.ras}" ;; |
|
| 47 | + | *) base="$test" ;; |
|
| 48 | + | esac |
|
| 49 | + | ||
| 50 | + | ril="${base}.ril" |
|
| 51 | + | bin="${base}.rv64" |
|
| 45 | 52 | ||
| 46 | 53 | # IL check: run the runner if a .ril file exists. |
|
| 47 | 54 | if [ -f "$ril" ]; then |
|
| 48 | 55 | if $EMU "$RUNNER" -- "$test"; then |
|
| 49 | 56 | passed=$((passed + 1)) |
test/runner.rad
+80 -3
| 1 | - | //! IL snapshot test runner. |
|
| 1 | + | //! IL snapshot test runner and `.ras` asm helper. |
|
| 2 | 2 | //! |
|
| 3 | 3 | //! Given a `.rad` source file, lowers it to IL and compares the output |
|
| 4 | - | //! against the corresponding `.ril` snapshot file. Called by `test/run` |
|
| 5 | - | //! for every test that has a `.ril` file. |
|
| 4 | + | //! against the corresponding `.ril` snapshot file. It also supports an |
|
| 5 | + | //! `assemble <input.ras> <output.rv64>` subcommand used by `bin-test`. |
|
| 6 | 6 | ||
| 7 | 7 | use std::io; |
|
| 8 | 8 | use std::mem; |
|
| 9 | 9 | use std::sys; |
|
| 10 | 10 | use std::sys::unix; |
| 14 | 14 | use std::lang::parser; |
|
| 15 | 15 | use std::lang::scanner; |
|
| 16 | 16 | use std::lang::resolver; |
|
| 17 | 17 | use std::lang::strings; |
|
| 18 | 18 | use std::lang::lower; |
|
| 19 | + | use std::arch::rv64; |
|
| 20 | + | use std::arch::rv64::asm; |
|
| 19 | 21 | ||
| 20 | 22 | /// Buffer size for reading source files (8 KB). |
|
| 21 | 23 | constant SOURCE_BUF_SIZE: u32 = 8192; |
|
| 22 | 24 | /// Buffer size for reading expected IL files (32 KB). |
|
| 23 | 25 | constant EXPECTED_BUF_SIZE: u32 = 32768; |
| 37 | 39 | ||
| 38 | 40 | /// Maximum number of AST nodes per test file. |
|
| 39 | 41 | constant MAX_NODE_DATA: u32 = 4096; |
|
| 40 | 42 | /// Maximum number of resolver errors per test file. |
|
| 41 | 43 | constant MAX_ERRORS: u32 = 16; |
|
| 44 | + | /// Maximum number of text words in a `.ras` test binary. |
|
| 45 | + | constant ASM_TEXT_CAPACITY: u32 = 256; |
|
| 46 | + | /// Maximum number of data bytes in a `.ras` test binary. |
|
| 47 | + | constant ASM_DATA_CAPACITY: u32 = 1024; |
|
| 48 | + | constant RO_DATA_EXT: *[u8] = ".ro.data"; |
|
| 42 | 49 | ||
| 43 | 50 | // Static storage for large buffers to avoid stack overflow. |
|
| 44 | 51 | // Tests run serially so sharing these is safe. |
|
| 45 | 52 | static SOURCE_BUF: [u8; SOURCE_BUF_SIZE] = undefined; |
|
| 46 | 53 | static EXPECTED_BUF: [u8; EXPECTED_BUF_SIZE] = undefined; |
| 49 | 56 | static IL_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined; |
|
| 50 | 57 | static PRINT_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined; |
|
| 51 | 58 | static RESOLVER_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined; |
|
| 52 | 59 | static NODE_DATA_STORAGE: [resolver::NodeData; MAX_NODE_DATA] = undefined; |
|
| 53 | 60 | static ERROR_STORAGE: [resolver::Error; MAX_ERRORS] = undefined; |
|
| 61 | + | static ASM_TEXT_STORAGE: [u32; ASM_TEXT_CAPACITY] = undefined; |
|
| 62 | + | static ASM_DATA_STORAGE: [u8; ASM_DATA_CAPACITY] = undefined; |
|
| 54 | 63 | ||
| 55 | 64 | /// Strip a `//` comment from a line, preserving `//` inside quoted strings. |
|
| 56 | 65 | /// Returns the content before the comment, trimmed of trailing whitespace. |
|
| 57 | 66 | fn stripLine(line: *[u8]) -> *[u8] { |
|
| 58 | 67 | let mut end = line.len; |
| 161 | 170 | set buf[len] = 0; |
|
| 162 | 171 | ||
| 163 | 172 | return &buf[..len]; |
|
| 164 | 173 | } |
|
| 165 | 174 | ||
| 175 | + | fn appendPathExt(basePath: *[u8], ext: *[u8], buf: *mut [u8]) -> ?*[u8] { |
|
| 176 | + | if basePath.len + ext.len + 1 > buf.len { |
|
| 177 | + | return nil; |
|
| 178 | + | } |
|
| 179 | + | let mut pos: u32 = 0; |
|
| 180 | + | ||
| 181 | + | set pos += try! mem::copy(&mut buf[pos..], basePath); |
|
| 182 | + | set pos += try! mem::copy(&mut buf[pos..], ext); |
|
| 183 | + | set buf[pos] = 0; |
|
| 184 | + | ||
| 185 | + | return &buf[..pos]; |
|
| 186 | + | } |
|
| 187 | + | ||
| 188 | + | fn writeCode(code: *[u32], path: *[u8]) -> bool { |
|
| 189 | + | let bytes = @sliceOf(code.ptr as *u8, code.len * 4); |
|
| 190 | + | return unix::writeFile(path, bytes); |
|
| 191 | + | } |
|
| 192 | + | ||
| 193 | + | fn assembleBinary(sourcePath: *[u8], outputPath: *[u8]) -> bool { |
|
| 194 | + | let mut roDataPathBuf: [u8; MAX_PATH_LEN] = undefined; |
|
| 195 | + | let source = unix::readFile(sourcePath, &mut SOURCE_BUF[..]) else { |
|
| 196 | + | io::printError("error: could not read source: "); |
|
| 197 | + | io::printError(sourcePath); |
|
| 198 | + | io::printError("\n"); |
|
| 199 | + | return false; |
|
| 200 | + | }; |
|
| 201 | + | let roDataPath = appendPathExt(outputPath, RO_DATA_EXT, &mut roDataPathBuf[..]) else { |
|
| 202 | + | io::printError("error: output path too long\n"); |
|
| 203 | + | return false; |
|
| 204 | + | }; |
|
| 205 | + | ||
| 206 | + | let mut arena = alloc::new(&mut AST_ARENA_STORAGE[..]); |
|
| 207 | + | let program = try asm::assemble( |
|
| 208 | + | asm::scanner::SourceKind::File { path: sourcePath }, |
|
| 209 | + | source, |
|
| 210 | + | &mut ASM_TEXT_STORAGE[..], |
|
| 211 | + | &mut ASM_DATA_STORAGE[..], |
|
| 212 | + | &mut arena, |
|
| 213 | + | &mut STRING_POOL, |
|
| 214 | + | rv64::RO_DATA_BASE |
|
| 215 | + | ) catch { |
|
| 216 | + | io::printError("error: assembly failed: "); |
|
| 217 | + | io::printError(sourcePath); |
|
| 218 | + | io::printError("\n"); |
|
| 219 | + | return false; |
|
| 220 | + | }; |
|
| 221 | + | if not writeCode(program.text, outputPath) { |
|
| 222 | + | io::printError("error: could not write output: "); |
|
| 223 | + | io::printError(outputPath); |
|
| 224 | + | io::printError("\n"); |
|
| 225 | + | return false; |
|
| 226 | + | } |
|
| 227 | + | if not unix::writeFile(roDataPath, program.data) { |
|
| 228 | + | io::printError("error: could not write data: "); |
|
| 229 | + | io::printError(roDataPath); |
|
| 230 | + | io::printError("\n"); |
|
| 231 | + | return false; |
|
| 232 | + | } |
|
| 233 | + | return true; |
|
| 234 | + | } |
|
| 235 | + | ||
| 166 | 236 | /// Run a single IL snapshot test case. Returns `true` on success. |
|
| 167 | 237 | fn runTest(sourcePath: *[u8]) -> bool { |
|
| 168 | 238 | // Path buffer. |
|
| 169 | 239 | let mut rilPathBuf: [u8; MAX_PATH_LEN] = undefined; |
|
| 170 | 240 | let mut pkgScope: resolver::Scope = undefined; |
| 241 | 311 | ||
| 242 | 312 | /// Run a single test specified as an argument. |
|
| 243 | 313 | @default fn main(env: *sys::Env) -> i32 { |
|
| 244 | 314 | let args = env.args; |
|
| 245 | 315 | ||
| 316 | + | if args.len == 4 and mem::eq(args[1], "assemble") { |
|
| 317 | + | if assembleBinary(args[2], args[3]) { |
|
| 318 | + | return 0; |
|
| 319 | + | } else { |
|
| 320 | + | return 1; |
|
| 321 | + | } |
|
| 322 | + | } |
|
| 246 | 323 | if args.len <> 2 { |
|
| 247 | 324 | io::printError("error: expected test file path as argument"); |
|
| 248 | 325 | return 1; |
|
| 249 | 326 | } |
|
| 250 | 327 | let sourcePath = args[1]; |
test/tests/asm.basic.text.program.ras
added
+13 -0
| 1 | + | //! returns: 42 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | addi %a0 %zero 42; |
|
| 6 | + | sd %a0 8(%sp); |
|
| 7 | + | beq %a0 %zero @fail; |
|
| 8 | + | li %a7 93; |
|
| 9 | + | ecall; |
|
| 10 | + | @fail |
|
| 11 | + | li %a0 1; |
|
| 12 | + | li %a7 93; |
|
| 13 | + | ecall; |
test/tests/asm.branch.comparisons.ras
added
+40 -0
| 1 | + | //! returns: 17 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | li %a0 0; |
|
| 6 | + | li %t0 5; |
|
| 7 | + | li %t1 5; |
|
| 8 | + | beq %t0 %t1 @beqOk; |
|
| 9 | + | j @fail; |
|
| 10 | + | @beqOk |
|
| 11 | + | bne %t0 %t1 @fail; |
|
| 12 | + | li %t2 -1; |
|
| 13 | + | li %t3 1; |
|
| 14 | + | blt %t2 %t3 @bltOk; |
|
| 15 | + | j @fail; |
|
| 16 | + | @bltOk |
|
| 17 | + | bgt %t3 %t2 @bgtOk; |
|
| 18 | + | j @fail; |
|
| 19 | + | @bgtOk |
|
| 20 | + | bge %t3 %t2 @bgeOk; |
|
| 21 | + | j @fail; |
|
| 22 | + | @bgeOk |
|
| 23 | + | ble %t2 %t3 @bleOk; |
|
| 24 | + | j @fail; |
|
| 25 | + | @bleOk |
|
| 26 | + | li %t4 1; |
|
| 27 | + | li %t5 2; |
|
| 28 | + | bltu %t4 %t5 @bltuOk; |
|
| 29 | + | j @fail; |
|
| 30 | + | @bltuOk |
|
| 31 | + | bgeu %t5 %t4 @done; |
|
| 32 | + | j @fail; |
|
| 33 | + | @done |
|
| 34 | + | li %a0 17; |
|
| 35 | + | li %a7 93; |
|
| 36 | + | ecall; |
|
| 37 | + | @fail |
|
| 38 | + | li %a0 1; |
|
| 39 | + | li %a7 93; |
|
| 40 | + | ecall; |
test/tests/asm.call.return.flow.ras
added
+17 -0
| 1 | + | //! returns: 29 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | call @helper; |
|
| 6 | + | j @exit; |
|
| 7 | + | @helper |
|
| 8 | + | mv %t0 %ra; |
|
| 9 | + | jal %ra @leaf; |
|
| 10 | + | mv %ra %t0; |
|
| 11 | + | ret; |
|
| 12 | + | @leaf |
|
| 13 | + | li %a0 29; |
|
| 14 | + | ret; |
|
| 15 | + | @exit |
|
| 16 | + | li %a7 93; |
|
| 17 | + | ecall; |
test/tests/asm.compare.set.logic.ras
added
+29 -0
| 1 | + | //! returns: 36 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | nop; |
|
| 6 | + | li %t0 -1; |
|
| 7 | + | li %t1 1; |
|
| 8 | + | slt %a0 %t0 %t1; |
|
| 9 | + | sltu %t2 %t1 %t0; |
|
| 10 | + | add %a0 %a0 %t2; |
|
| 11 | + | slti %t3 %t0 0; |
|
| 12 | + | add %a0 %a0 %t3; |
|
| 13 | + | sltiu %t4 %zero 1; |
|
| 14 | + | add %a0 %a0 %t4; |
|
| 15 | + | seqz %t5 %zero; |
|
| 16 | + | add %a0 %a0 %t5; |
|
| 17 | + | snez %t6 %t1; |
|
| 18 | + | add %a0 %a0 %t6; |
|
| 19 | + | neg %a1 %t1; |
|
| 20 | + | slti %a1 %a1 0; |
|
| 21 | + | add %a0 %a0 %a1; |
|
| 22 | + | xori %a2 %zero 7; |
|
| 23 | + | ori %a2 %a2 8; |
|
| 24 | + | andi %a2 %a2 15; |
|
| 25 | + | add %a0 %a0 %a2; |
|
| 26 | + | xor %a3 %a2 %t6; |
|
| 27 | + | add %a0 %a0 %a3; |
|
| 28 | + | li %a7 93; |
|
| 29 | + | ecall; |
test/tests/asm.csr.system.instructions.ras
added
+16 -0
| 1 | + | //! returns: 88 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | j @exit; |
|
| 6 | + | csrr %a0 mhartid; |
|
| 7 | + | csrw mtvec %a0; |
|
| 8 | + | csrrw %t0 mscratch %t1; |
|
| 9 | + | csrsi mstatus 8; |
|
| 10 | + | csrc mip %t0; |
|
| 11 | + | wfi; |
|
| 12 | + | mret; |
|
| 13 | + | @exit |
|
| 14 | + | li %a0 88; |
|
| 15 | + | li %a7 93; |
|
| 16 | + | ecall; |
test/tests/asm.data.directives.ras
added
+24 -0
| 1 | + | //! returns: 139 |
|
| 2 | + | ||
| 3 | + | .constant dataBase 0x10000; |
|
| 4 | + | .data; |
|
| 5 | + | .byte 1, 'A'; |
|
| 6 | + | .word 0x11223344; |
|
| 7 | + | .dword 0x1122334455667788; |
|
| 8 | + | .ascii "hi"; |
|
| 9 | + | .ascii "x"; |
|
| 10 | + | .align 4; |
|
| 11 | + | .text; |
|
| 12 | + | @entry |
|
| 13 | + | li %t0 dataBase; |
|
| 14 | + | lbu %a0 0(%t0); |
|
| 15 | + | lbu %t1 1(%t0); |
|
| 16 | + | add %a0 %a0 %t1; |
|
| 17 | + | lbu %t1 14(%t0); |
|
| 18 | + | add %a0 %a0 %t1; |
|
| 19 | + | lbu %t1 15(%t0); |
|
| 20 | + | add %a0 %a0 %t1; |
|
| 21 | + | lbu %t1 16(%t0); |
|
| 22 | + | add %a0 %a0 %t1; |
|
| 23 | + | li %a7 93; |
|
| 24 | + | ecall; |
test/tests/asm.data.symbol.fixup.ras
added
+16 -0
| 1 | + | //! returns: 2 |
|
| 2 | + | ||
| 3 | + | .constant dataBase 0x10000; |
|
| 4 | + | .data; |
|
| 5 | + | .byte 0; |
|
| 6 | + | @here |
|
| 7 | + | .byte 1; |
|
| 8 | + | .word @here; |
|
| 9 | + | .text; |
|
| 10 | + | @entry |
|
| 11 | + | li %t0 dataBase; |
|
| 12 | + | lbu %a0 1(%t0); |
|
| 13 | + | lbu %t1 2(%t0); |
|
| 14 | + | add %a0 %a0 %t1; |
|
| 15 | + | li %a7 93; |
|
| 16 | + | ecall; |
test/tests/asm.directive.boundary.values.ras
added
+40 -0
| 1 | + | //! returns: 97 |
|
| 2 | + | ||
| 3 | + | .constant dataBase 0x10000; |
|
| 4 | + | .data; |
|
| 5 | + | .word -2147483648; |
|
| 6 | + | .word 2147483647; |
|
| 7 | + | .dword 0x1122334455667788; |
|
| 8 | + | .byte 255; |
|
| 9 | + | .text; |
|
| 10 | + | @entry |
|
| 11 | + | j @exit; |
|
| 12 | + | .align 8; |
|
| 13 | + | ret; |
|
| 14 | + | @exit |
|
| 15 | + | li %t0 dataBase; |
|
| 16 | + | lbu %a0 3(%t0); |
|
| 17 | + | lbu %t1 4(%t0); |
|
| 18 | + | add %a0 %a0 %t1; |
|
| 19 | + | lbu %t1 7(%t0); |
|
| 20 | + | add %a0 %a0 %t1; |
|
| 21 | + | lbu %t1 8(%t0); |
|
| 22 | + | add %a0 %a0 %t1; |
|
| 23 | + | lbu %t1 9(%t0); |
|
| 24 | + | add %a0 %a0 %t1; |
|
| 25 | + | lbu %t1 10(%t0); |
|
| 26 | + | add %a0 %a0 %t1; |
|
| 27 | + | lbu %t1 11(%t0); |
|
| 28 | + | add %a0 %a0 %t1; |
|
| 29 | + | lbu %t1 12(%t0); |
|
| 30 | + | add %a0 %a0 %t1; |
|
| 31 | + | lbu %t1 13(%t0); |
|
| 32 | + | add %a0 %a0 %t1; |
|
| 33 | + | lbu %t1 14(%t0); |
|
| 34 | + | add %a0 %a0 %t1; |
|
| 35 | + | lbu %t1 15(%t0); |
|
| 36 | + | add %a0 %a0 %t1; |
|
| 37 | + | lbu %t1 16(%t0); |
|
| 38 | + | add %a0 %a0 %t1; |
|
| 39 | + | li %a7 93; |
|
| 40 | + | ecall; |
test/tests/asm.global.scoped.symbols.ras
added
+16 -0
| 1 | + | //! returns: 7 |
|
| 2 | + | ||
| 3 | + | .export @kernel::main; |
|
| 4 | + | .text; |
|
| 5 | + | @entry |
|
| 6 | + | j @kernel::main; |
|
| 7 | + | li %a0 1; |
|
| 8 | + | li %a7 93; |
|
| 9 | + | ecall; |
|
| 10 | + | @kernel::main |
|
| 11 | + | li %a0 7; |
|
| 12 | + | li %a7 93; |
|
| 13 | + | ecall; |
|
| 14 | + | .data; |
|
| 15 | + | @data::sym |
|
| 16 | + | .byte 1; |
test/tests/asm.instruction.matrix.alu.ras
added
+29 -0
| 1 | + | //! returns: 11 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | li %a0 0; |
|
| 6 | + | li %a1 6; |
|
| 7 | + | li %a2 3; |
|
| 8 | + | and %t0 %a1 %a2; |
|
| 9 | + | add %a0 %a0 %t0; |
|
| 10 | + | li %a3 4; |
|
| 11 | + | li %a4 1; |
|
| 12 | + | or %t1 %a3 %a4; |
|
| 13 | + | add %a0 %a0 %t1; |
|
| 14 | + | li %t2 0; |
|
| 15 | + | not %t3 %t2; |
|
| 16 | + | andi %t3 %t3 1; |
|
| 17 | + | add %a0 %a0 %t3; |
|
| 18 | + | li %s1 3; |
|
| 19 | + | mv %s2 %s1; |
|
| 20 | + | andi %s2 %s2 1; |
|
| 21 | + | add %a0 %a0 %s2; |
|
| 22 | + | li %t4 0; |
|
| 23 | + | seqz %t5 %t4; |
|
| 24 | + | add %a0 %a0 %t5; |
|
| 25 | + | li %t6 9; |
|
| 26 | + | snez %a6 %t6; |
|
| 27 | + | add %a0 %a0 %a6; |
|
| 28 | + | li %a7 93; |
|
| 29 | + | ecall; |
test/tests/asm.instruction.matrix.mem.control.ras
added
+21 -0
| 1 | + | //! returns: 12 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | addi %sp %sp -32; |
|
| 6 | + | li %t0 7; |
|
| 7 | + | sb %t0 1(%sp); |
|
| 8 | + | lb %a0 1(%sp); |
|
| 9 | + | sd %a0 8(%sp); |
|
| 10 | + | lui %t2 0; |
|
| 11 | + | auipc %t1 0; |
|
| 12 | + | jalr %ra %t1 16; |
|
| 13 | + | li %a0 1; |
|
| 14 | + | j @exit; |
|
| 15 | + | @helper |
|
| 16 | + | ld %a0 8(%sp); |
|
| 17 | + | addi %a0 %a0 5; |
|
| 18 | + | addi %sp %sp 32; |
|
| 19 | + | @exit |
|
| 20 | + | li %a7 93; |
|
| 21 | + | ecall; |
test/tests/asm.instruction.matrix.system.ras
added
+11 -0
| 1 | + | //! returns: 77 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | j @exit; |
|
| 6 | + | ecall; |
|
| 7 | + | ebreak; |
|
| 8 | + | @exit |
|
| 9 | + | li %a0 77; |
|
| 10 | + | li %a7 93; |
|
| 11 | + | ecall; |
test/tests/asm.label.fixups.ras
added
+16 -0
| 1 | + | //! returns: 3 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | li %a0 3; |
|
| 6 | + | li %a1 0; |
|
| 7 | + | @loop |
|
| 8 | + | beqz %a0 @done; |
|
| 9 | + | addi %a1 %a1 1; |
|
| 10 | + | addi %a0 %a0 -1; |
|
| 11 | + | bnez %a0 @loop; |
|
| 12 | + | j @done; |
|
| 13 | + | @done |
|
| 14 | + | mv %a0 %a1; |
|
| 15 | + | li %a7 93; |
|
| 16 | + | ecall; |
test/tests/asm.li.expressions.ras
added
+12 -0
| 1 | + | //! returns: 162 |
|
| 2 | + | ||
| 3 | + | .constant PAGE 4096; |
|
| 4 | + | .constant VALUE 0x12340000 + 0x5678; |
|
| 5 | + | .text; |
|
| 6 | + | @entry |
|
| 7 | + | li %a0 42; |
|
| 8 | + | li %a1 VALUE; |
|
| 9 | + | andi %a1 %a1 255; |
|
| 10 | + | add %a0 %a0 %a1; |
|
| 11 | + | li %a7 93; |
|
| 12 | + | ecall; |
test/tests/asm.link.rad
added
+7 -0
| 1 | + | //! returns: 42 |
|
| 2 | + | ||
| 3 | + | fn asmAddOne(x: i32) -> i32; |
|
| 4 | + | ||
| 5 | + | @default fn main() -> i32 { |
|
| 6 | + | return asmAddOne(41); |
|
| 7 | + | } |
test/tests/asm.link.ras
added
+5 -0
| 1 | + | .text; |
|
| 2 | + | .export @"asm.link::asmAddOne"; |
|
| 3 | + | @"asm.link::asmAddOne" |
|
| 4 | + | addi %a0 %a0 1; |
|
| 5 | + | ret; |
test/tests/asm.load.store.widths.ras
added
+33 -0
| 1 | + | //! returns: 72 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | addi %sp %sp -32; |
|
| 6 | + | li %t0 0x80; |
|
| 7 | + | sb %t0 0(%sp); |
|
| 8 | + | lb %t1 0(%sp); |
|
| 9 | + | slti %a0 %t1 0; |
|
| 10 | + | lbu %t2 0(%sp); |
|
| 11 | + | srli %t2 %t2 7; |
|
| 12 | + | add %a0 %a0 %t2; |
|
| 13 | + | li %t0 0x8000; |
|
| 14 | + | sh %t0 2(%sp); |
|
| 15 | + | lh %t3 2(%sp); |
|
| 16 | + | slti %t3 %t3 0; |
|
| 17 | + | add %a0 %a0 %t3; |
|
| 18 | + | lhu %t4 2(%sp); |
|
| 19 | + | srli %t4 %t4 15; |
|
| 20 | + | add %a0 %a0 %t4; |
|
| 21 | + | li %t0 17; |
|
| 22 | + | sw %t0 8(%sp); |
|
| 23 | + | lw %t5 8(%sp); |
|
| 24 | + | add %a0 %a0 %t5; |
|
| 25 | + | lwu %t6 8(%sp); |
|
| 26 | + | add %a0 %a0 %t6; |
|
| 27 | + | li %t0 34; |
|
| 28 | + | sd %t0 16(%sp); |
|
| 29 | + | ld %a1 16(%sp); |
|
| 30 | + | add %a0 %a0 %a1; |
|
| 31 | + | addi %sp %sp 32; |
|
| 32 | + | li %a7 93; |
|
| 33 | + | ecall; |
test/tests/asm.mul.div.rem.ras
added
+33 -0
| 1 | + | //! returns: 4 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | li %t0 20; |
|
| 6 | + | li %t1 6; |
|
| 7 | + | mul %a0 %t0 %t1; |
|
| 8 | + | rem %t2 %t0 %t1; |
|
| 9 | + | add %a0 %a0 %t2; |
|
| 10 | + | div %t3 %t0 %t1; |
|
| 11 | + | add %a0 %a0 %t3; |
|
| 12 | + | remu %t4 %t0 %t1; |
|
| 13 | + | add %a0 %a0 %t4; |
|
| 14 | + | divu %t5 %t0 %t1; |
|
| 15 | + | add %a0 %a0 %t5; |
|
| 16 | + | mulw %t6 %t0 %t1; |
|
| 17 | + | add %a0 %a0 %t6; |
|
| 18 | + | divw %a1 %t0 %t1; |
|
| 19 | + | add %a0 %a0 %a1; |
|
| 20 | + | divuw %a2 %t0 %t1; |
|
| 21 | + | add %a0 %a0 %a2; |
|
| 22 | + | remw %a3 %t0 %t1; |
|
| 23 | + | add %a0 %a0 %a3; |
|
| 24 | + | remuw %a4 %t0 %t1; |
|
| 25 | + | add %a0 %a0 %a4; |
|
| 26 | + | mulh %a5 %t0 %t1; |
|
| 27 | + | add %a0 %a0 %a5; |
|
| 28 | + | mulhu %a6 %t0 %t1; |
|
| 29 | + | add %a0 %a0 %a6; |
|
| 30 | + | mulhsu %a7 %t0 %t1; |
|
| 31 | + | add %a0 %a0 %a7; |
|
| 32 | + | li %a7 93; |
|
| 33 | + | ecall; |
test/tests/asm.rodata.prefix.rad
added
+14 -0
| 1 | + | //! returns: 0 |
|
| 2 | + | ||
| 3 | + | fn asmDataValue() -> i32; |
|
| 4 | + | ||
| 5 | + | @default fn main() -> i32 { |
|
| 6 | + | let s = "hello"; |
|
| 7 | + | ||
| 8 | + | assert s.len == 5; |
|
| 9 | + | assert s[0] == 'h' as u8; |
|
| 10 | + | assert s[4] == 'o' as u8; |
|
| 11 | + | assert asmDataValue() == 41; |
|
| 12 | + | ||
| 13 | + | return 0; |
|
| 14 | + | } |
test/tests/asm.rodata.prefix.ras
added
+10 -0
| 1 | + | .text; |
|
| 2 | + | .export @"asm.rodata.prefix::asmDataValue"; |
|
| 3 | + | @"asm.rodata.prefix::asmDataValue" |
|
| 4 | + | la %t0 @value; |
|
| 5 | + | lbu %a0 0(%t0); |
|
| 6 | + | ret; |
|
| 7 | + | ||
| 8 | + | .data; |
|
| 9 | + | @value |
|
| 10 | + | .byte 41; |
test/tests/asm.scoped.symbols.la.ras
added
+12 -0
| 1 | + | //! returns: 41 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | la %t0 @data::sym; |
|
| 6 | + | addi %a0 %t0 40; |
|
| 7 | + | li %a7 93; |
|
| 8 | + | ecall; |
|
| 9 | + | .data; |
|
| 10 | + | .byte 0; |
|
| 11 | + | @data::sym |
|
| 12 | + | .byte 1; |
test/tests/asm.scoped.symbols.tail.ras
added
+12 -0
| 1 | + | //! returns: 9 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | tail @kernel::main; |
|
| 6 | + | li %a0 1; |
|
| 7 | + | li %a7 93; |
|
| 8 | + | ecall; |
|
| 9 | + | @kernel::main |
|
| 10 | + | li %a0 9; |
|
| 11 | + | li %a7 93; |
|
| 12 | + | ecall; |
test/tests/asm.section.switching.ras
added
+15 -0
| 1 | + | //! returns: 3 |
|
| 2 | + | ||
| 3 | + | .data; |
|
| 4 | + | @msg |
|
| 5 | + | .ascii "ok"; |
|
| 6 | + | .text; |
|
| 7 | + | @entry |
|
| 8 | + | jal %ra @done; |
|
| 9 | + | li %a0 1; |
|
| 10 | + | li %a7 93; |
|
| 11 | + | ecall; |
|
| 12 | + | @done |
|
| 13 | + | li %a0 3; |
|
| 14 | + | li %a7 93; |
|
| 15 | + | ecall; |
test/tests/asm.space.constant.expressions.ras
added
+20 -0
| 1 | + | //! returns: 12 |
|
| 2 | + | ||
| 3 | + | .constant PAGE 4096; |
|
| 4 | + | .constant COUNT 8; |
|
| 5 | + | .constant SPACE PAGE / 1024; |
|
| 6 | + | .constant WORD (COUNT - 4) * 2; |
|
| 7 | + | .constant BYTE PAGE / 1024; |
|
| 8 | + | .constant dataBase 0x10000; |
|
| 9 | + | .data; |
|
| 10 | + | .space SPACE; |
|
| 11 | + | .word WORD; |
|
| 12 | + | .byte BYTE; |
|
| 13 | + | .text; |
|
| 14 | + | @entry |
|
| 15 | + | li %t0 dataBase; |
|
| 16 | + | lbu %a0 4(%t0); |
|
| 17 | + | lbu %t1 8(%t0); |
|
| 18 | + | add %a0 %a0 %t1; |
|
| 19 | + | li %a7 93; |
|
| 20 | + | ecall; |
test/tests/asm.string.directive.lists.ras
added
+18 -0
| 1 | + | //! returns: 180 |
|
| 2 | + | ||
| 3 | + | .constant dataBase 0x10000; |
|
| 4 | + | .data; |
|
| 5 | + | .ascii "a", "b"; |
|
| 6 | + | .ascii "x", "y"; |
|
| 7 | + | .text; |
|
| 8 | + | @entry |
|
| 9 | + | li %t0 dataBase; |
|
| 10 | + | lbu %a0 0(%t0); |
|
| 11 | + | lbu %t1 1(%t0); |
|
| 12 | + | add %a0 %a0 %t1; |
|
| 13 | + | lbu %t1 2(%t0); |
|
| 14 | + | add %a0 %a0 %t1; |
|
| 15 | + | lbu %t1 3(%t0); |
|
| 16 | + | add %a0 %a0 %t1; |
|
| 17 | + | li %a7 93; |
|
| 18 | + | ecall; |
test/tests/asm.word.dword.constants.ras
added
+20 -0
| 1 | + | //! returns: 50 |
|
| 2 | + | ||
| 3 | + | .constant WORD 0x11223344; |
|
| 4 | + | .constant DWORD 0x55667788; |
|
| 5 | + | .constant dataBase 0x10000; |
|
| 6 | + | .data; |
|
| 7 | + | .word WORD; |
|
| 8 | + | .dword DWORD; |
|
| 9 | + | .text; |
|
| 10 | + | @entry |
|
| 11 | + | li %t0 dataBase; |
|
| 12 | + | lbu %a0 0(%t0); |
|
| 13 | + | lbu %t1 3(%t0); |
|
| 14 | + | add %a0 %a0 %t1; |
|
| 15 | + | lbu %t1 4(%t0); |
|
| 16 | + | add %a0 %a0 %t1; |
|
| 17 | + | lbu %t1 7(%t0); |
|
| 18 | + | add %a0 %a0 %t1; |
|
| 19 | + | li %a7 93; |
|
| 20 | + | ecall; |
test/tests/asm.word.shift.ops.ras
added
+38 -0
| 1 | + | //! returns: 101 |
|
| 2 | + | ||
| 3 | + | .text; |
|
| 4 | + | @entry |
|
| 5 | + | li %t0 1; |
|
| 6 | + | slli %t1 %t0 5; |
|
| 7 | + | slliw %t2 %t0 4; |
|
| 8 | + | sllw %t3 %t2 %t0; |
|
| 9 | + | sll %t4 %t0 %t0; |
|
| 10 | + | li %t5 -8; |
|
| 11 | + | srai %t6 %t5 2; |
|
| 12 | + | sraiw %a1 %t5 1; |
|
| 13 | + | sraw %a2 %t5 %t0; |
|
| 14 | + | li %a3 64; |
|
| 15 | + | srli %a3 %a3 5; |
|
| 16 | + | li %a4 64; |
|
| 17 | + | srliw %a4 %a4 4; |
|
| 18 | + | srlw %a5 %a4 %t0; |
|
| 19 | + | srl %a6 %a3 %t0; |
|
| 20 | + | addiw %a0 %zero 7; |
|
| 21 | + | addw %a0 %a0 %t0; |
|
| 22 | + | subw %a0 %a0 %t0; |
|
| 23 | + | add %a0 %a0 %t4; |
|
| 24 | + | add %a0 %a0 %a3; |
|
| 25 | + | add %a0 %a0 %a4; |
|
| 26 | + | add %a0 %a0 %a5; |
|
| 27 | + | add %a0 %a0 %a6; |
|
| 28 | + | slti %t6 %t6 0; |
|
| 29 | + | add %a0 %a0 %t6; |
|
| 30 | + | slti %a1 %a1 0; |
|
| 31 | + | add %a0 %a0 %a1; |
|
| 32 | + | slti %a2 %a2 0; |
|
| 33 | + | add %a0 %a0 %a2; |
|
| 34 | + | add %a0 %a0 %t1; |
|
| 35 | + | add %a0 %a0 %t2; |
|
| 36 | + | add %a0 %a0 %t3; |
|
| 37 | + | li %a7 93; |
|
| 38 | + | ecall; |
vim/ras.vim
added
+63 -0
| 1 | + | " ras.vim |
|
| 2 | + | " Syntax for Radiance Assembly (.ras) files |
|
| 3 | + | " |
|
| 4 | + | if exists("b:current_syntax") |
|
| 5 | + | finish |
|
| 6 | + | endif |
|
| 7 | + | ||
| 8 | + | syntax clear |
|
| 9 | + | ||
| 10 | + | " Comments |
|
| 11 | + | syntax match rasComment "//.*$" |
|
| 12 | + | syntax keyword rasTodo TODO FIXME contained containedin=rasComment |
|
| 13 | + | ||
| 14 | + | " Directives |
|
| 15 | + | syntax match rasDirective "\.\%(align\|ascii\|byte\|constant\|data\|dword\|global\|space\|text\|word\)\>" |
|
| 16 | + | ||
| 17 | + | " Labels |
|
| 18 | + | syntax match rasLabel "@[A-Za-z_][A-Za-z0-9_]*\%(::[A-Za-z_][A-Za-z0-9_]*\)*" |
|
| 19 | + | ||
| 20 | + | " Mnemonics |
|
| 21 | + | syntax keyword rasMnemonic add addi addiw addw and andi auipc |
|
| 22 | + | syntax keyword rasMnemonic beq beqz bge bgeu bgt ble blt bltu bne bnez |
|
| 23 | + | syntax keyword rasMnemonic call csrc csrr csrrw csrsi csrw |
|
| 24 | + | syntax keyword rasMnemonic div divu divuw divw |
|
| 25 | + | syntax keyword rasMnemonic ebreak ecall |
|
| 26 | + | syntax keyword rasMnemonic j jal jalr la lb lbu ld lh lhu li lui lw lwu |
|
| 27 | + | syntax keyword rasMnemonic mret mul mulh mulhsu mulhu mulw mv |
|
| 28 | + | syntax keyword rasMnemonic neg nop not or ori |
|
| 29 | + | syntax keyword rasMnemonic rem remu remuw remw ret |
|
| 30 | + | syntax keyword rasMnemonic sb sd seqz sh sll slli slliw sllw slt slti sltiu sltu snez |
|
| 31 | + | syntax keyword rasMnemonic sra srai sraiw sraw srl srli srliw srlw sub subw sw |
|
| 32 | + | syntax keyword rasMnemonic tail wfi xor xori |
|
| 33 | + | ||
| 34 | + | " Registers |
|
| 35 | + | syntax match rasRegister "%\%(a[0-7]\|fp\|gp\|ra\|s[0-9]\|s10\|s11\|sp\|t[0-6]\|tp\|zero\)\>" |
|
| 36 | + | ||
| 37 | + | " CSR names |
|
| 38 | + | syntax keyword rasCsr mcause mepc mhartid mie mip mscratch mstatus mtval mtvec |
|
| 39 | + | ||
| 40 | + | " Numbers and literals |
|
| 41 | + | syntax match rasNumber "\%([+-]\)\=\<0[xX][0-9A-Fa-f]\+\>" |
|
| 42 | + | syntax match rasNumber "\%([+-]\)\=\<\d\+\>" |
|
| 43 | + | syntax region rasString start=+"+ skip=+\\"+ end=+"+ |
|
| 44 | + | syntax region rasChar start=+'+ skip=+\\'+ end=+'+ |
|
| 45 | + | ||
| 46 | + | " Namespaced symbols and punctuation |
|
| 47 | + | syntax match rasNamespaceSep "::" |
|
| 48 | + | syntax match rasPunct "[(),;:+\-*/]" |
|
| 49 | + | ||
| 50 | + | highlight default link rasComment Comment |
|
| 51 | + | highlight default link rasTodo Todo |
|
| 52 | + | highlight default link rasDirective Special |
|
| 53 | + | highlight default link rasLabel Label |
|
| 54 | + | highlight default link rasMnemonic Keyword |
|
| 55 | + | highlight default link rasRegister Identifier |
|
| 56 | + | highlight default link rasCsr Type |
|
| 57 | + | highlight default link rasNumber Number |
|
| 58 | + | highlight default link rasString String |
|
| 59 | + | highlight default link rasChar Character |
|
| 60 | + | highlight default link rasNamespaceSep Delimiter |
|
| 61 | + | highlight default link rasPunct Delimiter |
|
| 62 | + | ||
| 63 | + | let b:current_syntax = "ras" |