Add RV64 assembly support

364f0a910e6be6ef9be91ffcf1c6f159d8a4ae6eb70c4f403278389c2f7a9c98
This allows for modules written in RV64 assembly language to be compiled
and linked with Radiance modules.
Alexis Sellier committed ago 1 parent e5efba42
Makefile +13 -2
67 67
# Binary Tests
68 68
69 69
BIN_TEST_DIR := test/tests
70 70
# Only tests with `//! returns:` are compiled to binaries and executed.
71 71
BIN_TEST_EXE_SRC := $(shell grep -rl '^//! returns:' $(BIN_TEST_DIR))
72 -
BIN_TEST_EXE_BIN := $(BIN_TEST_EXE_SRC:.rad=.rv64)
72 +
BIN_TEST_RAD_EXE_SRC := $(filter %.rad,$(BIN_TEST_EXE_SRC))
73 +
BIN_TEST_RAS_EXE_SRC := $(filter %.ras,$(BIN_TEST_EXE_SRC))
74 +
BIN_TEST_RAD_ASM_SRC := $(wildcard $(BIN_TEST_RAD_EXE_SRC:.rad=.ras))
75 +
BIN_TEST_EXE_BIN := $(patsubst %.rad,%.rv64,$(BIN_TEST_RAD_EXE_SRC)) \
76 +
	$(patsubst %.ras,%.rv64,$(BIN_TEST_RAS_EXE_SRC))
73 77
BIN_RUNNER   := test/runner.rv64
74 78
BIN_TEST_RUN := test/run
75 79
76 80
bin-test: $(BIN_RUNNER) $(BIN_TEST_EXE_BIN)
77 81
	@echo
80 84
# Runner binary: the lowering IL checker.
81 85
$(BIN_RUNNER): test/runner.rad $(STD_LIB) $(RAD_BIN)
82 86
	@echo "radiance test/runner.rad => $@"
83 87
	@$(RADIANCE) $(STD) -pkg runner -mod test/runner.rad -entry runner -o $@
84 88
89 +
# A `.rad` executable test can have a same-basename `.ras` module.
90 +
$(patsubst %.ras,%.rv64,$(BIN_TEST_RAD_ASM_SRC)): %.rv64: %.ras
91 +
85 92
# Compile each executable test to a binary.
86 93
$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.rad $(RAD_BIN)
87 94
	@echo "radiance $< => $@"
88 -
	@$(RADIANCE) -pkg test -mod $< -o $@
95 +
	@$(RADIANCE) -pkg test -mod $< $(patsubst %,-mod %,$(wildcard $(@:.rv64=.ras))) -o $@
96 +
97 +
$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.ras $(BIN_RUNNER)
98 +
	@echo "asm $< => $@"
99 +
	@$(EMU) $(EMU_FLAGS) -run $(BIN_RUNNER) -- assemble $< $@
89 100
90 101
clean-bin-test:
91 102
	@rm -f $(BIN_RUNNER) \
92 103
		$(BIN_RUNNER:.rv64=.rv64.debug) \
93 104
		$(BIN_RUNNER:.rv64=.rv64.s) \
compiler/radiance.rad +203 -97
11 11
use std::lang::strings;
12 12
use std::lang::package;
13 13
use std::lang::il;
14 14
use std::lang::lower;
15 15
use std::arch::rv64;
16 +
use std::arch::rv64::asm;
16 17
use std::arch::rv64::printer;
17 18
use std::lang::sexpr;
18 19
use std::lang::gen::data;
19 20
use std::lang::gen::types;
20 21
use std::sys;
29 30
constant MAX_TOTAL_MODULES: u32 = 192;
30 31
/// Source code buffer arena (2 MB).
31 32
constant MAX_SOURCES_SIZE: u32 = 2097152;
32 33
/// Maximum number of test functions we can discover.
33 34
constant MAX_TESTS: u32 = 1024;
35 +
/// Maximum number of assembly source paths we can load per package.
36 +
constant MAX_ASM_MODULES: u32 = 64;
34 37
35 38
/// Temporary arena size (32 MB) - retains all parsed AST until resolution.
36 39
/// Used for: AST during parsing, then codegen scratch space.
37 40
constant TEMP_ARENA_SIZE: u32 = 33554432;
38 41
/// Main arena size (64 MB) - lives throughout compilation.
66 69
67 70
/// Read-only data file extension.
68 71
constant RO_DATA_EXT: *[u8] = ".ro.data";
69 72
/// Read-write data file extension.
70 73
constant RW_DATA_EXT: *[u8] = ".rw.data";
71 -
/// Maximum rodata size (1MB).
72 -
constant MAX_RO_DATA_SIZE: u32 = 1048576;
73 -
/// Maximum rwdata size (1MB).
74 -
constant MAX_RW_DATA_SIZE: u32 = 1048576;
74 +
/// Maximum rodata size (4MB).
75 +
constant MAX_RO_DATA_SIZE: u32 = 4194304;
76 +
/// Maximum rwdata size (4MB).
77 +
constant MAX_RW_DATA_SIZE: u32 = 4194304;
75 78
/// Maximum path length.
76 79
constant MAX_PATH_LEN: u32 = 256;
77 80
/// Read-only data buffer.
78 81
static RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
79 82
/// Read-write data buffer.
80 83
static RW_DATA_BUF: [u8; MAX_RW_DATA_SIZE] = undefined;
84 +
/// Assembly module source buffer.
85 +
static ASM_SOURCE_BUF: [u8; MAX_SOURCES_SIZE] = undefined;
86 +
/// Temporary assembly text buffer.
87 +
static ASM_TEXT_BUF: [u32; 262144] = undefined;
88 +
/// Temporary assembly data buffer.
89 +
static ASM_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
90 +
/// Accumulated assembly read-only data.
91 +
static ASM_RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
92 +
93 +
/// Assembly source file extension.
94 +
constant ASM_SOURCE_EXT: *[u8] = ".ras";
81 95
82 96
/// Usage string.
83 97
constant USAGE: *[u8] =
84 98
    "usage: radiance -pkg <name> -mod <input>.. [-pkg <name> -mod <input>..] -entry <pkg> -o <output>\n";
85 99
108 122
    modPath: *[*[u8]],
109 123
    /// Test function name (eg. "testFoo").
110 124
    fnName: *[u8],
111 125
}
112 126
127 +
/// Source inputs belonging to one command-line package.
128 +
record PackageInput {
129 +
    /// Package name from the `-pkg` argument.
130 +
    name: *[u8],
131 +
    /// Radiance source paths for this package.
132 +
    radPaths: [*[u8]; MAX_LOADED_MODULES],
133 +
    /// Number of Radiance source paths.
134 +
    radPathCount: u32,
135 +
    /// Assembly source paths for this package.
136 +
    asmPaths: [*[u8]; MAX_ASM_MODULES],
137 +
    /// Number of assembly source paths.
138 +
    asmPathCount: u32,
139 +
}
140 +
113 141
/// Compilation context.
114 142
record CompileContext {
115 143
    /// Array of packages to compile.
116 144
    packages: [package::Package; MAX_PACKAGES],
145 +
    /// Driver inputs for each package slot.
146 +
    inputs: [PackageInput; MAX_PACKAGES],
117 147
    /// Number of packages.
118 148
    packageCount: u32,
119 149
    /// Index of entry package.
120 150
    entryPkgIdx: ?u32,
121 151
    /// Global module graph shared by all packages.
162 192
    debug: bool,
163 193
    /// How the generated program should handle entry.
164 194
    entryMode: CodegenEntryMode,
165 195
}
166 196
197 +
/// Print a driver error line.
198 +
fn error(msg: *[*[u8]]) -> Error {
199 +
    io::printError("radiance: ");
200 +
201 +
    for part, i in msg {
202 +
        io::printError(part);
203 +
        if i < msg.len - 1 {
204 +
            io::printError(" ");
205 +
        }
206 +
    }
207 +
    io::printError("\n");
208 +
    return Error::Other;
209 +
}
210 +
167 211
/// Print a log line for the given package.
168 212
fn pkgLog(pkg: *package::Package, msg: *[*[u8]]) {
169 213
    io::printError("radiance: ");
170 214
    io::printError(pkg.name);
171 215
    io::printError(": ");
177 221
        }
178 222
    }
179 223
    io::printError("\n");
180 224
}
181 225
226 +
/// Return `true` when `path` ends with `ext`.
227 +
fn hasExtension(path: *[u8], ext: *[u8]) -> bool {
228 +
    if path.len < ext.len {
229 +
        return false;
230 +
    }
231 +
    let start = path.len - ext.len;
232 +
    return mem::eq(&path[start..], ext);
233 +
}
234 +
235 +
/// Create an empty source input set for one package.
236 +
fn packageInput(name: *[u8]) -> PackageInput {
237 +
    return PackageInput {
238 +
        name,
239 +
        radPaths: undefined,
240 +
        radPathCount: 0,
241 +
        asmPaths: undefined,
242 +
        asmPathCount: 0,
243 +
    };
244 +
}
245 +
182 246
/// Register, load, and parse `path` within `pkg`.
183 247
fn processModule(
184 248
    pkg: *mut package::Package,
185 249
    graph: *mut module::ModuleGraph,
186 250
    path: *[u8],
188 252
    sourceArena: *mut alloc::Arena
189 253
) throws (Error) {
190 254
    pkgLog(pkg, &["parsing", "(", path, ")", ".."]);
191 255
192 256
    let moduleId = try package::registerModule(pkg, graph, path) catch {
193 -
        io::printError("radiance: error registering module\n");
194 -
        throw Error::Other;
257 +
        throw error(&["error registering module"]);
195 258
    };
196 259
    // Read file into remaining arena space.
197 260
    let buffer = alloc::remainingBuf(sourceArena);
198 261
    if buffer.len == 0 {
199 -
        io::printError("radiance: fatal: source arena exhausted\n");
200 -
        throw Error::Other;
262 +
        throw error(&["fatal:", "source arena exhausted"]);
201 263
    }
202 264
    let source = unix::readFile(path, buffer) else {
203 -
        io::printError("radiance: error reading file\n");
204 -
        throw Error::Other;
265 +
        throw error(&["error reading file"]);
205 266
    };
206 267
    if source.len == buffer.len {
207 -
        io::printError("radiance: fatal: source arena too small, file truncated: ");
208 -
        io::printError(path);
209 -
        io::printError("\n");
210 -
        throw Error::Other;
268 +
        throw error(&["fatal:", "source arena too small, file truncated:", path]);
211 269
    }
212 270
    // Commit only what was read.
213 271
    alloc::commit(sourceArena, source.len);
214 272
215 273
    let ast = try parser::parse(scanner::SourceLoc::File(path), source, nodeArena, &mut STRING_POOL) catch {
216 274
        throw Error::Other;
217 275
    };
218 276
    try module::setAst(graph, moduleId, ast) catch {
219 -
        io::printError("radiance: error setting AST\n");
220 -
        throw Error::Other;
277 +
        throw error(&["error setting AST"]);
221 278
    };
222 279
    try module::setSource(graph, moduleId, source) catch {
223 -
        io::printError("radiance: error setting source\n");
224 -
        throw Error::Other;
280 +
        throw error(&["error setting source"]);
225 281
    };
226 282
}
227 283
228 284
/// Consume the next argument, or print an error and throw.
229 -
fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[u8]) -> *[u8] throws (Error) {
285 +
fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[*[u8]]) -> *[u8] throws (Error) {
230 286
    set *idx += 1;
231 287
    if *idx >= args.len {
232 -
        io::printError(msg);
233 -
        throw Error::Other;
288 +
        throw error(msg);
234 289
    }
235 290
    return args[*idx];
236 291
}
237 292
238 293
/// Parse CLI arguments and return compilation context.
244 299
    let mut debugEnabled = false;
245 300
    let mut outputPath: ?*[u8] = nil;
246 301
    let mut dump = Dump::None;
247 302
    let mut entryPkgName: ?*[u8] = nil;
248 303
249 -
    // Per-package module path tracking.
250 -
    let mut moduleCounts: [u32; MAX_PACKAGES] = undefined;
251 -
    let mut modulePaths: [[*[u8]; MAX_LOADED_MODULES]; MAX_PACKAGES] = undefined;
252 -
    let mut pkgNames: [*[u8]; MAX_PACKAGES] = undefined;
304 +
    // Per-package source path tracking.
305 +
    let mut inputs: [PackageInput; MAX_PACKAGES] = undefined;
253 306
    let mut pkgCount: u32 = 0;
254 307
    let mut currentPkgIdx: ?u32 = nil;
255 308
256 -
    for i in 0..MAX_PACKAGES {
257 -
        set moduleCounts[i] = 0;
258 -
    }
259 309
    if args.len == 0 {
260 310
        io::printError(USAGE);
261 311
        throw Error::Other;
262 312
    }
263 313
    let mut idx: u32 = 0;
264 314
265 315
    while idx < args.len {
266 316
        let arg = args[idx];
267 317
        if mem::eq(arg, "-pkg") {
268 -
            try nextArg(args, &mut idx, "radiance: `-pkg` requires a package name\n");
318 +
            try nextArg(args, &mut idx, &["`-pkg` requires a package name"]);
269 319
            if pkgCount >= MAX_PACKAGES {
270 -
                io::printError("radiance: too many packages specified\n");
271 -
                throw Error::Other;
320 +
                throw error(&["too many packages specified"]);
272 321
            }
273 -
            set pkgNames[pkgCount] = args[idx];
322 +
            set inputs[pkgCount] = packageInput(args[idx]);
274 323
            set currentPkgIdx = pkgCount;
275 324
            set pkgCount += 1;
276 325
        } else if mem::eq(arg, "-mod") {
277 -
            try nextArg(args, &mut idx, "radiance: `-mod` requires a module path\n");
326 +
            try nextArg(args, &mut idx, &["`-mod` requires a module path"]);
278 327
            let pkgIdx = currentPkgIdx else {
279 -
                io::printError("radiance: `-mod` must follow a `-pkg` argument\n");
280 -
                throw Error::Other;
328 +
                throw error(&["`-mod` must follow a `-pkg` argument"]);
281 329
            };
282 -
            if moduleCounts[pkgIdx] >= MAX_LOADED_MODULES {
283 -
                io::printError("radiance: too many modules specified for package\n");
284 -
                throw Error::Other;
330 +
            let input = &mut inputs[pkgIdx];
331 +
            if hasExtension(args[idx], ASM_SOURCE_EXT) {
332 +
                if input.asmPathCount >= MAX_ASM_MODULES {
333 +
                    throw error(&["too many assembly modules specified"]);
334 +
                }
335 +
                set input.asmPaths[input.asmPathCount] = args[idx];
336 +
                set input.asmPathCount += 1;
337 +
            } else {
338 +
                if input.radPathCount >= MAX_LOADED_MODULES {
339 +
                    throw error(&["too many modules specified for package"]);
340 +
                }
341 +
                set input.radPaths[input.radPathCount] = args[idx];
342 +
                set input.radPathCount += 1;
285 343
            }
286 -
            set modulePaths[pkgIdx][moduleCounts[pkgIdx]] = args[idx];
287 -
            set moduleCounts[pkgIdx] += 1;
288 344
        } else if mem::eq(arg, "-entry") {
289 -
            try nextArg(args, &mut idx, "radiance: `-entry` requires a package name\n");
345 +
            try nextArg(args, &mut idx, &["`-entry` requires a package name"]);
290 346
            set entryPkgName = args[idx];
291 347
        } else if mem::eq(arg, "-test") {
292 348
            set buildTest = true;
293 349
        } else if mem::eq(arg, "-debug") {
294 350
            set debugEnabled = true;
295 351
        } else if mem::eq(arg, "-o") {
296 -
            try nextArg(args, &mut idx, "radiance: `-o` requires an output path\n");
352 +
            try nextArg(args, &mut idx, &["`-o` requires an output path"]);
297 353
            set outputPath = args[idx];
298 354
        } else if mem::eq(arg, "-dump") {
299 -
            try nextArg(args, &mut idx, "radiance: `-dump` requires a mode (eg. ast)\n");
355 +
            try nextArg(args, &mut idx, &["`-dump` requires a mode (eg. ast)"]);
300 356
            let mode = args[idx];
301 357
            if mem::eq(mode, "ast") {
302 358
                set dump = Dump::Ast;
303 359
            } else if mem::eq(mode, "graph") {
304 360
                set dump = Dump::Graph;
305 361
            } else if mem::eq(mode, "il") {
306 362
                set dump = Dump::Il;
307 363
            } else if mem::eq(mode, "asm") {
308 364
                set dump = Dump::Asm;
309 365
            } else {
310 -
                io::printError("radiance: unknown dump mode `");
311 -
                io::printError(mode);
312 -
                io::printError("` (expected: ast, graph, il, asm)\n");
313 -
                throw Error::Other;
366 +
                throw error(&["unknown dump mode", mode, "(expected: ast, graph, il, asm)"]);
314 367
            }
315 368
        } else {
316 -
            io::printError("radiance: unknown argument `");
317 -
            io::printError(arg);
318 -
            io::printError("`\n");
319 -
            throw Error::Other;
369 +
            throw error(&["unknown argument", arg]);
320 370
        }
321 371
        set idx += 1;
322 372
    }
323 373
    if pkgCount == 0 {
324 -
        io::printError("radiance: no package specified\n");
325 -
        throw Error::Other;
374 +
        throw error(&["no package specified"]);
375 +
    }
376 +
    for i in 0..pkgCount {
377 +
        if inputs[i].radPathCount == 0 {
378 +
            throw error(&["package", inputs[i].name, "has no Radiance modules specified"]);
379 +
        }
326 380
    }
327 381
328 382
    // Determine entry package index.
329 383
    let mut entryPkgIdx: ?u32 = nil;
330 384
    if pkgCount == 1 {
331 385
        // Single package: it is the entry.
332 386
        set entryPkgIdx = 0;
333 387
    } else {
334 388
        // Multiple packages: need -entry.
335 389
        let entryName = entryPkgName else {
336 -
            io::printError("radiance: `-entry` required when multiple packages specified\n");
337 -
            throw Error::Other;
390 +
            throw error(&["`-entry` required when multiple packages specified"]);
338 391
        };
339 392
        for i in 0..pkgCount {
340 -
            if mem::eq(pkgNames[i], entryName) {
393 +
            if mem::eq(inputs[i].name, entryName) {
341 394
                set entryPkgIdx = i;
342 395
                break;
343 396
            }
344 397
        }
345 398
        if entryPkgIdx == nil {
346 -
            io::printError("radiance: fatal: entry package `");
347 -
            io::printError(entryName);
348 -
            io::printError("` not found\n");
349 -
350 -
            throw Error::Other;
399 +
            throw error(&["fatal:", "entry package", entryName, "not found"]);
351 400
        }
352 401
    }
353 402
    let graph = module::moduleGraph(&mut MODULE_ENTRIES[..], &mut STRING_POOL, arena);
354 403
    let mut ctx = CompileContext {
355 404
        packages: undefined,
405 +
        inputs,
356 406
        packageCount: pkgCount,
357 407
        entryPkgIdx,
358 408
        graph,
359 409
        config: resolver::Config { buildTest },
360 410
        dump,
362 412
        debug: debugEnabled,
363 413
    };
364 414
    // Initialize and parse all packages.
365 415
    let mut sourceArena = alloc::new(&mut MODULE_SOURCES[..]);
366 416
    for i in 0..pkgCount {
367 -
        package::init(&mut ctx.packages[i], i as u16, pkgNames[i], &mut STRING_POOL);
417 +
        package::init(&mut ctx.packages[i], i as u16, ctx.inputs[i].name, &mut STRING_POOL);
368 418
369 -
        for j in 0..moduleCounts[i] {
370 -
            let path = modulePaths[i][j];
419 +
        for j in 0..ctx.inputs[i].radPathCount {
420 +
            let path = ctx.inputs[i].radPaths[j];
371 421
            try processModule(&mut ctx.packages[i], &mut ctx.graph, path, arena, &mut sourceArena);
372 422
        }
373 423
    }
374 424
    return ctx;
375 425
}
376 426
377 427
/// Get the entry package from the context.
378 428
fn getEntryPackage(ctx: *CompileContext) -> *package::Package throws (Error) {
379 429
    let entryIdx = ctx.entryPkgIdx else {
380 -
        io::printError("radiance: no entry package specified\n");
381 -
        throw Error::Other;
430 +
        throw error(&["no entry package specified"]);
382 431
    };
383 432
    return &ctx.packages[entryIdx];
384 433
}
385 434
386 435
/// Get root module info from a package.
387 436
fn getRootModule(pkg: *package::Package, graph: *module::ModuleGraph) -> RootModule throws (Error) {
388 437
    let rootId = pkg.rootModuleId else {
389 -
        io::printError("radiance: no root module found\n");
390 -
        throw Error::Other;
438 +
        throw error(&["no root module found"]);
391 439
    };
392 440
    let rootEntry = module::get(graph, rootId) else {
393 -
        io::printError("radiance: root module entry not found\n");
394 -
        throw Error::Other;
441 +
        throw error(&["root module entry not found"]);
395 442
    };
396 443
    let rootAst = rootEntry.ast else {
397 -
        io::printError("radiance: root module has no AST\n");
398 -
        throw Error::Other;
444 +
        throw error(&["root module has no AST"]);
399 445
    };
400 446
    return RootModule { entry: rootEntry, ast: rootAst };
401 447
}
402 448
403 449
/// Dump the module graph.
463 509
    low: *mut lower::Lowerer,
464 510
    pkg: *mut package::Package,
465 511
    isEntry: bool
466 512
) throws (Error) {
467 513
    let rootId = pkg.rootModuleId else {
468 -
        io::printError("radiance: no root module found\n");
469 -
        throw Error::Other;
514 +
        throw error(&["no root module found"]);
470 515
    };
471 516
    // Set lowerer's package context for qualified name generation.
472 517
    // TODO: We shouldn't have to call this manually.
473 518
    lower::setPackage(low, &ctx.graph, pkg.name);
474 519
483 528
    modId: u16,
484 529
    isRoot: bool,
485 530
    pkg: *package::Package
486 531
) throws (Error) {
487 532
    let entry = module::get(graph, modId) else {
488 -
        io::printError("radiance: module entry not found\n");
489 -
        throw Error::Other;
533 +
        throw error(&["module entry not found"]);
490 534
    };
491 535
    let modAst = entry.ast else {
492 -
        io::printError("radiance: module has no AST\n");
493 -
        throw Error::Other;
536 +
        throw error(&["module has no AST"]);
494 537
    };
495 538
    pkgLog(pkg, &["lowering", "(", entry.filePath, ")", ".."]);
496 539
497 540
    try lower::lowerModule(low, modId, modAst, isRoot) catch err {
498 -
        io::printError("radiance: internal error during lowering: ");
541 +
        io::printError("radiance: ");
542 +
        io::printError("internal error during lowering: ");
499 543
        lower::printError(err);
500 544
        io::printError("\n");
545 +
501 546
        throw Error::Other;
502 547
    };
503 548
    // Recurse into children.
504 549
    for i in 0..entry.childrenLen {
505 550
        let childId = module::childAt(entry, i);
623 668
        if let entry = module::get(&ctx.graph, modIdx as u16) {
624 669
            collectModuleTests(entry, &mut tests[..], &mut testCount);
625 670
        }
626 671
    }
627 672
    if testCount == 0 {
628 -
        io::printError("radiance: fatal: no test functions found\n");
629 -
        throw Error::Other;
673 +
        throw error(&["fatal:", "no test functions found"]);
630 674
    }
631 675
    let mut countBuf: [u8; 10] = undefined;
632 676
    let countStr = fmt::formatU32(testCount, &mut countBuf[..]);
633 677
    pkgLog(entryPkg, &["found", countStr, "test(s)"]);
634 678
725 769
    set pos += try! mem::copy(&mut path[pos..], basePath);
726 770
    set pos += try! mem::copy(&mut path[pos..], ext);
727 771
    set path[pos] = 0; // Null-terminate for syscall.
728 772
729 773
    if not unix::writeFile(&path[..pos], data) {
730 -
        io::printError("radiance: fatal: failed to write data file\n");
731 -
        throw Error::Other;
774 +
        throw error(&["fatal:", "failed to write data file"]);
732 775
    }
733 776
}
734 777
735 778
/// Serialize debug entries and write the `.debug` file.
736 779
/// Resolves module IDs to file paths via the module graph.
779 822
        pkgScope: &mut RESOLVER_PKG_SCOPE,
780 823
        errors: &mut RESOLVER_ERRORS[..],
781 824
    };
782 825
    let mut res = resolver::resolver(storage, ctx.config);
783 826
784 -
    // Build package inputs.
785 -
    let mut packages: [resolver::Pkg; MAX_PACKAGES] = undefined;
827 +
    // Build the semantic package list consumed by the resolver.
828 +
    let mut resolverPkgs: [resolver::Pkg; MAX_PACKAGES] = undefined;
829 +
    let mut resolverPackageCount: u32 = 0;
786 830
    for i in 0..ctx.packageCount {
787 831
        let pkg = &ctx.packages[i];
788 832
        let root = try getRootModule(pkg, &ctx.graph);
789 833
790 -
        set packages[i] = resolver::Pkg {
834 +
        set resolverPkgs[resolverPackageCount] = resolver::Pkg {
791 835
            rootEntry: root.entry,
792 836
            rootAst: root.ast,
793 837
        };
838 +
        set resolverPackageCount += 1;
794 839
    }
795 840
796 841
    // Resolve all packages.
797 842
    // TODO: Fix this error printing dance.
798 -
    let diags = try resolver::resolve(&mut res, &ctx.graph, &packages[..ctx.packageCount]) catch {
843 +
    let diags = try resolver::resolve(&mut res, &ctx.graph, &resolverPkgs[..resolverPackageCount]) catch {
799 844
        let diags = resolver::Diagnostics { errors: res.errors };
800 845
        resolver::printer::printDiagnostics(&diags, &res);
801 846
        throw Error::Other;
802 847
    };
803 848
    if not resolver::success(&diags) {
804 849
        resolver::printer::printDiagnostics(&diags, &res);
805 -
        io::print("radiance: failed: ");
806 -
        io::printU32(diags.errors.len);
807 -
        io::printLn(" errors");
808 -
        throw Error::Other;
850 +
        let mut countBuf: [u8; 10] = undefined;
851 +
        let countStr = fmt::formatU32(diags.errors.len, &mut countBuf[..]);
852 +
        throw error(&["failed:", countStr, "errors"]);
809 853
    }
810 854
    return res;
811 855
}
812 856
813 -
/// Emit one lowered function to RV64 codegen and reclaim its IL arena.
857 +
/// Emit one lowered function to machine code and reclaim its IL arena.
814 858
fn generateLoweredFn(ctxPtr: *mut opaque, func: *il::Fn, role: lower::FnRole) {
815 859
    let ctx = ctxPtr as *mut CodegenSinkContext;
816 860
817 861
    match role {
818 862
        case lower::FnRole::Default => {
827 871
    }
828 872
    rv64::generateFunction(ctx.generator, func, ctx.codegenArena);
829 873
    alloc::reset(ctx.fnArena);
830 874
}
831 875
876 +
/// Assemble one `.ras` input and merge it into the active code generator.
877 +
///
878 +
/// Text symbols are appended to `generator`. Data emitted by the assembler is
879 +
/// copied into `ASM_RO_DATA_BUF` at `*asmDataLen`, and `*asmDataLen` is advanced
880 +
/// so the next assembly module receives the correct rodata base address.
881 +
fn assembleAsmModule(
882 +
    generator: *mut rv64::Generator,
883 +
    pkg: *package::Package,
884 +
    path: *[u8],
885 +
    asmDataLen: *mut u32,
886 +
    arena: *mut alloc::Arena
887 +
) throws (Error) {
888 +
    pkgLog(pkg, &["asm:", "parsing", "(", path, ")", ".."]);
889 +
890 +
    let source = unix::readFile(path, &mut ASM_SOURCE_BUF[..]) else {
891 +
        throw error(&["error reading assembly file"]);
892 +
    };
893 +
    if source.len == ASM_SOURCE_BUF.len {
894 +
        throw error(&["fatal:", "assembly source too large:", path]);
895 +
    }
896 +
    let program = try asm::assemble(
897 +
        asm::scanner::SourceKind::File { path },
898 +
        source,
899 +
        &mut ASM_TEXT_BUF[..],
900 +
        &mut ASM_DATA_BUF[..],
901 +
        arena,
902 +
        &mut STRING_POOL,
903 +
        rv64::RO_DATA_BASE + *asmDataLen
904 +
    ) catch {
905 +
        throw error(&["assembly failed:", path]);
906 +
    };
907 +
    if *asmDataLen + program.data.len > ASM_RO_DATA_BUF.len {
908 +
        throw error(&["fatal:", "assembly rodata too large"]);
909 +
    }
910 +
    try! mem::copy(&mut ASM_RO_DATA_BUF[*asmDataLen..], program.data);
911 +
    set *asmDataLen += program.data.len;
912 +
913 +
    rv64::addAssembly(generator, program);
914 +
}
915 +
916 +
/// Assemble all inputs collected in the package inputs.
917 +
fn assembleAsmInputs(
918 +
    ctx: *CompileContext,
919 +
    generator: *mut rv64::Generator,
920 +
    arena: *mut alloc::Arena
921 +
) -> *[u8] throws (Error) {
922 +
    let mut asmDataLen: u32 = 0;
923 +
924 +
    for i in 0..ctx.packageCount {
925 +
        let input = &ctx.inputs[i];
926 +
        for j in 0..input.asmPathCount {
927 +
            try assembleAsmModule(
928 +
                generator,
929 +
                &ctx.packages[i],
930 +
                input.asmPaths[j],
931 +
                &mut asmDataLen,
932 +
                arena
933 +
            );
934 +
        }
935 +
    }
936 +
    return &ASM_RO_DATA_BUF[..asmDataLen];
937 +
}
938 +
832 939
/// Lower all packages while streaming each lowered function into RV64 codegen.
833 940
fn lowerAndGenerateAllPackages(
834 941
    ctx: *mut CompileContext,
835 942
    res: *mut resolver::Resolver,
836 943
    fnArena: *mut alloc::Arena,
867 974
    set low.output = lower::FnOutput::Stream(lower::FnSink {
868 975
        ctx: &mut codegenCtx as *mut opaque,
869 976
        emitFn: generateLoweredFn,
870 977
    });
871 978
    try lowerAllPackagesInto(ctx, res, &mut low);
979 +
    let asmData = try assembleAsmInputs(ctx, &mut generator, &mut res.arena);
872 980
873 981
    match generator.entryPatch {
874 982
        case rv64::EntryPatch::Reserved(targetName) => {
875 983
            if targetName == nil {
876 -
                io::printError("radiance: fatal: no default function found\n");
877 -
                throw Error::Other;
984 +
                throw error(&["fatal:", "no default function found"]);
878 985
            }
879 986
        }
880 987
        else => {}
881 988
    }
882 989
    if let path = codegenOptions.logPath {
883 990
        pkgLog(entryPkg, &["generating code", "(", path, ")", ".."]);
884 991
    }
885 -
    return rv64::finishProgram(&mut generator, &low.data[..], storage, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
992 +
    return rv64::finishProgram(&mut generator, &low.data[..], storage, asmData, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
886 993
}
887 994
888 995
/// Lower, optionally dump, and optionally generate binary output.
889 996
fn compile(
890 997
    ctx: *mut CompileContext,
922 1029
        debug: ctx.debug,
923 1030
        entryMode: CodegenEntryMode::DefaultEntry,
924 1031
    });
925 1032
926 1033
    if not writeCode(result.code, outPath) {
927 -
        io::printError("radiance: fatal: failed to write output file\n");
928 -
        throw Error::Other;
1034 +
        throw error(&["fatal:", "failed to write output file"]);
929 1035
    }
930 1036
    // Write data files.
931 1037
    try writeDataWithExt(&RO_DATA_BUF[..result.roDataSize], outPath, RO_DATA_EXT);
932 1038
    try writeDataWithExt(&RW_DATA_BUF[..result.rwDataSize], outPath, RW_DATA_EXT);
933 1039
lib/std.rad +1 -0
1 1
//! The Radiance Standard Library.
2 2
3 3
export mod io;
4 4
export mod collections;
5 +
export mod char;
5 6
export mod lang;
6 7
export mod sys;
7 8
export mod arch;
8 9
export mod fmt;
9 10
export mod mem;
lib/std/arch/rv64.rad +43 -4
13 13
export mod encode;
14 14
export mod decode;
15 15
export mod emit;
16 16
export mod isel;
17 17
export mod printer;
18 +
export mod asm;
18 19
19 20
@test mod tests;
20 21
21 22
use std::mem;
22 23
use std::collections::dict;
238 239
239 240
    // Reclaim unused memory after instruction selection.
240 241
    alloc::restore(arena, checkpoint);
241 242
}
242 243
244 +
/// Add the text section of an assembled program to the generator.
245 +
///
246 +
/// This function snapshots the generator's current code length as the base
247 +
/// index, converts each text symbol's byte offset to an instruction index, adds
248 +
/// that base, and records the final address for printing. Only `.export` text
249 +
/// symbols are exported to the emitter's function-offset table for extern call
250 +
/// resolution. Local labels must not escape their assembly fragment because
251 +
/// separate assembly inputs may reuse the same local names.
252 +
///
253 +
/// Non-text symbols are ignored here because assembled data is not appended to
254 +
/// the generator's text stream. The driver merges assembled data into the RO data
255 +
/// prefix separately and passes that data to [`finishProgram`].
256 +
export fn addAssembly(generator: *mut Generator, program: asm::Program) {
257 +
    let baseIndex = generator.e.codeLen;
258 +
259 +
    for symbol in program.symbols {
260 +
        if symbol.section == asm::Section::Text {
261 +
            let index = baseIndex + ((symbol.offset as u32) / INSTR_SIZE as u32);
262 +
            emit::recordFuncAt(&mut generator.e, symbol.name, index);
263 +
            if symbol.isExported {
264 +
                emit::recordFuncOffsetAt(&mut generator.e, symbol.name, index);
265 +
            }
266 +
        }
267 +
    }
268 +
    for word in program.text {
269 +
        emit::emit(&mut generator.e, word);
270 +
    }
271 +
}
272 +
243 273
/// Finish RV64 code generation and return the emitted program.
244 274
export fn finishProgram(
245 275
    generator: *mut Generator,
246 276
    globalData: *[il::Data],
247 277
    storage: Storage,
278 +
    roDataPrefix: *[u8],
248 279
    roDataBuf: *mut [u8],
249 280
    rwDataBuf: *mut [u8]
250 281
) -> Program {
251 282
    // Build data map after function lowering. Function-local literals can add
252 283
    // global data while functions are lowered, so final layout belongs here.
253 284
    let mut dataSymCount: u32 = 0;
254 -
    let roLayoutSize = data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, true);
285 +
    let roLayoutSize = data::layoutSectionAtOffset(
286 +
        globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, roDataPrefix.len, true
287 +
    );
255 288
    data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RW_DATA_BASE, false);
256 289
257 290
    let dataSyms = &storage.dataSyms[..dataSymCount];
258 291
    let dataSymMap = data::buildMap(dataSyms, storage.dataSymEntries);
259 292
    let codeBase = mem::alignUp(RO_DATA_BASE + roLayoutSize, DWORD_SIZE as u32);
274 307
    // Patch function calls and address loads now that all functions are emitted.
275 308
    emit::patchCalls(&mut generator.e);
276 309
    emit::patchAddrLoads(&mut generator.e, &dataSymMap);
277 310
278 311
    // Emit data sections.
279 -
    let roDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true);
280 -
    let rwDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false);
281 -
312 +
    assert roDataPrefix.len <= roDataBuf.len, "finishProgram: rodata prefix buffer overflow";
313 +
    try! mem::copy(roDataBuf, roDataPrefix);
314 +
315 +
    let roDataSize = data::emitSectionAtOffset(
316 +
        globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true, roDataPrefix.len
317 +
    );
318 +
    let rwDataSize = data::emitSection(
319 +
        globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false
320 +
    );
282 321
    return Program {
283 322
        code: emit::getCode(&generator.e),
284 323
        funcs: emit::getFuncs(&generator.e),
285 324
        roDataSize,
286 325
        rwDataSize,
lib/std/arch/rv64/asm.rad added +528 -0
1 +
//! Minimal RV64 assembler.
2 +
//!
3 +
//! This module assembles `.ras` source files into RV64 text words plus a raw
4 +
//! data prefix that can be linked into a compiler-generated program. It exists
5 +
//! so the Radiance driver can mix hand-written RV64 assembly with generated IL
6 +
//! output without invoking an external assembler or linker.
7 +
//!
8 +
//! Assembly is intentionally direct and buffer-oriented. The caller provides a
9 +
//! text buffer, a data buffer, an arena, and the runtime base address where the
10 +
//! data buffer will be loaded. The parser writes encoded instructions into the
11 +
//! text buffer as it reads them and writes directive bytes into the data buffer
12 +
//! while in `.data`. The returned [`Program`] only contains slices into those
13 +
//! caller-provided buffers, so no ownership transfer or late copy is needed.
14 +
//!
15 +
//! The scanner is assembly-specific. It produces tokens for registers (`%a0`),
16 +
//! labels (`@name`), directives, strings, characters, numbers, and
17 +
//! punctuation. The parser consumes those tokens as a small line-oriented
18 +
//! language: *directives* declare sections or emit data, *labels* define
19 +
//! symbols at the current section offset, and *instructions* are validated
20 +
//! against RV64 operand forms before being encoded.
21 +
//!
22 +
//! Labels are defined at the current text instruction index or data byte
23 +
//! offset. The parser is single-pass because it keeps assembly cheap and lets
24 +
//! instructions and data be emitted immediately, but forward references mean
25 +
//! some operands cannot be encoded when first seen. Branches, jumps,
26 +
//! load-address operands, and data directives that reference labels therefore
27 +
//! record fixups. After parsing reaches EOF, the emitter resolves the final
28 +
//! symbol table and patches every recorded use with the correct PC-relative
29 +
//! offset, absolute data address, or encoded data value.
30 +
//!
31 +
//! Data labels are resolved relative to the data base address. The compiler
32 +
//! driver accumulates all assembly data in a RO data prefix, passes
33 +
//! [`RO_DATA_BASE`] + `currentPrefixLen` for each input, then appends the
34 +
//! input's emitted data to that prefix. Global text symbols are exported for
35 +
//! call resolution when the assembled text is appended to the RV64 generator,
36 +
//! shifted by the generator's current code length so disassembly/debug output
37 +
//! can name those instruction addresses correctly. Non-global text labels
38 +
//! remain local to their assembly fragment.
39 +
use std::lang::alloc;
40 +
use std::lang::strings;
41 +
use std::lang::gen;
42 +
use std::collections::dict;
43 +
use std::arch::rv64::encode;
44 +
use std::arch::rv64;
45 +
46 +
/// Assembler scanner module.
47 +
export mod scanner;
48 +
/// Assembler parser module.
49 +
export mod parser;
50 +
/// Assembler emission and fixup module.
51 +
export mod emit;
52 +
/// Tests.
53 +
@test mod tests;
54 +
55 +
/// In-memory result of assembling one RV64 assembly fragment.
56 +
///
57 +
/// [`Program`] is the boundary between the textual assembler and the rest of
58 +
/// the compiler. The assembler reads an assembly source file, encodes all
59 +
/// instructions, lays out all data bytes, resolves fixups that can be resolved
60 +
/// inside the fragment, and returns these three slices as the assembled
61 +
/// program.
62 +
///
63 +
/// The value is intentionally not a standalone object file or linked
64 +
/// executable. It carries only the sections and symbol table needed by the
65 +
/// compiler driver. The slices point at caller-owned storage: `text` and
66 +
/// `data` are backed by the buffers passed to [`assemble`], while symbol names
67 +
/// are interned in the assembler's string pool.
68 +
///
69 +
/// Symbol offsets are section-local byte offsets. Text symbols name positions
70 +
/// in `text`; data symbols name positions in `data`. When the compiler
71 +
/// consumes the program, [`rv64::addAssembly`] appends the text words to the
72 +
/// generated text stream and registers text labels at their relocated offsets.
73 +
/// The driver copies `data` into the final read-only data prefix; the data
74 +
/// base supplied to [`assemble`] lets the assembler resolve data addresses as
75 +
/// they will appear in that final layout.
76 +
export record Program {
77 +
    /// Encoded instructions in the text section.
78 +
    text: *[u32],
79 +
    /// Raw bytes in the data section.
80 +
    data: *[u8],
81 +
    /// Symbols defined by the source.
82 +
    symbols: *[Symbol],
83 +
}
84 +
85 +
/// Errors reported while assembling source text.
86 +
export union Error {
87 +
    /// Invalid syntax or operand form at a source offset.
88 +
    Invalid { offset: u32, message: *[u8] },
89 +
    /// The source emitted more text words than the caller-provided buffer holds.
90 +
    TextOverflow,
91 +
    /// The source emitted more data bytes than the caller-provided buffer holds.
92 +
    DataOverflow,
93 +
}
94 +
95 +
/// Active output section.
96 +
export union Section {
97 +
    /// Instruction section.
98 +
    Text,
99 +
    /// Data byte section.
100 +
    Data,
101 +
}
102 +
103 +
/// Branch opcode that needs fixup.
104 +
export union BranchOp {
105 +
    /// Branch if equal.
106 +
    Beq,
107 +
    /// Branch if not equal.
108 +
    Bne,
109 +
    /// Branch if less than, signed.
110 +
    Blt,
111 +
    /// Branch if greater than or equal, signed.
112 +
    Bge,
113 +
    /// Branch if less than, unsigned.
114 +
    Bltu,
115 +
    /// Branch if greater than or equal, unsigned.
116 +
    Bgeu,
117 +
    /// Branch if less than or equal, signed pseudo-instruction.
118 +
    Ble,
119 +
    /// Branch if greater than, signed pseudo-instruction.
120 +
    Bgt,
121 +
}
122 +
123 +
/// Parser and encoder behavior for one instruction mnemonic.
124 +
export union InstructionEncoder {
125 +
    /// No-operand instruction encoded by a fixed encoder.
126 +
    NoOperand { enc: fn() -> u32 },
127 +
    /// Load-immediate pseudo-instruction.
128 +
    Li,
129 +
    /// Load-address pseudo-instruction.
130 +
    La,
131 +
    /// Two-register instruction or pseudo-instruction.
132 +
    RR { enc: fn(gen::Reg, gen::Reg) -> u32 },
133 +
    /// Three-register instruction.
134 +
    RRR { enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32 },
135 +
    /// Register, register, immediate instruction.
136 +
    RRI { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
137 +
    /// Shift-immediate instruction with RV64 shift bounds.
138 +
    Shift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
139 +
    /// Shift-immediate instruction with RV64 W-mode shift bounds.
140 +
    WordShift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
141 +
    /// Load instruction with memory operand syntax.
142 +
    Load { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
143 +
    /// Store instruction with memory operand syntax.
144 +
    Store { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
145 +
    /// Two-register branch instruction.
146 +
    Branch { op: BranchOp },
147 +
    /// One-register branch-to-zero pseudo-instruction.
148 +
    BranchZero { op: BranchOp },
149 +
    /// `jal` instruction with explicit destination register.
150 +
    Jal,
151 +
    /// Jump pseudo-instruction with fixed destination register.
152 +
    Jump { rd: gen::Reg },
153 +
    /// CSR read-style operand form.
154 +
    RdCsr { enc: fn(gen::Reg, u32) -> u32 },
155 +
    /// CSR write-style operand form.
156 +
    CsrRs1 { enc: fn(u32, gen::Reg) -> u32 },
157 +
    /// CSR read/write operand form.
158 +
    Csrrw,
159 +
    /// CSR immediate operand form.
160 +
    Csrsi,
161 +
    /// Upper-immediate operand form.
162 +
    Upper { enc: fn(gen::Reg, i32) -> u32 },
163 +
}
164 +
165 +
/// Classified directive name.
166 +
export union DirectiveKind {
167 +
    /// `.align` directive.
168 +
    Align,
169 +
    /// `.ascii` directive.
170 +
    Ascii,
171 +
    /// `.byte` directive.
172 +
    Byte,
173 +
    /// `.constant` directive.
174 +
    Constant,
175 +
    /// `.data` directive.
176 +
    Data,
177 +
    /// `.dword` directive.
178 +
    Dword,
179 +
    /// `.export` directive.
180 +
    Export,
181 +
    /// `.space` directive.
182 +
    Space,
183 +
    /// `.text` directive.
184 +
    Text,
185 +
    /// `.word` directive.
186 +
    Word,
187 +
}
188 +
189 +
/// Instruction descriptor table row.
190 +
record InstructionEntry {
191 +
    /// Assembly mnemonic text.
192 +
    name: *[u8],
193 +
    /// Operand parser and encoder behavior.
194 +
    encoder: InstructionEncoder,
195 +
}
196 +
197 +
/// Directive descriptor table row.
198 +
record DirectiveEntry {
199 +
    /// Directive name without the leading `.`.
200 +
    name: *[u8],
201 +
    /// Parser behavior for the directive.
202 +
    kind: DirectiveKind,
203 +
}
204 +
205 +
/// Register descriptor table row.
206 +
record RegisterEntry {
207 +
    /// Register alias text without the leading `%`.
208 +
    name: *[u8],
209 +
    /// Numeric register selected by the alias.
210 +
    reg: gen::Reg,
211 +
}
212 +
213 +
/// CSR descriptor table row.
214 +
record CsrEntry {
215 +
    /// CSR name text.
216 +
    name: *[u8],
217 +
    /// Numeric CSR address.
218 +
    csr: u32,
219 +
}
220 +
221 +
/// Width of an integer data directive.
222 +
export union DataWidth {
223 +
    /// 32-bit data value.
224 +
    Word,
225 +
    /// 64-bit data value.
226 +
    Dword,
227 +
}
228 +
229 +
/// Extra slot used when sizing source-derived symbol and fixup buffers.
230 +
export constant SOURCE_CAP_PADDING: u32 = 1;
231 +
/// Scale factor used to keep assembler hash tables sparse.
232 +
export constant TABLE_CAPACITY_SCALE: u32 = 4;
233 +
/// Minimum hash-table capacity used by the assembler.
234 +
export constant MIN_TABLE_CAPACITY: u32 = 8;
235 +
/// `@label` names exclude the leading sigil byte when interned.
236 +
export constant LABEL_SIGIL_LEN: u32 = 1;
237 +
/// `.directive` names exclude the leading sigil byte when matched.
238 +
export constant DIRECTIVE_SIGIL_LEN: u32 = 1;
239 +
/// String and character literals are delimited by one byte on each side.
240 +
export constant QUOTE_DELIM_LEN: u32 = 1;
241 +
/// Number of bits in one byte.
242 +
export constant BITS_PER_BYTE: u64 = 8;
243 +
/// Mask for extracting one encoded byte.
244 +
export constant BYTE_MASK: u64 = 0xFF;
245 +
/// Largest signed 32-bit assembler value.
246 +
export constant I32_MAX_VALUE: i64 = 2147483647;
247 +
/// Magnitude of the smallest signed 32-bit assembler value.
248 +
export constant I32_MIN_MAGNITUDE: i64 = 2147483648;
249 +
/// Largest unsigned 32-bit assembler value.
250 +
export constant U32_MAX_VALUE: i64 = 4294967295;
251 +
/// Largest unsigned 8-bit assembler value.
252 +
export constant U8_MAX_VALUE: i64 = 255;
253 +
/// Upper bound for CSR immediate operands.
254 +
export constant CSR_IMM_LIMIT: i64 = 32;
255 +
/// Upper bound for RV64 W-mode shift immediates.
256 +
export constant WORD_SHIFT_LIMIT: i32 = 32;
257 +
/// Upper bound for RV64 shift immediates.
258 +
export constant SHIFT_LIMIT: i32 = 64;
259 +
/// Largest `lui` or `auipc` immediate.
260 +
export constant UPPER_IMM_MAX_VALUE: i64 = 0xFFFFF;
261 +
262 +
/// Sorted instruction descriptor table used by the assembler parser.
263 +
export constant INSTRUCTIONS: [InstructionEntry; 87] = [
264 +
    { name: "add",    encoder: InstructionEncoder::RRR { enc: encode::add } },
265 +
    { name: "addi",   encoder: InstructionEncoder::RRI { enc: encode::addi } },
266 +
    { name: "addiw",  encoder: InstructionEncoder::RRI { enc: encode::addiw } },
267 +
    { name: "addw",   encoder: InstructionEncoder::RRR { enc: encode::addw } },
268 +
    { name: "and",    encoder: InstructionEncoder::RRR { enc: encode::and_ } },
269 +
    { name: "andi",   encoder: InstructionEncoder::RRI { enc: encode::andi } },
270 +
    { name: "auipc",  encoder: InstructionEncoder::Upper { enc: encode::auipc } },
271 +
    { name: "beq",    encoder: InstructionEncoder::Branch { op: BranchOp::Beq } },
272 +
    { name: "beqz",   encoder: InstructionEncoder::BranchZero { op: BranchOp::Beq } },
273 +
    { name: "bge",    encoder: InstructionEncoder::Branch { op: BranchOp::Bge } },
274 +
    { name: "bgeu",   encoder: InstructionEncoder::Branch { op: BranchOp::Bgeu } },
275 +
    { name: "bgt",    encoder: InstructionEncoder::Branch { op: BranchOp::Bgt } },
276 +
    { name: "ble",    encoder: InstructionEncoder::Branch { op: BranchOp::Ble } },
277 +
    { name: "blt",    encoder: InstructionEncoder::Branch { op: BranchOp::Blt } },
278 +
    { name: "bltu",   encoder: InstructionEncoder::Branch { op: BranchOp::Bltu } },
279 +
    { name: "bne",    encoder: InstructionEncoder::Branch { op: BranchOp::Bne } },
280 +
    { name: "bnez",   encoder: InstructionEncoder::BranchZero { op: BranchOp::Bne } },
281 +
    { name: "call",   encoder: InstructionEncoder::Jump { rd: rv64::RA } },
282 +
    { name: "csrc",   encoder: InstructionEncoder::CsrRs1 { enc: encode::csrc } },
283 +
    { name: "csrr",   encoder: InstructionEncoder::RdCsr { enc: encode::csrr } },
284 +
    { name: "csrrw",  encoder: InstructionEncoder::Csrrw },
285 +
    { name: "csrsi",  encoder: InstructionEncoder::Csrsi },
286 +
    { name: "csrw",   encoder: InstructionEncoder::CsrRs1 { enc: encode::csrw } },
287 +
    { name: "div",    encoder: InstructionEncoder::RRR { enc: encode::div } },
288 +
    { name: "divu",   encoder: InstructionEncoder::RRR { enc: encode::divu } },
289 +
    { name: "divuw",  encoder: InstructionEncoder::RRR { enc: encode::divuw } },
290 +
    { name: "divw",   encoder: InstructionEncoder::RRR { enc: encode::divw } },
291 +
    { name: "ebreak", encoder: InstructionEncoder::NoOperand { enc: encode::ebreak } },
292 +
    { name: "ecall",  encoder: InstructionEncoder::NoOperand { enc: encode::ecall } },
293 +
    { name: "j",      encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
294 +
    { name: "jal",    encoder: InstructionEncoder::Jal },
295 +
    { name: "jalr",   encoder: InstructionEncoder::RRI { enc: encode::jalr } },
296 +
    { name: "la",     encoder: InstructionEncoder::La },
297 +
    { name: "lb",     encoder: InstructionEncoder::Load { enc: encode::lb } },
298 +
    { name: "lbu",    encoder: InstructionEncoder::Load { enc: encode::lbu } },
299 +
    { name: "ld",     encoder: InstructionEncoder::Load { enc: encode::ld } },
300 +
    { name: "lh",     encoder: InstructionEncoder::Load { enc: encode::lh } },
301 +
    { name: "lhu",    encoder: InstructionEncoder::Load { enc: encode::lhu } },
302 +
    { name: "li",     encoder: InstructionEncoder::Li },
303 +
    { name: "lui",    encoder: InstructionEncoder::Upper { enc: encode::lui } },
304 +
    { name: "lw",     encoder: InstructionEncoder::Load { enc: encode::lw } },
305 +
    { name: "lwu",    encoder: InstructionEncoder::Load { enc: encode::lwu } },
306 +
    { name: "mret",   encoder: InstructionEncoder::NoOperand { enc: encode::mret } },
307 +
    { name: "mul",    encoder: InstructionEncoder::RRR { enc: encode::mul } },
308 +
    { name: "mulh",   encoder: InstructionEncoder::RRR { enc: encode::mulh } },
309 +
    { name: "mulhsu", encoder: InstructionEncoder::RRR { enc: encode::mulhsu } },
310 +
    { name: "mulhu",  encoder: InstructionEncoder::RRR { enc: encode::mulhu } },
311 +
    { name: "mulw",   encoder: InstructionEncoder::RRR { enc: encode::mulw } },
312 +
    { name: "mv",     encoder: InstructionEncoder::RR { enc: encode::mv } },
313 +
    { name: "neg",    encoder: InstructionEncoder::RR { enc: encode::neg } },
314 +
    { name: "nop",    encoder: InstructionEncoder::NoOperand { enc: encode::nop } },
315 +
    { name: "not",    encoder: InstructionEncoder::RR { enc: encode::not_ } },
316 +
    { name: "or",     encoder: InstructionEncoder::RRR { enc: encode::or_ } },
317 +
    { name: "ori",    encoder: InstructionEncoder::RRI { enc: encode::ori } },
318 +
    { name: "rem",    encoder: InstructionEncoder::RRR { enc: encode::rem } },
319 +
    { name: "remu",   encoder: InstructionEncoder::RRR { enc: encode::remu } },
320 +
    { name: "remuw",  encoder: InstructionEncoder::RRR { enc: encode::remuw } },
321 +
    { name: "remw",   encoder: InstructionEncoder::RRR { enc: encode::remw } },
322 +
    { name: "ret",    encoder: InstructionEncoder::NoOperand { enc: encode::ret } },
323 +
    { name: "sb",     encoder: InstructionEncoder::Store { enc: encode::sb } },
324 +
    { name: "sd",     encoder: InstructionEncoder::Store { enc: encode::sd } },
325 +
    { name: "seqz",   encoder: InstructionEncoder::RR { enc: encode::seqz } },
326 +
    { name: "sh",     encoder: InstructionEncoder::Store { enc: encode::sh } },
327 +
    { name: "sll",    encoder: InstructionEncoder::RRR { enc: encode::sll } },
328 +
    { name: "slli",   encoder: InstructionEncoder::Shift { enc: encode::slli } },
329 +
    { name: "slliw",  encoder: InstructionEncoder::WordShift { enc: encode::slliw } },
330 +
    { name: "sllw",   encoder: InstructionEncoder::RRR { enc: encode::sllw } },
331 +
    { name: "slt",    encoder: InstructionEncoder::RRR { enc: encode::slt } },
332 +
    { name: "slti",   encoder: InstructionEncoder::RRI { enc: encode::slti } },
333 +
    { name: "sltiu",  encoder: InstructionEncoder::RRI { enc: encode::sltiu } },
334 +
    { name: "sltu",   encoder: InstructionEncoder::RRR { enc: encode::sltu } },
335 +
    { name: "snez",   encoder: InstructionEncoder::RR { enc: encode::snez } },
336 +
    { name: "sra",    encoder: InstructionEncoder::RRR { enc: encode::sra } },
337 +
    { name: "srai",   encoder: InstructionEncoder::Shift { enc: encode::srai } },
338 +
    { name: "sraiw",  encoder: InstructionEncoder::WordShift { enc: encode::sraiw } },
339 +
    { name: "sraw",   encoder: InstructionEncoder::RRR { enc: encode::sraw } },
340 +
    { name: "srl",    encoder: InstructionEncoder::RRR { enc: encode::srl } },
341 +
    { name: "srli",   encoder: InstructionEncoder::Shift { enc: encode::srli } },
342 +
    { name: "srliw",  encoder: InstructionEncoder::WordShift { enc: encode::srliw } },
343 +
    { name: "srlw",   encoder: InstructionEncoder::RRR { enc: encode::srlw } },
344 +
    { name: "sub",    encoder: InstructionEncoder::RRR { enc: encode::sub } },
345 +
    { name: "subw",   encoder: InstructionEncoder::RRR { enc: encode::subw } },
346 +
    { name: "sw",     encoder: InstructionEncoder::Store { enc: encode::sw } },
347 +
    { name: "tail",   encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
348 +
    { name: "wfi",    encoder: InstructionEncoder::NoOperand { enc: encode::wfi } },
349 +
    { name: "xor",    encoder: InstructionEncoder::RRR { enc: encode::xor } },
350 +
    { name: "xori",   encoder: InstructionEncoder::RRI { enc: encode::xori } },
351 +
];
352 +
353 +
/// Sorted directive lookup table used by the assembler parser.
354 +
export constant DIRECTIVES: [DirectiveEntry; 10] = [
355 +
    { name: "align",    kind: DirectiveKind::Align },
356 +
    { name: "ascii",    kind: DirectiveKind::Ascii },
357 +
    { name: "byte",     kind: DirectiveKind::Byte },
358 +
    { name: "constant", kind: DirectiveKind::Constant },
359 +
    { name: "data",     kind: DirectiveKind::Data },
360 +
    { name: "dword",    kind: DirectiveKind::Dword },
361 +
    { name: "export",   kind: DirectiveKind::Export },
362 +
    { name: "space",    kind: DirectiveKind::Space },
363 +
    { name: "text",     kind: DirectiveKind::Text },
364 +
    { name: "word",     kind: DirectiveKind::Word },
365 +
];
366 +
367 +
/// Sorted register-name lookup table used by the assembler parser.
368 +
export constant REGISTERS: [RegisterEntry; 33] = [
369 +
    { name: "a0",   reg: rv64::A0 },
370 +
    { name: "a1",   reg: rv64::A1 },
371 +
    { name: "a2",   reg: rv64::A2 },
372 +
    { name: "a3",   reg: rv64::A3 },
373 +
    { name: "a4",   reg: rv64::A4 },
374 +
    { name: "a5",   reg: rv64::A5 },
375 +
    { name: "a6",   reg: rv64::A6 },
376 +
    { name: "a7",   reg: rv64::A7 },
377 +
    { name: "fp",   reg: rv64::FP },
378 +
    { name: "gp",   reg: rv64::GP },
379 +
    { name: "ra",   reg: rv64::RA },
380 +
    { name: "s0",   reg: rv64::S0 },
381 +
    { name: "s1",   reg: rv64::S1 },
382 +
    { name: "s10",  reg: rv64::S10 },
383 +
    { name: "s11",  reg: rv64::S11 },
384 +
    { name: "s2",   reg: rv64::S2 },
385 +
    { name: "s3",   reg: rv64::S3 },
386 +
    { name: "s4",   reg: rv64::S4 },
387 +
    { name: "s5",   reg: rv64::S5 },
388 +
    { name: "s6",   reg: rv64::S6 },
389 +
    { name: "s7",   reg: rv64::S7 },
390 +
    { name: "s8",   reg: rv64::S8 },
391 +
    { name: "s9",   reg: rv64::S9 },
392 +
    { name: "sp",   reg: rv64::SP },
393 +
    { name: "t0",   reg: rv64::T0 },
394 +
    { name: "t1",   reg: rv64::T1 },
395 +
    { name: "t2",   reg: rv64::T2 },
396 +
    { name: "t3",   reg: rv64::T3 },
397 +
    { name: "t4",   reg: rv64::T4 },
398 +
    { name: "t5",   reg: rv64::T5 },
399 +
    { name: "t6",   reg: rv64::T6 },
400 +
    { name: "tp",   reg: rv64::TP },
401 +
    { name: "zero", reg: rv64::ZERO },
402 +
];
403 +
404 +
/// Sorted CSR-name lookup table used by the assembler parser.
405 +
export constant CSRS: [CsrEntry; 9] = [
406 +
    { name: "mcause",   csr: 0x342 },
407 +
    { name: "mepc",     csr: 0x341 },
408 +
    { name: "mhartid",  csr: 0xF14 },
409 +
    { name: "mie",      csr: 0x304 },
410 +
    { name: "mip",      csr: 0x344 },
411 +
    { name: "mscratch", csr: 0x340 },
412 +
    { name: "mstatus",  csr: 0x300 },
413 +
    { name: "mtval",    csr: 0x343 },
414 +
    { name: "mtvec",    csr: 0x305 },
415 +
];
416 +
417 +
/// Recorded symbol definition.
418 +
export record Symbol {
419 +
    /// Symbol name.
420 +
    name: *[u8],
421 +
    /// Section the symbol belongs to.
422 +
    section: Section,
423 +
    /// Byte offset within the section.
424 +
    offset: i32,
425 +
    /// Whether `.export` exported this symbol outside its assembly fragment.
426 +
    isExported: bool,
427 +
}
428 +
429 +
/// Information needed to resolve a pending symbol reference.
430 +
export union FixupInfo {
431 +
    /// Branch to a text label.
432 +
    Branch { op: BranchOp, rs1: gen::Reg, rs2: gen::Reg, index: u32 },
433 +
    /// JAL-like jump to a text label.
434 +
    Jal { rd: gen::Reg, index: u32 },
435 +
    /// Absolute address materialization into a register.
436 +
    Addr { rd: gen::Reg, index: u32 },
437 +
    /// A 32-bit data word referring to a symbol offset.
438 +
    Word { offset: u32 },
439 +
    /// A 64-bit data word referring to a symbol offset.
440 +
    Dword { offset: u32 },
441 +
}
442 +
443 +
/// Pending symbol reference.
444 +
export record Fixup {
445 +
    /// Referenced symbol.
446 +
    symbol: *[u8],
447 +
    /// Fixup payload.
448 +
    info: FixupInfo,
449 +
}
450 +
451 +
/// Parser and emission state.
452 +
export record Assembler {
453 +
    /// Allocation arena for temporary assembler state.
454 +
    arena: *mut alloc::Arena,
455 +
    /// Assembler lexical scanner.
456 +
    scan: scanner::Scanner,
457 +
    /// Output text buffer.
458 +
    text: *mut [u32],
459 +
    /// Output data buffer.
460 +
    data: *mut [u8],
461 +
    /// Current output section.
462 +
    section: Section,
463 +
    /// Defined symbols.
464 +
    symbols: *mut [Symbol],
465 +
    /// Name-to-symbol index map.
466 +
    symbolMap: dict::Dict,
467 +
    /// Name-to-integer map.
468 +
    constMap: dict::Dict,
469 +
    /// Names marked by `.export`.
470 +
    exportMap: dict::Dict,
471 +
    /// Pending fixups.
472 +
    fixups: *mut [Fixup],
473 +
    /// Absolute runtime address of data-section offset zero.
474 +
    dataBase: u32,
475 +
}
476 +
477 +
/// Assemble source using `dataBase` as the runtime address of the data-section.
478 +
export fn assemble(
479 +
    sourceKind: scanner::SourceKind,
480 +
    source: *[u8],
481 +
    textBuf: *mut [u32],
482 +
    dataBuf: *mut [u8],
483 +
    arena: *mut alloc::Arena,
484 +
    pool: *mut strings::Pool,
485 +
    dataBase: u32
486 +
) -> Program throws (Error) {
487 +
    let slotCap = source.len + SOURCE_CAP_PADDING;
488 +
    let tableCap = nextPowerOfTwo(slotCap * TABLE_CAPACITY_SCALE);
489 +
490 +
    let symbols = try! alloc::allocSlice(arena, @sizeOf(Symbol), @alignOf(Symbol), slotCap);
491 +
    let fixups = try! alloc::allocSlice(arena, @sizeOf(Fixup), @alignOf(Fixup), slotCap);
492 +
    let entries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
493 +
    let constEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
494 +
    let exportEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
495 +
496 +
    let mut a = Assembler {
497 +
        arena,
498 +
        scan: scanner::scanner(sourceKind, source, pool),
499 +
        text: @sliceOf(textBuf.ptr, 0, textBuf.len),
500 +
        data: @sliceOf(dataBuf.ptr, 0, dataBuf.len),
501 +
        section: Section::Text,
502 +
        symbols: @sliceOf((symbols as *mut [Symbol]).ptr, 0, (symbols as *mut [Symbol]).len),
503 +
        symbolMap: dict::init(entries as *mut [dict::Entry]),
504 +
        constMap: dict::init(constEntries as *mut [dict::Entry]),
505 +
        exportMap: dict::init(exportEntries as *mut [dict::Entry]),
506 +
        fixups: @sliceOf((fixups as *mut [Fixup]).ptr, 0, (fixups as *mut [Fixup]).len),
507 +
        dataBase,
508 +
    };
509 +
    // Parse assembly source and emit instructions.
510 +
    try parser::parseProgram(&mut a);
511 +
    // Resolve fixups and finalize program.
512 +
    try emit::finishProgram(&mut a);
513 +
514 +
    return Program {
515 +
        text: a.text,
516 +
        data: a.data,
517 +
        symbols: a.symbols,
518 +
    };
519 +
}
520 +
521 +
/// Return the next power of two at least as large as `value`.
522 +
fn nextPowerOfTwo(value: u32) -> u32 {
523 +
    let mut n: u32 = MIN_TABLE_CAPACITY;
524 +
    while n < value {
525 +
        set n <<= 1;
526 +
    }
527 +
    return n;
528 +
}
lib/std/arch/rv64/asm/emit.rad added +210 -0
1 +
//! Assembler emission and fixup helpers.
2 +
use std::arch::rv64::emit;
3 +
use std::arch::rv64::encode;
4 +
use std::arch::rv64;
5 +
use std::fmt;
6 +
7 +
use std::collections::dict;
8 +
use std::lang::gen;
9 +
10 +
/// Define a symbol at the current text or data offset.
11 +
export fn defineSymbol(a: *mut super::Assembler, name: *[u8]) {
12 +
    if a.symbols.len >= a.symbols.cap {
13 +
        panic "asm: symbol buffer full";
14 +
    }
15 +
    let idx = a.symbols.len;
16 +
    let offset: i32 = a.data.len as i32
17 +
        if a.section == super::Section::Data
18 +
        else a.text.len as i32 * rv64::INSTR_SIZE;
19 +
20 +
    set a.symbols = @sliceOf(a.symbols.ptr, idx + 1, a.symbols.cap);
21 +
    set a.symbols[idx] = super::Symbol {
22 +
        name,
23 +
        section: a.section,
24 +
        offset,
25 +
        isExported: dict::get(&a.exportMap, name) <> nil,
26 +
    };
27 +
    dict::insert(&mut a.symbolMap, name, idx as i32);
28 +
}
29 +
30 +
/// Append one encoded instruction word to the text section.
31 +
export fn emitText(a: *mut super::Assembler, word: u32) throws (super::Error) {
32 +
    if a.text.len >= a.text.cap {
33 +
        throw super::Error::TextOverflow;
34 +
    }
35 +
    let idx = a.text.len;
36 +
    set a.text = @sliceOf(a.text.ptr, idx + 1, a.text.cap);
37 +
    set a.text[idx] = word;
38 +
}
39 +
40 +
/// Append `words` no-op instructions to the text section.
41 +
export fn emitTextPadding(a: *mut super::Assembler, words: u32) throws (super::Error) {
42 +
    for _ in 0..words {
43 +
        try emitText(a, encode::nop());
44 +
    }
45 +
}
46 +
47 +
/// Append one byte to the data section.
48 +
export fn emitByte(a: *mut super::Assembler, byte: u8) throws (super::Error) {
49 +
    if a.data.len >= a.data.cap {
50 +
        throw super::Error::DataOverflow;
51 +
    }
52 +
    let idx = a.data.len;
53 +
    set a.data = @sliceOf(a.data.ptr, idx + 1, a.data.cap);
54 +
    set a.data[idx] = byte;
55 +
}
56 +
57 +
/// Emit a little-endian integer with `bytes` bytes.
58 +
fn emitDataInt(a: *mut super::Assembler, bits: u64, bytes: u32) throws (super::Error) {
59 +
    for i in 0..bytes {
60 +
        try emitByte(a, ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8);
61 +
    }
62 +
}
63 +
64 +
/// Patch a little-endian integer with `bytes` bytes.
65 +
fn patchDataInt(a: *mut super::Assembler, offset: u32, bits: u64, bytes: u32) {
66 +
    for i in 0..bytes {
67 +
        set a.data[offset + i] = ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8;
68 +
    }
69 +
}
70 +
71 +
/// Emit an integer data directive value.
72 +
export fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) {
73 +
    match width {
74 +
        case super::DataWidth::Word => try emitDataInt(a, value as u64, rv64::WORD_SIZE as u32),
75 +
        case super::DataWidth::Dword => try emitDataInt(a, value as u64, rv64::DWORD_SIZE as u32),
76 +
    }
77 +
}
78 +
79 +
/// Record a data-section symbol fixup and reserve its bytes.
80 +
export fn recordDataFixup(a: *mut super::Assembler, target: *[u8], width: super::DataWidth) throws (super::Error) {
81 +
    let offset = a.data.len;
82 +
    match width {
83 +
        case super::DataWidth::Word => {
84 +
            recordFixup(a, target, super::FixupInfo::Word { offset });
85 +
            try emitDataInt(a, 0, rv64::WORD_SIZE as u32);
86 +
        }
87 +
        case super::DataWidth::Dword => {
88 +
            recordFixup(a, target, super::FixupInfo::Dword { offset });
89 +
            try emitDataInt(a, 0, rv64::DWORD_SIZE as u32);
90 +
        }
91 +
    }
92 +
}
93 +
94 +
/// Record a pending symbol fixup.
95 +
fn recordFixup(a: *mut super::Assembler, symbol: *[u8], info: super::FixupInfo) {
96 +
    if a.fixups.len >= a.fixups.cap {
97 +
        panic "asm: fixup buffer full";
98 +
    }
99 +
    let idx = a.fixups.len as u32;
100 +
    set a.fixups = @sliceOf(a.fixups.ptr, idx + 1, a.fixups.cap);
101 +
    set a.fixups[idx] = super::Fixup { symbol, info };
102 +
}
103 +
104 +
/// Record a text-section symbol fixup and reserve its instruction words.
105 +
export fn recordTextFixup(a: *mut super::Assembler, symbol: *[u8], info: super::FixupInfo, words: u32) throws (super::Error) {
106 +
    recordFixup(a, symbol, info);
107 +
    try emitTextPadding(a, words);
108 +
}
109 +
110 +
/// Find a previously defined symbol by name.
111 +
fn findSymbol(a: *super::Assembler, name: *[u8]) -> ?super::Symbol {
112 +
    let idx = dict::get(&a.symbolMap, name)
113 +
        else return nil;
114 +
    return a.symbols[idx as u32];
115 +
}
116 +
117 +
/// Return the final address for a data symbol.
118 +
fn dataSymbolAddr(a: *super::Assembler, symbol: super::Symbol) -> i32 throws (super::Error) {
119 +
    if symbol.section <> super::Section::Data {
120 +
        throw super::Error::Invalid { offset: 0, message: "data address target must be in data section" };
121 +
    }
122 +
    return symbol.offset + (a.dataBase as i32);
123 +
}
124 +
125 +
/// Resolve final symbol references and patch all delayed output.
126 +
export fn finishProgram(a: *mut super::Assembler) throws (super::Error) {
127 +
    for i in 0..a.fixups.len {
128 +
        let fixup = a.fixups[i];
129 +
        let symbol = findSymbol(a, fixup.symbol) else {
130 +
            throw super::Error::Invalid { offset: 0, message: "undefined symbol" };
131 +
        };
132 +
        match fixup.info {
133 +
            case super::FixupInfo::Branch { op, rs1, rs2, index } => {
134 +
                if symbol.section <> super::Section::Text {
135 +
                    throw super::Error::Invalid { offset: 0, message: "branch target must be in text section" };
136 +
                }
137 +
                let srcOffset = index as i32 * rv64::INSTR_SIZE;
138 +
                let rel = symbol.offset - srcOffset;
139 +
140 +
                if not encode::isBranchImm(rel) {
141 +
                    throw super::Error::Invalid { offset: 0, message: "branch target out of range" };
142 +
                }
143 +
                let word = encodeBranch(op, rs1, rs2, rel);
144 +
145 +
                set a.text[index] = word;
146 +
            }
147 +
            case super::FixupInfo::Jal { rd, index } => {
148 +
                if symbol.section <> super::Section::Text {
149 +
                    throw super::Error::Invalid { offset: 0, message: "jump target must be in text section" };
150 +
                }
151 +
                let srcOffset = index as i32 * rv64::INSTR_SIZE;
152 +
                let rel = symbol.offset - srcOffset;
153 +
154 +
                if not encode::isJumpImm(rel) {
155 +
                    throw super::Error::Invalid { offset: 0, message: "jump target out of range" };
156 +
                }
157 +
                set a.text[index] = encode::jal(rd, rel);
158 +
            }
159 +
            case super::FixupInfo::Addr { rd, index } => {
160 +
                let mut addr = symbol.offset - (index as i32 * rv64::INSTR_SIZE);
161 +
                if symbol.section == super::Section::Data {
162 +
                    set addr = symbol.offset + (a.dataBase as i32);
163 +
                }
164 +
                let split = emit::splitImm(addr);
165 +
                set a.text[index] = encode::lui(rd, split.hi)
166 +
                    if symbol.section == super::Section::Data
167 +
                    else encode::auipc(rd, split.hi);
168 +
                set a.text[index + 1] = encode::addi(rd, rd, split.lo);
169 +
            }
170 +
            case super::FixupInfo::Word { offset } => {
171 +
                let addr = try dataSymbolAddr(a, symbol);
172 +
                patchDataInt(a, offset, addr as u64, rv64::WORD_SIZE as u32);
173 +
            }
174 +
            case super::FixupInfo::Dword { offset } => {
175 +
                let addr = try dataSymbolAddr(a, symbol);
176 +
                patchDataInt(a, offset, addr as u64, rv64::DWORD_SIZE as u32);
177 +
            }
178 +
        }
179 +
    }
180 +
}
181 +
182 +
/// Encode a concrete branch operation.
183 +
export fn encodeBranch(op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg, imm: i32) -> u32 {
184 +
    match op {
185 +
        case super::BranchOp::Beq  => return encode::beq(rs1, rs2, imm),
186 +
        case super::BranchOp::Bne  => return encode::bne(rs1, rs2, imm),
187 +
        case super::BranchOp::Blt  => return encode::blt(rs1, rs2, imm),
188 +
        case super::BranchOp::Bge  => return encode::bge(rs1, rs2, imm),
189 +
        case super::BranchOp::Bltu => return encode::bltu(rs1, rs2, imm),
190 +
        case super::BranchOp::Bgeu => return encode::bgeu(rs1, rs2, imm),
191 +
        case super::BranchOp::Ble  => return encode::ble(rs1, rs2, imm),
192 +
        case super::BranchOp::Bgt  => return encode::bgt(rs1, rs2, imm),
193 +
    }
194 +
}
195 +
196 +
/// Decode string literal escapes and emit the resulting data bytes.
197 +
export fn emitDecodedString(a: *mut super::Assembler, literal: *[u8]) throws (super::Error) {
198 +
    let raw = &literal[super::QUOTE_DELIM_LEN..literal.len - super::QUOTE_DELIM_LEN];
199 +
    let mut i: u32 = 0;
200 +
201 +
    while i < raw.len {
202 +
        if raw[i] == '\\' and i + 1 < raw.len {
203 +
            try emitByte(a, fmt::decodeAsciiEscape(raw[i + 1]));
204 +
            set i += 2;
205 +
        } else {
206 +
            try emitByte(a, raw[i]);
207 +
            set i += 1;
208 +
        }
209 +
    }
210 +
}
lib/std/arch/rv64/asm/parser.rad added +858 -0
1 +
//! Assembler parser pass.
2 +
use std::mem;
3 +
use std::fmt;
4 +
use std::lang::alloc;
5 +
use std::lang::strings;
6 +
use std::lang::parser;
7 +
use std::lang::gen;
8 +
use std::collections::dict;
9 +
use std::arch::rv64::encode;
10 +
use std::arch::rv64;
11 +
12 +
use super::emit;
13 +
use super::scanner;
14 +
15 +
/// Parsed memory operand with base register and signed byte offset.
16 +
record MemOperand {
17 +
    /// Base register inside the memory operand parentheses.
18 +
    base: gen::Reg,
19 +
    /// Signed byte offset preceding the base register.
20 +
    offset: i32,
21 +
}
22 +
23 +
/// Parse assembler source into the supplied assembler state.
24 +
export fn parseProgram(a: *mut super::Assembler) throws (super::Error) {
25 +
    advance(a);
26 +
27 +
    while a.scan.current.kind <> scanner::TokenKind::Eof {
28 +
        try parseItem(a);
29 +
    }
30 +
}
31 +
32 +
/// Align `value` upward to `alignment`, returning nil on u32 overflow.
33 +
fn checkedAlignUp(value: u32, alignment: u32) -> ?u32 {
34 +
    let padding = alignment - 1;
35 +
    if value > parser::U32_MAX - padding {
36 +
        return nil;
37 +
    }
38 +
    return mem::alignUp(value, alignment);
39 +
}
40 +
41 +
/// Advance the parser by one token, preserving the previous token.
42 +
fn advance(a: *mut super::Assembler) {
43 +
    set a.scan.previous = a.scan.current;
44 +
    set a.scan.current = scanner::next(&mut a.scan);
45 +
}
46 +
47 +
/// Consume the current token when it has `kind`.
48 +
fn consume(a: *mut super::Assembler, kind: scanner::TokenKind) -> bool {
49 +
    if a.scan.current.kind == kind {
50 +
        advance(a);
51 +
        return true;
52 +
    }
53 +
    return false;
54 +
}
55 +
56 +
/// Create an error at the current token.
57 +
fn fail(a: *super::Assembler, message: *[u8]) -> super::Error {
58 +
    return super::Error::Invalid { offset: a.scan.current.offset, message };
59 +
}
60 +
61 +
/// Create an error at `tok`.
62 +
fn failOnToken(tok: scanner::Token, message: *[u8]) -> super::Error {
63 +
    return super::Error::Invalid { offset: tok.offset, message };
64 +
}
65 +
66 +
/// Require that a data directive appears while assembling the data section.
67 +
fn expectDataSection(a: *super::Assembler, tok: scanner::Token) throws (super::Error) {
68 +
    if a.section <> super::Section::Data {
69 +
        throw failOnToken(tok, "data directive is only valid in the data section");
70 +
    }
71 +
}
72 +
73 +
/// Consume `kind` or throw `message` at the current token.
74 +
fn expect(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) throws (super::Error) {
75 +
    if not consume(a, kind) {
76 +
        throw fail(a, message);
77 +
    }
78 +
}
79 +
80 +
/// Consume `kind` and return the consumed token.
81 +
fn expectToken(a: *mut super::Assembler, kind: scanner::TokenKind, message: *[u8]) -> scanner::Token throws (super::Error) {
82 +
    try expect(a, kind, message);
83 +
    return a.scan.previous;
84 +
}
85 +
86 +
/// Require that the current item has reached its semicolon terminator.
87 +
fn expectTerminator(a: *super::Assembler, message: *[u8]) throws (super::Error) {
88 +
    if a.scan.current.kind <> scanner::TokenKind::Semicolon {
89 +
        throw fail(a, message);
90 +
    }
91 +
}
92 +
93 +
/// Require that `value` fits in i32.
94 +
fn expectI32Value(a: *super::Assembler, value: i64, message: *[u8]) -> i32 throws (super::Error) {
95 +
    if value < -super::I32_MIN_MAGNITUDE or value > super::I32_MAX_VALUE {
96 +
        throw fail(a, message);
97 +
    }
98 +
    return value as i32;
99 +
}
100 +
101 +
/// Require that `value` fits in a signed 12-bit immediate field.
102 +
fn expectSmallImmValue(a: *super::Assembler, value: i64) -> i32 throws (super::Error) {
103 +
    if not encode::isSmallImm64(value) {
104 +
        throw fail(a, "immediate out of range");
105 +
    }
106 +
    return value as i32;
107 +
}
108 +
109 +
/// Define a label at the current text or data offset.
110 +
fn defineSymbol(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) {
111 +
    if dict::get(&a.symbolMap, name) <> nil {
112 +
        throw failOnToken(tok, "duplicate label");
113 +
    }
114 +
    emit::defineSymbol(a, name);
115 +
}
116 +
117 +
/// Emit a parsed integer data value after applying source-level range checks.
118 +
fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) {
119 +
    match width {
120 +
        case super::DataWidth::Word =>
121 +
            try emit::emitDataValue(a, (try expectI32Value(a, value, "word literal out of range")) as i64, width),
122 +
        case super::DataWidth::Dword =>
123 +
            try emit::emitDataValue(a, value, width),
124 +
    }
125 +
}
126 +
127 +
/// Parse a possibly scoped name from one or more `::`-separated segments.
128 +
fn parseScopedName(
129 +
    a: *mut super::Assembler,
130 +
    kind: scanner::TokenKind,
131 +
    message: *[u8],
132 +
    trimPrefix: u32
133 +
) -> *[u8] throws (super::Error) {
134 +
    let first = try expectToken(a, kind, message);
135 +
    let start = first.offset + trimPrefix;
136 +
    let mut end = first.offset + first.source.len;
137 +
138 +
    while consume(a, scanner::TokenKind::ColonColon) {
139 +
        let segment = try expectToken(a, scanner::TokenKind::Ident, "expected identifier after `::`");
140 +
        set end = segment.offset + segment.source.len;
141 +
    }
142 +
    return strings::intern(a.scan.pool, &a.scan.source[start..end]);
143 +
}
144 +
145 +
/// Parse a bare symbol name.
146 +
fn parseSymbolName(a: *mut super::Assembler) -> *[u8] throws (super::Error) {
147 +
    return try parseScopedName(a, scanner::TokenKind::Ident, "expected symbol name", 0);
148 +
}
149 +
150 +
/// Return `true` when [`tok`] is any label token form.
151 +
fn isLabel(tok: scanner::TokenKind) -> bool {
152 +
    return tok == scanner::TokenKind::Label or tok == scanner::TokenKind::QuotedLabel;
153 +
}
154 +
155 +
/// Parse the contents of a quoted label token, decoding escapes as needed.
156 +
fn parseQuotedLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) {
157 +
    let tok = try expectToken(a, scanner::TokenKind::QuotedLabel, "expected label name");
158 +
    let rawStart = super::LABEL_SIGIL_LEN + super::QUOTE_DELIM_LEN;
159 +
    let raw = &tok.source[rawStart..tok.source.len - super::QUOTE_DELIM_LEN];
160 +
    let storage = try alloc::allocSlice(a.arena, 1, 1, raw.len) catch {
161 +
        panic "asm: out of memory allocating quoted label";
162 +
    } as *mut [u8];
163 +
    let len = fmt::unescapeString(raw, storage);
164 +
165 +
    return strings::intern(a.scan.pool, &storage[..len]);
166 +
}
167 +
168 +
/// Parse a label reference or definition name.
169 +
fn parseLabelName(a: *mut super::Assembler) -> *[u8] throws (super::Error) {
170 +
    if a.scan.current.kind == scanner::TokenKind::QuotedLabel {
171 +
        return try parseQuotedLabelName(a);
172 +
    }
173 +
    return try parseScopedName(a, scanner::TokenKind::Label, "expected label name", super::LABEL_SIGIL_LEN);
174 +
}
175 +
176 +
/// Parse a directive name without its leading `.`.
177 +
fn parseDirectiveName(a: *mut super::Assembler) -> *[u8] throws (super::Error) {
178 +
    let name = try expectToken(a, scanner::TokenKind::Directive, "expected directive name");
179 +
    return &name.source[super::DIRECTIVE_SIGIL_LEN..];
180 +
}
181 +
182 +
/// Parse one top-level assembler item.
183 +
fn parseItem(a: *mut super::Assembler) throws (super::Error) {
184 +
    match a.scan.current.kind {
185 +
        case scanner::TokenKind::Ident => {
186 +
            let tok = a.scan.current;
187 +
            let name = try parseSymbolName(a);
188 +
            try parseInstruction(a, name, tok);
189 +
            try expect(a, scanner::TokenKind::Semicolon, "expected `;` after instruction");
190 +
        }
191 +
        case scanner::TokenKind::Number => {
192 +
            let tok = a.scan.current;
193 +
            advance(a);
194 +
            throw failOnToken(tok, "unexpected number at top level");
195 +
        }
196 +
        case scanner::TokenKind::Label, scanner::TokenKind::QuotedLabel => {
197 +
            let tok = a.scan.current;
198 +
            let name = try parseLabelName(a);
199 +
            try defineSymbol(a, name, tok);
200 +
        }
201 +
        case scanner::TokenKind::Directive => {
202 +
            let tok = a.scan.current;
203 +
            let name = try parseDirectiveName(a);
204 +
            try parseDirective(a, name, tok);
205 +
            try expect(a, scanner::TokenKind::Semicolon, "expected `;` after directive");
206 +
        }
207 +
        else => throw fail(a, "expected label, instruction, or directive"),
208 +
    }
209 +
}
210 +
211 +
/// Find `name` in a sorted descriptor table.
212 +
fn findSortedNameIndex(name: *[u8], len: u32, getName: fn(u32) -> *[u8]) -> ?u32 {
213 +
    let mut left: u32 = 0;
214 +
    let mut right: u32 = len;
215 +
216 +
    while left < right {
217 +
        let mid = left + ((right - left) / 2);
218 +
        let cmp = mem::cmp(name, getName(mid));
219 +
220 +
        match cmp {
221 +
            case -1 => set right = mid,
222 +
            case  1 => set left = mid + 1,
223 +
            else => return mid,
224 +
        }
225 +
    }
226 +
    return nil;
227 +
}
228 +
229 +
/// Adapter used by [`findSortedNameIndex`] to read an instruction mnemonic.
230 +
fn instructionNameAt(index: u32) -> *[u8] {
231 +
    return super::INSTRUCTIONS[index].name;
232 +
}
233 +
234 +
/// Adapter used by [`findSortedNameIndex`] to read a directive name.
235 +
fn directiveNameAt(index: u32) -> *[u8] {
236 +
    return super::DIRECTIVES[index].name;
237 +
}
238 +
239 +
/// Adapter used by [`findSortedNameIndex`] to read a register name.
240 +
fn registerNameAt(index: u32) -> *[u8] {
241 +
    return super::REGISTERS[index].name;
242 +
}
243 +
244 +
/// Adapter used by [`findSortedNameIndex`] to read a CSR name.
245 +
fn csrNameAt(index: u32) -> *[u8] {
246 +
    return super::CSRS[index].name;
247 +
}
248 +
249 +
/// Look up the operand parser and encoder for an instruction mnemonic.
250 +
fn lookupInstruction(name: *[u8]) -> ?super::InstructionEncoder {
251 +
    let index = findSortedNameIndex(name, super::INSTRUCTIONS.len, instructionNameAt) else {
252 +
        return nil;
253 +
    };
254 +
    return super::INSTRUCTIONS[index].encoder;
255 +
}
256 +
257 +
/// Classify a directive name.
258 +
fn classifyDirective(name: *[u8]) -> ?super::DirectiveKind {
259 +
    let index = findSortedNameIndex(name, super::DIRECTIVES.len, directiveNameAt) else {
260 +
        return nil;
261 +
    };
262 +
    return super::DIRECTIVES[index].kind;
263 +
}
264 +
265 +
/// Look up a percent-prefixed register name after the `%` has been removed.
266 +
fn lookupRegister(name: *[u8]) -> ?gen::Reg {
267 +
    let index = findSortedNameIndex(name, super::REGISTERS.len, registerNameAt) else {
268 +
        return nil;
269 +
    };
270 +
    return super::REGISTERS[index].reg;
271 +
}
272 +
273 +
/// Look up a CSR name.
274 +
fn lookupCsr(name: *[u8]) -> ?u32 {
275 +
    let index = findSortedNameIndex(name, super::CSRS.len, csrNameAt) else {
276 +
        return nil;
277 +
    };
278 +
    return super::CSRS[index].csr;
279 +
}
280 +
281 +
/// Parse an instruction after its mnemonic has already been consumed.
282 +
fn parseInstruction(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) {
283 +
    if a.section <> super::Section::Text {
284 +
        throw failOnToken(tok, "instructions are only valid in the text section");
285 +
    }
286 +
    let form = lookupInstruction(name) else {
287 +
        throw failOnToken(tok, "unknown instruction");
288 +
    };
289 +
    match form {
290 +
        case super::InstructionEncoder::NoOperand { enc } => {
291 +
            if a.scan.current.kind <> scanner::TokenKind::Semicolon {
292 +
                throw fail(a, "unexpected operand");
293 +
            }
294 +
            try emit::emitText(a, enc());
295 +
            return;
296 +
        }
297 +
        case super::InstructionEncoder::Li => return try parseLi(a),
298 +
        case super::InstructionEncoder::La => return try parseLa(a),
299 +
        case super::InstructionEncoder::RR { enc } => return try parseRR(a, enc),
300 +
        case super::InstructionEncoder::RRR { enc } => return try parseRRR(a, enc),
301 +
        case super::InstructionEncoder::RRI { enc } => return try parseRRI(a, enc),
302 +
        case super::InstructionEncoder::Shift { enc } =>
303 +
            return try parseShift(a, enc, super::SHIFT_LIMIT, "shift amount out of range"),
304 +
        case super::InstructionEncoder::WordShift { enc } =>
305 +
            return try parseShift(a, enc, super::WORD_SHIFT_LIMIT, "word shift amount out of range"),
306 +
        case super::InstructionEncoder::Load { enc } => return try parseLoad(a, enc),
307 +
        case super::InstructionEncoder::Store { enc } => return try parseStore(a, enc),
308 +
        case super::InstructionEncoder::Branch { op } => return try parseBranch(a, op),
309 +
        case super::InstructionEncoder::BranchZero { op } => return try parseBranchZero(a, op),
310 +
        case super::InstructionEncoder::Jal => return try parseJal(a),
311 +
        case super::InstructionEncoder::Jump { rd } => return try parseJ(a, rd),
312 +
        case super::InstructionEncoder::RdCsr { enc } => return try parseRdCsr(a, enc),
313 +
        case super::InstructionEncoder::CsrRs1 { enc } => return try parseCsrRs1(a, enc),
314 +
        case super::InstructionEncoder::Csrrw => return try parseCsrrw(a),
315 +
        case super::InstructionEncoder::Csrsi => return try parseCsrsi(a),
316 +
        case super::InstructionEncoder::Upper { enc } => return try parseUpper(a, enc),
317 +
    }
318 +
}
319 +
320 +
/// Parse the `li` pseudo-instruction.
321 +
fn parseLi(a: *mut super::Assembler) throws (super::Error) {
322 +
    let rd = try parseRegister(a);
323 +
    let value = try parseValue(a);
324 +
    if encode::isSmallImm64(value) {
325 +
        try emit::emitText(a, encode::addi(rd, rv64::ZERO, value as i32));
326 +
        return;
327 +
    }
328 +
    let imm = try expectI32Value(a, value, "li immediate out of range");
329 +
    let split = rv64::emit::splitImm(imm);
330 +
331 +
    try emit::emitText(a, encode::lui(rd, split.hi));
332 +
    try emit::emitText(a, encode::addi(rd, rd, split.lo));
333 +
}
334 +
335 +
/// Parse the `la` pseudo-instruction.
336 +
fn parseLa(a: *mut super::Assembler) throws (super::Error) {
337 +
    let rd = try parseRegister(a);
338 +
    let target = try parseLabelName(a);
339 +
    let index = a.text.len;
340 +
341 +
    try emit::recordTextFixup(a, target, super::FixupInfo::Addr { rd, index }, 2);
342 +
}
343 +
344 +
/// Parse a CSR read-like instruction with destination register then CSR.
345 +
fn parseRdCsr(a: *mut super::Assembler, enc: fn(gen::Reg, u32) -> u32) throws (super::Error) {
346 +
    let rd = try parseRegister(a);
347 +
    let csr = try parseCsr(a);
348 +
349 +
    try emit::emitText(a, enc(rd, csr));
350 +
}
351 +
352 +
/// Parse a CSR write-like instruction with CSR then source register.
353 +
fn parseCsrRs1(a: *mut super::Assembler, enc: fn(u32, gen::Reg) -> u32) throws (super::Error) {
354 +
    let csr = try parseCsr(a);
355 +
    let rs1 = try parseRegister(a);
356 +
357 +
    try emit::emitText(a, enc(csr, rs1));
358 +
}
359 +
360 +
/// Parse `csrrw`.
361 +
fn parseCsrrw(a: *mut super::Assembler) throws (super::Error) {
362 +
    let rd = try parseRegister(a);
363 +
    let csr = try parseCsr(a);
364 +
    let rs1 = try parseRegister(a);
365 +
366 +
    try emit::emitText(a, encode::csrrw(rd, csr, rs1));
367 +
}
368 +
369 +
/// Parse a CSR immediate instruction.
370 +
fn parseCsrsi(a: *mut super::Assembler) throws (super::Error) {
371 +
    let csr = try parseCsr(a);
372 +
    let imm = try parseValue(a);
373 +
    if imm < 0 or imm >= super::CSR_IMM_LIMIT {
374 +
        throw fail(a, "CSR immediate out of range");
375 +
    }
376 +
    try emit::emitText(a, encode::csrsi(csr, imm as u32));
377 +
}
378 +
379 +
/// Parse a two-register instruction.
380 +
fn parseRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg) -> u32) throws (super::Error) {
381 +
    let rd = try parseRegister(a);
382 +
    let rs = try parseRegister(a);
383 +
384 +
    try emit::emitText(a, enc(rd, rs));
385 +
}
386 +
387 +
/// Parse a three-register instruction.
388 +
fn parseRRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32) throws (super::Error) {
389 +
    let rd = try parseRegister(a);
390 +
    let rs1 = try parseRegister(a);
391 +
    let rs2 = try parseRegister(a);
392 +
393 +
    try emit::emitText(a, enc(rd, rs1, rs2));
394 +
}
395 +
396 +
/// Parse a register-register-immediate instruction.
397 +
fn parseRRI(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
398 +
    let rd = try parseRegister(a);
399 +
    let rs1 = try parseRegister(a);
400 +
    let imm = try parseSmallImm(a);
401 +
402 +
    try emit::emitText(a, enc(rd, rs1, imm));
403 +
}
404 +
405 +
/// Parse a shift-immediate instruction and enforce its RV64 shift bound.
406 +
fn parseShift(
407 +
    a: *mut super::Assembler,
408 +
    enc: fn(gen::Reg, gen::Reg, i32) -> u32,
409 +
    limit: i32,
410 +
    message: *[u8]
411 +
) throws (super::Error) {
412 +
    let rd = try parseRegister(a);
413 +
    let rs1 = try parseRegister(a);
414 +
    let shamt64 = try parseValue(a);
415 +
416 +
    if shamt64 < 0 {
417 +
        throw fail(a, "shift amount must be non-negative");
418 +
    }
419 +
    if shamt64 >= limit as i64 {
420 +
        throw fail(a, message);
421 +
    }
422 +
    let shamt = shamt64 as i32;
423 +
424 +
    try emit::emitText(a, enc(rd, rs1, shamt));
425 +
}
426 +
427 +
/// Parse a load instruction with a memory operand.
428 +
fn parseLoad(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
429 +
    let rd = try parseRegister(a);
430 +
    let memop = try parseMemory(a);
431 +
432 +
    try emit::emitText(a, enc(rd, memop.base, memop.offset));
433 +
}
434 +
435 +
/// Parse a store instruction with a memory operand.
436 +
fn parseStore(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
437 +
    let rs2 = try parseRegister(a);
438 +
    let memop = try parseMemory(a);
439 +
440 +
    try emit::emitText(a, enc(rs2, memop.base, memop.offset));
441 +
}
442 +
443 +
/// Parse a two-register branch instruction.
444 +
fn parseBranch(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) {
445 +
    let rs1 = try parseRegister(a);
446 +
    let rs2 = try parseRegister(a);
447 +
448 +
    try parseBranchLabel(a, op, rs1, rs2);
449 +
}
450 +
451 +
/// Parse an optional label operand.
452 +
fn parseOptionalLabel(a: *mut super::Assembler) -> ?*[u8] throws (super::Error) {
453 +
    if not isLabel(a.scan.current.kind) {
454 +
        return nil;
455 +
    }
456 +
    return try parseLabelName(a);
457 +
}
458 +
459 +
/// Parse a branch target as either a label fixup or immediate offset.
460 +
fn parseBranchLabel(a: *mut super::Assembler, op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg) throws (super::Error) {
461 +
    let index = a.text.len;
462 +
    if let target = try parseOptionalLabel(a) {
463 +
        try emit::recordTextFixup(a, target, super::FixupInfo::Branch { op, rs1, rs2, index }, 1);
464 +
        return;
465 +
    }
466 +
    let imm = try parseBranchImm(a);
467 +
    try emit::emitText(a, emit::encodeBranch(op, rs1, rs2, imm));
468 +
}
469 +
470 +
/// Parse a branch-to-zero pseudo-instruction.
471 +
fn parseBranchZero(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) {
472 +
    let rs = try parseRegister(a);
473 +
    try parseBranchLabel(a, op, rs, rv64::ZERO);
474 +
}
475 +
476 +
/// Parse `jal` with an explicit destination register.
477 +
fn parseJal(a: *mut super::Assembler) throws (super::Error) {
478 +
    let rd = try parseRegister(a);
479 +
    try parseJ(a, rd);
480 +
}
481 +
482 +
/// Parse a jump target for `jal` or a jump pseudo-instruction.
483 +
fn parseJ(a: *mut super::Assembler, rd: gen::Reg) throws (super::Error) {
484 +
    let index = a.text.len;
485 +
    if let target = try parseOptionalLabel(a) {
486 +
        try emit::recordTextFixup(a, target, super::FixupInfo::Jal { rd, index }, 1);
487 +
        return;
488 +
    }
489 +
    let imm = try parseJumpImm(a);
490 +
    try emit::emitText(a, encode::jal(rd, imm));
491 +
}
492 +
493 +
/// Parse an upper-immediate instruction.
494 +
fn parseUpper(a: *mut super::Assembler, enc: fn(gen::Reg, i32) -> u32) throws (super::Error) {
495 +
    let rd = try parseRegister(a);
496 +
    let imm64 = try parseValue(a);
497 +
    if imm64 < 0 or imm64 > super::UPPER_IMM_MAX_VALUE {
498 +
        throw fail(a, "upper immediate out of range");
499 +
    }
500 +
    try emit::emitText(a, enc(rd, imm64 as i32));
501 +
}
502 +
503 +
/// Parse a directive after its name has already been consumed.
504 +
fn parseDirective(a: *mut super::Assembler, name: *[u8], tok: scanner::Token) throws (super::Error) {
505 +
    let directive = classifyDirective(name) else {
506 +
        throw failOnToken(tok, "unknown directive");
507 +
    };
508 +
    match directive {
509 +
        case super::DirectiveKind::Text => {
510 +
            try expectTerminator(a, "unexpected operand");
511 +
            set a.section = super::Section::Text;
512 +
            return;
513 +
        }
514 +
        case super::DirectiveKind::Data => {
515 +
            try expectTerminator(a, "unexpected operand");
516 +
            set a.section = super::Section::Data;
517 +
            return;
518 +
        }
519 +
        case super::DirectiveKind::Align =>
520 +
            return try parseAlignDirective(a),
521 +
        case super::DirectiveKind::Ascii => {
522 +
            try expectDataSection(a, tok);
523 +
            return try parseStringDirective(a);
524 +
        }
525 +
        case super::DirectiveKind::Byte => {
526 +
            try expectDataSection(a, tok);
527 +
            return try parseByteDirective(a);
528 +
        }
529 +
        case super::DirectiveKind::Constant =>
530 +
            return try parseConstantDirective(a),
531 +
        case super::DirectiveKind::Dword => {
532 +
            try expectDataSection(a, tok);
533 +
            return try parseIntDirective(a, super::DataWidth::Dword);
534 +
        }
535 +
        case super::DirectiveKind::Export =>
536 +
            return try parseExportDirective(a),
537 +
        case super::DirectiveKind::Space => {
538 +
            try expectDataSection(a, tok);
539 +
            return try parseSpaceDirective(a);
540 +
        }
541 +
        case super::DirectiveKind::Word => {
542 +
            try expectDataSection(a, tok);
543 +
            return try parseIntDirective(a, super::DataWidth::Word);
544 +
        }
545 +
    }
546 +
}
547 +
548 +
/// Parse a `.constant` directive.
549 +
fn parseConstantDirective(a: *mut super::Assembler) throws (super::Error) {
550 +
    let name = try parseSymbolName(a);
551 +
    let value = try expectI32Value(a, try parseExpr(a), "constant out of range");
552 +
553 +
    dict::insert(&mut a.constMap, name, value);
554 +
}
555 +
556 +
/// Parse a `.export` directive.
557 +
fn parseExportDirective(a: *mut super::Assembler) throws (super::Error) {
558 +
    let name = try parseLabelName(a);
559 +
    dict::insert(&mut a.exportMap, name, 1);
560 +
    if let idx = dict::get(&a.symbolMap, name) {
561 +
        set a.symbols[idx as u32].isExported = true;
562 +
    }
563 +
}
564 +
565 +
/// Parse a `.space` directive.
566 +
fn parseSpaceDirective(a: *mut super::Assembler) throws (super::Error) {
567 +
    let count = try parseValue(a);
568 +
    if count < 0 {
569 +
        throw fail(a, "space size must be non-negative");
570 +
    }
571 +
    let remaining = a.data.cap - a.data.len;
572 +
    if count > remaining as i64 {
573 +
        throw super::Error::DataOverflow;
574 +
    }
575 +
    for _ in 0..count as u32 {
576 +
        try emit::emitByte(a, 0);
577 +
    }
578 +
}
579 +
580 +
/// Parse an `.align` directive for the current section.
581 +
fn parseAlignDirective(a: *mut super::Assembler) throws (super::Error) {
582 +
    let amount64 = try parseValue(a);
583 +
    if amount64 <= 0 {
584 +
        throw fail(a, "alignment must be positive");
585 +
    }
586 +
    if amount64 > super::U32_MAX_VALUE {
587 +
        throw fail(a, "alignment out of range");
588 +
    }
589 +
    let amount = amount64 as u32;
590 +
    if (amount & (amount - 1)) <> 0 {
591 +
        throw fail(a, "alignment must be a power of two");
592 +
    }
593 +
    match a.section {
594 +
        case super::Section::Text => {
595 +
            if amount % rv64::INSTR_SIZE as u32 <> 0 {
596 +
                throw fail(a, "text alignment must be a multiple of 4");
597 +
            }
598 +
            let bytes = a.text.len * rv64::INSTR_SIZE as u32;
599 +
            let aligned = checkedAlignUp(bytes, amount) else {
600 +
                throw super::Error::TextOverflow;
601 +
            };
602 +
            let words = (aligned - bytes) / rv64::INSTR_SIZE as u32;
603 +
            if words > a.text.cap - a.text.len {
604 +
                throw super::Error::TextOverflow;
605 +
            }
606 +
            try emit::emitTextPadding(a, words);
607 +
        }
608 +
        case super::Section::Data => {
609 +
            let aligned = checkedAlignUp(a.data.len, amount) else {
610 +
                throw super::Error::DataOverflow;
611 +
            };
612 +
            if aligned > a.data.cap {
613 +
                throw super::Error::DataOverflow;
614 +
            }
615 +
            for _ in a.data.len..aligned {
616 +
                try emit::emitByte(a, 0);
617 +
            }
618 +
        }
619 +
    }
620 +
}
621 +
622 +
/// Parse a `.byte` directive.
623 +
fn parseByteDirective(a: *mut super::Assembler) throws (super::Error) {
624 +
    loop {
625 +
        if a.scan.current.kind == scanner::TokenKind::Char {
626 +
            let ch = parseCharLiteral(a.scan.current) else {
627 +
                throw fail(a, "invalid char literal");
628 +
            };
629 +
            try emit::emitByte(a, ch);
630 +
            advance(a);
631 +
        } else {
632 +
            let value = try parseValue(a);
633 +
            if value < 0 or value > super::U8_MAX_VALUE {
634 +
                throw fail(a, "byte literal out of range");
635 +
            }
636 +
            try emit::emitByte(a, value as u8);
637 +
        }
638 +
        if not consume(a, scanner::TokenKind::Comma) {
639 +
            return;
640 +
        }
641 +
    }
642 +
}
643 +
644 +
/// Parse a fixed-width integer data directive.
645 +
fn parseIntDirective(a: *mut super::Assembler, width: super::DataWidth) throws (super::Error) {
646 +
    loop {
647 +
        if isLabel(a.scan.current.kind) {
648 +
            let target = try parseLabelName(a);
649 +
            try emit::recordDataFixup(a, target, width);
650 +
        } else if a.scan.current.kind == scanner::TokenKind::Char {
651 +
            let ch = parseCharLiteral(a.scan.current) else {
652 +
                throw fail(a, "invalid char literal");
653 +
            };
654 +
            advance(a);
655 +
            try emitDataValue(a, ch as i64, width);
656 +
        } else {
657 +
            try emitDataValue(a, try parseValue(a), width);
658 +
        }
659 +
        if not consume(a, scanner::TokenKind::Comma) {
660 +
            return;
661 +
        }
662 +
    }
663 +
}
664 +
665 +
/// Parse a `.ascii` string literal list.
666 +
fn parseStringDirective(a: *mut super::Assembler) throws (super::Error) {
667 +
    loop {
668 +
        let literal = try expectToken(a, scanner::TokenKind::String, "expected string literal");
669 +
        try emit::emitDecodedString(a, literal.source);
670 +
        if not consume(a, scanner::TokenKind::Comma) {
671 +
            return;
672 +
        }
673 +
    }
674 +
}
675 +
676 +
/// Parse and resolve a register operand.
677 +
fn parseRegister(a: *mut super::Assembler) -> gen::Reg throws (super::Error) {
678 +
    let tok = try expectToken(a, scanner::TokenKind::Register, "expected register");
679 +
    let reg = lookupRegister(&tok.source[1..]) else {
680 +
        throw super::Error::Invalid { offset: tok.offset, message: "unknown register" };
681 +
    };
682 +
    return reg;
683 +
}
684 +
685 +
/// Parse a simple signed immediate or constant value.
686 +
fn parseValue(a: *mut super::Assembler) -> i64 throws (super::Error) {
687 +
    if consume(a, scanner::TokenKind::Minus) {
688 +
        return -(try parseValuePrimary(a));
689 +
    }
690 +
    return try parseValuePrimary(a);
691 +
}
692 +
693 +
/// Parse the primary form used by simple immediate values.
694 +
fn parseValuePrimary(a: *mut super::Assembler) -> i64 throws (super::Error) {
695 +
    if a.scan.current.kind == scanner::TokenKind::Number {
696 +
        return try parseInteger(a);
697 +
    }
698 +
    if a.scan.current.kind == scanner::TokenKind::Ident {
699 +
        return try parseConstantValue(a);
700 +
    }
701 +
    throw fail(a, "expected number or constant");
702 +
}
703 +
704 +
/// Parse an additive constant expression.
705 +
fn parseExpr(a: *mut super::Assembler) -> i64 throws (super::Error) {
706 +
    let mut value = try parseExprMul(a);
707 +
708 +
    while a.scan.current.kind == scanner::TokenKind::Plus or a.scan.current.kind == scanner::TokenKind::Minus {
709 +
        let op = a.scan.current.kind;
710 +
        advance(a);
711 +
712 +
        let rhs = try parseExprMul(a);
713 +
        if op == scanner::TokenKind::Plus {
714 +
            set value += rhs;
715 +
        } else {
716 +
            set value -= rhs;
717 +
        }
718 +
    }
719 +
    return value;
720 +
}
721 +
722 +
/// Parse multiplicative expression operators.
723 +
fn parseExprMul(a: *mut super::Assembler) -> i64 throws (super::Error) {
724 +
    let mut value = try parseExprUnary(a);
725 +
726 +
    while a.scan.current.kind == scanner::TokenKind::Star or a.scan.current.kind == scanner::TokenKind::Slash {
727 +
        let op = a.scan.current.kind;
728 +
        advance(a);
729 +
730 +
        let rhs = try parseExprUnary(a);
731 +
        if op == scanner::TokenKind::Star {
732 +
            set value *= rhs;
733 +
        } else {
734 +
            if rhs == 0 {
735 +
                throw fail(a, "division by zero");
736 +
            }
737 +
            set value /= rhs;
738 +
        }
739 +
    }
740 +
    return value;
741 +
}
742 +
743 +
/// Parse unary expression operators.
744 +
fn parseExprUnary(a: *mut super::Assembler) -> i64 throws (super::Error) {
745 +
    if consume(a, scanner::TokenKind::Minus) {
746 +
        return -(try parseExprUnary(a));
747 +
    }
748 +
    if consume(a, scanner::TokenKind::Plus) {
749 +
        return try parseExprUnary(a);
750 +
    }
751 +
    return try parseExprPrimary(a);
752 +
}
753 +
754 +
/// Parse expression atoms.
755 +
fn parseExprPrimary(a: *mut super::Assembler) -> i64 throws (super::Error) {
756 +
    if consume(a, scanner::TokenKind::LParen) {
757 +
        let value = try parseExpr(a);
758 +
        try expect(a, scanner::TokenKind::RParen, "expected `)`");
759 +
        return value;
760 +
    }
761 +
    if a.scan.current.kind == scanner::TokenKind::Number {
762 +
        return try parseInteger(a);
763 +
    }
764 +
    if a.scan.current.kind == scanner::TokenKind::Ident {
765 +
        return try parseConstantValue(a);
766 +
    }
767 +
    throw fail(a, "expected expression");
768 +
}
769 +
770 +
/// Parse and resolve a named assembler constant.
771 +
fn parseConstantValue(a: *mut super::Assembler) -> i64 throws (super::Error) {
772 +
    let name = try parseSymbolName(a);
773 +
    let value = dict::get(&a.constMap, name) else {
774 +
        throw super::Error::Invalid { offset: a.scan.previous.offset, message: "undefined constant" };
775 +
    };
776 +
    return value as i64;
777 +
}
778 +
779 +
/// Parse and resolve a CSR operand.
780 +
fn parseCsr(a: *mut super::Assembler) -> u32 throws (super::Error) {
781 +
    let name = try parseSymbolName(a);
782 +
    let csr = lookupCsr(name) else {
783 +
        throw super::Error::Invalid { offset: a.scan.previous.offset, message: "unknown CSR" };
784 +
    };
785 +
    return csr;
786 +
}
787 +
788 +
/// Parse an offset(base) memory operand.
789 +
fn parseMemory(a: *mut super::Assembler) -> MemOperand throws (super::Error) {
790 +
    let mut offset: i32 = 0;
791 +
    if a.scan.current.kind <> scanner::TokenKind::LParen {
792 +
        set offset = try expectSmallImmValue(a, try parseValue(a));
793 +
    }
794 +
    try expect(a, scanner::TokenKind::LParen, "expected `(`");
795 +
    let base = try parseRegister(a);
796 +
    try expect(a, scanner::TokenKind::RParen, "expected `)`");
797 +
798 +
    return MemOperand { base, offset };
799 +
}
800 +
801 +
/// Parse an immediate value that fits in a signed 12-bit field.
802 +
fn parseSmallImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
803 +
    return try expectSmallImmValue(a, try parseValue(a));
804 +
}
805 +
806 +
/// Parse and validate a branch immediate.
807 +
fn parseBranchImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
808 +
    let value = try expectI32Value(a, try parseValue(a), "branch immediate out of range");
809 +
    if not encode::isBranchImm(value) {
810 +
        throw fail(a, "branch immediate out of range");
811 +
    }
812 +
    return value;
813 +
}
814 +
815 +
/// Parse and validate a jump immediate.
816 +
fn parseJumpImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
817 +
    let value = try expectI32Value(a, try parseValue(a), "jump immediate out of range");
818 +
    if not encode::isJumpImm(value) {
819 +
        throw fail(a, "jump immediate out of range");
820 +
    }
821 +
    return value;
822 +
}
823 +
824 +
/// Parse an integer token as an i64.
825 +
fn parseInteger(a: *mut super::Assembler) -> i64 throws (super::Error) {
826 +
    let tok = try expectToken(a, scanner::TokenKind::Number, "expected number");
827 +
    let value = parseIntegerText(tok.source) else {
828 +
        throw failOnToken(tok, "invalid integer literal");
829 +
    };
830 +
    return value;
831 +
}
832 +
833 +
/// Parse integer literal text as an i64.
834 +
fn parseIntegerText(text: *[u8]) -> ?i64 {
835 +
    let literal = try fmt::parseInt(text) catch {
836 +
        return nil;
837 +
    };
838 +
    if literal.negative {
839 +
        if literal.magnitude > parser::I64_MIN_MAGNITUDE {
840 +
            return nil;
841 +
        }
842 +
        if literal.magnitude == parser::I64_MIN_MAGNITUDE {
843 +
            return parser::I64_MIN;
844 +
        }
845 +
        return -(literal.magnitude as i64);
846 +
    }
847 +
    if literal.magnitude > parser::I64_MAX_MAGNITUDE {
848 +
        return nil;
849 +
    }
850 +
    return literal.magnitude as i64;
851 +
}
852 +
853 +
/// Parse a character literal token as one byte.
854 +
fn parseCharLiteral(tok: scanner::Token) -> ?u8 {
855 +
    return try fmt::parseChar(tok.source) catch {
856 +
        return nil;
857 +
    };
858 +
}
lib/std/arch/rv64/asm/scanner.rad added +315 -0
1 +
//! Assembly-specific lexical scanner.
2 +
@test mod tests;
3 +
4 +
use std::char;
5 +
use std::lang::strings;
6 +
7 +
/// Token kinds recognized by the assembler scanner.
8 +
export union TokenKind {
9 +
    /// Special end-of-file token generated when the input is exhausted.
10 +
    Eof,
11 +
    /// Special invalid token carrying an error message in [`Token::source`].
12 +
    Invalid,
13 +
14 +
    LParen,     // (
15 +
    RParen,     // )
16 +
    Comma,      // ,
17 +
    Colon,      // :
18 +
    ColonColon, // ::
19 +
    Semicolon,  // ;
20 +
    Minus,      // -
21 +
    Plus,       // +
22 +
    Slash,      // /
23 +
    Star,       // *
24 +
25 +
    /// Bare identifier used for mnemonics, constants, CSR names, and symbol segments.
26 +
    Ident,
27 +
    /// Identifier-shaped label token including the leading `@`.
28 +
    Label,
29 +
    /// Quoted label token including the leading `@` and quote delimiters.
30 +
    QuotedLabel,
31 +
    /// Directive token including the leading `.`.
32 +
    Directive,
33 +
    /// Register token including the leading `%`.
34 +
    Register,
35 +
36 +
    /// String literal token including delimiters.
37 +
    String,
38 +
    /// Character literal token including delimiters.
39 +
    Char,
40 +
    /// Integer literal token.
41 +
    Number,
42 +
}
43 +
44 +
/// Describes where assembler source originated from.
45 +
export union SourceKind {
46 +
    /// Source loaded from a file at the given path.
47 +
    File { path: *[u8] },
48 +
    /// Source provided as an inline string.
49 +
    String,
50 +
}
51 +
52 +
/// Lexical scanner state for assembler source.
53 +
export record Scanner {
54 +
    /// Origin of the source being scanned.
55 +
    sourceKind: SourceKind,
56 +
    /// Source buffer.
57 +
    source: *[u8],
58 +
    /// Offset of the current token in `source`.
59 +
    token: u32,
60 +
    /// Offset of the current cursor in `source`.
61 +
    cursor: u32,
62 +
    /// Current token observed by the parser.
63 +
    current: Token,
64 +
    /// Previously consumed token observed by the parser.
65 +
    previous: Token,
66 +
    /// Intern pool for identifier-shaped token text.
67 +
    pool: *mut strings::Pool,
68 +
}
69 +
70 +
/// Individual token with kind, source text, and byte offset.
71 +
export record Token {
72 +
    /// Token kind.
73 +
    kind: TokenKind,
74 +
    /// Token source text.
75 +
    source: *[u8],
76 +
    /// Byte offset of `source` in the input buffer.
77 +
    offset: u32,
78 +
}
79 +
80 +
/// Create a new assembler scanner.
81 +
export fn scanner(sourceKind: SourceKind, source: *[u8], pool: *mut strings::Pool) -> Scanner {
82 +
    let invalidToken = invalid(0, "");
83 +
    return Scanner {
84 +
        sourceKind,
85 +
        source,
86 +
        token: 0,
87 +
        cursor: 0,
88 +
        current: invalidToken,
89 +
        previous: invalidToken,
90 +
        pool,
91 +
    };
92 +
}
93 +
94 +
/// Create an invalid token with the given message.
95 +
export fn invalid(offset: u32, message: *[u8]) -> Token {
96 +
    return Token { kind: TokenKind::Invalid, source: message, offset };
97 +
}
98 +
99 +
/// Return `true` when the scanner has consumed all input.
100 +
export fn isEof(s: *Scanner) -> bool {
101 +
    return s.cursor >= s.source.len;
102 +
}
103 +
104 +
/// Return the current character without advancing.
105 +
fn current(s: *Scanner) -> ?u8 {
106 +
    if isEof(s) {
107 +
        return nil;
108 +
    }
109 +
    return s.source[s.cursor];
110 +
}
111 +
112 +
/// Return the next character without advancing.
113 +
fn peek(s: *Scanner) -> ?u8 {
114 +
    if s.cursor + 1 >= s.source.len {
115 +
        return nil;
116 +
    }
117 +
    return s.source[s.cursor + 1];
118 +
}
119 +
120 +
/// Advance the scanner cursor and return the consumed character.
121 +
fn advance(s: *mut Scanner) -> u8 {
122 +
    set s.cursor += 1;
123 +
    return s.source[s.cursor - 1];
124 +
}
125 +
126 +
/// Consume `expected` when it is present at the current cursor.
127 +
fn consume(s: *mut Scanner, expected: u8) -> bool {
128 +
    if let ch = current(s); ch == expected {
129 +
        advance(s);
130 +
        return true;
131 +
    }
132 +
    return false;
133 +
}
134 +
135 +
/// Skip spaces, newlines, tabs, and `//` line comments.
136 +
fn skipWhitespace(s: *mut Scanner) {
137 +
    while let ch = current(s) {
138 +
        match ch {
139 +
            case ' ', '\n', '\r', '\t' => advance(s),
140 +
            case '/' => {
141 +
                if let nextCh = peek(s); nextCh == '/' {
142 +
                    while let lineCh = current(s); lineCh <> '\n' {
143 +
                        advance(s);
144 +
                    }
145 +
                } else {
146 +
                    return;
147 +
                }
148 +
            }
149 +
            else => return,
150 +
        }
151 +
    }
152 +
}
153 +
154 +
/// Return the next assembler token.
155 +
export fn next(s: *mut Scanner) -> Token {
156 +
    skipWhitespace(s);
157 +
    set s.token = s.cursor;
158 +
159 +
    if isEof(s) {
160 +
        return tok(s, TokenKind::Eof);
161 +
    }
162 +
    let ch = advance(s);
163 +
164 +
    if char::isDigit(ch) {
165 +
        return scanNumber(s);
166 +
    }
167 +
    if char::isAlpha(ch) or ch == '_' {
168 +
        return scanIdentToken(s, TokenKind::Ident);
169 +
    }
170 +
171 +
    match ch {
172 +
        case '(' => return tok(s, TokenKind::LParen),
173 +
        case ')' => return tok(s, TokenKind::RParen),
174 +
        case ',' => return tok(s, TokenKind::Comma),
175 +
        case ';' => return tok(s, TokenKind::Semicolon),
176 +
        case ':' => {
177 +
            if consume(s, ':') {
178 +
                return tok(s, TokenKind::ColonColon);
179 +
            }
180 +
            return invalid(s.token, "unexpected `:`");
181 +
        }
182 +
        case '"' => return scanString(s),
183 +
        case '\'' => return scanChar(s),
184 +
        case '.' => return scanPrefixedToken(s, TokenKind::Directive, "expected directive name after `.`"),
185 +
        case '@' => return scanLabelToken(s),
186 +
        case '%' => return scanPrefixedToken(s, TokenKind::Register, "expected register after `%`"),
187 +
        case '-' => return scanSignedNumberOrToken(s, TokenKind::Minus),
188 +
        case '+' => return scanSignedNumberOrToken(s, TokenKind::Plus),
189 +
        case '/' => return tok(s, TokenKind::Slash),
190 +
        case '*' => return tok(s, TokenKind::Star),
191 +
        else => return invalid(s.token, "unexpected character"),
192 +
    }
193 +
}
194 +
195 +
/// Create a token spanning the current scanner range.
196 +
fn tok(s: *Scanner, kind: TokenKind) -> Token {
197 +
    return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token };
198 +
}
199 +
200 +
/// Scan the identifier continuation characters that follow the current token start.
201 +
fn scanIdentifierBody(s: *mut Scanner) {
202 +
    while let ch = current(s); char::isAlpha(ch) or char::isDigit(ch) or ch == '_' {
203 +
        advance(s);
204 +
    }
205 +
}
206 +
207 +
/// Scan a signed number when `+` or `-` is followed by a digit, otherwise return the punctuation token.
208 +
fn scanSignedNumberOrToken(s: *mut Scanner, kind: TokenKind) -> Token {
209 +
    if let nextCh = current(s); char::isDigit(nextCh) {
210 +
        return scanNumber(s);
211 +
    }
212 +
    return tok(s, kind);
213 +
}
214 +
215 +
/// Scan a numeric literal.
216 +
fn scanNumber(s: *mut Scanner) -> Token {
217 +
    let first = s.source[s.cursor - 1];
218 +
    if first == '-' or first == '+' {
219 +
        advance(s);
220 +
    }
221 +
    if s.source[s.cursor - 1] == '0' {
222 +
        if let ch = current(s); ch == 'x' or ch == 'X' {
223 +
            advance(s);
224 +
            if let digit = current(s); not char::isHexDigit(digit) {
225 +
                return invalid(s.token, "invalid hex literal");
226 +
            }
227 +
            while let digit = current(s); char::isHexDigit(digit) {
228 +
                advance(s);
229 +
            }
230 +
            return tok(s, TokenKind::Number);
231 +
        }
232 +
    }
233 +
    while let digit = current(s); char::isDigit(digit) {
234 +
        advance(s);
235 +
    }
236 +
    return tok(s, TokenKind::Number);
237 +
}
238 +
239 +
/// Scan a printable token terminated by `delim`.
240 +
fn scanCharsUntil(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
241 +
    while let ch = current(s); ch <> delim {
242 +
        if not char::isPrint(ch) {
243 +
            return invalid(s.token, "invalid character");
244 +
        }
245 +
        if consume(s, '\\') {
246 +
            if isEof(s) {
247 +
                return nil;
248 +
            }
249 +
        }
250 +
        advance(s);
251 +
    }
252 +
    if not consume(s, delim) {
253 +
        return nil;
254 +
    }
255 +
    return tok(s, kind);
256 +
}
257 +
258 +
/// Scan a string literal.
259 +
fn scanString(s: *mut Scanner) -> Token {
260 +
    if let token = scanCharsUntil(s, '"', TokenKind::String) {
261 +
        return token;
262 +
    }
263 +
    return invalid(s.token, "unterminated string");
264 +
}
265 +
266 +
/// Scan a character literal.
267 +
fn scanChar(s: *mut Scanner) -> Token {
268 +
    if let token = scanCharsUntil(s, '\'', TokenKind::Char) {
269 +
        return token;
270 +
    }
271 +
    return invalid(s.token, "unterminated character");
272 +
}
273 +
274 +
/// Scan an identifier-shaped token of the given kind.
275 +
fn scanIdentToken(s: *mut Scanner, kind: TokenKind) -> Token {
276 +
    scanIdentifierBody(s);
277 +
278 +
    return Token {
279 +
        kind,
280 +
        source: strings::intern(s.pool, &s.source[s.token..s.cursor]),
281 +
        offset: s.token,
282 +
    };
283 +
}
284 +
285 +
/// Scan a sigil-prefixed identifier-shaped token.
286 +
fn scanPrefixedToken(s: *mut Scanner, kind: TokenKind, message: *[u8]) -> Token {
287 +
    let ch = current(s) else {
288 +
        return invalid(s.token, message);
289 +
    };
290 +
    if not char::isAlpha(ch) and ch <> '_' {
291 +
        return invalid(s.token, message);
292 +
    }
293 +
    scanIdentifierBody(s);
294 +
295 +
    return Token {
296 +
        kind,
297 +
        source: strings::intern(s.pool, &s.source[s.token..s.cursor]),
298 +
        offset: s.token,
299 +
    };
300 +
}
301 +
302 +
/// Scan an assembler label token, accepting either `@name` or `@"quoted"` syntax.
303 +
fn scanLabelToken(s: *mut Scanner) -> Token {
304 +
    let ch = current(s) else {
305 +
        return invalid(s.token, "expected label after `@`");
306 +
    };
307 +
    if ch == '"' {
308 +
        advance(s);
309 +
        if let token = scanCharsUntil(s, '"', TokenKind::QuotedLabel) {
310 +
            return token;
311 +
        }
312 +
        return invalid(s.token, "unterminated quoted label");
313 +
    }
314 +
    return scanPrefixedToken(s, TokenKind::Label, "expected label after `@`");
315 +
}
lib/std/arch/rv64/asm/scanner/tests.rad added +140 -0
1 +
use std::mem;
2 +
use std::testing;
3 +
use std::lang::strings;
4 +
5 +
/// String pool used by assembler scanner tests.
6 +
static TEST_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 };
7 +
8 +
/// Create a scanner for test input.
9 +
fn testScanner(source: *[u8]) -> super::Scanner {
10 +
    return super::scanner(super::SourceKind::String, source, &mut TEST_STRING_POOL);
11 +
}
12 +
13 +
/// Scanner recognizes assembler-specific sigils and scoped names.
14 +
@test fn testScanRegisterDirectiveAndLabelTokens() throws (testing::TestError) {
15 +
    let mut s = testScanner(
16 +
        ".text %sp @entry name::tail 42"
17 +
    );
18 +
    let directive = super::next(&mut s);
19 +
    try testing::expect(directive.kind == super::TokenKind::Directive);
20 +
    try testing::expect(mem::eq(directive.source, ".text"));
21 +
22 +
    let reg = super::next(&mut s);
23 +
    try testing::expect(reg.kind == super::TokenKind::Register);
24 +
    try testing::expect(mem::eq(reg.source, "%sp"));
25 +
26 +
    let label = super::next(&mut s);
27 +
    try testing::expect(label.kind == super::TokenKind::Label);
28 +
    try testing::expect(mem::eq(label.source, "@entry"));
29 +
30 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
31 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::ColonColon);
32 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
33 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Number);
34 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
35 +
}
36 +
37 +
/// Keyword-shaped text remains plain assembler identifiers.
38 +
@test fn testScanKeywordShapedAsmNamesRemainAsmTokens() throws (testing::TestError) {
39 +
    let mut s = testScanner(
40 +
        "and or not align addi .text @label"
41 +
    );
42 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
43 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
44 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
45 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
46 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
47 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Directive);
48 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Label);
49 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
50 +
}
51 +
52 +
/// Quoted labels can spell symbol names that are not identifier-shaped.
53 +
@test fn testScanQuotedLabelToken() throws (testing::TestError) {
54 +
    let mut s = testScanner(
55 +
        "@\"foo.bar.baz\""
56 +
    );
57 +
    let label = super::next(&mut s);
58 +
    try testing::expect(label.kind == super::TokenKind::QuotedLabel);
59 +
    try testing::expect(mem::eq(label.source, "@\"foo.bar.baz\""));
60 +
    try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
61 +
}
62 +
63 +
/// Sigil-prefixed tokens require the name to start immediately after the sigil.
64 +
@test fn testScanSigilsRequireAdjacency() throws (testing::TestError) {
65 +
    let mut regScan = testScanner("% a0");
66 +
    try testing::expect(super::next(&mut regScan).kind == super::TokenKind::Invalid);
67 +
68 +
    let mut labelScan = testScanner("@ entry");
69 +
    try testing::expect(super::next(&mut labelScan).kind == super::TokenKind::Invalid);
70 +
71 +
    let mut directiveScan = testScanner(". text");
72 +
    try testing::expect(super::next(&mut directiveScan).kind == super::TokenKind::Invalid);
73 +
}
74 +
75 +
/// Scanner reaches EOF after trailing whitespace and comments.
76 +
@test fn testScanProgramEndingWithNewline() throws (testing::TestError) {
77 +
    let mut s = testScanner(
78 +
        ".text;\n@start\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n// comment\nbeq %a0 %zero @done;\n@done\nret;\n"
79 +
    );
80 +
    loop {
81 +
        let tok = super::next(&mut s);
82 +
        if tok.kind == super::TokenKind::Eof {
83 +
            try testing::expect(tok.source.len == 0);
84 +
            return;
85 +
        }
86 +
    }
87 +
}
88 +
89 +
/// Signed numbers scan only the numeric formats supported by the assembler scanner.
90 +
@test fn testScanSignedHexAndUnsupportedNumericForms() throws (testing::TestError) {
91 +
    let mut s = testScanner(
92 +
        "+0x2a -0b10 45.5"
93 +
    );
94 +
    let mut tok = super::next(&mut s);
95 +
    try testing::expect(tok.kind == super::TokenKind::Number);
96 +
    try testing::expect(mem::eq(tok.source, "+0x2a"));
97 +
98 +
    set tok = super::next(&mut s);
99 +
    try testing::expect(tok.kind == super::TokenKind::Number);
100 +
    try testing::expect(mem::eq(tok.source, "-0"));
101 +
102 +
    set tok = super::next(&mut s);
103 +
    try testing::expect(tok.kind == super::TokenKind::Ident);
104 +
    try testing::expect(mem::eq(tok.source, "b10"));
105 +
106 +
    set tok = super::next(&mut s);
107 +
    try testing::expect(tok.kind == super::TokenKind::Number);
108 +
    try testing::expect(mem::eq(tok.source, "45"));
109 +
110 +
    set tok = super::next(&mut s);
111 +
    try testing::expect(tok.kind == super::TokenKind::Invalid);
112 +
    try testing::expect(mem::eq(tok.source, "expected directive name after `.`"));
113 +
114 +
    set tok = super::next(&mut s);
115 +
    try testing::expect(tok.kind == super::TokenKind::Number);
116 +
    try testing::expect(mem::eq(tok.source, "5"));
117 +
}
118 +
119 +
/// Unterminated string and character literals report invalid tokens.
120 +
@test fn testScanUnterminatedDelimitedLiterals() throws (testing::TestError) {
121 +
    let mut stringScan = testScanner("\"unterminated");
122 +
    let stringTok = super::next(&mut stringScan);
123 +
    try testing::expect(stringTok.kind == super::TokenKind::Invalid);
124 +
    try testing::expect(mem::eq(stringTok.source, "unterminated string"));
125 +
126 +
    let mut escapedStringScan = testScanner("\"unterminated\\");
127 +
    let escapedStringTok = super::next(&mut escapedStringScan);
128 +
    try testing::expect(escapedStringTok.kind == super::TokenKind::Invalid);
129 +
    try testing::expect(mem::eq(escapedStringTok.source, "unterminated string"));
130 +
131 +
    let mut charScan = testScanner("'x");
132 +
    let charTok = super::next(&mut charScan);
133 +
    try testing::expect(charTok.kind == super::TokenKind::Invalid);
134 +
    try testing::expect(mem::eq(charTok.source, "unterminated character"));
135 +
136 +
    let mut escapedCharScan = testScanner("'\\");
137 +
    let escapedCharTok = super::next(&mut escapedCharScan);
138 +
    try testing::expect(escapedCharTok.kind == super::TokenKind::Invalid);
139 +
    try testing::expect(mem::eq(escapedCharTok.source, "unterminated character"));
140 +
}
lib/std/arch/rv64/asm/tests.rad added +211 -0
1 +
//! RV64 assembler tests.
2 +
3 +
use std::testing;
4 +
use std::mem;
5 +
use std::lang::alloc;
6 +
use std::lang::sexpr;
7 +
use std::lang::strings;
8 +
use std::arch::rv64;
9 +
use std::arch::rv64::encode;
10 +
use std::arch::rv64::printer;
11 +
12 +
use super::scanner;
13 +
14 +
static ASM_ARENA_STORAGE: [u8; 65536] = undefined;
15 +
static ASM_TEXT_STORAGE: [u32; 256] = undefined;
16 +
static ASM_DATA_STORAGE: [u8; 1024] = undefined;
17 +
static ASM_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 };
18 +
static PRINT_ARENA_STORAGE: [u8; 1024] = undefined;
19 +
static PRINT_BUFFER: [u8; 128] = undefined;
20 +
21 +
fn assembleSource(source: *[u8]) -> super::Program throws (testing::TestError) {
22 +
    let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]);
23 +
    return try super::assemble(
24 +
        scanner::SourceKind::String,
25 +
        source,
26 +
        &mut ASM_TEXT_STORAGE[..],
27 +
        &mut ASM_DATA_STORAGE[..],
28 +
        &mut arena,
29 +
        &mut ASM_STRING_POOL,
30 +
        rv64::RO_DATA_BASE
31 +
    ) catch {
32 +
        throw testing::TestError::Failed;
33 +
    };
34 +
}
35 +
36 +
fn expectAssembleFail(source: *[u8]) throws (testing::TestError) {
37 +
    let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]);
38 +
    try super::assemble(
39 +
        scanner::SourceKind::String,
40 +
        source,
41 +
        &mut ASM_TEXT_STORAGE[..],
42 +
        &mut ASM_DATA_STORAGE[..],
43 +
        &mut arena,
44 +
        &mut ASM_STRING_POOL,
45 +
        rv64::RO_DATA_BASE
46 +
    ) catch {
47 +
        return;
48 +
    };
49 +
    throw testing::TestError::Failed;
50 +
}
51 +
52 +
fn printInstrText(instr: u32) -> *[u8] {
53 +
    let mut arena = alloc::new(&mut PRINT_ARENA_STORAGE[..]);
54 +
    let mut pos: u32 = 0;
55 +
    let mut out = sexpr::Output::Buffer { buf: &mut PRINT_BUFFER[..], pos: &mut pos };
56 +
    printer::printInstr(&mut out, &mut arena, instr);
57 +
    return &PRINT_BUFFER[..pos];
58 +
}
59 +
60 +
@test fn testAssemblePercentPrefixedRegisters() throws (testing::TestError) {
61 +
    let program = try assembleSource(
62 +
        ".text;\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n"
63 +
    );
64 +
    try testing::expect(program.text.len == 2);
65 +
    try testing::expect(program.text[0] == encode::addi(rv64::A0, rv64::ZERO, 42));
66 +
    try testing::expect(program.text[1] == encode::sd(rv64::A0, rv64::SP, 8));
67 +
}
68 +
69 +
@test fn testAssembleDataAddressUsesRoDataBase() throws (testing::TestError) {
70 +
    let program = try assembleSource(
71 +
        ".text;\nla %t0 @value;\n.data;\n.byte 0;\n@value\n.byte 1;\n"
72 +
    );
73 +
    try testing::expect(program.text.len == 2);
74 +
    try testing::expect(program.text[0] == encode::lui(rv64::T0, 0x10));
75 +
    try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 1));
76 +
}
77 +
78 +
@test fn testAssembleTextAddressUsesPcRelative() throws (testing::TestError) {
79 +
    let program = try assembleSource(
80 +
        ".text;\nla %t0 @target;\n@target\nret;\n"
81 +
    );
82 +
    try testing::expect(program.text.len == 3);
83 +
    try testing::expect(program.text[0] == encode::auipc(rv64::T0, 0));
84 +
    try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 8));
85 +
}
86 +
87 +
@test fn testAssembleQuotedLabelNames() throws (testing::TestError) {
88 +
    let program = try assembleSource(
89 +
        ".text;\nj @\"foo.bar.baz\";\n@\"foo.bar.baz\"\nret;\n"
90 +
    );
91 +
    try testing::expect(program.text.len == 2);
92 +
    try testing::expect(program.text[0] == encode::jal(rv64::ZERO, 4));
93 +
    try testing::expect(program.text[1] == encode::jalr(rv64::ZERO, rv64::RA, 0));
94 +
}
95 +
96 +
@test fn testAssembleGlobalMarksOnlyDeclaredSymbols() throws (testing::TestError) {
97 +
    let program = try assembleSource(
98 +
        ".text;\n.export @exported;\n@local\nret;\n@exported\nret;\n@late\n.export @late;\nret;\n"
99 +
    );
100 +
    try testing::expect(program.symbols.len == 3);
101 +
    try testing::expect(not program.symbols[0].isExported);
102 +
    try testing::expect(program.symbols[1].isExported);
103 +
    try testing::expect(program.symbols[2].isExported);
104 +
}
105 +
106 +
@test fn testAssembleInvalidOperandsFail() throws (testing::TestError) {
107 +
    try expectAssembleFail(
108 +
        ".text;\nbeq %a0 %a1 @missing;\n"
109 +
    );
110 +
    try expectAssembleFail(
111 +
        ".text;\naddi a0 zero 1;\n"
112 +
    );
113 +
    try expectAssembleFail(
114 +
        ".text;\naddi % a0 %zero 1;\n"
115 +
    );
116 +
    try expectAssembleFail(
117 +
        ".text;\nli %a0 UNKNOWN;\n"
118 +
    );
119 +
    try expectAssembleFail(
120 +
        ".text;\n@start\nj start;\n"
121 +
    );
122 +
}
123 +
124 +
@test fn testAssembleInvalidSyntaxFails() throws (testing::TestError) {
125 +
    try expectAssembleFail(
126 +
        ".text;\n@dup\n@dup\nret;\n"
127 +
    );
128 +
    try expectAssembleFail(
129 +
        ".text;\naddi %a0, %zero, 1\n"
130 +
    );
131 +
    try expectAssembleFail(
132 +
        ".constant PAGE, 4096;\n"
133 +
    );
134 +
    try expectAssembleFail(
135 +
        ".text;\naddi %a0, %zero, 1;\n"
136 +
    );
137 +
    try expectAssembleFail(
138 +
        ".export @kernel::main, @data::sym;\n"
139 +
    );
140 +
}
141 +
142 +
@test fn testAssembleInvalidSectionsFail() throws (testing::TestError) {
143 +
    try expectAssembleFail(
144 +
        ".data;\n.dword @target;\n.text;\n@target\nret;\n"
145 +
    );
146 +
    try expectAssembleFail(
147 +
        ".data;\naddi %a0 %zero 1;\n"
148 +
    );
149 +
    try expectAssembleFail(
150 +
        ".text;\n.byte 1;\n"
151 +
    );
152 +
    try expectAssembleFail(
153 +
        ".text;\n.word 1;\n"
154 +
    );
155 +
    try expectAssembleFail(
156 +
        ".text;\n.dword 1;\n"
157 +
    );
158 +
    try expectAssembleFail(
159 +
        ".text;\n.ascii \"x\";\n"
160 +
    );
161 +
    try expectAssembleFail(
162 +
        ".data;\n@value\n.byte 1;\n.text;\nj @value;\n"
163 +
    );
164 +
}
165 +
166 +
@test fn testAssembleInvalidDirectivesFail() throws (testing::TestError) {
167 +
    try expectAssembleFail(
168 +
        ".data;\n.ascii 'x';\n"
169 +
    );
170 +
    try expectAssembleFail(
171 +
        ".data;\n.byte 1 + 2;\n"
172 +
    );
173 +
    try expectAssembleFail(
174 +
        ".data;\n.byte 256;\n"
175 +
    );
176 +
    try expectAssembleFail(
177 +
        ".data;\n.word 2147483648;\n"
178 +
    );
179 +
    try expectAssembleFail(
180 +
        ".data;\n.space 4294967296;\n"
181 +
    );
182 +
    try expectAssembleFail(
183 +
        ".data;\n.align 3;\n"
184 +
    );
185 +
    try expectAssembleFail(
186 +
        ".text;\n.align 12;\n"
187 +
    );
188 +
    try expectAssembleFail(
189 +
        ".data;\n.align 4294967296;\n"
190 +
    );
191 +
}
192 +
193 +
@test fn testAssembleInvalidImmediateRangesFail() throws (testing::TestError) {
194 +
    try expectAssembleFail(
195 +
        ".text;\nslli %a0 %a1 64;\n"
196 +
    );
197 +
    try expectAssembleFail(
198 +
        ".text;\nslli %a0 %a1 4294967296;\n"
199 +
    );
200 +
    try expectAssembleFail(
201 +
        ".text;\nslliw %a0 %a1 2147483648;\n"
202 +
    );
203 +
    try expectAssembleFail(
204 +
        ".text;\ncsrsi mstatus 32;\n"
205 +
    );
206 +
}
207 +
208 +
@test fn testPrintInstrUsesPercentPrefixedRegisters() throws (testing::TestError) {
209 +
    let text = printInstrText(encode::addi(rv64::A0, rv64::SP, 42));
210 +
    try testing::expect(mem::eq(text, "addi    %a0, %sp, 42"));
211 +
}
lib/std/arch/rv64/emit.rad +13 -3
235 235
    labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE);
236 236
}
237 237
238 238
/// Record a function's code offset for call resolution.
239 239
export fn recordFuncOffset(e: *mut Emitter, name: *[u8]) {
240 -
    assert e.codeLen <= MAX_CODE_LEN;
241 -
    dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE);
240 +
    recordFuncOffsetAt(e, name, e.codeLen);
241 +
}
242 +
243 +
/// Record a function's code offset at `index` for call resolution.
244 +
export fn recordFuncOffsetAt(e: *mut Emitter, name: *[u8], index: u32) {
245 +
    assert index <= MAX_CODE_LEN;
246 +
    dict::insert(&mut e.labels.funcs, name, index as i32 * super::INSTR_SIZE);
242 247
}
243 248
244 249
/// Record a function's start position for printing.
245 250
export fn recordFunc(e: *mut Emitter, name: *[u8]) {
251 +
    recordFuncAt(e, name, e.codeLen);
252 +
}
253 +
254 +
/// Record a function's start position at `index` for printing.
255 +
export fn recordFuncAt(e: *mut Emitter, name: *[u8], index: u32) {
246 256
    assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full";
247 -
    set e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen };
257 +
    set e.funcs[e.funcsLen] = types::FuncAddr { name, index };
248 258
    set e.funcsLen += 1;
249 259
}
250 260
251 261
/// Record a local branch needing later patching.
252 262
/// Unconditional jumps use a single slot (J-type, +-1MB range).
lib/std/arch/rv64/encode.rad +63 -0
53 53
export constant F3_BLT:  u32 = 0x4;
54 54
export constant F3_BGE:  u32 = 0x5;
55 55
export constant F3_BLTU: u32 = 0x6;
56 56
export constant F3_BGEU: u32 = 0x7;
57 57
58 +
// CSR/system operations
59 +
60 +
export constant F3_CSRRW: u32 = 0x1;
61 +
export constant F3_CSRRS: u32 = 0x2;
62 +
export constant F3_CSRRC: u32 = 0x3;
63 +
export constant F3_CSRRWI: u32 = 0x5;
64 +
export constant F3_CSRRSI: u32 = 0x6;
65 +
export constant F3_CSRRCI: u32 = 0x7;
66 +
58 67
//////////////////////
59 68
// Funct7 Constants //
60 69
//////////////////////
61 70
62 71
export constant F7_NORMAL: u32 = 0b0000000;
535 544
/// Environment break (debugger breakpoint).
536 545
export fn ebreak() -> u32 {
537 546
    return encodeI(OP_SYSTEM, super::ZERO, super::ZERO, 0, 1);
538 547
}
539 548
549 +
/// Encode a CSR instruction with a register source.
550 +
fn encodeCsr(op: u32, rd: gen::Reg, csr: u32, funct3: u32, rs1: gen::Reg) -> u32 {
551 +
    return (op             & 0x7F)
552 +
         | ((*rd as u32    & 0x1F) << 7)
553 +
         | ((funct3        & 0x07) << 12)
554 +
         | ((*rs1 as u32   & 0x1F) << 15)
555 +
         | ((csr           & 0xFFF) << 20);
556 +
}
557 +
558 +
/// Encode a CSR instruction with an immediate source.
559 +
fn encodeCsrImm(op: u32, rd: gen::Reg, csr: u32, funct3: u32, imm: u32) -> u32 {
560 +
    assert imm < 32;
561 +
    return (op             & 0x7F)
562 +
         | ((*rd as u32    & 0x1F) << 7)
563 +
         | ((funct3        & 0x07) << 12)
564 +
         | ((imm           & 0x1F) << 15)
565 +
         | ((csr           & 0xFFF) << 20);
566 +
}
567 +
568 +
/// Read CSR into `rd`.
569 +
export fn csrr(rd: gen::Reg, csr: u32) -> u32 {
570 +
    return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRS, super::ZERO);
571 +
}
572 +
573 +
/// Read/write CSR: old CSR to `rd`, write `rs1`.
574 +
export fn csrrw(rd: gen::Reg, csr: u32, rs1: gen::Reg) -> u32 {
575 +
    return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRW, rs1);
576 +
}
577 +
578 +
/// Write `rs1` into CSR and discard old value.
579 +
export fn csrw(csr: u32, rs1: gen::Reg) -> u32 {
580 +
    return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRW, rs1);
581 +
}
582 +
583 +
/// Clear CSR bits from `rs1` and discard old value.
584 +
export fn csrc(csr: u32, rs1: gen::Reg) -> u32 {
585 +
    return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRC, rs1);
586 +
}
587 +
588 +
/// Set CSR bits from a 5-bit immediate and discard old value.
589 +
export fn csrsi(csr: u32, imm: u32) -> u32 {
590 +
    return encodeCsrImm(OP_SYSTEM, super::ZERO, csr, F3_CSRRSI, imm);
591 +
}
592 +
593 +
/// Wait for interrupt.
594 +
export fn wfi() -> u32 {
595 +
    return 0x10500073;
596 +
}
597 +
598 +
/// Return from machine mode trap.
599 +
export fn mret() -> u32 {
600 +
    return 0x30200073;
601 +
}
602 +
540 603
/////////////////////////
541 604
// Pseudo-instructions //
542 605
/////////////////////////
543 606
544 607
/// No operation: `addi zero, zero, 0`.
lib/std/arch/rv64/printer.rad +4 -4
16 16
// Register Names  //
17 17
/////////////////////
18 18
19 19
/// ABI register names.
20 20
constant REG_NAMES: [*[u8]; 32] = [
21 -
    "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
22 -
    "fp", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
23 -
    "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7",
24 -
    "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"
21 +
    "%zero", "%ra", "%sp", "%gp", "%tp", "%t0", "%t1", "%t2",
22 +
    "%fp", "%s1", "%a0", "%a1", "%a2", "%a3", "%a4", "%a5",
23 +
    "%a6", "%a7", "%s2", "%s3", "%s4", "%s5", "%s6", "%s7",
24 +
    "%s8", "%s9", "%s10", "%s11", "%t3", "%t4", "%t5", "%t6"
25 25
];
26 26
27 27
/// Get register name from number.
28 28
fn regName(n: u8) -> *[u8] {
29 29
    return "?" if n >= 32 else REG_NAMES[n as u32];
lib/std/arch/rv64/tests.rad +42 -0
2 2
//!
3 3
//! These tests verify that instruction encodings match the RISC-V specification
4 4
//! by comparing against known-good values.
5 5
6 6
use std::testing;
7 +
use std::lang::alloc;
8 +
use std::collections::dict;
9 +
7 10
use super::encode;
11 +
use super::asm;
12 +
13 +
static ASSEMBLY_ARENA_STORAGE: [u8; 16777216] = undefined;
14 +
static ASSEMBLY_TEXT_STORAGE: [u32; 2] = undefined;
8 15
9 16
/// Helper to check encoding equals expected value.
10 17
fn expectEncoding(actual: u32, expected: u32) throws (testing::TestError) {
11 18
    try testing::expect(actual == expected);
12 19
}
13 20
21 +
@test fn testAddAssemblyExportsOnlyGlobalTextSymbols() throws (testing::TestError) {
22 +
    let mut arena = alloc::new(&mut ASSEMBLY_ARENA_STORAGE[..]);
23 +
    let symbols = try alloc::allocSlice(&mut arena, @sizeOf(asm::Symbol), @alignOf(asm::Symbol), 2) catch {
24 +
        throw testing::TestError::Failed;
25 +
    };
26 +
    let mut symbolSlice = @sliceOf((symbols as *mut [asm::Symbol]).ptr, 2, 2);
27 +
    set symbolSlice[0] = asm::Symbol {
28 +
        name: "local",
29 +
        section: asm::Section::Text,
30 +
        offset: 0,
31 +
        isExported: false,
32 +
    };
33 +
    set symbolSlice[1] = asm::Symbol {
34 +
        name: "exported",
35 +
        section: asm::Section::Text,
36 +
        offset: super::INSTR_SIZE,
37 +
        isExported: true,
38 +
    };
39 +
40 +
    let mut generator = super::beginProgram(
41 +
        super::ProgramOptions { entryPatch: super::EntryPatch::None, debug: false },
42 +
        &mut arena
43 +
    );
44 +
    super::addAssembly(
45 +
        &mut generator,
46 +
        asm::Program { text: &ASSEMBLY_TEXT_STORAGE[..], data: &[], symbols: symbolSlice }
47 +
    );
48 +
49 +
    try testing::expect(dict::get(&generator.e.labels.funcs, "local") == nil);
50 +
    let exportedOffset = dict::get(&generator.e.labels.funcs, "exported") else {
51 +
        throw testing::TestError::Failed;
52 +
    };
53 +
    try testing::expect(exportedOffset == super::INSTR_SIZE);
54 +
}
55 +
14 56
///////////////////////
15 57
// R-type ALU tests  //
16 58
///////////////////////
17 59
18 60
@test fn testEncodeAdd() throws (testing::TestError) {
lib/std/char.rad added +31 -0
1 +
//! ASCII character classification helpers shared across the standard library.
2 +
3 +
@test mod tests;
4 +
5 +
/// Return `true` when `ch` is an ASCII digit.
6 +
export fn isDigit(ch: u8) -> bool {
7 +
    return ch >= '0' and ch <= '9';
8 +
}
9 +
10 +
/// Return `true` when `ch` is an ASCII hexadecimal digit.
11 +
export fn isHexDigit(ch: u8) -> bool {
12 +
    return (ch >= '0' and ch <= '9')
13 +
        or (ch >= 'a' and ch <= 'f')
14 +
        or (ch >= 'A' and ch <= 'F');
15 +
}
16 +
17 +
/// Return `true` when `ch` is a binary digit.
18 +
export fn isBinDigit(ch: u8) -> bool {
19 +
    return ch == '0' or ch == '1';
20 +
}
21 +
22 +
/// Return `true` when `ch` is an ASCII alphabetic character.
23 +
export fn isAlpha(ch: u8) -> bool {
24 +
    return (ch >= 'a' and ch <= 'z')
25 +
        or (ch >= 'A' and ch <= 'Z');
26 +
}
27 +
28 +
/// Return `true` when `ch` is printable ASCII.
29 +
export fn isPrint(ch: u8) -> bool {
30 +
    return ch >= ' ' and ch <= '~';
31 +
}
lib/std/char/tests.rad added +42 -0
1 +
use std::testing;
2 +
3 +
@test fn testIsDigit() throws (testing::TestError) {
4 +
    try testing::expect(super::isDigit('0'));
5 +
    try testing::expect(super::isDigit('9'));
6 +
    try testing::expectNot(super::isDigit('/'));
7 +
    try testing::expectNot(super::isDigit(':'));
8 +
}
9 +
10 +
@test fn testIsHexDigit() throws (testing::TestError) {
11 +
    try testing::expect(super::isHexDigit('0'));
12 +
    try testing::expect(super::isHexDigit('9'));
13 +
    try testing::expect(super::isHexDigit('a'));
14 +
    try testing::expect(super::isHexDigit('f'));
15 +
    try testing::expect(super::isHexDigit('A'));
16 +
    try testing::expect(super::isHexDigit('F'));
17 +
    try testing::expectNot(super::isHexDigit('g'));
18 +
    try testing::expectNot(super::isHexDigit('G'));
19 +
}
20 +
21 +
@test fn testIsBinDigit() throws (testing::TestError) {
22 +
    try testing::expect(super::isBinDigit('0'));
23 +
    try testing::expect(super::isBinDigit('1'));
24 +
    try testing::expectNot(super::isBinDigit('2'));
25 +
    try testing::expectNot(super::isBinDigit('a'));
26 +
}
27 +
28 +
@test fn testIsAlpha() throws (testing::TestError) {
29 +
    try testing::expect(super::isAlpha('a'));
30 +
    try testing::expect(super::isAlpha('z'));
31 +
    try testing::expect(super::isAlpha('A'));
32 +
    try testing::expect(super::isAlpha('Z'));
33 +
    try testing::expectNot(super::isAlpha('0'));
34 +
    try testing::expectNot(super::isAlpha('_'));
35 +
}
36 +
37 +
@test fn testIsPrint() throws (testing::TestError) {
38 +
    try testing::expect(super::isPrint(' '));
39 +
    try testing::expect(super::isPrint('~'));
40 +
    try testing::expectNot(super::isPrint(31));
41 +
    try testing::expectNot(super::isPrint(127));
42 +
}
lib/std/fmt.rad +168 -0
1 1
//! Formatting utilities for converting values to strings.
2 2
use super::mem;
3 3
4 +
/// Maximum `u64` value.
5 +
export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF;
4 6
/// Maximum string length for a formatted u32 (eg. "4294967295").
5 7
export constant U32_STR_LEN: u32 = 10;
6 8
/// Maximum string length for a formatted i32 (eg. "-2147483648").
7 9
export constant I32_STR_LEN: u32 = U32_STR_LEN + 1;
8 10
/// Maximum string length for a formatted u64 (eg. "18446744073709551615").
10 12
/// Maximum string length for a formatted i64 (eg. "-9223372036854775808").
11 13
export constant I64_STR_LEN: u32 = 20;
12 14
/// Maximum string length for a formatted bool (eg. "false").
13 15
export constant BOOL_STR_LEN: u32 = 5;
14 16
17 +
/// Radix/base of a parsed integer literal.
18 +
export union Radix {
19 +
    /// Binary literal (0b...).
20 +
    Binary,
21 +
    /// Decimal literal.
22 +
    Decimal,
23 +
    /// Hexadecimal literal (0x...).
24 +
    Hex,
25 +
}
26 +
27 +
/// Errors reported while parsing literal text.
28 +
export union ParseError {
29 +
    /// Literal text was empty or missing required digits.
30 +
    Invalid,
31 +
    /// Literal contained an invalid digit for its radix.
32 +
    InvalidDigit,
33 +
    /// Literal value exceeded the supported range.
34 +
    Overflow,
35 +
}
36 +
37 +
/// Parsed integer literal metadata.
38 +
export record IntLiteral {
39 +
    /// Raw characters that comprised the literal.
40 +
    text: *[u8],
41 +
    /// Absolute magnitude parsed from the literal.
42 +
    magnitude: u64,
43 +
    /// Radix used by the literal.
44 +
    radix: Radix,
45 +
    /// Whether the literal spelled an explicit sign.
46 +
    signed: bool,
47 +
    /// Whether the literal used a negative sign.
48 +
    negative: bool,
49 +
}
50 +
15 51
/// Format a u32 by writing it to the provided buffer.
16 52
export fn formatU32(val: u32, buffer: *mut [u8]) -> *[u8] {
17 53
    assert buffer.len >= U32_STR_LEN;
18 54
19 55
    let mut x: u32 = val;
135 171
    } else {
136 172
        try! mem::copy(buffer, "false");
137 173
        return &buffer[..5];
138 174
    }
139 175
}
176 +
177 +
/// Convert a single ASCII digit into its numeric value for the given radix.
178 +
export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 {
179 +
    assert radix >= 2 and radix <= 36;
180 +
181 +
    // Default to an out-of-range value so non-digits fall through to `nil`.
182 +
    let mut value: u32 = 36;
183 +
184 +
    if ch >= '0' and ch <= '9' {
185 +
        set value = (ch - '0') as u32;
186 +
    } else if radix > 10 {
187 +
        // Mask to convert ASCII letters to uppercase.
188 +
        let upper = ch & 0xDF;
189 +
        if upper >= 'A' and upper <= 'Z' {
190 +
            set value = (upper - 'A') as u32 + 10;
191 +
        }
192 +
    }
193 +
    if value < radix {
194 +
        return value;
195 +
    }
196 +
    return nil;
197 +
}
198 +
199 +
/// Decode a single-byte ASCII escape.
200 +
export fn decodeAsciiEscape(ch: u8) -> u8 {
201 +
    match ch {
202 +
        case 'n'  => return '\n',
203 +
        case 't'  => return '\t',
204 +
        case 'r'  => return '\r',
205 +
        case '\\' => return '\\',
206 +
        case '"'  => return '"',
207 +
        case '\'' => return '\'',
208 +
        case '0'  => return 0,
209 +
        else      => return ch,
210 +
    }
211 +
}
212 +
213 +
/// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign.
214 +
export fn parseInt(text: *[u8]) -> IntLiteral throws (ParseError) {
215 +
    if text.len == 0 {
216 +
        throw ParseError::Invalid;
217 +
    }
218 +
    let first = text[0];
219 +
    let negative = first == '-';
220 +
    let signed: bool = negative or (first == '+');
221 +
222 +
    let mut start: u32 = 0;
223 +
    let mut radix: u32 = 10;
224 +
    let mut radixType = Radix::Decimal;
225 +
226 +
    if signed {
227 +
        set start = 1;
228 +
        if start >= text.len {
229 +
            throw ParseError::Invalid;
230 +
        }
231 +
    }
232 +
    if start + 1 < text.len and text[start] == '0' {
233 +
        let prefix = text[start + 1];
234 +
        if prefix == 'x' or prefix == 'X' {
235 +
            set radix = 16;
236 +
            set radixType = Radix::Hex;
237 +
            set start += 2;
238 +
        } else if prefix == 'b' or prefix == 'B' {
239 +
            set radix = 2;
240 +
            set radixType = Radix::Binary;
241 +
            set start += 2;
242 +
        }
243 +
        if start >= text.len {
244 +
            throw ParseError::Invalid;
245 +
        }
246 +
    }
247 +
    let mut value: u64 = 0;
248 +
    let radix64: u64 = radix as u64;
249 +
    for i in start..text.len {
250 +
        let ch = text[i];
251 +
        let digit = digitFromAscii(ch, radix) else {
252 +
            throw ParseError::InvalidDigit;
253 +
        };
254 +
        if value > (U64_MAX / radix64) {
255 +
            throw ParseError::Overflow;
256 +
        }
257 +
        set value *= radix64;
258 +
259 +
        if value > U64_MAX - (digit as u64) {
260 +
            throw ParseError::Overflow;
261 +
        }
262 +
        set value += (digit as u64);
263 +
    }
264 +
    return IntLiteral {
265 +
        text, magnitude: value, radix: radixType, signed, negative,
266 +
    };
267 +
}
268 +
269 +
/// Process escape sequences in a raw string, writing the result into `dst`.
270 +
/// Returns the number of bytes written.
271 +
export fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 {
272 +
    let mut i: u32 = 0;
273 +
    let mut j: u32 = 0;
274 +
275 +
    while i < raw.len {
276 +
        if raw[i] == '\\' and i + 1 < raw.len {
277 +
            set dst[j] = decodeAsciiEscape(raw[i + 1]);
278 +
            set i += 2;
279 +
        } else {
280 +
            set dst[j] = raw[i];
281 +
            set i += 1;
282 +
        }
283 +
        set j += 1;
284 +
    }
285 +
    return j;
286 +
}
287 +
288 +
/// Parse a single-byte character literal, including the single quotes.
289 +
export fn parseChar(text: *[u8]) -> u8 throws (ParseError) {
290 +
    if text.len < 2 {
291 +
        throw ParseError::Invalid;
292 +
    }
293 +
    let raw = &text[1..text.len - 1];
294 +
    if raw.len == 0 {
295 +
        throw ParseError::Invalid;
296 +
    }
297 +
    if raw[0] == '\\' {
298 +
        if raw.len <> 2 {
299 +
            throw ParseError::Invalid;
300 +
        }
301 +
        return decodeAsciiEscape(raw[1]);
302 +
    }
303 +
    if raw.len <> 1 {
304 +
        throw ParseError::Invalid;
305 +
    }
306 +
    return raw[0];
307 +
}
lib/std/lang/ast.rad +2 -25
1 1
//! Radiance AST modules.
2 2
export mod printer;
3 3
4 4
use std::io;
5 +
use std::fmt;
5 6
use std::lang::alloc;
6 7
7 8
/// Maximum number of trait methods.
8 9
export constant MAX_TRAIT_METHODS: u32 = 8;
9 10
75 76
    Signed,
76 77
    /// Unsigned, eg. `u32`.
77 78
    Unsigned,
78 79
}
79 80
80 -
/// Radix/base of a number.
81 -
export union Radix {
82 -
    /// Binary literal (0b...).
83 -
    Binary,
84 -
    /// Decimal literal.
85 -
    Decimal,
86 -
    /// Hexadecimal literal (0x...).
87 -
    Hex,
88 -
}
89 -
90 -
/// Parsed integer literal metadata.
91 -
export record IntLiteral {
92 -
    /// Raw characters that comprised the literal.
93 -
    text: *[u8],
94 -
    /// Absolute magnitude parsed from the literal.
95 -
    magnitude: u64,
96 -
    /// Radix used by the literal.
97 -
    radix: Radix,
98 -
    /// Whether the literal spelled an explicit sign.
99 -
    signed: bool,
100 -
    /// Whether the literal used a negative sign.
101 -
    negative: bool,
102 -
}
103 -
104 81
/// Binary operator kinds used in numeric expressions.
105 82
export union BinaryOp {
106 83
    /// Addition (`+`).
107 84
    Add,
108 85
    /// Subtraction (`-`).
616 593
    /// String literal like `"Hello World!"`.
617 594
    String(*[u8]),
618 595
    /// Identifier expression.
619 596
    Ident(*[u8]),
620 597
    /// Numeric literal such as `42` or `0xFF`.
621 -
    Number(IntLiteral),
598 +
    Number(fmt::IntLiteral),
622 599
    /// Range expression such as `0..10` or `..`.
623 600
    Range(Range),
624 601
    /// Array literal expression.
625 602
    ArrayLit(*mut [*Node]),
626 603
    /// Array repeat literal expression.
lib/std/lang/gen/data.rad +32 -6
40 40
    syms: *mut [DataSym],
41 41
    count: *mut u32,
42 42
    base: u32,
43 43
    readOnly: bool
44 44
) -> u32 {
45 -
    let mut offset: u32 = 0;
45 +
    return layoutSectionAtOffset(items, syms, count, base, 0, readOnly);
46 +
}
47 +
48 +
/// Lay out data symbols for a single section starting at [`startOffset`].
49 +
export fn layoutSectionAtOffset(
50 +
    items: *[il::Data],
51 +
    syms: *mut [DataSym],
52 +
    count: *mut u32,
53 +
    base: u32,
54 +
    startOffset: u32,
55 +
    readOnly: bool
56 +
) -> u32 {
57 +
    let mut offset: u32 = startOffset;
46 58
47 59
    // Initialized data first.
48 60
    for i in 0..items.len {
49 61
        let data = &items[i];
50 62
        if data.readOnly == readOnly and not data.isUndefined {
76 88
    fnLabels: *labels::Labels,
77 89
    codeBase: u32,
78 90
    buf: *mut [u8],
79 91
    readOnly: bool
80 92
) -> u32 {
81 -
    let mut offset: u32 = 0;
93 +
    return emitSectionAtOffset(items, dataSymMap, fnLabels, codeBase, buf, readOnly, 0);
94 +
}
95 +
96 +
/// Emit data bytes for a single section starting at `startOffset`.
97 +
export fn emitSectionAtOffset(
98 +
    items: *[il::Data],
99 +
    dataSymMap: *DataSymMap,
100 +
    fnLabels: *labels::Labels,
101 +
    codeBase: u32,
102 +
    buf: *mut [u8],
103 +
    readOnly: bool,
104 +
    startOffset: u32
105 +
) -> u32 {
106 +
    let mut offset: u32 = startOffset;
82 107
83 108
    for i in 0..items.len {
84 109
        let data = &items[i];
85 110
        if data.readOnly == readOnly and not data.isUndefined {
86 111
            set offset = mem::alignUp(offset, data.alignment);
87 -
            assert offset + data.size <= buf.len, "emitSection: buffer overflow";
112 +
            assert offset + data.size <= buf.len, "emitSectionAtOffset: buffer overflow";
88 113
            for j in 0..data.values.len {
89 114
                let v = &data.values[j];
90 115
                for _ in 0..v.count {
91 116
                    match v.item {
92 117
                        case il::DataItem::Val { typ, val } => {
93 118
                            let size = il::typeSize(typ);
94 119
                            let valPtr = &val as *u8;
95 120
                            try! mem::copy(&mut buf[offset..], @sliceOf(valPtr, size));
121 +
96 122
                            set offset += size;
97 123
                        },
98 124
                        case il::DataItem::Sym(name) => {
99 125
                            let addr = lookupAddr(dataSymMap, name) else {
100 -
                                panic "emitSection: data symbol not found";
126 +
                                panic "emitSectionAtOffset: data symbol not found";
101 127
                            };
102 128
                            let addr64: u64 = addr as u64;
103 129
                            let addrPtr = &addr64 as *u8;
104 130
105 131
                            try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8));
106 132
107 -
                            set offset += 8;
133 +
                            set offset += @sizeOf(u64);
108 134
                        },
109 135
                        case il::DataItem::Fn(name) => {
110 136
                            let addr = codeBase + labels::funcOffset(fnLabels, name) as u32;
111 137
                            let addr64: u64 = addr as u64;
112 138
                            let addrPtr = &addr64 as *u8;
113 139
114 140
                            try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8));
115 141
116 -
                            set offset += 8;
142 +
                            set offset += @sizeOf(*u8);
117 143
                        },
118 144
                        case il::DataItem::Str(s) => {
119 145
                            try! mem::copy(&mut buf[offset..], s);
120 146
                            set offset += s.len;
121 147
                        },
lib/std/lang/parser.rad +23 -118
1 1
//! Recursive descent parser for the Radiance programming language.
2 2
@test export mod tests;
3 3
4 4
use std::mem;
5 5
use std::io;
6 +
use std::fmt;
6 7
use std::lang::alloc;
7 8
use std::lang::ast;
8 9
use std::lang::strings;
9 10
use std::lang::scanner;
10 11
11 12
/// Maximum `u32` value.
12 13
export constant U32_MAX: u32 = 0xFFFFFFFF;
14 +
/// Minimum `i64` value.
15 +
export constant I64_MIN: i64 = -0x8000000000000000;
16 +
/// Largest magnitude representable by a negative `i64`.
17 +
export constant I64_MIN_MAGNITUDE: u64 = 0x8000000000000000;
18 +
/// Maximum representable `i64` magnitude.
19 +
export constant I64_MAX_MAGNITUDE: u64 = 0x7FFFFFFFFFFFFFFF;
13 20
/// Maximum representable `u64` value.
14 21
export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF;
15 22
/// Maximum number of fields in a record.
16 23
export constant MAX_RECORD_FIELDS: u32 = 32;
17 24
169 176
/// Emit a `true` or `false` literal node.
170 177
fn nodeBool(p: *mut Parser, value: bool) -> *ast::Node {
171 178
    return node(p, ast::NodeValue::Bool(value));
172 179
}
173 180
174 -
/// Convert a single ASCII digit into its numeric value for the given radix.
175 -
export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 {
176 -
    assert radix >= 2 and radix <= 36;
177 -
178 -
    // Default to an out-of-range value so non-digits fall through to `nil`.
179 -
    let mut value: u32 = 36;
180 -
181 -
    if ch >= '0' and ch <= '9' {
182 -
        set value = (ch - '0') as u32;
183 -
    } else if radix > 10 {
184 -
        // Mask to convert ASCII letters to uppercase.
185 -
        let upper = ch & 0xDF;
186 -
        if upper >= 'A' and upper <= 'Z' {
187 -
            set value = (upper - 'A') as u32 + 10;
188 -
        }
189 -
    }
190 -
    if value < radix {
191 -
        return value;
192 -
    }
193 -
    return nil;
194 -
}
195 -
196 -
/// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign.
197 -
fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> ast::IntLiteral
181 +
/// Parse an integer literal while mapping shared errors into parser diagnostics.
182 +
fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> fmt::IntLiteral
198 183
    throws (ParseError)
199 184
{
200 -
    if text.len == 0 {
201 -
        throw failParsing(p, "integer literal is empty");
202 -
    }
203 -
    let first = text[0];
204 -
    let negative = first == '-';
205 -
    let signed: bool = negative or (first == '+');
206 -
207 -
    let mut start: u32 = 0;
208 -
    let mut radix: u32 = 10;
209 -
    let mut radixType = ast::Radix::Decimal;
210 -
211 -
    if signed {
212 -
        set start = 1;
213 -
        if start >= text.len {
214 -
            throw failParsing(p, "integer literal requires digits after sign");
215 -
        }
216 -
    }
217 -
    if start + 1 < text.len and text[start] == '0' {
218 -
        let prefix = text[start + 1];
219 -
        if prefix == 'x' or prefix == 'X' {
220 -
            set radix = 16;
221 -
            set radixType = ast::Radix::Hex;
222 -
            set start += 2;
223 -
        } else if prefix == 'b' or prefix == 'B' {
224 -
            set radix = 2;
225 -
            set radixType = ast::Radix::Binary;
226 -
            set start += 2;
227 -
        }
228 -
        if start >= text.len {
229 -
            throw failParsing(p, "integer literal prefix must be followed by digits");
185 +
    let literal = try fmt::parseInt(text) catch err {
186 +
        match err {
187 +
            case fmt::ParseError::Invalid =>
188 +
                throw failParsing(p, "invalid integer literal"),
189 +
            case fmt::ParseError::InvalidDigit =>
190 +
                throw failParsing(p, "invalid digit in integer literal"),
191 +
            case fmt::ParseError::Overflow =>
192 +
                throw failParsing(p, "integer literal overflow"),
230 193
        }
231 -
    }
232 -
    let mut value: u64 = 0;
233 -
    let radix64: u64 = radix as u64;
234 -
    for i in start..text.len {
235 -
        let ch = text[i];
236 -
        let digit = digitFromAscii(ch, radix) else {
237 -
            throw failParsing(p, "invalid digit in integer literal");
238 -
        };
239 -
        if value > (U64_MAX / radix64) {
240 -
            throw failParsing(p, "integer literal overflow");
241 -
        }
242 -
        set value *= radix64;
243 -
244 -
        if value > U64_MAX - (digit as u64) {
245 -
            throw failParsing(p, "integer literal overflow");
246 -
        }
247 -
        set value += (digit as u64);
248 -
    }
249 -
    return ast::IntLiteral {
250 -
        text, magnitude: value, radix: radixType, signed, negative,
251 194
    };
195 +
    return literal;
252 196
}
253 197
254 198
/// Emit an integer type node.
255 199
fn nodeTypeInt(p: *mut Parser, width: u8, sign: ast::Signedness) -> *ast::Node {
256 200
    return node(p, ast::NodeValue::TypeSig(
257 201
        ast::TypeSig::Integer { width, sign }
258 202
    ));
259 203
}
260 204
261 205
/// Emit a number literal node with the provided literal metadata.
262 -
fn nodeNumber(p: *mut Parser, literal: ast::IntLiteral) -> *ast::Node {
206 +
fn nodeNumber(p: *mut Parser, literal: fmt::IntLiteral) -> *ast::Node {
263 207
    return node(p, ast::NodeValue::Number(literal));
264 208
}
265 209
266 210
/// Emit a `super` node.
267 211
fn nodeSuper(p: *mut Parser) -> *ast::Node {
268 212
    return node(p, ast::NodeValue::Super);
269 213
}
270 214
271 -
/// Process escape sequences in a raw string, writing the result into `dst`.
272 -
/// Returns the number of bytes written.
273 -
fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 {
274 -
    let mut i: u32 = 0;
275 -
    let mut j: u32 = 0;
276 -
277 -
    while i < raw.len {
278 -
        if raw[i] == '\\' and i + 1 < raw.len {
279 -
            match raw[i + 1] {
280 -
                case 'n'  => set dst[j] = '\n',
281 -
                case 't'  => set dst[j] = '\t',
282 -
                case 'r'  => set dst[j] = '\r',
283 -
                case '\\' => set dst[j] = '\\',
284 -
                case '"'  => set dst[j] = '"',
285 -
                case '0'  => set dst[j] = 0,
286 -
                else      => set dst[j] = raw[i + 1],
287 -
            }
288 -
            set i += 2;
289 -
        } else {
290 -
            set dst[j] = raw[i];
291 -
            set i += 1;
292 -
        }
293 -
        set j += 1;
294 -
    }
295 -
    return j;
296 -
}
297 -
298 215
/// Emit a single attribute node.
299 216
fn nodeAttribute(p: *mut Parser, attr: ast::Attribute) -> *ast::Node {
300 217
    return node(p, ast::NodeValue::Attribute(attr));
301 218
}
302 219
696 613
            advance(p);
697 614
            return node(p, ast::NodeValue::Undef);
698 615
        }
699 616
        case scanner::TokenKind::Char => {
700 617
            advance(p);
701 -
            let src = p.previous.source;
702 -
            let mut ch: u8 = 0;
703 -
704 -
            if src[1] == '\\' { // Handle escape sequences.
705 -
                match src[2] {
706 -
                    case 'n'  => { set ch = '\n'; }
707 -
                    case 't'  => { set ch = '\t'; }
708 -
                    case 'r'  => { set ch = '\r'; }
709 -
                    case '\'' => { set ch = '\''; }
710 -
                    case '\\' => { set ch = '\\'; }
711 -
                    else      => { set ch = src[2]; }
712 -
                }
713 -
            } else {
714 -
                set ch = src[1];
715 -
            }
618 +
            let ch = try fmt::parseChar(p.previous.source) catch {
619 +
                throw failParsing(p, "invalid char literal");
620 +
            };
716 621
            return node(p, ast::NodeValue::Char(ch));
717 622
        }
718 623
        case scanner::TokenKind::String => {
719 624
            advance(p);
720 625
            let src = p.previous.source;
721 626
            let raw = &src[1..src.len - 1]; // Strip quotes.
722 627
723 628
            // Process escape sequences into arena buffer.
724 629
            let buf = alloc::remainingBuf(&mut p.arena.arena);
725 -
            let len = unescapeString(raw, buf);
630 +
            let len = fmt::unescapeString(raw, buf);
726 631
            alloc::commit(&mut p.arena.arena, len);
727 632
728 633
            return node(p, ast::NodeValue::String(&buf[..len]));
729 634
        }
730 635
        case scanner::TokenKind::Underscore => {
lib/std/lang/parser/tests.rad +8 -31
1 1
//! Parser tests.
2 2
3 3
use std::mem;
4 +
use std::fmt;
4 5
use std::testing;
5 6
use std::lang::ast;
6 7
use std::lang::scanner;
7 8
use std::lang::strings;
8 9
113 114
114 115
    return root;
115 116
}
116 117
117 118
/// Parse an expression expected to be a number literal and return its payload.
118 -
fn parseNumberLiteral(text: *[u8]) -> ast::IntLiteral
119 +
fn parseNumberLiteral(text: *[u8]) -> fmt::IntLiteral
119 120
    throws (testing::TestError)
120 121
{
121 122
    let mut arena = ast::nodeArena(&mut ARENA_STORAGE[..]);
122 123
    let mut parser = super::mkParser(scanner::SourceLoc::String, text, &mut arena, &mut STRING_POOL);
123 124
    super::advance(&mut parser);
275 276
276 277
/// Verify that decimal literals record magnitude and base metadata.
277 278
@test fn testParseDecimalLiteralMetadata() throws (testing::TestError) {
278 279
    let lit = try parseNumberLiteral("1234");
279 280
    try testing::expect(lit.magnitude == 1234);
280 -
    try testing::expect(lit.radix == ast::Radix::Decimal);
281 +
    try testing::expect(lit.radix == fmt::Radix::Decimal);
281 282
}
282 283
283 284
/// Verify that hexadecimal literals record metadata without marking them signed.
284 285
@test fn testParseNumberMetadata() throws (testing::TestError) {
285 286
    let lit = try parseNumberLiteral("0xFF");
286 287
    try testing::expect(lit.magnitude == 0xFF);
287 -
    try testing::expect(lit.radix == ast::Radix::Hex);
288 +
    try testing::expect(lit.radix == fmt::Radix::Hex);
288 289
    try testing::expect(not lit.signed);
289 290
    try testing::expect(not lit.negative);
290 291
}
291 292
292 293
/// Verify that binary literals capture their radix.
293 294
@test fn testParseBinaryLiteralMetadata() throws (testing::TestError) {
294 295
    let lit = try parseNumberLiteral("0b1010");
295 296
    try testing::expect(lit.magnitude == 0b1010);
296 -
    try testing::expect(lit.radix == ast::Radix::Binary);
297 +
    try testing::expect(lit.radix == fmt::Radix::Binary);
297 298
}
298 299
299 300
/// Signed literals produced by the scanner keep sign details in metadata.
300 301
@test fn testParseSignedLiteralMetadata() throws (testing::TestError) {
301 302
    let literal = try parseNumberLiteral("42");
312 313
/// Literals with prefixes still parse correctly when explicitly signed.
313 314
@test fn testParseSignedPrefixedLiteral() throws (testing::TestError) {
314 315
    let hex = try parseNumberLiteral("+0x2A");
315 316
    try testing::expect(hex.signed);
316 317
    try testing::expect(not hex.negative);
317 -
    try testing::expect(hex.radix == ast::Radix::Hex);
318 +
    try testing::expect(hex.radix == fmt::Radix::Hex);
318 319
    try testing::expect(hex.magnitude == 0x2A);
319 320
320 321
    let neg = try parseNumberLiteral("-0x2A");
321 322
    try testing::expect(neg.signed);
322 323
    try testing::expect(neg.negative);
323 -
    try testing::expect(neg.radix == ast::Radix::Hex);
324 +
    try testing::expect(neg.radix == fmt::Radix::Hex);
324 325
    try testing::expect(neg.magnitude == 0x2A);
325 326
326 327
    let bin = try parseNumberLiteral("-0b11");
327 328
    try testing::expect(bin.signed);
328 329
    try testing::expect(bin.negative);
329 -
    try testing::expect(bin.radix == ast::Radix::Binary);
330 +
    try testing::expect(bin.radix == fmt::Radix::Binary);
330 331
    try testing::expect(bin.magnitude == 0b11);
331 332
}
332 333
333 334
/// Range expressions parse with explicit start and end bounds.
334 335
@test fn testParseRangeExpr() throws (testing::TestError) {
356 357
    try expectNumberLiteralFail("0x1G");
357 358
    try expectNumberLiteralFail("0b102");
358 359
    try expectNumberLiteralFail("+0x1G");
359 360
}
360 361
361 -
/// Ensure digit-to-value conversion covers decimal and hex ranges.
362 -
@test fn testDigitFromAscii() throws (testing::TestError) {
363 -
    let zero = super::digitFromAscii('0', 10) else throw testing::TestError::Failed;
364 -
    try testing::expect(zero == 0);
365 -
366 -
    let nine = super::digitFromAscii('9', 10) else throw testing::TestError::Failed;
367 -
    try testing::expect(nine == 9);
368 -
369 -
    let lower = super::digitFromAscii('a', 16) else throw testing::TestError::Failed;
370 -
    try testing::expect(lower == 10);
371 -
372 -
    let lowerF = super::digitFromAscii('f', 16) else throw testing::TestError::Failed;
373 -
    try testing::expect(lowerF == 15);
374 -
375 -
    let upper = super::digitFromAscii('A', 16) else throw testing::TestError::Failed;
376 -
    try testing::expect(upper == 10);
377 -
378 -
    let upperF = super::digitFromAscii('F', 16) else throw testing::TestError::Failed;
379 -
    try testing::expect(upperF == 15);
380 -
381 -
    try testing::expect(super::digitFromAscii('g', 16) == nil);
382 -
    try testing::expect(super::digitFromAscii('_', 10) == nil);
383 -
}
384 -
385 362
/// Test parsing nil literal.
386 363
@test fn testParseNil() throws (testing::TestError) {
387 364
    let r1 = try! parseExprStr("nil");
388 365
    let case ast::NodeValue::Nil = r1.value
389 366
        else throw testing::TestError::Failed;
lib/std/lang/scanner.rad +22 -49
2 2
//!
3 3
//! This module implements a hand-written scanner that tokenizes Radiance
4 4
//! source code into a stream of tokens for consumption by the parser.
5 5
@test mod tests;
6 6
7 +
use std::char;
7 8
use std::mem;
8 9
use std::lang::strings;
9 10
10 11
/// Token kinds representing all lexical elements in Radiance.
11 12
///
298 299
            else => return,
299 300
        }
300 301
    }
301 302
}
302 303
303 -
/// Check if character is an ASCII digit (0-9).
304 -
fn isDigit(c: u8) -> bool {
305 -
    return c >= '0' and c <= '9';
306 -
}
307 -
308 -
/// Check if character is a hexadecimal digit (0-9, a-f, A-F).
309 -
fn isHexDigit(c: u8) -> bool {
310 -
    return (c >= '0' and c <= '9')
311 -
        or (c >= 'a' and c <= 'f')
312 -
        or (c >= 'A' and c <= 'F');
313 -
}
314 -
315 -
/// Check if character is a binary digit (0 or 1).
316 -
fn isBinDigit(c: u8) -> bool {
317 -
    return c == '0' or c == '1';
318 -
}
319 -
320 -
/// Check if character is alphabetic.
321 -
fn isAlpha(c: u8) -> bool {
322 -
    return (c >= 'a' and c <= 'z')
323 -
        or (c >= 'A' and c <= 'Z');
324 -
}
325 -
326 -
/// Check if character is printable ASCII.
327 -
fn isPrint(c: u8) -> bool {
328 -
    return c >= ' ' and c <= '~';
329 -
}
330 -
331 304
/// Scan numeric literal (decimal, hex, or binary).
332 305
fn scanNumber(s: *mut Scanner) -> Token {
333 306
    let first = s.source[s.cursor - 1];
334 307
    if first == '-' or first == '+' {
335 308
        advance(s);
337 310
    // Check for hex literal (`0x` or `0X` prefix).
338 311
    if s.source[s.cursor - 1] == '0' {
339 312
        if let ch = current(s); ch == 'x' or ch == 'X' {
340 313
            advance(s);
341 314
            // Must have at least one hex digit after `0x`.
342 -
            if let ch = current(s); not isHexDigit(ch) {
315 +
            if let ch = current(s); not char::isHexDigit(ch) {
343 316
                return invalid(s.token, "invalid hex literal");
344 317
            }
345 -
            while let ch = current(s); isHexDigit(ch) {
318 +
            while let ch = current(s); char::isHexDigit(ch) {
346 319
                advance(s);
347 320
            }
348 321
            return tok(s, TokenKind::Number);
349 322
        }
350 323
        // Check for binary literal (`0b` or `0B` prefix).
351 324
        if let ch = current(s); ch == 'b' or ch == 'B' {
352 325
            advance(s);
353 326
            // Must have at least one binary digit after `0b`.
354 -
            if let ch = current(s); not isBinDigit(ch) {
327 +
            if let ch = current(s); not char::isBinDigit(ch) {
355 328
                return invalid(s.token, "invalid binary literal");
356 329
            }
357 -
            while let ch = current(s); isBinDigit(ch) {
330 +
            while let ch = current(s); char::isBinDigit(ch) {
358 331
                advance(s);
359 332
            }
360 333
            return tok(s, TokenKind::Number);
361 334
        }
362 335
    }
363 -
364 336
    // Regular decimal number.
365 -
    while let ch = current(s); isDigit(ch) {
337 +
    while let ch = current(s); char::isDigit(ch) {
366 338
        advance(s);
367 339
    }
368 -
369 340
    // Look for decimal part.
370 341
    if let ch = current(s); ch == '.' {
371 -
        if let p = peek(s); isDigit(p) {
342 +
        if let p = peek(s); char::isDigit(p) {
372 343
            advance(s); // Consume the "."
373 -
            while let ch = current(s); isDigit(ch) {
344 +
            while let ch = current(s); char::isDigit(ch) {
374 345
                advance(s);
375 346
            }
376 347
        }
377 348
    }
378 349
    return tok(s, TokenKind::Number);
379 350
}
380 351
381 352
fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
382 353
    while let ch = current(s); ch <> delim {
383 -
        if not isPrint(ch) {
354 +
        if not char::isPrint(ch) {
384 355
            return invalid(s.token, "invalid character");
385 356
        }
386 357
        if consume(s, '\\') { // Consume escapes
387 358
            if isEof(s) {
388 359
                return nil;
431 402
    return TokenKind::Ident;
432 403
}
433 404
434 405
/// Scan an identifier, keyword, or label.
435 406
fn scanIdentifier(s: *mut Scanner) -> Token {
436 -
    while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' {
407 +
    while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
437 408
        advance(s);
438 409
    }
439 410
    let ident = &s.source[s.token..s.cursor];
440 411
    let kind = keywordOrIdent(ident);
441 412
454 425
    if isEof(s) {
455 426
        return tok(s, TokenKind::Eof);
456 427
    }
457 428
    let c: u8 = advance(s);
458 429
459 -
    if isDigit(c) {
430 +
    if char::isDigit(c) {
460 431
        return scanNumber(s);
461 432
    }
462 -
    if isAlpha(c) {
433 +
    if char::isAlpha(c) {
463 434
        return scanIdentifier(s);
464 435
    }
465 436
    match c {
466 437
        case '\'' => return scanChar(s),
467 438
        case '"'  => return scanString(s),
491 462
            }
492 463
            if consume(s, '=') {
493 464
                return tok(s, TokenKind::MinusEqual);
494 465
            }
495 466
            // If followed by a digit, scan as negative number
496 -
            if let ch = current(s); isDigit(ch) {
467 +
            if let ch = current(s); char::isDigit(ch) {
497 468
                return scanNumber(s);
498 469
            }
499 470
            return tok(s, TokenKind::Minus);
500 471
        }
501 472
        case '+' => {
502 473
            if consume(s, '=') {
503 474
                return tok(s, TokenKind::PlusEqual);
504 475
            }
505 -
            if let ch = current(s); isDigit(ch) {
476 +
            if let ch = current(s); char::isDigit(ch) {
506 477
                return scanNumber(s);
507 478
            }
508 479
            return tok(s, TokenKind::Plus);
509 480
        }
510 481
        case '/' => {
582 553
            }
583 554
            return tok(s, TokenKind::Gt);
584 555
        }
585 556
        case '@' => {
586 557
            // Scan `@identifier` as a single token.
587 -
            while let ch = current(s); isAlpha(ch) {
588 -
                advance(s);
589 -
            }
590 -
            // Must have at least one character after `@`.
591 -
            if s.cursor - s.token <= 1 {
558 +
            let ch = current(s) else {
592 559
                return invalid(s.token, "expected identifier after `@`");
560 +
            };
561 +
            if not char::isAlpha(ch) and ch <> '_' {
562 +
                return invalid(s.token, "expected identifier after `@`");
563 +
            }
564 +
            while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
565 +
                advance(s);
593 566
            }
594 567
            let name = &s.source[s.token..s.cursor];
595 568
            return Token {
596 569
                kind: TokenKind::AtIdent,
597 570
                source: strings::intern(s.pool, name),
598 571
                offset: s.token,
599 572
            };
600 573
        }
601 574
        case '_' => {
602 -
            if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' {
575 +
            if let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
603 576
                // This is part of an identifier like `_foo` or `__start`
604 577
                return scanIdentifier(s);
605 578
            }
606 579
            return tok(s, TokenKind::Underscore);
607 580
        }
lib/std/lang/strings.rad +1 -1
8 8
9 9
use std::mem;
10 10
use std::collections::dict;
11 11
12 12
/// Table size.
13 -
constant TABLE_SIZE: u32 = 8192;
13 +
constant TABLE_SIZE: u32 = 32768;
14 14
15 15
/// String interning pool using open-addressed hash table.
16 16
///
17 17
/// Each unique string content is stored only once, allowing pointer equality
18 18
/// to be used instead of content comparison for symbol lookups and module names.
lib/std/tests.rad +99 -0
103 103
    let result: *[u8] = fmt::formatI64(-9223372036854775808, &mut buffer[..]);
104 104
    try testing::expect(result.len == 20);
105 105
    try testing::expectBytesEq(result, "-9223372036854775808");
106 106
}
107 107
108 +
@test fn testParseIntLiteralText() throws (testing::TestError) {
109 +
    let dec = try fmt::parseInt("123") catch {
110 +
        throw testing::TestError::Failed;
111 +
    };
112 +
    try testing::expect(dec.magnitude == 123);
113 +
    try testing::expect(dec.radix == fmt::Radix::Decimal);
114 +
    try testing::expect(not dec.signed);
115 +
    try testing::expect(not dec.negative);
116 +
117 +
    let hex = try fmt::parseInt("-0x2a") catch {
118 +
        throw testing::TestError::Failed;
119 +
    };
120 +
    try testing::expect(hex.magnitude == 42);
121 +
    try testing::expect(hex.radix == fmt::Radix::Hex);
122 +
    try testing::expect(hex.signed);
123 +
    try testing::expect(hex.negative);
124 +
125 +
    let bin = try fmt::parseInt("+0b101") catch {
126 +
        throw testing::TestError::Failed;
127 +
    };
128 +
    try testing::expect(bin.magnitude == 5);
129 +
    try testing::expect(bin.radix == fmt::Radix::Binary);
130 +
    try testing::expect(bin.signed);
131 +
    try testing::expect(not bin.negative);
132 +
}
133 +
134 +
@test fn testDigitFromAscii() throws (testing::TestError) {
135 +
    let zero = fmt::digitFromAscii('0', 10) else throw testing::TestError::Failed;
136 +
    try testing::expect(zero == 0);
137 +
138 +
    let nine = fmt::digitFromAscii('9', 10) else throw testing::TestError::Failed;
139 +
    try testing::expect(nine == 9);
140 +
141 +
    let lower = fmt::digitFromAscii('a', 16) else throw testing::TestError::Failed;
142 +
    try testing::expect(lower == 10);
143 +
144 +
    let lowerF = fmt::digitFromAscii('f', 16) else throw testing::TestError::Failed;
145 +
    try testing::expect(lowerF == 15);
146 +
147 +
    let upper = fmt::digitFromAscii('A', 16) else throw testing::TestError::Failed;
148 +
    try testing::expect(upper == 10);
149 +
150 +
    let upperF = fmt::digitFromAscii('F', 16) else throw testing::TestError::Failed;
151 +
    try testing::expect(upperF == 15);
152 +
153 +
    try testing::expect(fmt::digitFromAscii('g', 16) == nil);
154 +
    try testing::expect(fmt::digitFromAscii('_', 10) == nil);
155 +
}
156 +
157 +
@test fn testParseIntLiteralTextErrors() throws (testing::TestError) {
158 +
    try fmt::parseInt("") catch {
159 +
        return;
160 +
    };
161 +
    throw testing::TestError::Failed;
162 +
}
163 +
164 +
@test fn testParseIntLiteralTextInvalidDigitErrors() throws (testing::TestError) {
165 +
    try fmt::parseInt("0b2") catch {
166 +
        return;
167 +
    };
168 +
    throw testing::TestError::Failed;
169 +
}
170 +
171 +
@test fn testParseIntLiteralTextOverflowErrors() throws (testing::TestError) {
172 +
    try fmt::parseInt("18446744073709551616") catch {
173 +
        return;
174 +
    };
175 +
    throw testing::TestError::Failed;
176 +
}
177 +
178 +
@test fn testParseCharLiteralText() throws (testing::TestError) {
179 +
    let x = try fmt::parseChar("'x'") catch {
180 +
        throw testing::TestError::Failed;
181 +
    };
182 +
    try testing::expect(x == 'x');
183 +
184 +
    let newline = try fmt::parseChar("'\\n'") catch {
185 +
        throw testing::TestError::Failed;
186 +
    };
187 +
    try testing::expect(newline == '\n');
188 +
}
189 +
190 +
@test fn testParseCharLiteralTextErrors() throws (testing::TestError) {
191 +
    try fmt::parseChar("''") catch {
192 +
        return;
193 +
    };
194 +
    throw testing::TestError::Failed;
195 +
}
196 +
197 +
@test fn testUnescapeString() throws (testing::TestError) {
198 +
    let mut buffer: [u8; 8] = [0; 8];
199 +
    let len = fmt::unescapeString("a\\n\\0", &mut buffer[..]);
200 +
201 +
    try testing::expect(len == 3);
202 +
    try testing::expect(buffer[0] == 'a');
203 +
    try testing::expect(buffer[1] == '\n');
204 +
    try testing::expect(buffer[2] == 0);
205 +
}
206 +
108 207
// mem /////////////////////////////////////////////////////////////////////////
109 208
110 209
@test fn testCopyFullSlice() throws (testing::TestError) {
111 210
    let mut xs: [u8; 3] = [1, 2, 3];
112 211
    let mut ys: [u8; 3] = [4, 5, 6];
scripts/count-lines-no-comments.sh +35 -11
1 1
#!/bin/sh
2 -
# Count non-blank lines in all .rad files, excluding comment lines and tests.
2 +
# Count non-blank, non-comment lines in .rad files, skipping tests.
3 3
4 -
dir="${1:-.}"
5 -
6 -
if [ ! -d "$dir" ]; then
7 -
  echo "Error: Directory '$dir' does not exist"
8 -
  exit 1
4 +
if [ "$#" -eq 0 ]; then
5 +
  set -- .
9 6
fi
10 7
11 -
echo "Counting non-blank, non-comment lines in .rad files in: $dir"
12 -
echo "--------------------------------------------------------------"
8 +
tmpList=$(mktemp)
9 +
tmpFiles=$(mktemp)
10 +
trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM
11 +
12 +
for input in "$@"; do
13 +
  if [ -d "$input" ]; then
14 +
    find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList"
15 +
  elif [ -f "$input" ]; then
16 +
    case "$input" in
17 +
      */tests/*|*/tests.rad|tests.rad)
18 +
        ;;
19 +
      *.rad)
20 +
        printf '%s\n' "$input" >> "$tmpList"
21 +
        ;;
22 +
      *)
23 +
        echo "Error: File '$input' is not a .rad file" >&2
24 +
        exit 1
25 +
        ;;
26 +
    esac
27 +
  else
28 +
    echo "Error: Path '$input' does not exist" >&2
29 +
    exit 1
30 +
  fi
31 +
done
32 +
33 +
echo "Counting non-blank, non-comment lines in .rad files for inputs: $*"
34 +
echo "---------------------------------------------------------------------"
13 35
14 36
total=0
15 -
for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do
37 +
sort -u "$tmpList" > "$tmpFiles"
38 +
39 +
while IFS= read -r file; do
16 40
  if [ -f "$file" ]; then
17 41
    count=$(grep -v '^[[:space:]]*$' "$file" | grep -v '^[[:space:]]*//' | wc -l)
18 42
    total=$((total + count))
19 43
    printf "%6d  %s\n" "$count" "$file"
20 44
  fi
21 -
done
45 +
done < "$tmpFiles"
22 46
23 -
echo "--------------------------------------------------------------"
47 +
echo "---------------------------------------------------------------------"
24 48
printf "%6d  TOTAL\n" "$total"
scripts/count-lines.sh +35 -11
1 1
#!/bin/sh
2 -
# Count non-blank lines in all .rad files, skipping tests.
2 +
# Count non-blank lines in .rad files, skipping tests.
3 3
4 -
dir="${1:-.}"
5 -
6 -
if [ ! -d "$dir" ]; then
7 -
  echo "Error: Directory '$dir' does not exist"
8 -
  exit 1
4 +
if [ "$#" -eq 0 ]; then
5 +
  set -- .
9 6
fi
10 7
11 -
echo "Counting non-blank lines in .rad files in: $dir"
12 -
echo "-------------------------------------------"
8 +
tmpList=$(mktemp)
9 +
tmpFiles=$(mktemp)
10 +
trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM
11 +
12 +
for input in "$@"; do
13 +
  if [ -d "$input" ]; then
14 +
    find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList"
15 +
  elif [ -f "$input" ]; then
16 +
    case "$input" in
17 +
      */tests/*|*/tests.rad|tests.rad)
18 +
        ;;
19 +
      *.rad)
20 +
        printf '%s\n' "$input" >> "$tmpList"
21 +
        ;;
22 +
      *)
23 +
        echo "Error: File '$input' is not a .rad file" >&2
24 +
        exit 1
25 +
        ;;
26 +
    esac
27 +
  else
28 +
    echo "Error: Path '$input' does not exist" >&2
29 +
    exit 1
30 +
  fi
31 +
done
32 +
33 +
echo "Counting non-blank lines in .rad files for inputs: $*"
34 +
echo "------------------------------------------------------"
13 35
14 36
total=0
15 -
for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do
37 +
sort -u "$tmpList" > "$tmpFiles"
38 +
39 +
while IFS= read -r file; do
16 40
  if [ -f "$file" ]; then
17 41
    count=$(grep -v '^[[:space:]]*$' "$file" | wc -l)
18 42
    total=$((total + count))
19 43
    printf "%6d  %s\n" "$count" "$file"
20 44
  fi
21 -
done
45 +
done < "$tmpFiles"
22 46
23 -
echo "-------------------------------------------"
47 +
echo "------------------------------------------------------"
24 48
printf "%6d  TOTAL\n" "$total"
std.lib +5 -0
1 1
lib/std.rad
2 +
lib/std/char.rad
2 3
lib/std/fmt.rad
3 4
lib/std/mem.rad
4 5
lib/std/vec.rad
5 6
lib/std/io.rad
6 7
lib/std/intrinsics.rad
13 14
lib/std/arch/rv64/encode.rad
14 15
lib/std/arch/rv64/decode.rad
15 16
lib/std/arch/rv64/emit.rad
16 17
lib/std/arch/rv64/isel.rad
17 18
lib/std/arch/rv64/printer.rad
19 +
lib/std/arch/rv64/asm.rad
20 +
lib/std/arch/rv64/asm/scanner.rad
21 +
lib/std/arch/rv64/asm/parser.rad
22 +
lib/std/arch/rv64/asm/emit.rad
18 23
lib/std/lang.rad
19 24
lib/std/lang/alloc.rad
20 25
lib/std/lang/strings.rad
21 26
lib/std/lang/sexpr.rad
22 27
lib/std/lang/ast.rad
std.lib.test +3 -0
1 1
lib/std/testing.rad
2 2
lib/std/tests.rad
3 +
lib/std/char/tests.rad
3 4
lib/std/arch/rv64/tests.rad
5 +
lib/std/arch/rv64/asm/tests.rad
6 +
lib/std/arch/rv64/asm/scanner/tests.rad
4 7
lib/std/lang/alloc/tests.rad
5 8
lib/std/lang/parser/tests.rad
6 9
lib/std/lang/module/tests.rad
7 10
lib/std/lang/scanner/tests.rad
8 11
lib/std/lang/resolver/tests.rad
test/run +13 -6
1 1
#!/bin/sh
2 2
# Run binary tests.
3 -
# Usage: test/run [<test.rad>...]
3 +
# Usage: test/run [<test.rad|test.ras>...]
4 4
#
5 -
# If no arguments are provided, runs all tests in `test/tests/`.
5 +
# If no arguments are provided, runs all `.rad` and `.ras` tests in
6 +
# `test/tests/`.
6 7
#
7 8
# For each test:
8 9
#   - If a `.ril` file exists alongside it, the IL output is checked
9 10
#     against it via the runner binary.
10 11
#   - If `//! returns: N` appears in the file, the test is compiled to
11 12
#     a binary and executed; the exit code must match N.
12 13
13 14
RUNNER="test/runner.rv64"
14 15
TEST_DIR="test/tests"
15 16
EMU="${RAD_EMULATOR:-emulator} -stack-size=1024 -run"
16 -
EMU_RUN="${RAD_EMULATOR:-emulator} -run"
17 +
EMU_RUN="${RAD_EMULATOR:-emulator} -no-jit -run"
17 18
18 19
if [ ! -f "$RUNNER" ]; then
19 20
  echo "error: runner binary not found: $RUNNER" >&2
20 21
  echo "hint: run 'make test' first" >&2
21 22
  exit 1
24 25
# Disable core dumps for tests.
25 26
ulimit -c 0
26 27
27 28
# Collect tests.
28 29
if [ $# -eq 0 ]; then
29 -
  tests=$(find "$TEST_DIR" -name '*.rad' | sort)
30 +
  tests=$(find "$TEST_DIR" \( -name '*.rad' -o -name '*.ras' \) | sort)
30 31
else
31 32
  tests="$*"
32 33
fi
33 34
34 35
if [ -z "$tests" ]; then
38 39
39 40
passed=0
40 41
failed=0
41 42
42 43
for test in $tests; do
43 -
  ril="${test%.rad}.ril"
44 -
  bin="${test%.rad}.rv64"
44 +
  case "$test" in
45 +
    *.rad) base="${test%.rad}" ;;
46 +
    *.ras) base="${test%.ras}" ;;
47 +
    *) base="$test" ;;
48 +
  esac
49 +
50 +
  ril="${base}.ril"
51 +
  bin="${base}.rv64"
45 52
46 53
  # IL check: run the runner if a .ril file exists.
47 54
  if [ -f "$ril" ]; then
48 55
    if $EMU "$RUNNER" -- "$test"; then
49 56
      passed=$((passed + 1))
test/runner.rad +80 -3
1 -
//! IL snapshot test runner.
1 +
//! IL snapshot test runner and `.ras` asm helper.
2 2
//!
3 3
//! Given a `.rad` source file, lowers it to IL and compares the output
4 -
//! against the corresponding `.ril` snapshot file. Called by `test/run`
5 -
//! for every test that has a `.ril` file.
4 +
//! against the corresponding `.ril` snapshot file. It also supports an
5 +
//! `assemble <input.ras> <output.rv64>` subcommand used by `bin-test`.
6 6
7 7
use std::io;
8 8
use std::mem;
9 9
use std::sys;
10 10
use std::sys::unix;
14 14
use std::lang::parser;
15 15
use std::lang::scanner;
16 16
use std::lang::resolver;
17 17
use std::lang::strings;
18 18
use std::lang::lower;
19 +
use std::arch::rv64;
20 +
use std::arch::rv64::asm;
19 21
20 22
/// Buffer size for reading source files (8 KB).
21 23
constant SOURCE_BUF_SIZE: u32 = 8192;
22 24
/// Buffer size for reading expected IL files (32 KB).
23 25
constant EXPECTED_BUF_SIZE: u32 = 32768;
37 39
38 40
/// Maximum number of AST nodes per test file.
39 41
constant MAX_NODE_DATA: u32 = 4096;
40 42
/// Maximum number of resolver errors per test file.
41 43
constant MAX_ERRORS: u32 = 16;
44 +
/// Maximum number of text words in a `.ras` test binary.
45 +
constant ASM_TEXT_CAPACITY: u32 = 256;
46 +
/// Maximum number of data bytes in a `.ras` test binary.
47 +
constant ASM_DATA_CAPACITY: u32 = 1024;
48 +
constant RO_DATA_EXT: *[u8] = ".ro.data";
42 49
43 50
// Static storage for large buffers to avoid stack overflow.
44 51
// Tests run serially so sharing these is safe.
45 52
static SOURCE_BUF: [u8; SOURCE_BUF_SIZE] = undefined;
46 53
static EXPECTED_BUF: [u8; EXPECTED_BUF_SIZE] = undefined;
49 56
static IL_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
50 57
static PRINT_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
51 58
static RESOLVER_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
52 59
static NODE_DATA_STORAGE: [resolver::NodeData; MAX_NODE_DATA] = undefined;
53 60
static ERROR_STORAGE: [resolver::Error; MAX_ERRORS] = undefined;
61 +
static ASM_TEXT_STORAGE: [u32; ASM_TEXT_CAPACITY] = undefined;
62 +
static ASM_DATA_STORAGE: [u8; ASM_DATA_CAPACITY] = undefined;
54 63
55 64
/// Strip a `//` comment from a line, preserving `//` inside quoted strings.
56 65
/// Returns the content before the comment, trimmed of trailing whitespace.
57 66
fn stripLine(line: *[u8]) -> *[u8] {
58 67
    let mut end = line.len;
161 170
    set buf[len] = 0;
162 171
163 172
    return &buf[..len];
164 173
}
165 174
175 +
fn appendPathExt(basePath: *[u8], ext: *[u8], buf: *mut [u8]) -> ?*[u8] {
176 +
    if basePath.len + ext.len + 1 > buf.len {
177 +
        return nil;
178 +
    }
179 +
    let mut pos: u32 = 0;
180 +
181 +
    set pos += try! mem::copy(&mut buf[pos..], basePath);
182 +
    set pos += try! mem::copy(&mut buf[pos..], ext);
183 +
    set buf[pos] = 0;
184 +
185 +
    return &buf[..pos];
186 +
}
187 +
188 +
fn writeCode(code: *[u32], path: *[u8]) -> bool {
189 +
    let bytes = @sliceOf(code.ptr as *u8, code.len * 4);
190 +
    return unix::writeFile(path, bytes);
191 +
}
192 +
193 +
fn assembleBinary(sourcePath: *[u8], outputPath: *[u8]) -> bool {
194 +
    let mut roDataPathBuf: [u8; MAX_PATH_LEN] = undefined;
195 +
    let source = unix::readFile(sourcePath, &mut SOURCE_BUF[..]) else {
196 +
        io::printError("error: could not read source: ");
197 +
        io::printError(sourcePath);
198 +
        io::printError("\n");
199 +
        return false;
200 +
    };
201 +
    let roDataPath = appendPathExt(outputPath, RO_DATA_EXT, &mut roDataPathBuf[..]) else {
202 +
        io::printError("error: output path too long\n");
203 +
        return false;
204 +
    };
205 +
206 +
    let mut arena = alloc::new(&mut AST_ARENA_STORAGE[..]);
207 +
    let program = try asm::assemble(
208 +
        asm::scanner::SourceKind::File { path: sourcePath },
209 +
        source,
210 +
        &mut ASM_TEXT_STORAGE[..],
211 +
        &mut ASM_DATA_STORAGE[..],
212 +
        &mut arena,
213 +
        &mut STRING_POOL,
214 +
        rv64::RO_DATA_BASE
215 +
    ) catch {
216 +
        io::printError("error: assembly failed: ");
217 +
        io::printError(sourcePath);
218 +
        io::printError("\n");
219 +
        return false;
220 +
    };
221 +
    if not writeCode(program.text, outputPath) {
222 +
        io::printError("error: could not write output: ");
223 +
        io::printError(outputPath);
224 +
        io::printError("\n");
225 +
        return false;
226 +
    }
227 +
    if not unix::writeFile(roDataPath, program.data) {
228 +
        io::printError("error: could not write data: ");
229 +
        io::printError(roDataPath);
230 +
        io::printError("\n");
231 +
        return false;
232 +
    }
233 +
    return true;
234 +
}
235 +
166 236
/// Run a single IL snapshot test case. Returns `true` on success.
167 237
fn runTest(sourcePath: *[u8]) -> bool {
168 238
    // Path buffer.
169 239
    let mut rilPathBuf: [u8; MAX_PATH_LEN] = undefined;
170 240
    let mut pkgScope: resolver::Scope = undefined;
241 311
242 312
/// Run a single test specified as an argument.
243 313
@default fn main(env: *sys::Env) -> i32 {
244 314
    let args = env.args;
245 315
316 +
    if args.len == 4 and mem::eq(args[1], "assemble") {
317 +
        if assembleBinary(args[2], args[3]) {
318 +
            return 0;
319 +
        } else {
320 +
            return 1;
321 +
        }
322 +
    }
246 323
    if args.len <> 2 {
247 324
        io::printError("error: expected test file path as argument");
248 325
        return 1;
249 326
    }
250 327
    let sourcePath = args[1];
test/tests/asm.basic.text.program.ras added +13 -0
1 +
//! returns: 42
2 +
3 +
.text;
4 +
@entry
5 +
	addi   %a0    %zero  42;
6 +
	sd     %a0    8(%sp);
7 +
	beq    %a0    %zero  @fail;
8 +
	li     %a7    93;
9 +
	ecall;
10 +
@fail
11 +
	li     %a0    1;
12 +
	li     %a7    93;
13 +
	ecall;
test/tests/asm.branch.comparisons.ras added +40 -0
1 +
//! returns: 17
2 +
3 +
.text;
4 +
@entry
5 +
	li     %a0    0;
6 +
	li     %t0    5;
7 +
	li     %t1    5;
8 +
	beq    %t0    %t1    @beqOk;
9 +
	j      @fail;
10 +
@beqOk
11 +
	bne    %t0    %t1    @fail;
12 +
	li     %t2    -1;
13 +
	li     %t3    1;
14 +
	blt    %t2    %t3    @bltOk;
15 +
	j      @fail;
16 +
@bltOk
17 +
	bgt    %t3    %t2    @bgtOk;
18 +
	j      @fail;
19 +
@bgtOk
20 +
	bge    %t3    %t2    @bgeOk;
21 +
	j      @fail;
22 +
@bgeOk
23 +
	ble    %t2    %t3    @bleOk;
24 +
	j      @fail;
25 +
@bleOk
26 +
	li     %t4    1;
27 +
	li     %t5    2;
28 +
	bltu   %t4    %t5    @bltuOk;
29 +
	j      @fail;
30 +
@bltuOk
31 +
	bgeu   %t5    %t4    @done;
32 +
	j      @fail;
33 +
@done
34 +
	li     %a0    17;
35 +
	li     %a7    93;
36 +
	ecall;
37 +
@fail
38 +
	li     %a0    1;
39 +
	li     %a7    93;
40 +
	ecall;
test/tests/asm.call.return.flow.ras added +17 -0
1 +
//! returns: 29
2 +
3 +
.text;
4 +
@entry
5 +
	call   @helper;
6 +
	j      @exit;
7 +
@helper
8 +
	mv     %t0    %ra;
9 +
	jal    %ra    @leaf;
10 +
	mv     %ra    %t0;
11 +
	ret;
12 +
@leaf
13 +
	li     %a0    29;
14 +
	ret;
15 +
@exit
16 +
	li     %a7    93;
17 +
	ecall;
test/tests/asm.compare.set.logic.ras added +29 -0
1 +
//! returns: 36
2 +
3 +
.text;
4 +
@entry
5 +
	nop;
6 +
	li     %t0    -1;
7 +
	li     %t1    1;
8 +
	slt    %a0    %t0    %t1;
9 +
	sltu   %t2    %t1    %t0;
10 +
	add    %a0    %a0    %t2;
11 +
	slti   %t3    %t0    0;
12 +
	add    %a0    %a0    %t3;
13 +
	sltiu  %t4    %zero  1;
14 +
	add    %a0    %a0    %t4;
15 +
	seqz   %t5    %zero;
16 +
	add    %a0    %a0    %t5;
17 +
	snez   %t6    %t1;
18 +
	add    %a0    %a0    %t6;
19 +
	neg    %a1    %t1;
20 +
	slti   %a1    %a1    0;
21 +
	add    %a0    %a0    %a1;
22 +
	xori   %a2    %zero  7;
23 +
	ori    %a2    %a2    8;
24 +
	andi   %a2    %a2    15;
25 +
	add    %a0    %a0    %a2;
26 +
	xor    %a3    %a2    %t6;
27 +
	add    %a0    %a0    %a3;
28 +
	li     %a7    93;
29 +
	ecall;
test/tests/asm.csr.system.instructions.ras added +16 -0
1 +
//! returns: 88
2 +
3 +
.text;
4 +
@entry
5 +
	j      @exit;
6 +
	csrr   %a0    mhartid;
7 +
	csrw   mtvec %a0;
8 +
	csrrw  %t0    mscratch %t1;
9 +
	csrsi  mstatus 8;
10 +
	csrc   mip   %t0;
11 +
	wfi;
12 +
	mret;
13 +
@exit
14 +
	li     %a0    88;
15 +
	li     %a7    93;
16 +
	ecall;
test/tests/asm.data.directives.ras added +24 -0
1 +
//! returns: 139
2 +
3 +
.constant dataBase 0x10000;
4 +
.data;
5 +
.byte 1, 'A';
6 +
.word 0x11223344;
7 +
.dword 0x1122334455667788;
8 +
.ascii "hi";
9 +
.ascii "x";
10 +
.align 4;
11 +
.text;
12 +
@entry
13 +
	li     %t0    dataBase;
14 +
	lbu    %a0    0(%t0);
15 +
	lbu    %t1    1(%t0);
16 +
	add    %a0    %a0    %t1;
17 +
	lbu    %t1    14(%t0);
18 +
	add    %a0    %a0    %t1;
19 +
	lbu    %t1    15(%t0);
20 +
	add    %a0    %a0    %t1;
21 +
	lbu    %t1    16(%t0);
22 +
	add    %a0    %a0    %t1;
23 +
	li     %a7    93;
24 +
	ecall;
test/tests/asm.data.symbol.fixup.ras added +16 -0
1 +
//! returns: 2
2 +
3 +
.constant dataBase 0x10000;
4 +
.data;
5 +
.byte 0;
6 +
@here
7 +
.byte 1;
8 +
.word @here;
9 +
.text;
10 +
@entry
11 +
	li     %t0    dataBase;
12 +
	lbu    %a0    1(%t0);
13 +
	lbu    %t1    2(%t0);
14 +
	add    %a0    %a0    %t1;
15 +
	li     %a7    93;
16 +
	ecall;
test/tests/asm.directive.boundary.values.ras added +40 -0
1 +
//! returns: 97
2 +
3 +
.constant dataBase 0x10000;
4 +
.data;
5 +
.word -2147483648;
6 +
.word 2147483647;
7 +
.dword 0x1122334455667788;
8 +
.byte 255;
9 +
.text;
10 +
@entry
11 +
	j      @exit;
12 +
.align 8;
13 +
ret;
14 +
@exit
15 +
	li     %t0    dataBase;
16 +
	lbu    %a0    3(%t0);
17 +
	lbu    %t1    4(%t0);
18 +
	add    %a0    %a0    %t1;
19 +
	lbu    %t1    7(%t0);
20 +
	add    %a0    %a0    %t1;
21 +
	lbu    %t1    8(%t0);
22 +
	add    %a0    %a0    %t1;
23 +
	lbu    %t1    9(%t0);
24 +
	add    %a0    %a0    %t1;
25 +
	lbu    %t1    10(%t0);
26 +
	add    %a0    %a0    %t1;
27 +
	lbu    %t1    11(%t0);
28 +
	add    %a0    %a0    %t1;
29 +
	lbu    %t1    12(%t0);
30 +
	add    %a0    %a0    %t1;
31 +
	lbu    %t1    13(%t0);
32 +
	add    %a0    %a0    %t1;
33 +
	lbu    %t1    14(%t0);
34 +
	add    %a0    %a0    %t1;
35 +
	lbu    %t1    15(%t0);
36 +
	add    %a0    %a0    %t1;
37 +
	lbu    %t1    16(%t0);
38 +
	add    %a0    %a0    %t1;
39 +
	li     %a7    93;
40 +
	ecall;
test/tests/asm.global.scoped.symbols.ras added +16 -0
1 +
//! returns: 7
2 +
3 +
.export @kernel::main;
4 +
.text;
5 +
@entry
6 +
	j      @kernel::main;
7 +
	li     %a0    1;
8 +
	li     %a7    93;
9 +
	ecall;
10 +
@kernel::main
11 +
	li     %a0    7;
12 +
	li     %a7    93;
13 +
	ecall;
14 +
.data;
15 +
@data::sym
16 +
.byte 1;
test/tests/asm.instruction.matrix.alu.ras added +29 -0
1 +
//! returns: 11
2 +
3 +
.text;
4 +
@entry
5 +
	li     %a0    0;
6 +
	li     %a1    6;
7 +
	li     %a2    3;
8 +
	and    %t0    %a1    %a2;
9 +
	add    %a0    %a0    %t0;
10 +
	li     %a3    4;
11 +
	li     %a4    1;
12 +
	or     %t1    %a3    %a4;
13 +
	add    %a0    %a0    %t1;
14 +
	li     %t2    0;
15 +
	not    %t3    %t2;
16 +
	andi   %t3    %t3    1;
17 +
	add    %a0    %a0    %t3;
18 +
	li     %s1    3;
19 +
	mv     %s2    %s1;
20 +
	andi   %s2    %s2    1;
21 +
	add    %a0    %a0    %s2;
22 +
	li     %t4    0;
23 +
	seqz   %t5    %t4;
24 +
	add    %a0    %a0    %t5;
25 +
	li     %t6    9;
26 +
	snez   %a6    %t6;
27 +
	add    %a0    %a0    %a6;
28 +
	li     %a7    93;
29 +
	ecall;
test/tests/asm.instruction.matrix.mem.control.ras added +21 -0
1 +
//! returns: 12
2 +
3 +
.text;
4 +
@entry
5 +
	addi   %sp    %sp    -32;
6 +
	li     %t0    7;
7 +
	sb     %t0    1(%sp);
8 +
	lb     %a0    1(%sp);
9 +
	sd     %a0    8(%sp);
10 +
	lui    %t2    0;
11 +
	auipc  %t1    0;
12 +
	jalr   %ra    %t1    16;
13 +
	li     %a0    1;
14 +
	j      @exit;
15 +
@helper
16 +
	ld     %a0    8(%sp);
17 +
	addi   %a0    %a0    5;
18 +
	addi   %sp    %sp    32;
19 +
@exit
20 +
	li     %a7    93;
21 +
	ecall;
test/tests/asm.instruction.matrix.system.ras added +11 -0
1 +
//! returns: 77
2 +
3 +
.text;
4 +
@entry
5 +
	j      @exit;
6 +
	ecall;
7 +
	ebreak;
8 +
@exit
9 +
	li     %a0    77;
10 +
	li     %a7    93;
11 +
	ecall;
test/tests/asm.label.fixups.ras added +16 -0
1 +
//! returns: 3
2 +
3 +
.text;
4 +
@entry
5 +
	li     %a0    3;
6 +
	li     %a1    0;
7 +
@loop
8 +
	beqz   %a0    @done;
9 +
	addi   %a1    %a1    1;
10 +
	addi   %a0    %a0    -1;
11 +
	bnez   %a0    @loop;
12 +
	j      @done;
13 +
@done
14 +
	mv     %a0    %a1;
15 +
	li     %a7    93;
16 +
	ecall;
test/tests/asm.li.expressions.ras added +12 -0
1 +
//! returns: 162
2 +
3 +
.constant PAGE 4096;
4 +
.constant VALUE 0x12340000 + 0x5678;
5 +
.text;
6 +
@entry
7 +
	li     %a0    42;
8 +
	li     %a1    VALUE;
9 +
	andi   %a1    %a1    255;
10 +
	add    %a0    %a0    %a1;
11 +
	li     %a7    93;
12 +
	ecall;
test/tests/asm.link.rad added +7 -0
1 +
//! returns: 42
2 +
3 +
fn asmAddOne(x: i32) -> i32;
4 +
5 +
@default fn main() -> i32 {
6 +
    return asmAddOne(41);
7 +
}
test/tests/asm.link.ras added +5 -0
1 +
.text;
2 +
.export @"asm.link::asmAddOne";
3 +
@"asm.link::asmAddOne"
4 +
	addi %a0 %a0 1;
5 +
	ret;
test/tests/asm.load.store.widths.ras added +33 -0
1 +
//! returns: 72
2 +
3 +
.text;
4 +
@entry
5 +
	addi   %sp    %sp    -32;
6 +
	li     %t0    0x80;
7 +
	sb     %t0    0(%sp);
8 +
	lb     %t1    0(%sp);
9 +
	slti   %a0    %t1    0;
10 +
	lbu    %t2    0(%sp);
11 +
	srli   %t2    %t2    7;
12 +
	add    %a0    %a0    %t2;
13 +
	li     %t0    0x8000;
14 +
	sh     %t0    2(%sp);
15 +
	lh     %t3    2(%sp);
16 +
	slti   %t3    %t3    0;
17 +
	add    %a0    %a0    %t3;
18 +
	lhu    %t4    2(%sp);
19 +
	srli   %t4    %t4    15;
20 +
	add    %a0    %a0    %t4;
21 +
	li     %t0    17;
22 +
	sw     %t0    8(%sp);
23 +
	lw     %t5    8(%sp);
24 +
	add    %a0    %a0    %t5;
25 +
	lwu    %t6    8(%sp);
26 +
	add    %a0    %a0    %t6;
27 +
	li     %t0    34;
28 +
	sd     %t0    16(%sp);
29 +
	ld     %a1    16(%sp);
30 +
	add    %a0    %a0    %a1;
31 +
	addi   %sp    %sp    32;
32 +
	li     %a7    93;
33 +
	ecall;
test/tests/asm.mul.div.rem.ras added +33 -0
1 +
//! returns: 4
2 +
3 +
.text;
4 +
@entry
5 +
	li     %t0    20;
6 +
	li     %t1    6;
7 +
	mul    %a0    %t0    %t1;
8 +
	rem    %t2    %t0    %t1;
9 +
	add    %a0    %a0    %t2;
10 +
	div    %t3    %t0    %t1;
11 +
	add    %a0    %a0    %t3;
12 +
	remu   %t4    %t0    %t1;
13 +
	add    %a0    %a0    %t4;
14 +
	divu   %t5    %t0    %t1;
15 +
	add    %a0    %a0    %t5;
16 +
	mulw   %t6    %t0    %t1;
17 +
	add    %a0    %a0    %t6;
18 +
	divw   %a1    %t0    %t1;
19 +
	add    %a0    %a0    %a1;
20 +
	divuw  %a2    %t0    %t1;
21 +
	add    %a0    %a0    %a2;
22 +
	remw   %a3    %t0    %t1;
23 +
	add    %a0    %a0    %a3;
24 +
	remuw  %a4    %t0    %t1;
25 +
	add    %a0    %a0    %a4;
26 +
	mulh   %a5    %t0    %t1;
27 +
	add    %a0    %a0    %a5;
28 +
	mulhu  %a6    %t0    %t1;
29 +
	add    %a0    %a0    %a6;
30 +
	mulhsu %a7    %t0    %t1;
31 +
	add    %a0    %a0    %a7;
32 +
	li     %a7    93;
33 +
	ecall;
test/tests/asm.rodata.prefix.rad added +14 -0
1 +
//! returns: 0
2 +
3 +
fn asmDataValue() -> i32;
4 +
5 +
@default fn main() -> i32 {
6 +
    let s = "hello";
7 +
8 +
    assert s.len == 5;
9 +
    assert s[0] == 'h' as u8;
10 +
    assert s[4] == 'o' as u8;
11 +
    assert asmDataValue() == 41;
12 +
13 +
    return 0;
14 +
}
test/tests/asm.rodata.prefix.ras added +10 -0
1 +
.text;
2 +
.export @"asm.rodata.prefix::asmDataValue";
3 +
@"asm.rodata.prefix::asmDataValue"
4 +
    la %t0 @value;
5 +
    lbu %a0 0(%t0);
6 +
    ret;
7 +
8 +
.data;
9 +
@value
10 +
    .byte 41;
test/tests/asm.scoped.symbols.la.ras added +12 -0
1 +
//! returns: 41
2 +
3 +
.text;
4 +
@entry
5 +
	la     %t0    @data::sym;
6 +
	addi   %a0    %t0    40;
7 +
	li     %a7    93;
8 +
	ecall;
9 +
.data;
10 +
.byte 0;
11 +
@data::sym
12 +
.byte 1;
test/tests/asm.scoped.symbols.tail.ras added +12 -0
1 +
//! returns: 9
2 +
3 +
.text;
4 +
@entry
5 +
	tail   @kernel::main;
6 +
	li     %a0    1;
7 +
	li     %a7    93;
8 +
	ecall;
9 +
@kernel::main
10 +
	li     %a0    9;
11 +
	li     %a7    93;
12 +
	ecall;
test/tests/asm.section.switching.ras added +15 -0
1 +
//! returns: 3
2 +
3 +
.data;
4 +
@msg
5 +
.ascii "ok";
6 +
.text;
7 +
@entry
8 +
	jal    %ra    @done;
9 +
	li     %a0    1;
10 +
	li     %a7    93;
11 +
	ecall;
12 +
@done
13 +
	li     %a0    3;
14 +
	li     %a7    93;
15 +
	ecall;
test/tests/asm.space.constant.expressions.ras added +20 -0
1 +
//! returns: 12
2 +
3 +
.constant PAGE 4096;
4 +
.constant COUNT 8;
5 +
.constant SPACE PAGE / 1024;
6 +
.constant WORD (COUNT - 4) * 2;
7 +
.constant BYTE PAGE / 1024;
8 +
.constant dataBase 0x10000;
9 +
.data;
10 +
.space SPACE;
11 +
.word WORD;
12 +
.byte BYTE;
13 +
.text;
14 +
@entry
15 +
	li     %t0    dataBase;
16 +
	lbu    %a0    4(%t0);
17 +
	lbu    %t1    8(%t0);
18 +
	add    %a0    %a0    %t1;
19 +
	li     %a7    93;
20 +
	ecall;
test/tests/asm.string.directive.lists.ras added +18 -0
1 +
//! returns: 180
2 +
3 +
.constant dataBase 0x10000;
4 +
.data;
5 +
.ascii "a", "b";
6 +
.ascii "x", "y";
7 +
.text;
8 +
@entry
9 +
	li     %t0    dataBase;
10 +
	lbu    %a0    0(%t0);
11 +
	lbu    %t1    1(%t0);
12 +
	add    %a0    %a0    %t1;
13 +
	lbu    %t1    2(%t0);
14 +
	add    %a0    %a0    %t1;
15 +
	lbu    %t1    3(%t0);
16 +
	add    %a0    %a0    %t1;
17 +
	li     %a7    93;
18 +
	ecall;
test/tests/asm.word.dword.constants.ras added +20 -0
1 +
//! returns: 50
2 +
3 +
.constant WORD 0x11223344;
4 +
.constant DWORD 0x55667788;
5 +
.constant dataBase 0x10000;
6 +
.data;
7 +
.word WORD;
8 +
.dword DWORD;
9 +
.text;
10 +
@entry
11 +
	li     %t0    dataBase;
12 +
	lbu    %a0    0(%t0);
13 +
	lbu    %t1    3(%t0);
14 +
	add    %a0    %a0    %t1;
15 +
	lbu    %t1    4(%t0);
16 +
	add    %a0    %a0    %t1;
17 +
	lbu    %t1    7(%t0);
18 +
	add    %a0    %a0    %t1;
19 +
	li     %a7    93;
20 +
	ecall;
test/tests/asm.word.shift.ops.ras added +38 -0
1 +
//! returns: 101
2 +
3 +
.text;
4 +
@entry
5 +
	li     %t0    1;
6 +
	slli   %t1    %t0    5;
7 +
	slliw  %t2    %t0    4;
8 +
	sllw   %t3    %t2    %t0;
9 +
	sll    %t4    %t0    %t0;
10 +
	li     %t5    -8;
11 +
	srai   %t6    %t5    2;
12 +
	sraiw  %a1    %t5    1;
13 +
	sraw   %a2    %t5    %t0;
14 +
	li     %a3    64;
15 +
	srli   %a3    %a3    5;
16 +
	li     %a4    64;
17 +
	srliw  %a4    %a4    4;
18 +
	srlw   %a5    %a4    %t0;
19 +
	srl    %a6    %a3    %t0;
20 +
	addiw  %a0    %zero  7;
21 +
	addw   %a0    %a0    %t0;
22 +
	subw   %a0    %a0    %t0;
23 +
	add    %a0    %a0    %t4;
24 +
	add    %a0    %a0    %a3;
25 +
	add    %a0    %a0    %a4;
26 +
	add    %a0    %a0    %a5;
27 +
	add    %a0    %a0    %a6;
28 +
	slti   %t6    %t6    0;
29 +
	add    %a0    %a0    %t6;
30 +
	slti   %a1    %a1    0;
31 +
	add    %a0    %a0    %a1;
32 +
	slti   %a2    %a2    0;
33 +
	add    %a0    %a0    %a2;
34 +
	add    %a0    %a0    %t1;
35 +
	add    %a0    %a0    %t2;
36 +
	add    %a0    %a0    %t3;
37 +
	li     %a7    93;
38 +
	ecall;
vim/ras.vim added +63 -0
1 +
" ras.vim
2 +
" Syntax for Radiance Assembly (.ras) files
3 +
"
4 +
if exists("b:current_syntax")
5 +
  finish
6 +
endif
7 +
8 +
syntax clear
9 +
10 +
" Comments
11 +
syntax match rasComment "//.*$"
12 +
syntax keyword rasTodo TODO FIXME contained containedin=rasComment
13 +
14 +
" Directives
15 +
syntax match rasDirective "\.\%(align\|ascii\|byte\|constant\|data\|dword\|global\|space\|text\|word\)\>"
16 +
17 +
" Labels
18 +
syntax match rasLabel "@[A-Za-z_][A-Za-z0-9_]*\%(::[A-Za-z_][A-Za-z0-9_]*\)*"
19 +
20 +
" Mnemonics
21 +
syntax keyword rasMnemonic add addi addiw addw and andi auipc
22 +
syntax keyword rasMnemonic beq beqz bge bgeu bgt ble blt bltu bne bnez
23 +
syntax keyword rasMnemonic call csrc csrr csrrw csrsi csrw
24 +
syntax keyword rasMnemonic div divu divuw divw
25 +
syntax keyword rasMnemonic ebreak ecall
26 +
syntax keyword rasMnemonic j jal jalr la lb lbu ld lh lhu li lui lw lwu
27 +
syntax keyword rasMnemonic mret mul mulh mulhsu mulhu mulw mv
28 +
syntax keyword rasMnemonic neg nop not or ori
29 +
syntax keyword rasMnemonic rem remu remuw remw ret
30 +
syntax keyword rasMnemonic sb sd seqz sh sll slli slliw sllw slt slti sltiu sltu snez
31 +
syntax keyword rasMnemonic sra srai sraiw sraw srl srli srliw srlw sub subw sw
32 +
syntax keyword rasMnemonic tail wfi xor xori
33 +
34 +
" Registers
35 +
syntax match rasRegister "%\%(a[0-7]\|fp\|gp\|ra\|s[0-9]\|s10\|s11\|sp\|t[0-6]\|tp\|zero\)\>"
36 +
37 +
" CSR names
38 +
syntax keyword rasCsr mcause mepc mhartid mie mip mscratch mstatus mtval mtvec
39 +
40 +
" Numbers and literals
41 +
syntax match rasNumber "\%([+-]\)\=\<0[xX][0-9A-Fa-f]\+\>"
42 +
syntax match rasNumber "\%([+-]\)\=\<\d\+\>"
43 +
syntax region rasString start=+"+ skip=+\\"+ end=+"+
44 +
syntax region rasChar start=+'+ skip=+\\'+ end=+'+
45 +
46 +
" Namespaced symbols and punctuation
47 +
syntax match rasNamespaceSep "::"
48 +
syntax match rasPunct "[(),;:+\-*/]"
49 +
50 +
highlight default link rasComment Comment
51 +
highlight default link rasTodo Todo
52 +
highlight default link rasDirective Special
53 +
highlight default link rasLabel Label
54 +
highlight default link rasMnemonic Keyword
55 +
highlight default link rasRegister Identifier
56 +
highlight default link rasCsr Type
57 +
highlight default link rasNumber Number
58 +
highlight default link rasString String
59 +
highlight default link rasChar Character
60 +
highlight default link rasNamespaceSep Delimiter
61 +
highlight default link rasPunct Delimiter
62 +
63 +
let b:current_syntax = "ras"