radiance · commit

Add RV64 assembly support

364f0a910e6be6ef9be91ffcf1c6f159d8a4ae6eb70c4f403278389c2f7a9c98

This allows for modules written in RV64 assembly language to be compiled
and linked with Radiance modules.

Alexis Sellier committed 2 months ago 1 parent e5efba42

Makefile +13 -2

# Binary Tests

BIN_TEST_DIR := test/tests
# Only tests with `//! returns:` are compiled to binaries and executed.
BIN_TEST_EXE_SRC := $(shell grep -rl '^//! returns:' $(BIN_TEST_DIR))
BIN_TEST_EXE_BIN := $(BIN_TEST_EXE_SRC:.rad=.rv64)
BIN_TEST_RAD_EXE_SRC := $(filter %.rad,$(BIN_TEST_EXE_SRC))
BIN_TEST_RAS_EXE_SRC := $(filter %.ras,$(BIN_TEST_EXE_SRC))
BIN_TEST_RAD_ASM_SRC := $(wildcard $(BIN_TEST_RAD_EXE_SRC:.rad=.ras))
BIN_TEST_EXE_BIN := $(patsubst %.rad,%.rv64,$(BIN_TEST_RAD_EXE_SRC)) \
	$(patsubst %.ras,%.rv64,$(BIN_TEST_RAS_EXE_SRC))
BIN_RUNNER   := test/runner.rv64
BIN_TEST_RUN := test/run

bin-test: $(BIN_RUNNER) $(BIN_TEST_EXE_BIN)
	@echo

# Runner binary: the lowering IL checker.
$(BIN_RUNNER): test/runner.rad $(STD_LIB) $(RAD_BIN)
	@echo "radiance test/runner.rad => $@"
	@$(RADIANCE) $(STD) -pkg runner -mod test/runner.rad -entry runner -o $@

# A `.rad` executable test can have a same-basename `.ras` module.
$(patsubst %.ras,%.rv64,$(BIN_TEST_RAD_ASM_SRC)): %.rv64: %.ras

# Compile each executable test to a binary.
$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.rad $(RAD_BIN)
	@echo "radiance $< => $@"
	@$(RADIANCE) -pkg test -mod $< -o $@
	@$(RADIANCE) -pkg test -mod $< $(patsubst %,-mod %,$(wildcard $(@:.rv64=.ras))) -o $@

$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.ras $(BIN_RUNNER)
	@echo "asm $< => $@"
	@$(EMU) $(EMU_FLAGS) -run $(BIN_RUNNER) -- assemble $< $@

clean-bin-test:
	@rm -f $(BIN_RUNNER) \
		$(BIN_RUNNER:.rv64=.rv64.debug) \
		$(BIN_RUNNER:.rv64=.rv64.s) \

compiler/radiance.rad +203 -97

use std::lang::strings;
use std::lang::package;
use std::lang::il;
use std::lang::lower;
use std::arch::rv64;
use std::arch::rv64::asm;
use std::arch::rv64::printer;
use std::lang::sexpr;
use std::lang::gen::data;
use std::lang::gen::types;
use std::sys;

constant MAX_TOTAL_MODULES: u32 = 192;
/// Source code buffer arena (2 MB).
constant MAX_SOURCES_SIZE: u32 = 2097152;
/// Maximum number of test functions we can discover.
constant MAX_TESTS: u32 = 1024;
/// Maximum number of assembly source paths we can load per package.
constant MAX_ASM_MODULES: u32 = 64;

/// Temporary arena size (32 MB) - retains all parsed AST until resolution.
/// Used for: AST during parsing, then codegen scratch space.
constant TEMP_ARENA_SIZE: u32 = 33554432;
/// Main arena size (64 MB) - lives throughout compilation.


/// Read-only data file extension.
constant RO_DATA_EXT: *[u8] = ".ro.data";
/// Read-write data file extension.
constant RW_DATA_EXT: *[u8] = ".rw.data";
/// Maximum rodata size (1MB).
constant MAX_RO_DATA_SIZE: u32 = 1048576;
/// Maximum rwdata size (1MB).
constant MAX_RW_DATA_SIZE: u32 = 1048576;
/// Maximum rodata size (4MB).
constant MAX_RO_DATA_SIZE: u32 = 4194304;
/// Maximum rwdata size (4MB).
constant MAX_RW_DATA_SIZE: u32 = 4194304;
/// Maximum path length.
constant MAX_PATH_LEN: u32 = 256;
/// Read-only data buffer.
static RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
/// Read-write data buffer.
static RW_DATA_BUF: [u8; MAX_RW_DATA_SIZE] = undefined;
/// Assembly module source buffer.
static ASM_SOURCE_BUF: [u8; MAX_SOURCES_SIZE] = undefined;
/// Temporary assembly text buffer.
static ASM_TEXT_BUF: [u32; 262144] = undefined;
/// Temporary assembly data buffer.
static ASM_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
/// Accumulated assembly read-only data.
static ASM_RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;

/// Assembly source file extension.
constant ASM_SOURCE_EXT: *[u8] = ".ras";

/// Usage string.
constant USAGE: *[u8] =
    "usage: radiance -pkg <name> -mod <input>.. [-pkg <name> -mod <input>..] -entry <pkg> -o <output>\n";


    modPath: *[*[u8]],
    /// Test function name (eg. "testFoo").
    fnName: *[u8],
}

/// Source inputs belonging to one command-line package.
record PackageInput {
    /// Package name from the `-pkg` argument.
    name: *[u8],
    /// Radiance source paths for this package.
    radPaths: [*[u8]; MAX_LOADED_MODULES],
    /// Number of Radiance source paths.
    radPathCount: u32,
    /// Assembly source paths for this package.
    asmPaths: [*[u8]; MAX_ASM_MODULES],
    /// Number of assembly source paths.
    asmPathCount: u32,
}

/// Compilation context.
record CompileContext {
    /// Array of packages to compile.
    packages: [package::Package; MAX_PACKAGES],
    /// Driver inputs for each package slot.
    inputs: [PackageInput; MAX_PACKAGES],
    /// Number of packages.
    packageCount: u32,
    /// Index of entry package.
    entryPkgIdx: ?u32,
    /// Global module graph shared by all packages.

    debug: bool,
    /// How the generated program should handle entry.
    entryMode: CodegenEntryMode,
}

/// Print a driver error line.
fn error(msg: *[*[u8]]) -> Error {
    io::printError("radiance: ");

    for part, i in msg {
        io::printError(part);
        if i < msg.len - 1 {
            io::printError(" ");
        }
    }
    io::printError("\n");
    return Error::Other;
}

/// Print a log line for the given package.
fn pkgLog(pkg: *package::Package, msg: *[*[u8]]) {
    io::printError("radiance: ");
    io::printError(pkg.name);
    io::printError(": ");

        }
    }
    io::printError("\n");
}

/// Return `true` when `path` ends with `ext`.
fn hasExtension(path: *[u8], ext: *[u8]) -> bool {
    if path.len < ext.len {
        return false;
    }
    let start = path.len - ext.len;
    return mem::eq(&path[start..], ext);
}

/// Create an empty source input set for one package.
fn packageInput(name: *[u8]) -> PackageInput {
    return PackageInput {
        name,
        radPaths: undefined,
        radPathCount: 0,
        asmPaths: undefined,
        asmPathCount: 0,
    };
}

/// Register, load, and parse `path` within `pkg`.
fn processModule(
    pkg: *mut package::Package,
    graph: *mut module::ModuleGraph,
    path: *[u8],

    sourceArena: *mut alloc::Arena
) throws (Error) {
    pkgLog(pkg, &["parsing", "(", path, ")", ".."]);

    let moduleId = try package::registerModule(pkg, graph, path) catch {
        io::printError("radiance: error registering module\n");
        throw Error::Other;
        throw error(&["error registering module"]);
    };
    // Read file into remaining arena space.
    let buffer = alloc::remainingBuf(sourceArena);
    if buffer.len == 0 {
        io::printError("radiance: fatal: source arena exhausted\n");
        throw Error::Other;
        throw error(&["fatal:", "source arena exhausted"]);
    }
    let source = unix::readFile(path, buffer) else {
        io::printError("radiance: error reading file\n");
        throw Error::Other;
        throw error(&["error reading file"]);
    };
    if source.len == buffer.len {
        io::printError("radiance: fatal: source arena too small, file truncated: ");
        io::printError(path);
        io::printError("\n");
        throw Error::Other;
        throw error(&["fatal:", "source arena too small, file truncated:", path]);
    }
    // Commit only what was read.
    alloc::commit(sourceArena, source.len);

    let ast = try parser::parse(scanner::SourceLoc::File(path), source, nodeArena, &mut STRING_POOL) catch {
        throw Error::Other;
    };
    try module::setAst(graph, moduleId, ast) catch {
        io::printError("radiance: error setting AST\n");
        throw Error::Other;
        throw error(&["error setting AST"]);
    };
    try module::setSource(graph, moduleId, source) catch {
        io::printError("radiance: error setting source\n");
        throw Error::Other;
        throw error(&["error setting source"]);
    };
}

/// Consume the next argument, or print an error and throw.
fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[u8]) -> *[u8] throws (Error) {
fn nextArg(args: *[*[u8]], idx: *mut u32, msg: *[*[u8]]) -> *[u8] throws (Error) {
    set *idx += 1;
    if *idx >= args.len {
        io::printError(msg);
        throw Error::Other;
        throw error(msg);
    }
    return args[*idx];
}

/// Parse CLI arguments and return compilation context.

    let mut debugEnabled = false;
    let mut outputPath: ?*[u8] = nil;
    let mut dump = Dump::None;
    let mut entryPkgName: ?*[u8] = nil;

    // Per-package module path tracking.
    let mut moduleCounts: [u32; MAX_PACKAGES] = undefined;
    let mut modulePaths: [[*[u8]; MAX_LOADED_MODULES]; MAX_PACKAGES] = undefined;
    let mut pkgNames: [*[u8]; MAX_PACKAGES] = undefined;
    // Per-package source path tracking.
    let mut inputs: [PackageInput; MAX_PACKAGES] = undefined;
    let mut pkgCount: u32 = 0;
    let mut currentPkgIdx: ?u32 = nil;

    for i in 0..MAX_PACKAGES {
        set moduleCounts[i] = 0;
    }
    if args.len == 0 {
        io::printError(USAGE);
        throw Error::Other;
    }
    let mut idx: u32 = 0;

    while idx < args.len {
        let arg = args[idx];
        if mem::eq(arg, "-pkg") {
            try nextArg(args, &mut idx, "radiance: `-pkg` requires a package name\n");
            try nextArg(args, &mut idx, &["`-pkg` requires a package name"]);
            if pkgCount >= MAX_PACKAGES {
                io::printError("radiance: too many packages specified\n");
                throw Error::Other;
                throw error(&["too many packages specified"]);
            }
            set pkgNames[pkgCount] = args[idx];
            set inputs[pkgCount] = packageInput(args[idx]);
            set currentPkgIdx = pkgCount;
            set pkgCount += 1;
        } else if mem::eq(arg, "-mod") {
            try nextArg(args, &mut idx, "radiance: `-mod` requires a module path\n");
            try nextArg(args, &mut idx, &["`-mod` requires a module path"]);
            let pkgIdx = currentPkgIdx else {
                io::printError("radiance: `-mod` must follow a `-pkg` argument\n");
                throw Error::Other;
                throw error(&["`-mod` must follow a `-pkg` argument"]);
            };
            if moduleCounts[pkgIdx] >= MAX_LOADED_MODULES {
                io::printError("radiance: too many modules specified for package\n");
                throw Error::Other;
            let input = &mut inputs[pkgIdx];
            if hasExtension(args[idx], ASM_SOURCE_EXT) {
                if input.asmPathCount >= MAX_ASM_MODULES {
                    throw error(&["too many assembly modules specified"]);
                }
                set input.asmPaths[input.asmPathCount] = args[idx];
                set input.asmPathCount += 1;
            } else {
                if input.radPathCount >= MAX_LOADED_MODULES {
                    throw error(&["too many modules specified for package"]);
                }
                set input.radPaths[input.radPathCount] = args[idx];
                set input.radPathCount += 1;
            }
            set modulePaths[pkgIdx][moduleCounts[pkgIdx]] = args[idx];
            set moduleCounts[pkgIdx] += 1;
        } else if mem::eq(arg, "-entry") {
            try nextArg(args, &mut idx, "radiance: `-entry` requires a package name\n");
            try nextArg(args, &mut idx, &["`-entry` requires a package name"]);
            set entryPkgName = args[idx];
        } else if mem::eq(arg, "-test") {
            set buildTest = true;
        } else if mem::eq(arg, "-debug") {
            set debugEnabled = true;
        } else if mem::eq(arg, "-o") {
            try nextArg(args, &mut idx, "radiance: `-o` requires an output path\n");
            try nextArg(args, &mut idx, &["`-o` requires an output path"]);
            set outputPath = args[idx];
        } else if mem::eq(arg, "-dump") {
            try nextArg(args, &mut idx, "radiance: `-dump` requires a mode (eg. ast)\n");
            try nextArg(args, &mut idx, &["`-dump` requires a mode (eg. ast)"]);
            let mode = args[idx];
            if mem::eq(mode, "ast") {
                set dump = Dump::Ast;
            } else if mem::eq(mode, "graph") {
                set dump = Dump::Graph;
            } else if mem::eq(mode, "il") {
                set dump = Dump::Il;
            } else if mem::eq(mode, "asm") {
                set dump = Dump::Asm;
            } else {
                io::printError("radiance: unknown dump mode `");
                io::printError(mode);
                io::printError("` (expected: ast, graph, il, asm)\n");
                throw Error::Other;
                throw error(&["unknown dump mode", mode, "(expected: ast, graph, il, asm)"]);
            }
        } else {
            io::printError("radiance: unknown argument `");
            io::printError(arg);
            io::printError("`\n");
            throw Error::Other;
            throw error(&["unknown argument", arg]);
        }
        set idx += 1;
    }
    if pkgCount == 0 {
        io::printError("radiance: no package specified\n");
        throw Error::Other;
        throw error(&["no package specified"]);
    }
    for i in 0..pkgCount {
        if inputs[i].radPathCount == 0 {
            throw error(&["package", inputs[i].name, "has no Radiance modules specified"]);
        }
    }

    // Determine entry package index.
    let mut entryPkgIdx: ?u32 = nil;
    if pkgCount == 1 {
        // Single package: it is the entry.
        set entryPkgIdx = 0;
    } else {
        // Multiple packages: need -entry.
        let entryName = entryPkgName else {
            io::printError("radiance: `-entry` required when multiple packages specified\n");
            throw Error::Other;
            throw error(&["`-entry` required when multiple packages specified"]);
        };
        for i in 0..pkgCount {
            if mem::eq(pkgNames[i], entryName) {
            if mem::eq(inputs[i].name, entryName) {
                set entryPkgIdx = i;
                break;
            }
        }
        if entryPkgIdx == nil {
            io::printError("radiance: fatal: entry package `");
            io::printError(entryName);
            io::printError("` not found\n");

            throw Error::Other;
            throw error(&["fatal:", "entry package", entryName, "not found"]);
        }
    }
    let graph = module::moduleGraph(&mut MODULE_ENTRIES[..], &mut STRING_POOL, arena);
    let mut ctx = CompileContext {
        packages: undefined,
        inputs,
        packageCount: pkgCount,
        entryPkgIdx,
        graph,
        config: resolver::Config { buildTest },
        dump,

        debug: debugEnabled,
    };
    // Initialize and parse all packages.
    let mut sourceArena = alloc::new(&mut MODULE_SOURCES[..]);
    for i in 0..pkgCount {
        package::init(&mut ctx.packages[i], i as u16, pkgNames[i], &mut STRING_POOL);
        package::init(&mut ctx.packages[i], i as u16, ctx.inputs[i].name, &mut STRING_POOL);

        for j in 0..moduleCounts[i] {
            let path = modulePaths[i][j];
        for j in 0..ctx.inputs[i].radPathCount {
            let path = ctx.inputs[i].radPaths[j];
            try processModule(&mut ctx.packages[i], &mut ctx.graph, path, arena, &mut sourceArena);
        }
    }
    return ctx;
}

/// Get the entry package from the context.
fn getEntryPackage(ctx: *CompileContext) -> *package::Package throws (Error) {
    let entryIdx = ctx.entryPkgIdx else {
        io::printError("radiance: no entry package specified\n");
        throw Error::Other;
        throw error(&["no entry package specified"]);
    };
    return &ctx.packages[entryIdx];
}

/// Get root module info from a package.
fn getRootModule(pkg: *package::Package, graph: *module::ModuleGraph) -> RootModule throws (Error) {
    let rootId = pkg.rootModuleId else {
        io::printError("radiance: no root module found\n");
        throw Error::Other;
        throw error(&["no root module found"]);
    };
    let rootEntry = module::get(graph, rootId) else {
        io::printError("radiance: root module entry not found\n");
        throw Error::Other;
        throw error(&["root module entry not found"]);
    };
    let rootAst = rootEntry.ast else {
        io::printError("radiance: root module has no AST\n");
        throw Error::Other;
        throw error(&["root module has no AST"]);
    };
    return RootModule { entry: rootEntry, ast: rootAst };
}

/// Dump the module graph.

    low: *mut lower::Lowerer,
    pkg: *mut package::Package,
    isEntry: bool
) throws (Error) {
    let rootId = pkg.rootModuleId else {
        io::printError("radiance: no root module found\n");
        throw Error::Other;
        throw error(&["no root module found"]);
    };
    // Set lowerer's package context for qualified name generation.
    // TODO: We shouldn't have to call this manually.
    lower::setPackage(low, &ctx.graph, pkg.name);


    modId: u16,
    isRoot: bool,
    pkg: *package::Package
) throws (Error) {
    let entry = module::get(graph, modId) else {
        io::printError("radiance: module entry not found\n");
        throw Error::Other;
        throw error(&["module entry not found"]);
    };
    let modAst = entry.ast else {
        io::printError("radiance: module has no AST\n");
        throw Error::Other;
        throw error(&["module has no AST"]);
    };
    pkgLog(pkg, &["lowering", "(", entry.filePath, ")", ".."]);

    try lower::lowerModule(low, modId, modAst, isRoot) catch err {
        io::printError("radiance: internal error during lowering: ");
        io::printError("radiance: ");
        io::printError("internal error during lowering: ");
        lower::printError(err);
        io::printError("\n");

        throw Error::Other;
    };
    // Recurse into children.
    for i in 0..entry.childrenLen {
        let childId = module::childAt(entry, i);

        if let entry = module::get(&ctx.graph, modIdx as u16) {
            collectModuleTests(entry, &mut tests[..], &mut testCount);
        }
    }
    if testCount == 0 {
        io::printError("radiance: fatal: no test functions found\n");
        throw Error::Other;
        throw error(&["fatal:", "no test functions found"]);
    }
    let mut countBuf: [u8; 10] = undefined;
    let countStr = fmt::formatU32(testCount, &mut countBuf[..]);
    pkgLog(entryPkg, &["found", countStr, "test(s)"]);


    set pos += try! mem::copy(&mut path[pos..], basePath);
    set pos += try! mem::copy(&mut path[pos..], ext);
    set path[pos] = 0; // Null-terminate for syscall.

    if not unix::writeFile(&path[..pos], data) {
        io::printError("radiance: fatal: failed to write data file\n");
        throw Error::Other;
        throw error(&["fatal:", "failed to write data file"]);
    }
}

/// Serialize debug entries and write the `.debug` file.
/// Resolves module IDs to file paths via the module graph.

        pkgScope: &mut RESOLVER_PKG_SCOPE,
        errors: &mut RESOLVER_ERRORS[..],
    };
    let mut res = resolver::resolver(storage, ctx.config);

    // Build package inputs.
    let mut packages: [resolver::Pkg; MAX_PACKAGES] = undefined;
    // Build the semantic package list consumed by the resolver.
    let mut resolverPkgs: [resolver::Pkg; MAX_PACKAGES] = undefined;
    let mut resolverPackageCount: u32 = 0;
    for i in 0..ctx.packageCount {
        let pkg = &ctx.packages[i];
        let root = try getRootModule(pkg, &ctx.graph);

        set packages[i] = resolver::Pkg {
        set resolverPkgs[resolverPackageCount] = resolver::Pkg {
            rootEntry: root.entry,
            rootAst: root.ast,
        };
        set resolverPackageCount += 1;
    }

    // Resolve all packages.
    // TODO: Fix this error printing dance.
    let diags = try resolver::resolve(&mut res, &ctx.graph, &packages[..ctx.packageCount]) catch {
    let diags = try resolver::resolve(&mut res, &ctx.graph, &resolverPkgs[..resolverPackageCount]) catch {
        let diags = resolver::Diagnostics { errors: res.errors };
        resolver::printer::printDiagnostics(&diags, &res);
        throw Error::Other;
    };
    if not resolver::success(&diags) {
        resolver::printer::printDiagnostics(&diags, &res);
        io::print("radiance: failed: ");
        io::printU32(diags.errors.len);
        io::printLn(" errors");
        throw Error::Other;
        let mut countBuf: [u8; 10] = undefined;
        let countStr = fmt::formatU32(diags.errors.len, &mut countBuf[..]);
        throw error(&["failed:", countStr, "errors"]);
    }
    return res;
}

/// Emit one lowered function to RV64 codegen and reclaim its IL arena.
/// Emit one lowered function to machine code and reclaim its IL arena.
fn generateLoweredFn(ctxPtr: *mut opaque, func: *il::Fn, role: lower::FnRole) {
    let ctx = ctxPtr as *mut CodegenSinkContext;

    match role {
        case lower::FnRole::Default => {

    }
    rv64::generateFunction(ctx.generator, func, ctx.codegenArena);
    alloc::reset(ctx.fnArena);
}

/// Assemble one `.ras` input and merge it into the active code generator.
///
/// Text symbols are appended to `generator`. Data emitted by the assembler is
/// copied into `ASM_RO_DATA_BUF` at `*asmDataLen`, and `*asmDataLen` is advanced
/// so the next assembly module receives the correct rodata base address.
fn assembleAsmModule(
    generator: *mut rv64::Generator,
    pkg: *package::Package,
    path: *[u8],
    asmDataLen: *mut u32,
    arena: *mut alloc::Arena
) throws (Error) {
    pkgLog(pkg, &["asm:", "parsing", "(", path, ")", ".."]);

    let source = unix::readFile(path, &mut ASM_SOURCE_BUF[..]) else {
        throw error(&["error reading assembly file"]);
    };
    if source.len == ASM_SOURCE_BUF.len {
        throw error(&["fatal:", "assembly source too large:", path]);
    }
    let program = try asm::assemble(
        asm::scanner::SourceKind::File { path },
        source,
        &mut ASM_TEXT_BUF[..],
        &mut ASM_DATA_BUF[..],
        arena,
        &mut STRING_POOL,
        rv64::RO_DATA_BASE + *asmDataLen
    ) catch {
        throw error(&["assembly failed:", path]);
    };
    if *asmDataLen + program.data.len > ASM_RO_DATA_BUF.len {
        throw error(&["fatal:", "assembly rodata too large"]);
    }
    try! mem::copy(&mut ASM_RO_DATA_BUF[*asmDataLen..], program.data);
    set *asmDataLen += program.data.len;

    rv64::addAssembly(generator, program);
}

/// Assemble all inputs collected in the package inputs.
fn assembleAsmInputs(
    ctx: *CompileContext,
    generator: *mut rv64::Generator,
    arena: *mut alloc::Arena
) -> *[u8] throws (Error) {
    let mut asmDataLen: u32 = 0;

    for i in 0..ctx.packageCount {
        let input = &ctx.inputs[i];
        for j in 0..input.asmPathCount {
            try assembleAsmModule(
                generator,
                &ctx.packages[i],
                input.asmPaths[j],
                &mut asmDataLen,
                arena
            );
        }
    }
    return &ASM_RO_DATA_BUF[..asmDataLen];
}

/// Lower all packages while streaming each lowered function into RV64 codegen.
fn lowerAndGenerateAllPackages(
    ctx: *mut CompileContext,
    res: *mut resolver::Resolver,
    fnArena: *mut alloc::Arena,

    set low.output = lower::FnOutput::Stream(lower::FnSink {
        ctx: &mut codegenCtx as *mut opaque,
        emitFn: generateLoweredFn,
    });
    try lowerAllPackagesInto(ctx, res, &mut low);
    let asmData = try assembleAsmInputs(ctx, &mut generator, &mut res.arena);

    match generator.entryPatch {
        case rv64::EntryPatch::Reserved(targetName) => {
            if targetName == nil {
                io::printError("radiance: fatal: no default function found\n");
                throw Error::Other;
                throw error(&["fatal:", "no default function found"]);
            }
        }
        else => {}
    }
    if let path = codegenOptions.logPath {
        pkgLog(entryPkg, &["generating code", "(", path, ")", ".."]);
    }
    return rv64::finishProgram(&mut generator, &low.data[..], storage, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
    return rv64::finishProgram(&mut generator, &low.data[..], storage, asmData, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
}

/// Lower, optionally dump, and optionally generate binary output.
fn compile(
    ctx: *mut CompileContext,

        debug: ctx.debug,
        entryMode: CodegenEntryMode::DefaultEntry,
    });

    if not writeCode(result.code, outPath) {
        io::printError("radiance: fatal: failed to write output file\n");
        throw Error::Other;
        throw error(&["fatal:", "failed to write output file"]);
    }
    // Write data files.
    try writeDataWithExt(&RO_DATA_BUF[..result.roDataSize], outPath, RO_DATA_EXT);
    try writeDataWithExt(&RW_DATA_BUF[..result.rwDataSize], outPath, RW_DATA_EXT);


lib/std.rad +1 -0

//! The Radiance Standard Library.

export mod io;
export mod collections;
export mod char;
export mod lang;
export mod sys;
export mod arch;
export mod fmt;
export mod mem;

lib/std/arch/rv64.rad +43 -4

export mod encode;
export mod decode;
export mod emit;
export mod isel;
export mod printer;
export mod asm;

@test mod tests;

use std::mem;
use std::collections::dict;


    // Reclaim unused memory after instruction selection.
    alloc::restore(arena, checkpoint);
}

/// Add the text section of an assembled program to the generator.
///
/// This function snapshots the generator's current code length as the base
/// index, converts each text symbol's byte offset to an instruction index, adds
/// that base, and records the final address for printing. Only `.export` text
/// symbols are exported to the emitter's function-offset table for extern call
/// resolution. Local labels must not escape their assembly fragment because
/// separate assembly inputs may reuse the same local names.
///
/// Non-text symbols are ignored here because assembled data is not appended to
/// the generator's text stream. The driver merges assembled data into the RO data
/// prefix separately and passes that data to [`finishProgram`].
export fn addAssembly(generator: *mut Generator, program: asm::Program) {
    let baseIndex = generator.e.codeLen;

    for symbol in program.symbols {
        if symbol.section == asm::Section::Text {
            let index = baseIndex + ((symbol.offset as u32) / INSTR_SIZE as u32);
            emit::recordFuncAt(&mut generator.e, symbol.name, index);
            if symbol.isExported {
                emit::recordFuncOffsetAt(&mut generator.e, symbol.name, index);
            }
        }
    }
    for word in program.text {
        emit::emit(&mut generator.e, word);
    }
}

/// Finish RV64 code generation and return the emitted program.
export fn finishProgram(
    generator: *mut Generator,
    globalData: *[il::Data],
    storage: Storage,
    roDataPrefix: *[u8],
    roDataBuf: *mut [u8],
    rwDataBuf: *mut [u8]
) -> Program {
    // Build data map after function lowering. Function-local literals can add
    // global data while functions are lowered, so final layout belongs here.
    let mut dataSymCount: u32 = 0;
    let roLayoutSize = data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, true);
    let roLayoutSize = data::layoutSectionAtOffset(
        globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, roDataPrefix.len, true
    );
    data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RW_DATA_BASE, false);

    let dataSyms = &storage.dataSyms[..dataSymCount];
    let dataSymMap = data::buildMap(dataSyms, storage.dataSymEntries);
    let codeBase = mem::alignUp(RO_DATA_BASE + roLayoutSize, DWORD_SIZE as u32);

    // Patch function calls and address loads now that all functions are emitted.
    emit::patchCalls(&mut generator.e);
    emit::patchAddrLoads(&mut generator.e, &dataSymMap);

    // Emit data sections.
    let roDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true);
    let rwDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false);

    assert roDataPrefix.len <= roDataBuf.len, "finishProgram: rodata prefix buffer overflow";
    try! mem::copy(roDataBuf, roDataPrefix);

    let roDataSize = data::emitSectionAtOffset(
        globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true, roDataPrefix.len
    );
    let rwDataSize = data::emitSection(
        globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false
    );
    return Program {
        code: emit::getCode(&generator.e),
        funcs: emit::getFuncs(&generator.e),
        roDataSize,
        rwDataSize,

lib/std/arch/rv64/asm.rad added +528 -0

1	+	//! Minimal RV64 assembler.
2	+	//!
3	+	//! This module assembles `.ras` source files into RV64 text words plus a raw
4	+	//! data prefix that can be linked into a compiler-generated program. It exists
5	+	//! so the Radiance driver can mix hand-written RV64 assembly with generated IL
6	+	//! output without invoking an external assembler or linker.
7	+	//!
8	+	//! Assembly is intentionally direct and buffer-oriented. The caller provides a
9	+	//! text buffer, a data buffer, an arena, and the runtime base address where the
10	+	//! data buffer will be loaded. The parser writes encoded instructions into the
11	+	//! text buffer as it reads them and writes directive bytes into the data buffer
12	+	//! while in `.data`. The returned [`Program`] only contains slices into those
13	+	//! caller-provided buffers, so no ownership transfer or late copy is needed.
14	+	//!
15	+	//! The scanner is assembly-specific. It produces tokens for registers (`%a0`),
16	+	//! labels (`@name`), directives, strings, characters, numbers, and
17	+	//! punctuation. The parser consumes those tokens as a small line-oriented
18	+	//! language: directives declare sections or emit data, labels define
19	+	//! symbols at the current section offset, and instructions are validated
20	+	//! against RV64 operand forms before being encoded.
21	+	//!
22	+	//! Labels are defined at the current text instruction index or data byte
23	+	//! offset. The parser is single-pass because it keeps assembly cheap and lets
24	+	//! instructions and data be emitted immediately, but forward references mean
25	+	//! some operands cannot be encoded when first seen. Branches, jumps,
26	+	//! load-address operands, and data directives that reference labels therefore
27	+	//! record fixups. After parsing reaches EOF, the emitter resolves the final
28	+	//! symbol table and patches every recorded use with the correct PC-relative
29	+	//! offset, absolute data address, or encoded data value.
30	+	//!
31	+	//! Data labels are resolved relative to the data base address. The compiler
32	+	//! driver accumulates all assembly data in a RO data prefix, passes
33	+	//! [`RO_DATA_BASE`] + `currentPrefixLen` for each input, then appends the
34	+	//! input's emitted data to that prefix. Global text symbols are exported for
35	+	//! call resolution when the assembled text is appended to the RV64 generator,
36	+	//! shifted by the generator's current code length so disassembly/debug output
37	+	//! can name those instruction addresses correctly. Non-global text labels
38	+	//! remain local to their assembly fragment.
39	+	use std::lang::alloc;
40	+	use std::lang::strings;
41	+	use std::lang::gen;
42	+	use std::collections::dict;
43	+	use std::arch::rv64::encode;
44	+	use std::arch::rv64;
45	+
46	+	/// Assembler scanner module.
47	+	export mod scanner;
48	+	/// Assembler parser module.
49	+	export mod parser;
50	+	/// Assembler emission and fixup module.
51	+	export mod emit;
52	+	/// Tests.
53	+	@test mod tests;
54	+
55	+	/// In-memory result of assembling one RV64 assembly fragment.
56	+	///
57	+	/// [`Program`] is the boundary between the textual assembler and the rest of
58	+	/// the compiler. The assembler reads an assembly source file, encodes all
59	+	/// instructions, lays out all data bytes, resolves fixups that can be resolved
60	+	/// inside the fragment, and returns these three slices as the assembled
61	+	/// program.
62	+	///
63	+	/// The value is intentionally not a standalone object file or linked
64	+	/// executable. It carries only the sections and symbol table needed by the
65	+	/// compiler driver. The slices point at caller-owned storage: `text` and
66	+	/// `data` are backed by the buffers passed to [`assemble`], while symbol names
67	+	/// are interned in the assembler's string pool.
68	+	///
69	+	/// Symbol offsets are section-local byte offsets. Text symbols name positions
70	+	/// in `text`; data symbols name positions in `data`. When the compiler
71	+	/// consumes the program, [`rv64::addAssembly`] appends the text words to the
72	+	/// generated text stream and registers text labels at their relocated offsets.
73	+	/// The driver copies `data` into the final read-only data prefix; the data
74	+	/// base supplied to [`assemble`] lets the assembler resolve data addresses as
75	+	/// they will appear in that final layout.
76	+	export record Program {
77	+	/// Encoded instructions in the text section.
78	+	text: *[u32],
79	+	/// Raw bytes in the data section.
80	+	data: *[u8],
81	+	/// Symbols defined by the source.
82	+	symbols: *[Symbol],
83	+	}
84	+
85	+	/// Errors reported while assembling source text.
86	+	export union Error {
87	+	/// Invalid syntax or operand form at a source offset.
88	+	Invalid { offset: u32, message: *[u8] },
89	+	/// The source emitted more text words than the caller-provided buffer holds.
90	+	TextOverflow,
91	+	/// The source emitted more data bytes than the caller-provided buffer holds.
92	+	DataOverflow,
93	+	}
94	+
95	+	/// Active output section.
96	+	export union Section {
97	+	/// Instruction section.
98	+	Text,
99	+	/// Data byte section.
100	+	Data,
101	+	}
102	+
103	+	/// Branch opcode that needs fixup.
104	+	export union BranchOp {
105	+	/// Branch if equal.
106	+	Beq,
107	+	/// Branch if not equal.
108	+	Bne,
109	+	/// Branch if less than, signed.
110	+	Blt,
111	+	/// Branch if greater than or equal, signed.
112	+	Bge,
113	+	/// Branch if less than, unsigned.
114	+	Bltu,
115	+	/// Branch if greater than or equal, unsigned.
116	+	Bgeu,
117	+	/// Branch if less than or equal, signed pseudo-instruction.
118	+	Ble,
119	+	/// Branch if greater than, signed pseudo-instruction.
120	+	Bgt,
121	+	}
122	+
123	+	/// Parser and encoder behavior for one instruction mnemonic.
124	+	export union InstructionEncoder {
125	+	/// No-operand instruction encoded by a fixed encoder.
126	+	NoOperand { enc: fn() -> u32 },
127	+	/// Load-immediate pseudo-instruction.
128	+	Li,
129	+	/// Load-address pseudo-instruction.
130	+	La,
131	+	/// Two-register instruction or pseudo-instruction.
132	+	RR { enc: fn(gen::Reg, gen::Reg) -> u32 },
133	+	/// Three-register instruction.
134	+	RRR { enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32 },
135	+	/// Register, register, immediate instruction.
136	+	RRI { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
137	+	/// Shift-immediate instruction with RV64 shift bounds.
138	+	Shift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
139	+	/// Shift-immediate instruction with RV64 W-mode shift bounds.
140	+	WordShift { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
141	+	/// Load instruction with memory operand syntax.
142	+	Load { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
143	+	/// Store instruction with memory operand syntax.
144	+	Store { enc: fn(gen::Reg, gen::Reg, i32) -> u32 },
145	+	/// Two-register branch instruction.
146	+	Branch { op: BranchOp },
147	+	/// One-register branch-to-zero pseudo-instruction.
148	+	BranchZero { op: BranchOp },
149	+	/// `jal` instruction with explicit destination register.
150	+	Jal,
151	+	/// Jump pseudo-instruction with fixed destination register.
152	+	Jump { rd: gen::Reg },
153	+	/// CSR read-style operand form.
154	+	RdCsr { enc: fn(gen::Reg, u32) -> u32 },
155	+	/// CSR write-style operand form.
156	+	CsrRs1 { enc: fn(u32, gen::Reg) -> u32 },
157	+	/// CSR read/write operand form.
158	+	Csrrw,
159	+	/// CSR immediate operand form.
160	+	Csrsi,
161	+	/// Upper-immediate operand form.
162	+	Upper { enc: fn(gen::Reg, i32) -> u32 },
163	+	}
164	+
165	+	/// Classified directive name.
166	+	export union DirectiveKind {
167	+	/// `.align` directive.
168	+	Align,
169	+	/// `.ascii` directive.
170	+	Ascii,
171	+	/// `.byte` directive.
172	+	Byte,
173	+	/// `.constant` directive.
174	+	Constant,
175	+	/// `.data` directive.
176	+	Data,
177	+	/// `.dword` directive.
178	+	Dword,
179	+	/// `.export` directive.
180	+	Export,
181	+	/// `.space` directive.
182	+	Space,
183	+	/// `.text` directive.
184	+	Text,
185	+	/// `.word` directive.
186	+	Word,
187	+	}
188	+
189	+	/// Instruction descriptor table row.
190	+	record InstructionEntry {
191	+	/// Assembly mnemonic text.
192	+	name: *[u8],
193	+	/// Operand parser and encoder behavior.
194	+	encoder: InstructionEncoder,
195	+	}
196	+
197	+	/// Directive descriptor table row.
198	+	record DirectiveEntry {
199	+	/// Directive name without the leading `.`.
200	+	name: *[u8],
201	+	/// Parser behavior for the directive.
202	+	kind: DirectiveKind,
203	+	}
204	+
205	+	/// Register descriptor table row.
206	+	record RegisterEntry {
207	+	/// Register alias text without the leading `%`.
208	+	name: *[u8],
209	+	/// Numeric register selected by the alias.
210	+	reg: gen::Reg,
211	+	}
212	+
213	+	/// CSR descriptor table row.
214	+	record CsrEntry {
215	+	/// CSR name text.
216	+	name: *[u8],
217	+	/// Numeric CSR address.
218	+	csr: u32,
219	+	}
220	+
221	+	/// Width of an integer data directive.
222	+	export union DataWidth {
223	+	/// 32-bit data value.
224	+	Word,
225	+	/// 64-bit data value.
226	+	Dword,
227	+	}
228	+
229	+	/// Extra slot used when sizing source-derived symbol and fixup buffers.
230	+	export constant SOURCE_CAP_PADDING: u32 = 1;
231	+	/// Scale factor used to keep assembler hash tables sparse.
232	+	export constant TABLE_CAPACITY_SCALE: u32 = 4;
233	+	/// Minimum hash-table capacity used by the assembler.
234	+	export constant MIN_TABLE_CAPACITY: u32 = 8;
235	+	/// `@label` names exclude the leading sigil byte when interned.
236	+	export constant LABEL_SIGIL_LEN: u32 = 1;
237	+	/// `.directive` names exclude the leading sigil byte when matched.
238	+	export constant DIRECTIVE_SIGIL_LEN: u32 = 1;
239	+	/// String and character literals are delimited by one byte on each side.
240	+	export constant QUOTE_DELIM_LEN: u32 = 1;
241	+	/// Number of bits in one byte.
242	+	export constant BITS_PER_BYTE: u64 = 8;
243	+	/// Mask for extracting one encoded byte.
244	+	export constant BYTE_MASK: u64 = 0xFF;
245	+	/// Largest signed 32-bit assembler value.
246	+	export constant I32_MAX_VALUE: i64 = 2147483647;
247	+	/// Magnitude of the smallest signed 32-bit assembler value.
248	+	export constant I32_MIN_MAGNITUDE: i64 = 2147483648;
249	+	/// Largest unsigned 32-bit assembler value.
250	+	export constant U32_MAX_VALUE: i64 = 4294967295;
251	+	/// Largest unsigned 8-bit assembler value.
252	+	export constant U8_MAX_VALUE: i64 = 255;
253	+	/// Upper bound for CSR immediate operands.
254	+	export constant CSR_IMM_LIMIT: i64 = 32;
255	+	/// Upper bound for RV64 W-mode shift immediates.
256	+	export constant WORD_SHIFT_LIMIT: i32 = 32;
257	+	/// Upper bound for RV64 shift immediates.
258	+	export constant SHIFT_LIMIT: i32 = 64;
259	+	/// Largest `lui` or `auipc` immediate.
260	+	export constant UPPER_IMM_MAX_VALUE: i64 = 0xFFFFF;
261	+
262	+	/// Sorted instruction descriptor table used by the assembler parser.
263	+	export constant INSTRUCTIONS: [InstructionEntry; 87] = [
264	+	{ name: "add", encoder: InstructionEncoder::RRR { enc: encode::add } },
265	+	{ name: "addi", encoder: InstructionEncoder::RRI { enc: encode::addi } },
266	+	{ name: "addiw", encoder: InstructionEncoder::RRI { enc: encode::addiw } },
267	+	{ name: "addw", encoder: InstructionEncoder::RRR { enc: encode::addw } },
268	+	{ name: "and", encoder: InstructionEncoder::RRR { enc: encode::and_ } },
269	+	{ name: "andi", encoder: InstructionEncoder::RRI { enc: encode::andi } },
270	+	{ name: "auipc", encoder: InstructionEncoder::Upper { enc: encode::auipc } },
271	+	{ name: "beq", encoder: InstructionEncoder::Branch { op: BranchOp::Beq } },
272	+	{ name: "beqz", encoder: InstructionEncoder::BranchZero { op: BranchOp::Beq } },
273	+	{ name: "bge", encoder: InstructionEncoder::Branch { op: BranchOp::Bge } },
274	+	{ name: "bgeu", encoder: InstructionEncoder::Branch { op: BranchOp::Bgeu } },
275	+	{ name: "bgt", encoder: InstructionEncoder::Branch { op: BranchOp::Bgt } },
276	+	{ name: "ble", encoder: InstructionEncoder::Branch { op: BranchOp::Ble } },
277	+	{ name: "blt", encoder: InstructionEncoder::Branch { op: BranchOp::Blt } },
278	+	{ name: "bltu", encoder: InstructionEncoder::Branch { op: BranchOp::Bltu } },
279	+	{ name: "bne", encoder: InstructionEncoder::Branch { op: BranchOp::Bne } },
280	+	{ name: "bnez", encoder: InstructionEncoder::BranchZero { op: BranchOp::Bne } },
281	+	{ name: "call", encoder: InstructionEncoder::Jump { rd: rv64::RA } },
282	+	{ name: "csrc", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrc } },
283	+	{ name: "csrr", encoder: InstructionEncoder::RdCsr { enc: encode::csrr } },
284	+	{ name: "csrrw", encoder: InstructionEncoder::Csrrw },
285	+	{ name: "csrsi", encoder: InstructionEncoder::Csrsi },
286	+	{ name: "csrw", encoder: InstructionEncoder::CsrRs1 { enc: encode::csrw } },
287	+	{ name: "div", encoder: InstructionEncoder::RRR { enc: encode::div } },
288	+	{ name: "divu", encoder: InstructionEncoder::RRR { enc: encode::divu } },
289	+	{ name: "divuw", encoder: InstructionEncoder::RRR { enc: encode::divuw } },
290	+	{ name: "divw", encoder: InstructionEncoder::RRR { enc: encode::divw } },
291	+	{ name: "ebreak", encoder: InstructionEncoder::NoOperand { enc: encode::ebreak } },
292	+	{ name: "ecall", encoder: InstructionEncoder::NoOperand { enc: encode::ecall } },
293	+	{ name: "j", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
294	+	{ name: "jal", encoder: InstructionEncoder::Jal },
295	+	{ name: "jalr", encoder: InstructionEncoder::RRI { enc: encode::jalr } },
296	+	{ name: "la", encoder: InstructionEncoder::La },
297	+	{ name: "lb", encoder: InstructionEncoder::Load { enc: encode::lb } },
298	+	{ name: "lbu", encoder: InstructionEncoder::Load { enc: encode::lbu } },
299	+	{ name: "ld", encoder: InstructionEncoder::Load { enc: encode::ld } },
300	+	{ name: "lh", encoder: InstructionEncoder::Load { enc: encode::lh } },
301	+	{ name: "lhu", encoder: InstructionEncoder::Load { enc: encode::lhu } },
302	+	{ name: "li", encoder: InstructionEncoder::Li },
303	+	{ name: "lui", encoder: InstructionEncoder::Upper { enc: encode::lui } },
304	+	{ name: "lw", encoder: InstructionEncoder::Load { enc: encode::lw } },
305	+	{ name: "lwu", encoder: InstructionEncoder::Load { enc: encode::lwu } },
306	+	{ name: "mret", encoder: InstructionEncoder::NoOperand { enc: encode::mret } },
307	+	{ name: "mul", encoder: InstructionEncoder::RRR { enc: encode::mul } },
308	+	{ name: "mulh", encoder: InstructionEncoder::RRR { enc: encode::mulh } },
309	+	{ name: "mulhsu", encoder: InstructionEncoder::RRR { enc: encode::mulhsu } },
310	+	{ name: "mulhu", encoder: InstructionEncoder::RRR { enc: encode::mulhu } },
311	+	{ name: "mulw", encoder: InstructionEncoder::RRR { enc: encode::mulw } },
312	+	{ name: "mv", encoder: InstructionEncoder::RR { enc: encode::mv } },
313	+	{ name: "neg", encoder: InstructionEncoder::RR { enc: encode::neg } },
314	+	{ name: "nop", encoder: InstructionEncoder::NoOperand { enc: encode::nop } },
315	+	{ name: "not", encoder: InstructionEncoder::RR { enc: encode::not_ } },
316	+	{ name: "or", encoder: InstructionEncoder::RRR { enc: encode::or_ } },
317	+	{ name: "ori", encoder: InstructionEncoder::RRI { enc: encode::ori } },
318	+	{ name: "rem", encoder: InstructionEncoder::RRR { enc: encode::rem } },
319	+	{ name: "remu", encoder: InstructionEncoder::RRR { enc: encode::remu } },
320	+	{ name: "remuw", encoder: InstructionEncoder::RRR { enc: encode::remuw } },
321	+	{ name: "remw", encoder: InstructionEncoder::RRR { enc: encode::remw } },
322	+	{ name: "ret", encoder: InstructionEncoder::NoOperand { enc: encode::ret } },
323	+	{ name: "sb", encoder: InstructionEncoder::Store { enc: encode::sb } },
324	+	{ name: "sd", encoder: InstructionEncoder::Store { enc: encode::sd } },
325	+	{ name: "seqz", encoder: InstructionEncoder::RR { enc: encode::seqz } },
326	+	{ name: "sh", encoder: InstructionEncoder::Store { enc: encode::sh } },
327	+	{ name: "sll", encoder: InstructionEncoder::RRR { enc: encode::sll } },
328	+	{ name: "slli", encoder: InstructionEncoder::Shift { enc: encode::slli } },
329	+	{ name: "slliw", encoder: InstructionEncoder::WordShift { enc: encode::slliw } },
330	+	{ name: "sllw", encoder: InstructionEncoder::RRR { enc: encode::sllw } },
331	+	{ name: "slt", encoder: InstructionEncoder::RRR { enc: encode::slt } },
332	+	{ name: "slti", encoder: InstructionEncoder::RRI { enc: encode::slti } },
333	+	{ name: "sltiu", encoder: InstructionEncoder::RRI { enc: encode::sltiu } },
334	+	{ name: "sltu", encoder: InstructionEncoder::RRR { enc: encode::sltu } },
335	+	{ name: "snez", encoder: InstructionEncoder::RR { enc: encode::snez } },
336	+	{ name: "sra", encoder: InstructionEncoder::RRR { enc: encode::sra } },
337	+	{ name: "srai", encoder: InstructionEncoder::Shift { enc: encode::srai } },
338	+	{ name: "sraiw", encoder: InstructionEncoder::WordShift { enc: encode::sraiw } },
339	+	{ name: "sraw", encoder: InstructionEncoder::RRR { enc: encode::sraw } },
340	+	{ name: "srl", encoder: InstructionEncoder::RRR { enc: encode::srl } },
341	+	{ name: "srli", encoder: InstructionEncoder::Shift { enc: encode::srli } },
342	+	{ name: "srliw", encoder: InstructionEncoder::WordShift { enc: encode::srliw } },
343	+	{ name: "srlw", encoder: InstructionEncoder::RRR { enc: encode::srlw } },
344	+	{ name: "sub", encoder: InstructionEncoder::RRR { enc: encode::sub } },
345	+	{ name: "subw", encoder: InstructionEncoder::RRR { enc: encode::subw } },
346	+	{ name: "sw", encoder: InstructionEncoder::Store { enc: encode::sw } },
347	+	{ name: "tail", encoder: InstructionEncoder::Jump { rd: rv64::ZERO } },
348	+	{ name: "wfi", encoder: InstructionEncoder::NoOperand { enc: encode::wfi } },
349	+	{ name: "xor", encoder: InstructionEncoder::RRR { enc: encode::xor } },
350	+	{ name: "xori", encoder: InstructionEncoder::RRI { enc: encode::xori } },
351	+	];
352	+
353	+	/// Sorted directive lookup table used by the assembler parser.
354	+	export constant DIRECTIVES: [DirectiveEntry; 10] = [
355	+	{ name: "align", kind: DirectiveKind::Align },
356	+	{ name: "ascii", kind: DirectiveKind::Ascii },
357	+	{ name: "byte", kind: DirectiveKind::Byte },
358	+	{ name: "constant", kind: DirectiveKind::Constant },
359	+	{ name: "data", kind: DirectiveKind::Data },
360	+	{ name: "dword", kind: DirectiveKind::Dword },
361	+	{ name: "export", kind: DirectiveKind::Export },
362	+	{ name: "space", kind: DirectiveKind::Space },
363	+	{ name: "text", kind: DirectiveKind::Text },
364	+	{ name: "word", kind: DirectiveKind::Word },
365	+	];
366	+
367	+	/// Sorted register-name lookup table used by the assembler parser.
368	+	export constant REGISTERS: [RegisterEntry; 33] = [
369	+	{ name: "a0", reg: rv64::A0 },
370	+	{ name: "a1", reg: rv64::A1 },
371	+	{ name: "a2", reg: rv64::A2 },
372	+	{ name: "a3", reg: rv64::A3 },
373	+	{ name: "a4", reg: rv64::A4 },
374	+	{ name: "a5", reg: rv64::A5 },
375	+	{ name: "a6", reg: rv64::A6 },
376	+	{ name: "a7", reg: rv64::A7 },
377	+	{ name: "fp", reg: rv64::FP },
378	+	{ name: "gp", reg: rv64::GP },
379	+	{ name: "ra", reg: rv64::RA },
380	+	{ name: "s0", reg: rv64::S0 },
381	+	{ name: "s1", reg: rv64::S1 },
382	+	{ name: "s10", reg: rv64::S10 },
383	+	{ name: "s11", reg: rv64::S11 },
384	+	{ name: "s2", reg: rv64::S2 },
385	+	{ name: "s3", reg: rv64::S3 },
386	+	{ name: "s4", reg: rv64::S4 },
387	+	{ name: "s5", reg: rv64::S5 },
388	+	{ name: "s6", reg: rv64::S6 },
389	+	{ name: "s7", reg: rv64::S7 },
390	+	{ name: "s8", reg: rv64::S8 },
391	+	{ name: "s9", reg: rv64::S9 },
392	+	{ name: "sp", reg: rv64::SP },
393	+	{ name: "t0", reg: rv64::T0 },
394	+	{ name: "t1", reg: rv64::T1 },
395	+	{ name: "t2", reg: rv64::T2 },
396	+	{ name: "t3", reg: rv64::T3 },
397	+	{ name: "t4", reg: rv64::T4 },
398	+	{ name: "t5", reg: rv64::T5 },
399	+	{ name: "t6", reg: rv64::T6 },
400	+	{ name: "tp", reg: rv64::TP },
401	+	{ name: "zero", reg: rv64::ZERO },
402	+	];
403	+
404	+	/// Sorted CSR-name lookup table used by the assembler parser.
405	+	export constant CSRS: [CsrEntry; 9] = [
406	+	{ name: "mcause", csr: 0x342 },
407	+	{ name: "mepc", csr: 0x341 },
408	+	{ name: "mhartid", csr: 0xF14 },
409	+	{ name: "mie", csr: 0x304 },
410	+	{ name: "mip", csr: 0x344 },
411	+	{ name: "mscratch", csr: 0x340 },
412	+	{ name: "mstatus", csr: 0x300 },
413	+	{ name: "mtval", csr: 0x343 },
414	+	{ name: "mtvec", csr: 0x305 },
415	+	];
416	+
417	+	/// Recorded symbol definition.
418	+	export record Symbol {
419	+	/// Symbol name.
420	+	name: *[u8],
421	+	/// Section the symbol belongs to.
422	+	section: Section,
423	+	/// Byte offset within the section.
424	+	offset: i32,
425	+	/// Whether `.export` exported this symbol outside its assembly fragment.
426	+	isExported: bool,
427	+	}
428	+
429	+	/// Information needed to resolve a pending symbol reference.
430	+	export union FixupInfo {
431	+	/// Branch to a text label.
432	+	Branch { op: BranchOp, rs1: gen::Reg, rs2: gen::Reg, index: u32 },
433	+	/// JAL-like jump to a text label.
434	+	Jal { rd: gen::Reg, index: u32 },
435	+	/// Absolute address materialization into a register.
436	+	Addr { rd: gen::Reg, index: u32 },
437	+	/// A 32-bit data word referring to a symbol offset.
438	+	Word { offset: u32 },
439	+	/// A 64-bit data word referring to a symbol offset.
440	+	Dword { offset: u32 },
441	+	}
442	+
443	+	/// Pending symbol reference.
444	+	export record Fixup {
445	+	/// Referenced symbol.
446	+	symbol: *[u8],
447	+	/// Fixup payload.
448	+	info: FixupInfo,
449	+	}
450	+
451	+	/// Parser and emission state.
452	+	export record Assembler {
453	+	/// Allocation arena for temporary assembler state.
454	+	arena: *mut alloc::Arena,
455	+	/// Assembler lexical scanner.
456	+	scan: scanner::Scanner,
457	+	/// Output text buffer.
458	+	text: *mut [u32],
459	+	/// Output data buffer.
460	+	data: *mut [u8],
461	+	/// Current output section.
462	+	section: Section,
463	+	/// Defined symbols.
464	+	symbols: *mut [Symbol],
465	+	/// Name-to-symbol index map.
466	+	symbolMap: dict::Dict,
467	+	/// Name-to-integer map.
468	+	constMap: dict::Dict,
469	+	/// Names marked by `.export`.
470	+	exportMap: dict::Dict,
471	+	/// Pending fixups.
472	+	fixups: *mut [Fixup],
473	+	/// Absolute runtime address of data-section offset zero.
474	+	dataBase: u32,
475	+	}
476	+
477	+	/// Assemble source using `dataBase` as the runtime address of the data-section.
478	+	export fn assemble(
479	+	sourceKind: scanner::SourceKind,
480	+	source: *[u8],
481	+	textBuf: *mut [u32],
482	+	dataBuf: *mut [u8],
483	+	arena: *mut alloc::Arena,
484	+	pool: *mut strings::Pool,
485	+	dataBase: u32
486	+	) -> Program throws (Error) {
487	+	let slotCap = source.len + SOURCE_CAP_PADDING;
488	+	let tableCap = nextPowerOfTwo(slotCap * TABLE_CAPACITY_SCALE);
489	+
490	+	let symbols = try! alloc::allocSlice(arena, @sizeOf(Symbol), @alignOf(Symbol), slotCap);
491	+	let fixups = try! alloc::allocSlice(arena, @sizeOf(Fixup), @alignOf(Fixup), slotCap);
492	+	let entries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
493	+	let constEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
494	+	let exportEntries = try! alloc::allocSlice(arena, @sizeOf(dict::Entry), @alignOf(dict::Entry), tableCap);
495	+
496	+	let mut a = Assembler {
497	+	arena,
498	+	scan: scanner::scanner(sourceKind, source, pool),
499	+	text: @sliceOf(textBuf.ptr, 0, textBuf.len),
500	+	data: @sliceOf(dataBuf.ptr, 0, dataBuf.len),
501	+	section: Section::Text,
502	+	symbols: @sliceOf((symbols as mut [Symbol]).ptr, 0, (symbols as mut [Symbol]).len),
503	+	symbolMap: dict::init(entries as *mut [dict::Entry]),
504	+	constMap: dict::init(constEntries as *mut [dict::Entry]),
505	+	exportMap: dict::init(exportEntries as *mut [dict::Entry]),
506	+	fixups: @sliceOf((fixups as mut [Fixup]).ptr, 0, (fixups as mut [Fixup]).len),
507	+	dataBase,
508	+	};
509	+	// Parse assembly source and emit instructions.
510	+	try parser::parseProgram(&mut a);
511	+	// Resolve fixups and finalize program.
512	+	try emit::finishProgram(&mut a);
513	+
514	+	return Program {
515	+	text: a.text,
516	+	data: a.data,
517	+	symbols: a.symbols,
518	+	};
519	+	}
520	+
521	+	/// Return the next power of two at least as large as `value`.
522	+	fn nextPowerOfTwo(value: u32) -> u32 {
523	+	let mut n: u32 = MIN_TABLE_CAPACITY;
524	+	while n < value {
525	+	set n <<= 1;
526	+	}
527	+	return n;
528	+	}

lib/std/arch/rv64/asm/emit.rad added +210 -0

1	+	//! Assembler emission and fixup helpers.
2	+	use std::arch::rv64::emit;
3	+	use std::arch::rv64::encode;
4	+	use std::arch::rv64;
5	+	use std::fmt;
6	+
7	+	use std::collections::dict;
8	+	use std::lang::gen;
9	+
10	+	/// Define a symbol at the current text or data offset.
11	+	export fn defineSymbol(a: mut super::Assembler, name: [u8]) {
12	+	if a.symbols.len >= a.symbols.cap {
13	+	panic "asm: symbol buffer full";
14	+	}
15	+	let idx = a.symbols.len;
16	+	let offset: i32 = a.data.len as i32
17	+	if a.section == super::Section::Data
18	+	else a.text.len as i32 * rv64::INSTR_SIZE;
19	+
20	+	set a.symbols = @sliceOf(a.symbols.ptr, idx + 1, a.symbols.cap);
21	+	set a.symbols[idx] = super::Symbol {
22	+	name,
23	+	section: a.section,
24	+	offset,
25	+	isExported: dict::get(&a.exportMap, name) <> nil,
26	+	};
27	+	dict::insert(&mut a.symbolMap, name, idx as i32);
28	+	}
29	+
30	+	/// Append one encoded instruction word to the text section.
31	+	export fn emitText(a: *mut super::Assembler, word: u32) throws (super::Error) {
32	+	if a.text.len >= a.text.cap {
33	+	throw super::Error::TextOverflow;
34	+	}
35	+	let idx = a.text.len;
36	+	set a.text = @sliceOf(a.text.ptr, idx + 1, a.text.cap);
37	+	set a.text[idx] = word;
38	+	}
39	+
40	+	/// Append `words` no-op instructions to the text section.
41	+	export fn emitTextPadding(a: *mut super::Assembler, words: u32) throws (super::Error) {
42	+	for _ in 0..words {
43	+	try emitText(a, encode::nop());
44	+	}
45	+	}
46	+
47	+	/// Append one byte to the data section.
48	+	export fn emitByte(a: *mut super::Assembler, byte: u8) throws (super::Error) {
49	+	if a.data.len >= a.data.cap {
50	+	throw super::Error::DataOverflow;
51	+	}
52	+	let idx = a.data.len;
53	+	set a.data = @sliceOf(a.data.ptr, idx + 1, a.data.cap);
54	+	set a.data[idx] = byte;
55	+	}
56	+
57	+	/// Emit a little-endian integer with `bytes` bytes.
58	+	fn emitDataInt(a: *mut super::Assembler, bits: u64, bytes: u32) throws (super::Error) {
59	+	for i in 0..bytes {
60	+	try emitByte(a, ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8);
61	+	}
62	+	}
63	+
64	+	/// Patch a little-endian integer with `bytes` bytes.
65	+	fn patchDataInt(a: *mut super::Assembler, offset: u32, bits: u64, bytes: u32) {
66	+	for i in 0..bytes {
67	+	set a.data[offset + i] = ((bits >> ((i as u64) * super::BITS_PER_BYTE)) & super::BYTE_MASK) as u8;
68	+	}
69	+	}
70	+
71	+	/// Emit an integer data directive value.
72	+	export fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) {
73	+	match width {
74	+	case super::DataWidth::Word => try emitDataInt(a, value as u64, rv64::WORD_SIZE as u32),
75	+	case super::DataWidth::Dword => try emitDataInt(a, value as u64, rv64::DWORD_SIZE as u32),
76	+	}
77	+	}
78	+
79	+	/// Record a data-section symbol fixup and reserve its bytes.
80	+	export fn recordDataFixup(a: mut super::Assembler, target: [u8], width: super::DataWidth) throws (super::Error) {
81	+	let offset = a.data.len;
82	+	match width {
83	+	case super::DataWidth::Word => {
84	+	recordFixup(a, target, super::FixupInfo::Word { offset });
85	+	try emitDataInt(a, 0, rv64::WORD_SIZE as u32);
86	+	}
87	+	case super::DataWidth::Dword => {
88	+	recordFixup(a, target, super::FixupInfo::Dword { offset });
89	+	try emitDataInt(a, 0, rv64::DWORD_SIZE as u32);
90	+	}
91	+	}
92	+	}
93	+
94	+	/// Record a pending symbol fixup.
95	+	fn recordFixup(a: mut super::Assembler, symbol: [u8], info: super::FixupInfo) {
96	+	if a.fixups.len >= a.fixups.cap {
97	+	panic "asm: fixup buffer full";
98	+	}
99	+	let idx = a.fixups.len as u32;
100	+	set a.fixups = @sliceOf(a.fixups.ptr, idx + 1, a.fixups.cap);
101	+	set a.fixups[idx] = super::Fixup { symbol, info };
102	+	}
103	+
104	+	/// Record a text-section symbol fixup and reserve its instruction words.
105	+	export fn recordTextFixup(a: mut super::Assembler, symbol: [u8], info: super::FixupInfo, words: u32) throws (super::Error) {
106	+	recordFixup(a, symbol, info);
107	+	try emitTextPadding(a, words);
108	+	}
109	+
110	+	/// Find a previously defined symbol by name.
111	+	fn findSymbol(a: super::Assembler, name: [u8]) -> ?super::Symbol {
112	+	let idx = dict::get(&a.symbolMap, name)
113	+	else return nil;
114	+	return a.symbols[idx as u32];
115	+	}
116	+
117	+	/// Return the final address for a data symbol.
118	+	fn dataSymbolAddr(a: *super::Assembler, symbol: super::Symbol) -> i32 throws (super::Error) {
119	+	if symbol.section <> super::Section::Data {
120	+	throw super::Error::Invalid { offset: 0, message: "data address target must be in data section" };
121	+	}
122	+	return symbol.offset + (a.dataBase as i32);
123	+	}
124	+
125	+	/// Resolve final symbol references and patch all delayed output.
126	+	export fn finishProgram(a: *mut super::Assembler) throws (super::Error) {
127	+	for i in 0..a.fixups.len {
128	+	let fixup = a.fixups[i];
129	+	let symbol = findSymbol(a, fixup.symbol) else {
130	+	throw super::Error::Invalid { offset: 0, message: "undefined symbol" };
131	+	};
132	+	match fixup.info {
133	+	case super::FixupInfo::Branch { op, rs1, rs2, index } => {
134	+	if symbol.section <> super::Section::Text {
135	+	throw super::Error::Invalid { offset: 0, message: "branch target must be in text section" };
136	+	}
137	+	let srcOffset = index as i32 * rv64::INSTR_SIZE;
138	+	let rel = symbol.offset - srcOffset;
139	+
140	+	if not encode::isBranchImm(rel) {
141	+	throw super::Error::Invalid { offset: 0, message: "branch target out of range" };
142	+	}
143	+	let word = encodeBranch(op, rs1, rs2, rel);
144	+
145	+	set a.text[index] = word;
146	+	}
147	+	case super::FixupInfo::Jal { rd, index } => {
148	+	if symbol.section <> super::Section::Text {
149	+	throw super::Error::Invalid { offset: 0, message: "jump target must be in text section" };
150	+	}
151	+	let srcOffset = index as i32 * rv64::INSTR_SIZE;
152	+	let rel = symbol.offset - srcOffset;
153	+
154	+	if not encode::isJumpImm(rel) {
155	+	throw super::Error::Invalid { offset: 0, message: "jump target out of range" };
156	+	}
157	+	set a.text[index] = encode::jal(rd, rel);
158	+	}
159	+	case super::FixupInfo::Addr { rd, index } => {
160	+	let mut addr = symbol.offset - (index as i32 * rv64::INSTR_SIZE);
161	+	if symbol.section == super::Section::Data {
162	+	set addr = symbol.offset + (a.dataBase as i32);
163	+	}
164	+	let split = emit::splitImm(addr);
165	+	set a.text[index] = encode::lui(rd, split.hi)
166	+	if symbol.section == super::Section::Data
167	+	else encode::auipc(rd, split.hi);
168	+	set a.text[index + 1] = encode::addi(rd, rd, split.lo);
169	+	}
170	+	case super::FixupInfo::Word { offset } => {
171	+	let addr = try dataSymbolAddr(a, symbol);
172	+	patchDataInt(a, offset, addr as u64, rv64::WORD_SIZE as u32);
173	+	}
174	+	case super::FixupInfo::Dword { offset } => {
175	+	let addr = try dataSymbolAddr(a, symbol);
176	+	patchDataInt(a, offset, addr as u64, rv64::DWORD_SIZE as u32);
177	+	}
178	+	}
179	+	}
180	+	}
181	+
182	+	/// Encode a concrete branch operation.
183	+	export fn encodeBranch(op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg, imm: i32) -> u32 {
184	+	match op {
185	+	case super::BranchOp::Beq => return encode::beq(rs1, rs2, imm),
186	+	case super::BranchOp::Bne => return encode::bne(rs1, rs2, imm),
187	+	case super::BranchOp::Blt => return encode::blt(rs1, rs2, imm),
188	+	case super::BranchOp::Bge => return encode::bge(rs1, rs2, imm),
189	+	case super::BranchOp::Bltu => return encode::bltu(rs1, rs2, imm),
190	+	case super::BranchOp::Bgeu => return encode::bgeu(rs1, rs2, imm),
191	+	case super::BranchOp::Ble => return encode::ble(rs1, rs2, imm),
192	+	case super::BranchOp::Bgt => return encode::bgt(rs1, rs2, imm),
193	+	}
194	+	}
195	+
196	+	/// Decode string literal escapes and emit the resulting data bytes.
197	+	export fn emitDecodedString(a: mut super::Assembler, literal: [u8]) throws (super::Error) {
198	+	let raw = &literal[super::QUOTE_DELIM_LEN..literal.len - super::QUOTE_DELIM_LEN];
199	+	let mut i: u32 = 0;
200	+
201	+	while i < raw.len {
202	+	if raw[i] == '\\' and i + 1 < raw.len {
203	+	try emitByte(a, fmt::decodeAsciiEscape(raw[i + 1]));
204	+	set i += 2;
205	+	} else {
206	+	try emitByte(a, raw[i]);
207	+	set i += 1;
208	+	}
209	+	}
210	+	}

lib/std/arch/rv64/asm/parser.rad added +858 -0

1	+	//! Assembler parser pass.
2	+	use std::mem;
3	+	use std::fmt;
4	+	use std::lang::alloc;
5	+	use std::lang::strings;
6	+	use std::lang::parser;
7	+	use std::lang::gen;
8	+	use std::collections::dict;
9	+	use std::arch::rv64::encode;
10	+	use std::arch::rv64;
11	+
12	+	use super::emit;
13	+	use super::scanner;
14	+
15	+	/// Parsed memory operand with base register and signed byte offset.
16	+	record MemOperand {
17	+	/// Base register inside the memory operand parentheses.
18	+	base: gen::Reg,
19	+	/// Signed byte offset preceding the base register.
20	+	offset: i32,
21	+	}
22	+
23	+	/// Parse assembler source into the supplied assembler state.
24	+	export fn parseProgram(a: *mut super::Assembler) throws (super::Error) {
25	+	advance(a);
26	+
27	+	while a.scan.current.kind <> scanner::TokenKind::Eof {
28	+	try parseItem(a);
29	+	}
30	+	}
31	+
32	+	/// Align `value` upward to `alignment`, returning nil on u32 overflow.
33	+	fn checkedAlignUp(value: u32, alignment: u32) -> ?u32 {
34	+	let padding = alignment - 1;
35	+	if value > parser::U32_MAX - padding {
36	+	return nil;
37	+	}
38	+	return mem::alignUp(value, alignment);
39	+	}
40	+
41	+	/// Advance the parser by one token, preserving the previous token.
42	+	fn advance(a: *mut super::Assembler) {
43	+	set a.scan.previous = a.scan.current;
44	+	set a.scan.current = scanner::next(&mut a.scan);
45	+	}
46	+
47	+	/// Consume the current token when it has `kind`.
48	+	fn consume(a: *mut super::Assembler, kind: scanner::TokenKind) -> bool {
49	+	if a.scan.current.kind == kind {
50	+	advance(a);
51	+	return true;
52	+	}
53	+	return false;
54	+	}
55	+
56	+	/// Create an error at the current token.
57	+	fn fail(a: super::Assembler, message: [u8]) -> super::Error {
58	+	return super::Error::Invalid { offset: a.scan.current.offset, message };
59	+	}
60	+
61	+	/// Create an error at `tok`.
62	+	fn failOnToken(tok: scanner::Token, message: *[u8]) -> super::Error {
63	+	return super::Error::Invalid { offset: tok.offset, message };
64	+	}
65	+
66	+	/// Require that a data directive appears while assembling the data section.
67	+	fn expectDataSection(a: *super::Assembler, tok: scanner::Token) throws (super::Error) {
68	+	if a.section <> super::Section::Data {
69	+	throw failOnToken(tok, "data directive is only valid in the data section");
70	+	}
71	+	}
72	+
73	+	/// Consume `kind` or throw `message` at the current token.
74	+	fn expect(a: mut super::Assembler, kind: scanner::TokenKind, message: [u8]) throws (super::Error) {
75	+	if not consume(a, kind) {
76	+	throw fail(a, message);
77	+	}
78	+	}
79	+
80	+	/// Consume `kind` and return the consumed token.
81	+	fn expectToken(a: mut super::Assembler, kind: scanner::TokenKind, message: [u8]) -> scanner::Token throws (super::Error) {
82	+	try expect(a, kind, message);
83	+	return a.scan.previous;
84	+	}
85	+
86	+	/// Require that the current item has reached its semicolon terminator.
87	+	fn expectTerminator(a: super::Assembler, message: [u8]) throws (super::Error) {
88	+	if a.scan.current.kind <> scanner::TokenKind::Semicolon {
89	+	throw fail(a, message);
90	+	}
91	+	}
92	+
93	+	/// Require that `value` fits in i32.
94	+	fn expectI32Value(a: super::Assembler, value: i64, message: [u8]) -> i32 throws (super::Error) {
95	+	if value < -super::I32_MIN_MAGNITUDE or value > super::I32_MAX_VALUE {
96	+	throw fail(a, message);
97	+	}
98	+	return value as i32;
99	+	}
100	+
101	+	/// Require that `value` fits in a signed 12-bit immediate field.
102	+	fn expectSmallImmValue(a: *super::Assembler, value: i64) -> i32 throws (super::Error) {
103	+	if not encode::isSmallImm64(value) {
104	+	throw fail(a, "immediate out of range");
105	+	}
106	+	return value as i32;
107	+	}
108	+
109	+	/// Define a label at the current text or data offset.
110	+	fn defineSymbol(a: mut super::Assembler, name: [u8], tok: scanner::Token) throws (super::Error) {
111	+	if dict::get(&a.symbolMap, name) <> nil {
112	+	throw failOnToken(tok, "duplicate label");
113	+	}
114	+	emit::defineSymbol(a, name);
115	+	}
116	+
117	+	/// Emit a parsed integer data value after applying source-level range checks.
118	+	fn emitDataValue(a: *mut super::Assembler, value: i64, width: super::DataWidth) throws (super::Error) {
119	+	match width {
120	+	case super::DataWidth::Word =>
121	+	try emit::emitDataValue(a, (try expectI32Value(a, value, "word literal out of range")) as i64, width),
122	+	case super::DataWidth::Dword =>
123	+	try emit::emitDataValue(a, value, width),
124	+	}
125	+	}
126	+
127	+	/// Parse a possibly scoped name from one or more `::`-separated segments.
128	+	fn parseScopedName(
129	+	a: *mut super::Assembler,
130	+	kind: scanner::TokenKind,
131	+	message: *[u8],
132	+	trimPrefix: u32
133	+	) -> *[u8] throws (super::Error) {
134	+	let first = try expectToken(a, kind, message);
135	+	let start = first.offset + trimPrefix;
136	+	let mut end = first.offset + first.source.len;
137	+
138	+	while consume(a, scanner::TokenKind::ColonColon) {
139	+	let segment = try expectToken(a, scanner::TokenKind::Ident, "expected identifier after `::`");
140	+	set end = segment.offset + segment.source.len;
141	+	}
142	+	return strings::intern(a.scan.pool, &a.scan.source[start..end]);
143	+	}
144	+
145	+	/// Parse a bare symbol name.
146	+	fn parseSymbolName(a: mut super::Assembler) -> [u8] throws (super::Error) {
147	+	return try parseScopedName(a, scanner::TokenKind::Ident, "expected symbol name", 0);
148	+	}
149	+
150	+	/// Return `true` when [`tok`] is any label token form.
151	+	fn isLabel(tok: scanner::TokenKind) -> bool {
152	+	return tok == scanner::TokenKind::Label or tok == scanner::TokenKind::QuotedLabel;
153	+	}
154	+
155	+	/// Parse the contents of a quoted label token, decoding escapes as needed.
156	+	fn parseQuotedLabelName(a: mut super::Assembler) -> [u8] throws (super::Error) {
157	+	let tok = try expectToken(a, scanner::TokenKind::QuotedLabel, "expected label name");
158	+	let rawStart = super::LABEL_SIGIL_LEN + super::QUOTE_DELIM_LEN;
159	+	let raw = &tok.source[rawStart..tok.source.len - super::QUOTE_DELIM_LEN];
160	+	let storage = try alloc::allocSlice(a.arena, 1, 1, raw.len) catch {
161	+	panic "asm: out of memory allocating quoted label";
162	+	} as *mut [u8];
163	+	let len = fmt::unescapeString(raw, storage);
164	+
165	+	return strings::intern(a.scan.pool, &storage[..len]);
166	+	}
167	+
168	+	/// Parse a label reference or definition name.
169	+	fn parseLabelName(a: mut super::Assembler) -> [u8] throws (super::Error) {
170	+	if a.scan.current.kind == scanner::TokenKind::QuotedLabel {
171	+	return try parseQuotedLabelName(a);
172	+	}
173	+	return try parseScopedName(a, scanner::TokenKind::Label, "expected label name", super::LABEL_SIGIL_LEN);
174	+	}
175	+
176	+	/// Parse a directive name without its leading `.`.
177	+	fn parseDirectiveName(a: mut super::Assembler) -> [u8] throws (super::Error) {
178	+	let name = try expectToken(a, scanner::TokenKind::Directive, "expected directive name");
179	+	return &name.source[super::DIRECTIVE_SIGIL_LEN..];
180	+	}
181	+
182	+	/// Parse one top-level assembler item.
183	+	fn parseItem(a: *mut super::Assembler) throws (super::Error) {
184	+	match a.scan.current.kind {
185	+	case scanner::TokenKind::Ident => {
186	+	let tok = a.scan.current;
187	+	let name = try parseSymbolName(a);
188	+	try parseInstruction(a, name, tok);
189	+	try expect(a, scanner::TokenKind::Semicolon, "expected `;` after instruction");
190	+	}
191	+	case scanner::TokenKind::Number => {
192	+	let tok = a.scan.current;
193	+	advance(a);
194	+	throw failOnToken(tok, "unexpected number at top level");
195	+	}
196	+	case scanner::TokenKind::Label, scanner::TokenKind::QuotedLabel => {
197	+	let tok = a.scan.current;
198	+	let name = try parseLabelName(a);
199	+	try defineSymbol(a, name, tok);
200	+	}
201	+	case scanner::TokenKind::Directive => {
202	+	let tok = a.scan.current;
203	+	let name = try parseDirectiveName(a);
204	+	try parseDirective(a, name, tok);
205	+	try expect(a, scanner::TokenKind::Semicolon, "expected `;` after directive");
206	+	}
207	+	else => throw fail(a, "expected label, instruction, or directive"),
208	+	}
209	+	}
210	+
211	+	/// Find `name` in a sorted descriptor table.
212	+	fn findSortedNameIndex(name: [u8], len: u32, getName: fn(u32) -> [u8]) -> ?u32 {
213	+	let mut left: u32 = 0;
214	+	let mut right: u32 = len;
215	+
216	+	while left < right {
217	+	let mid = left + ((right - left) / 2);
218	+	let cmp = mem::cmp(name, getName(mid));
219	+
220	+	match cmp {
221	+	case -1 => set right = mid,
222	+	case 1 => set left = mid + 1,
223	+	else => return mid,
224	+	}
225	+	}
226	+	return nil;
227	+	}
228	+
229	+	/// Adapter used by [`findSortedNameIndex`] to read an instruction mnemonic.
230	+	fn instructionNameAt(index: u32) -> *[u8] {
231	+	return super::INSTRUCTIONS[index].name;
232	+	}
233	+
234	+	/// Adapter used by [`findSortedNameIndex`] to read a directive name.
235	+	fn directiveNameAt(index: u32) -> *[u8] {
236	+	return super::DIRECTIVES[index].name;
237	+	}
238	+
239	+	/// Adapter used by [`findSortedNameIndex`] to read a register name.
240	+	fn registerNameAt(index: u32) -> *[u8] {
241	+	return super::REGISTERS[index].name;
242	+	}
243	+
244	+	/// Adapter used by [`findSortedNameIndex`] to read a CSR name.
245	+	fn csrNameAt(index: u32) -> *[u8] {
246	+	return super::CSRS[index].name;
247	+	}
248	+
249	+	/// Look up the operand parser and encoder for an instruction mnemonic.
250	+	fn lookupInstruction(name: *[u8]) -> ?super::InstructionEncoder {
251	+	let index = findSortedNameIndex(name, super::INSTRUCTIONS.len, instructionNameAt) else {
252	+	return nil;
253	+	};
254	+	return super::INSTRUCTIONS[index].encoder;
255	+	}
256	+
257	+	/// Classify a directive name.
258	+	fn classifyDirective(name: *[u8]) -> ?super::DirectiveKind {
259	+	let index = findSortedNameIndex(name, super::DIRECTIVES.len, directiveNameAt) else {
260	+	return nil;
261	+	};
262	+	return super::DIRECTIVES[index].kind;
263	+	}
264	+
265	+	/// Look up a percent-prefixed register name after the `%` has been removed.
266	+	fn lookupRegister(name: *[u8]) -> ?gen::Reg {
267	+	let index = findSortedNameIndex(name, super::REGISTERS.len, registerNameAt) else {
268	+	return nil;
269	+	};
270	+	return super::REGISTERS[index].reg;
271	+	}
272	+
273	+	/// Look up a CSR name.
274	+	fn lookupCsr(name: *[u8]) -> ?u32 {
275	+	let index = findSortedNameIndex(name, super::CSRS.len, csrNameAt) else {
276	+	return nil;
277	+	};
278	+	return super::CSRS[index].csr;
279	+	}
280	+
281	+	/// Parse an instruction after its mnemonic has already been consumed.
282	+	fn parseInstruction(a: mut super::Assembler, name: [u8], tok: scanner::Token) throws (super::Error) {
283	+	if a.section <> super::Section::Text {
284	+	throw failOnToken(tok, "instructions are only valid in the text section");
285	+	}
286	+	let form = lookupInstruction(name) else {
287	+	throw failOnToken(tok, "unknown instruction");
288	+	};
289	+	match form {
290	+	case super::InstructionEncoder::NoOperand { enc } => {
291	+	if a.scan.current.kind <> scanner::TokenKind::Semicolon {
292	+	throw fail(a, "unexpected operand");
293	+	}
294	+	try emit::emitText(a, enc());
295	+	return;
296	+	}
297	+	case super::InstructionEncoder::Li => return try parseLi(a),
298	+	case super::InstructionEncoder::La => return try parseLa(a),
299	+	case super::InstructionEncoder::RR { enc } => return try parseRR(a, enc),
300	+	case super::InstructionEncoder::RRR { enc } => return try parseRRR(a, enc),
301	+	case super::InstructionEncoder::RRI { enc } => return try parseRRI(a, enc),
302	+	case super::InstructionEncoder::Shift { enc } =>
303	+	return try parseShift(a, enc, super::SHIFT_LIMIT, "shift amount out of range"),
304	+	case super::InstructionEncoder::WordShift { enc } =>
305	+	return try parseShift(a, enc, super::WORD_SHIFT_LIMIT, "word shift amount out of range"),
306	+	case super::InstructionEncoder::Load { enc } => return try parseLoad(a, enc),
307	+	case super::InstructionEncoder::Store { enc } => return try parseStore(a, enc),
308	+	case super::InstructionEncoder::Branch { op } => return try parseBranch(a, op),
309	+	case super::InstructionEncoder::BranchZero { op } => return try parseBranchZero(a, op),
310	+	case super::InstructionEncoder::Jal => return try parseJal(a),
311	+	case super::InstructionEncoder::Jump { rd } => return try parseJ(a, rd),
312	+	case super::InstructionEncoder::RdCsr { enc } => return try parseRdCsr(a, enc),
313	+	case super::InstructionEncoder::CsrRs1 { enc } => return try parseCsrRs1(a, enc),
314	+	case super::InstructionEncoder::Csrrw => return try parseCsrrw(a),
315	+	case super::InstructionEncoder::Csrsi => return try parseCsrsi(a),
316	+	case super::InstructionEncoder::Upper { enc } => return try parseUpper(a, enc),
317	+	}
318	+	}
319	+
320	+	/// Parse the `li` pseudo-instruction.
321	+	fn parseLi(a: *mut super::Assembler) throws (super::Error) {
322	+	let rd = try parseRegister(a);
323	+	let value = try parseValue(a);
324	+	if encode::isSmallImm64(value) {
325	+	try emit::emitText(a, encode::addi(rd, rv64::ZERO, value as i32));
326	+	return;
327	+	}
328	+	let imm = try expectI32Value(a, value, "li immediate out of range");
329	+	let split = rv64::emit::splitImm(imm);
330	+
331	+	try emit::emitText(a, encode::lui(rd, split.hi));
332	+	try emit::emitText(a, encode::addi(rd, rd, split.lo));
333	+	}
334	+
335	+	/// Parse the `la` pseudo-instruction.
336	+	fn parseLa(a: *mut super::Assembler) throws (super::Error) {
337	+	let rd = try parseRegister(a);
338	+	let target = try parseLabelName(a);
339	+	let index = a.text.len;
340	+
341	+	try emit::recordTextFixup(a, target, super::FixupInfo::Addr { rd, index }, 2);
342	+	}
343	+
344	+	/// Parse a CSR read-like instruction with destination register then CSR.
345	+	fn parseRdCsr(a: *mut super::Assembler, enc: fn(gen::Reg, u32) -> u32) throws (super::Error) {
346	+	let rd = try parseRegister(a);
347	+	let csr = try parseCsr(a);
348	+
349	+	try emit::emitText(a, enc(rd, csr));
350	+	}
351	+
352	+	/// Parse a CSR write-like instruction with CSR then source register.
353	+	fn parseCsrRs1(a: *mut super::Assembler, enc: fn(u32, gen::Reg) -> u32) throws (super::Error) {
354	+	let csr = try parseCsr(a);
355	+	let rs1 = try parseRegister(a);
356	+
357	+	try emit::emitText(a, enc(csr, rs1));
358	+	}
359	+
360	+	/// Parse `csrrw`.
361	+	fn parseCsrrw(a: *mut super::Assembler) throws (super::Error) {
362	+	let rd = try parseRegister(a);
363	+	let csr = try parseCsr(a);
364	+	let rs1 = try parseRegister(a);
365	+
366	+	try emit::emitText(a, encode::csrrw(rd, csr, rs1));
367	+	}
368	+
369	+	/// Parse a CSR immediate instruction.
370	+	fn parseCsrsi(a: *mut super::Assembler) throws (super::Error) {
371	+	let csr = try parseCsr(a);
372	+	let imm = try parseValue(a);
373	+	if imm < 0 or imm >= super::CSR_IMM_LIMIT {
374	+	throw fail(a, "CSR immediate out of range");
375	+	}
376	+	try emit::emitText(a, encode::csrsi(csr, imm as u32));
377	+	}
378	+
379	+	/// Parse a two-register instruction.
380	+	fn parseRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg) -> u32) throws (super::Error) {
381	+	let rd = try parseRegister(a);
382	+	let rs = try parseRegister(a);
383	+
384	+	try emit::emitText(a, enc(rd, rs));
385	+	}
386	+
387	+	/// Parse a three-register instruction.
388	+	fn parseRRR(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, gen::Reg) -> u32) throws (super::Error) {
389	+	let rd = try parseRegister(a);
390	+	let rs1 = try parseRegister(a);
391	+	let rs2 = try parseRegister(a);
392	+
393	+	try emit::emitText(a, enc(rd, rs1, rs2));
394	+	}
395	+
396	+	/// Parse a register-register-immediate instruction.
397	+	fn parseRRI(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
398	+	let rd = try parseRegister(a);
399	+	let rs1 = try parseRegister(a);
400	+	let imm = try parseSmallImm(a);
401	+
402	+	try emit::emitText(a, enc(rd, rs1, imm));
403	+	}
404	+
405	+	/// Parse a shift-immediate instruction and enforce its RV64 shift bound.
406	+	fn parseShift(
407	+	a: *mut super::Assembler,
408	+	enc: fn(gen::Reg, gen::Reg, i32) -> u32,
409	+	limit: i32,
410	+	message: *[u8]
411	+	) throws (super::Error) {
412	+	let rd = try parseRegister(a);
413	+	let rs1 = try parseRegister(a);
414	+	let shamt64 = try parseValue(a);
415	+
416	+	if shamt64 < 0 {
417	+	throw fail(a, "shift amount must be non-negative");
418	+	}
419	+	if shamt64 >= limit as i64 {
420	+	throw fail(a, message);
421	+	}
422	+	let shamt = shamt64 as i32;
423	+
424	+	try emit::emitText(a, enc(rd, rs1, shamt));
425	+	}
426	+
427	+	/// Parse a load instruction with a memory operand.
428	+	fn parseLoad(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
429	+	let rd = try parseRegister(a);
430	+	let memop = try parseMemory(a);
431	+
432	+	try emit::emitText(a, enc(rd, memop.base, memop.offset));
433	+	}
434	+
435	+	/// Parse a store instruction with a memory operand.
436	+	fn parseStore(a: *mut super::Assembler, enc: fn(gen::Reg, gen::Reg, i32) -> u32) throws (super::Error) {
437	+	let rs2 = try parseRegister(a);
438	+	let memop = try parseMemory(a);
439	+
440	+	try emit::emitText(a, enc(rs2, memop.base, memop.offset));
441	+	}
442	+
443	+	/// Parse a two-register branch instruction.
444	+	fn parseBranch(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) {
445	+	let rs1 = try parseRegister(a);
446	+	let rs2 = try parseRegister(a);
447	+
448	+	try parseBranchLabel(a, op, rs1, rs2);
449	+	}
450	+
451	+	/// Parse an optional label operand.
452	+	fn parseOptionalLabel(a: mut super::Assembler) -> ?[u8] throws (super::Error) {
453	+	if not isLabel(a.scan.current.kind) {
454	+	return nil;
455	+	}
456	+	return try parseLabelName(a);
457	+	}
458	+
459	+	/// Parse a branch target as either a label fixup or immediate offset.
460	+	fn parseBranchLabel(a: *mut super::Assembler, op: super::BranchOp, rs1: gen::Reg, rs2: gen::Reg) throws (super::Error) {
461	+	let index = a.text.len;
462	+	if let target = try parseOptionalLabel(a) {
463	+	try emit::recordTextFixup(a, target, super::FixupInfo::Branch { op, rs1, rs2, index }, 1);
464	+	return;
465	+	}
466	+	let imm = try parseBranchImm(a);
467	+	try emit::emitText(a, emit::encodeBranch(op, rs1, rs2, imm));
468	+	}
469	+
470	+	/// Parse a branch-to-zero pseudo-instruction.
471	+	fn parseBranchZero(a: *mut super::Assembler, op: super::BranchOp) throws (super::Error) {
472	+	let rs = try parseRegister(a);
473	+	try parseBranchLabel(a, op, rs, rv64::ZERO);
474	+	}
475	+
476	+	/// Parse `jal` with an explicit destination register.
477	+	fn parseJal(a: *mut super::Assembler) throws (super::Error) {
478	+	let rd = try parseRegister(a);
479	+	try parseJ(a, rd);
480	+	}
481	+
482	+	/// Parse a jump target for `jal` or a jump pseudo-instruction.
483	+	fn parseJ(a: *mut super::Assembler, rd: gen::Reg) throws (super::Error) {
484	+	let index = a.text.len;
485	+	if let target = try parseOptionalLabel(a) {
486	+	try emit::recordTextFixup(a, target, super::FixupInfo::Jal { rd, index }, 1);
487	+	return;
488	+	}
489	+	let imm = try parseJumpImm(a);
490	+	try emit::emitText(a, encode::jal(rd, imm));
491	+	}
492	+
493	+	/// Parse an upper-immediate instruction.
494	+	fn parseUpper(a: *mut super::Assembler, enc: fn(gen::Reg, i32) -> u32) throws (super::Error) {
495	+	let rd = try parseRegister(a);
496	+	let imm64 = try parseValue(a);
497	+	if imm64 < 0 or imm64 > super::UPPER_IMM_MAX_VALUE {
498	+	throw fail(a, "upper immediate out of range");
499	+	}
500	+	try emit::emitText(a, enc(rd, imm64 as i32));
501	+	}
502	+
503	+	/// Parse a directive after its name has already been consumed.
504	+	fn parseDirective(a: mut super::Assembler, name: [u8], tok: scanner::Token) throws (super::Error) {
505	+	let directive = classifyDirective(name) else {
506	+	throw failOnToken(tok, "unknown directive");
507	+	};
508	+	match directive {
509	+	case super::DirectiveKind::Text => {
510	+	try expectTerminator(a, "unexpected operand");
511	+	set a.section = super::Section::Text;
512	+	return;
513	+	}
514	+	case super::DirectiveKind::Data => {
515	+	try expectTerminator(a, "unexpected operand");
516	+	set a.section = super::Section::Data;
517	+	return;
518	+	}
519	+	case super::DirectiveKind::Align =>
520	+	return try parseAlignDirective(a),
521	+	case super::DirectiveKind::Ascii => {
522	+	try expectDataSection(a, tok);
523	+	return try parseStringDirective(a);
524	+	}
525	+	case super::DirectiveKind::Byte => {
526	+	try expectDataSection(a, tok);
527	+	return try parseByteDirective(a);
528	+	}
529	+	case super::DirectiveKind::Constant =>
530	+	return try parseConstantDirective(a),
531	+	case super::DirectiveKind::Dword => {
532	+	try expectDataSection(a, tok);
533	+	return try parseIntDirective(a, super::DataWidth::Dword);
534	+	}
535	+	case super::DirectiveKind::Export =>
536	+	return try parseExportDirective(a),
537	+	case super::DirectiveKind::Space => {
538	+	try expectDataSection(a, tok);
539	+	return try parseSpaceDirective(a);
540	+	}
541	+	case super::DirectiveKind::Word => {
542	+	try expectDataSection(a, tok);
543	+	return try parseIntDirective(a, super::DataWidth::Word);
544	+	}
545	+	}
546	+	}
547	+
548	+	/// Parse a `.constant` directive.
549	+	fn parseConstantDirective(a: *mut super::Assembler) throws (super::Error) {
550	+	let name = try parseSymbolName(a);
551	+	let value = try expectI32Value(a, try parseExpr(a), "constant out of range");
552	+
553	+	dict::insert(&mut a.constMap, name, value);
554	+	}
555	+
556	+	/// Parse a `.export` directive.
557	+	fn parseExportDirective(a: *mut super::Assembler) throws (super::Error) {
558	+	let name = try parseLabelName(a);
559	+	dict::insert(&mut a.exportMap, name, 1);
560	+	if let idx = dict::get(&a.symbolMap, name) {
561	+	set a.symbols[idx as u32].isExported = true;
562	+	}
563	+	}
564	+
565	+	/// Parse a `.space` directive.
566	+	fn parseSpaceDirective(a: *mut super::Assembler) throws (super::Error) {
567	+	let count = try parseValue(a);
568	+	if count < 0 {
569	+	throw fail(a, "space size must be non-negative");
570	+	}
571	+	let remaining = a.data.cap - a.data.len;
572	+	if count > remaining as i64 {
573	+	throw super::Error::DataOverflow;
574	+	}
575	+	for _ in 0..count as u32 {
576	+	try emit::emitByte(a, 0);
577	+	}
578	+	}
579	+
580	+	/// Parse an `.align` directive for the current section.
581	+	fn parseAlignDirective(a: *mut super::Assembler) throws (super::Error) {
582	+	let amount64 = try parseValue(a);
583	+	if amount64 <= 0 {
584	+	throw fail(a, "alignment must be positive");
585	+	}
586	+	if amount64 > super::U32_MAX_VALUE {
587	+	throw fail(a, "alignment out of range");
588	+	}
589	+	let amount = amount64 as u32;
590	+	if (amount & (amount - 1)) <> 0 {
591	+	throw fail(a, "alignment must be a power of two");
592	+	}
593	+	match a.section {
594	+	case super::Section::Text => {
595	+	if amount % rv64::INSTR_SIZE as u32 <> 0 {
596	+	throw fail(a, "text alignment must be a multiple of 4");
597	+	}
598	+	let bytes = a.text.len * rv64::INSTR_SIZE as u32;
599	+	let aligned = checkedAlignUp(bytes, amount) else {
600	+	throw super::Error::TextOverflow;
601	+	};
602	+	let words = (aligned - bytes) / rv64::INSTR_SIZE as u32;
603	+	if words > a.text.cap - a.text.len {
604	+	throw super::Error::TextOverflow;
605	+	}
606	+	try emit::emitTextPadding(a, words);
607	+	}
608	+	case super::Section::Data => {
609	+	let aligned = checkedAlignUp(a.data.len, amount) else {
610	+	throw super::Error::DataOverflow;
611	+	};
612	+	if aligned > a.data.cap {
613	+	throw super::Error::DataOverflow;
614	+	}
615	+	for _ in a.data.len..aligned {
616	+	try emit::emitByte(a, 0);
617	+	}
618	+	}
619	+	}
620	+	}
621	+
622	+	/// Parse a `.byte` directive.
623	+	fn parseByteDirective(a: *mut super::Assembler) throws (super::Error) {
624	+	loop {
625	+	if a.scan.current.kind == scanner::TokenKind::Char {
626	+	let ch = parseCharLiteral(a.scan.current) else {
627	+	throw fail(a, "invalid char literal");
628	+	};
629	+	try emit::emitByte(a, ch);
630	+	advance(a);
631	+	} else {
632	+	let value = try parseValue(a);
633	+	if value < 0 or value > super::U8_MAX_VALUE {
634	+	throw fail(a, "byte literal out of range");
635	+	}
636	+	try emit::emitByte(a, value as u8);
637	+	}
638	+	if not consume(a, scanner::TokenKind::Comma) {
639	+	return;
640	+	}
641	+	}
642	+	}
643	+
644	+	/// Parse a fixed-width integer data directive.
645	+	fn parseIntDirective(a: *mut super::Assembler, width: super::DataWidth) throws (super::Error) {
646	+	loop {
647	+	if isLabel(a.scan.current.kind) {
648	+	let target = try parseLabelName(a);
649	+	try emit::recordDataFixup(a, target, width);
650	+	} else if a.scan.current.kind == scanner::TokenKind::Char {
651	+	let ch = parseCharLiteral(a.scan.current) else {
652	+	throw fail(a, "invalid char literal");
653	+	};
654	+	advance(a);
655	+	try emitDataValue(a, ch as i64, width);
656	+	} else {
657	+	try emitDataValue(a, try parseValue(a), width);
658	+	}
659	+	if not consume(a, scanner::TokenKind::Comma) {
660	+	return;
661	+	}
662	+	}
663	+	}
664	+
665	+	/// Parse a `.ascii` string literal list.
666	+	fn parseStringDirective(a: *mut super::Assembler) throws (super::Error) {
667	+	loop {
668	+	let literal = try expectToken(a, scanner::TokenKind::String, "expected string literal");
669	+	try emit::emitDecodedString(a, literal.source);
670	+	if not consume(a, scanner::TokenKind::Comma) {
671	+	return;
672	+	}
673	+	}
674	+	}
675	+
676	+	/// Parse and resolve a register operand.
677	+	fn parseRegister(a: *mut super::Assembler) -> gen::Reg throws (super::Error) {
678	+	let tok = try expectToken(a, scanner::TokenKind::Register, "expected register");
679	+	let reg = lookupRegister(&tok.source[1..]) else {
680	+	throw super::Error::Invalid { offset: tok.offset, message: "unknown register" };
681	+	};
682	+	return reg;
683	+	}
684	+
685	+	/// Parse a simple signed immediate or constant value.
686	+	fn parseValue(a: *mut super::Assembler) -> i64 throws (super::Error) {
687	+	if consume(a, scanner::TokenKind::Minus) {
688	+	return -(try parseValuePrimary(a));
689	+	}
690	+	return try parseValuePrimary(a);
691	+	}
692	+
693	+	/// Parse the primary form used by simple immediate values.
694	+	fn parseValuePrimary(a: *mut super::Assembler) -> i64 throws (super::Error) {
695	+	if a.scan.current.kind == scanner::TokenKind::Number {
696	+	return try parseInteger(a);
697	+	}
698	+	if a.scan.current.kind == scanner::TokenKind::Ident {
699	+	return try parseConstantValue(a);
700	+	}
701	+	throw fail(a, "expected number or constant");
702	+	}
703	+
704	+	/// Parse an additive constant expression.
705	+	fn parseExpr(a: *mut super::Assembler) -> i64 throws (super::Error) {
706	+	let mut value = try parseExprMul(a);
707	+
708	+	while a.scan.current.kind == scanner::TokenKind::Plus or a.scan.current.kind == scanner::TokenKind::Minus {
709	+	let op = a.scan.current.kind;
710	+	advance(a);
711	+
712	+	let rhs = try parseExprMul(a);
713	+	if op == scanner::TokenKind::Plus {
714	+	set value += rhs;
715	+	} else {
716	+	set value -= rhs;
717	+	}
718	+	}
719	+	return value;
720	+	}
721	+
722	+	/// Parse multiplicative expression operators.
723	+	fn parseExprMul(a: *mut super::Assembler) -> i64 throws (super::Error) {
724	+	let mut value = try parseExprUnary(a);
725	+
726	+	while a.scan.current.kind == scanner::TokenKind::Star or a.scan.current.kind == scanner::TokenKind::Slash {
727	+	let op = a.scan.current.kind;
728	+	advance(a);
729	+
730	+	let rhs = try parseExprUnary(a);
731	+	if op == scanner::TokenKind::Star {
732	+	set value *= rhs;
733	+	} else {
734	+	if rhs == 0 {
735	+	throw fail(a, "division by zero");
736	+	}
737	+	set value /= rhs;
738	+	}
739	+	}
740	+	return value;
741	+	}
742	+
743	+	/// Parse unary expression operators.
744	+	fn parseExprUnary(a: *mut super::Assembler) -> i64 throws (super::Error) {
745	+	if consume(a, scanner::TokenKind::Minus) {
746	+	return -(try parseExprUnary(a));
747	+	}
748	+	if consume(a, scanner::TokenKind::Plus) {
749	+	return try parseExprUnary(a);
750	+	}
751	+	return try parseExprPrimary(a);
752	+	}
753	+
754	+	/// Parse expression atoms.
755	+	fn parseExprPrimary(a: *mut super::Assembler) -> i64 throws (super::Error) {
756	+	if consume(a, scanner::TokenKind::LParen) {
757	+	let value = try parseExpr(a);
758	+	try expect(a, scanner::TokenKind::RParen, "expected `)`");
759	+	return value;
760	+	}
761	+	if a.scan.current.kind == scanner::TokenKind::Number {
762	+	return try parseInteger(a);
763	+	}
764	+	if a.scan.current.kind == scanner::TokenKind::Ident {
765	+	return try parseConstantValue(a);
766	+	}
767	+	throw fail(a, "expected expression");
768	+	}
769	+
770	+	/// Parse and resolve a named assembler constant.
771	+	fn parseConstantValue(a: *mut super::Assembler) -> i64 throws (super::Error) {
772	+	let name = try parseSymbolName(a);
773	+	let value = dict::get(&a.constMap, name) else {
774	+	throw super::Error::Invalid { offset: a.scan.previous.offset, message: "undefined constant" };
775	+	};
776	+	return value as i64;
777	+	}
778	+
779	+	/// Parse and resolve a CSR operand.
780	+	fn parseCsr(a: *mut super::Assembler) -> u32 throws (super::Error) {
781	+	let name = try parseSymbolName(a);
782	+	let csr = lookupCsr(name) else {
783	+	throw super::Error::Invalid { offset: a.scan.previous.offset, message: "unknown CSR" };
784	+	};
785	+	return csr;
786	+	}
787	+
788	+	/// Parse an offset(base) memory operand.
789	+	fn parseMemory(a: *mut super::Assembler) -> MemOperand throws (super::Error) {
790	+	let mut offset: i32 = 0;
791	+	if a.scan.current.kind <> scanner::TokenKind::LParen {
792	+	set offset = try expectSmallImmValue(a, try parseValue(a));
793	+	}
794	+	try expect(a, scanner::TokenKind::LParen, "expected `(`");
795	+	let base = try parseRegister(a);
796	+	try expect(a, scanner::TokenKind::RParen, "expected `)`");
797	+
798	+	return MemOperand { base, offset };
799	+	}
800	+
801	+	/// Parse an immediate value that fits in a signed 12-bit field.
802	+	fn parseSmallImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
803	+	return try expectSmallImmValue(a, try parseValue(a));
804	+	}
805	+
806	+	/// Parse and validate a branch immediate.
807	+	fn parseBranchImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
808	+	let value = try expectI32Value(a, try parseValue(a), "branch immediate out of range");
809	+	if not encode::isBranchImm(value) {
810	+	throw fail(a, "branch immediate out of range");
811	+	}
812	+	return value;
813	+	}
814	+
815	+	/// Parse and validate a jump immediate.
816	+	fn parseJumpImm(a: *mut super::Assembler) -> i32 throws (super::Error) {
817	+	let value = try expectI32Value(a, try parseValue(a), "jump immediate out of range");
818	+	if not encode::isJumpImm(value) {
819	+	throw fail(a, "jump immediate out of range");
820	+	}
821	+	return value;
822	+	}
823	+
824	+	/// Parse an integer token as an i64.
825	+	fn parseInteger(a: *mut super::Assembler) -> i64 throws (super::Error) {
826	+	let tok = try expectToken(a, scanner::TokenKind::Number, "expected number");
827	+	let value = parseIntegerText(tok.source) else {
828	+	throw failOnToken(tok, "invalid integer literal");
829	+	};
830	+	return value;
831	+	}
832	+
833	+	/// Parse integer literal text as an i64.
834	+	fn parseIntegerText(text: *[u8]) -> ?i64 {
835	+	let literal = try fmt::parseInt(text) catch {
836	+	return nil;
837	+	};
838	+	if literal.negative {
839	+	if literal.magnitude > parser::I64_MIN_MAGNITUDE {
840	+	return nil;
841	+	}
842	+	if literal.magnitude == parser::I64_MIN_MAGNITUDE {
843	+	return parser::I64_MIN;
844	+	}
845	+	return -(literal.magnitude as i64);
846	+	}
847	+	if literal.magnitude > parser::I64_MAX_MAGNITUDE {
848	+	return nil;
849	+	}
850	+	return literal.magnitude as i64;
851	+	}
852	+
853	+	/// Parse a character literal token as one byte.
854	+	fn parseCharLiteral(tok: scanner::Token) -> ?u8 {
855	+	return try fmt::parseChar(tok.source) catch {
856	+	return nil;
857	+	};
858	+	}

lib/std/arch/rv64/asm/scanner.rad added +315 -0

1	+	//! Assembly-specific lexical scanner.
2	+	@test mod tests;
3	+
4	+	use std::char;
5	+	use std::lang::strings;
6	+
7	+	/// Token kinds recognized by the assembler scanner.
8	+	export union TokenKind {
9	+	/// Special end-of-file token generated when the input is exhausted.
10	+	Eof,
11	+	/// Special invalid token carrying an error message in [`Token::source`].
12	+	Invalid,
13	+
14	+	LParen, // (
15	+	RParen, // )
16	+	Comma, // ,
17	+	Colon, // :
18	+	ColonColon, // ::
19	+	Semicolon, // ;
20	+	Minus, // -
21	+	Plus, // +
22	+	Slash, // /
23	+	Star, // *
24	+
25	+	/// Bare identifier used for mnemonics, constants, CSR names, and symbol segments.
26	+	Ident,
27	+	/// Identifier-shaped label token including the leading `@`.
28	+	Label,
29	+	/// Quoted label token including the leading `@` and quote delimiters.
30	+	QuotedLabel,
31	+	/// Directive token including the leading `.`.
32	+	Directive,
33	+	/// Register token including the leading `%`.
34	+	Register,
35	+
36	+	/// String literal token including delimiters.
37	+	String,
38	+	/// Character literal token including delimiters.
39	+	Char,
40	+	/// Integer literal token.
41	+	Number,
42	+	}
43	+
44	+	/// Describes where assembler source originated from.
45	+	export union SourceKind {
46	+	/// Source loaded from a file at the given path.
47	+	File { path: *[u8] },
48	+	/// Source provided as an inline string.
49	+	String,
50	+	}
51	+
52	+	/// Lexical scanner state for assembler source.
53	+	export record Scanner {
54	+	/// Origin of the source being scanned.
55	+	sourceKind: SourceKind,
56	+	/// Source buffer.
57	+	source: *[u8],
58	+	/// Offset of the current token in `source`.
59	+	token: u32,
60	+	/// Offset of the current cursor in `source`.
61	+	cursor: u32,
62	+	/// Current token observed by the parser.
63	+	current: Token,
64	+	/// Previously consumed token observed by the parser.
65	+	previous: Token,
66	+	/// Intern pool for identifier-shaped token text.
67	+	pool: *mut strings::Pool,
68	+	}
69	+
70	+	/// Individual token with kind, source text, and byte offset.
71	+	export record Token {
72	+	/// Token kind.
73	+	kind: TokenKind,
74	+	/// Token source text.
75	+	source: *[u8],
76	+	/// Byte offset of `source` in the input buffer.
77	+	offset: u32,
78	+	}
79	+
80	+	/// Create a new assembler scanner.
81	+	export fn scanner(sourceKind: SourceKind, source: [u8], pool: mut strings::Pool) -> Scanner {
82	+	let invalidToken = invalid(0, "");
83	+	return Scanner {
84	+	sourceKind,
85	+	source,
86	+	token: 0,
87	+	cursor: 0,
88	+	current: invalidToken,
89	+	previous: invalidToken,
90	+	pool,
91	+	};
92	+	}
93	+
94	+	/// Create an invalid token with the given message.
95	+	export fn invalid(offset: u32, message: *[u8]) -> Token {
96	+	return Token { kind: TokenKind::Invalid, source: message, offset };
97	+	}
98	+
99	+	/// Return `true` when the scanner has consumed all input.
100	+	export fn isEof(s: *Scanner) -> bool {
101	+	return s.cursor >= s.source.len;
102	+	}
103	+
104	+	/// Return the current character without advancing.
105	+	fn current(s: *Scanner) -> ?u8 {
106	+	if isEof(s) {
107	+	return nil;
108	+	}
109	+	return s.source[s.cursor];
110	+	}
111	+
112	+	/// Return the next character without advancing.
113	+	fn peek(s: *Scanner) -> ?u8 {
114	+	if s.cursor + 1 >= s.source.len {
115	+	return nil;
116	+	}
117	+	return s.source[s.cursor + 1];
118	+	}
119	+
120	+	/// Advance the scanner cursor and return the consumed character.
121	+	fn advance(s: *mut Scanner) -> u8 {
122	+	set s.cursor += 1;
123	+	return s.source[s.cursor - 1];
124	+	}
125	+
126	+	/// Consume `expected` when it is present at the current cursor.
127	+	fn consume(s: *mut Scanner, expected: u8) -> bool {
128	+	if let ch = current(s); ch == expected {
129	+	advance(s);
130	+	return true;
131	+	}
132	+	return false;
133	+	}
134	+
135	+	/// Skip spaces, newlines, tabs, and `//` line comments.
136	+	fn skipWhitespace(s: *mut Scanner) {
137	+	while let ch = current(s) {
138	+	match ch {
139	+	case ' ', '\n', '\r', '\t' => advance(s),
140	+	case '/' => {
141	+	if let nextCh = peek(s); nextCh == '/' {
142	+	while let lineCh = current(s); lineCh <> '\n' {
143	+	advance(s);
144	+	}
145	+	} else {
146	+	return;
147	+	}
148	+	}
149	+	else => return,
150	+	}
151	+	}
152	+	}
153	+
154	+	/// Return the next assembler token.
155	+	export fn next(s: *mut Scanner) -> Token {
156	+	skipWhitespace(s);
157	+	set s.token = s.cursor;
158	+
159	+	if isEof(s) {
160	+	return tok(s, TokenKind::Eof);
161	+	}
162	+	let ch = advance(s);
163	+
164	+	if char::isDigit(ch) {
165	+	return scanNumber(s);
166	+	}
167	+	if char::isAlpha(ch) or ch == '_' {
168	+	return scanIdentToken(s, TokenKind::Ident);
169	+	}
170	+
171	+	match ch {
172	+	case '(' => return tok(s, TokenKind::LParen),
173	+	case ')' => return tok(s, TokenKind::RParen),
174	+	case ',' => return tok(s, TokenKind::Comma),
175	+	case ';' => return tok(s, TokenKind::Semicolon),
176	+	case ':' => {
177	+	if consume(s, ':') {
178	+	return tok(s, TokenKind::ColonColon);
179	+	}
180	+	return invalid(s.token, "unexpected `:`");
181	+	}
182	+	case '"' => return scanString(s),
183	+	case '\'' => return scanChar(s),
184	+	case '.' => return scanPrefixedToken(s, TokenKind::Directive, "expected directive name after `.`"),
185	+	case '@' => return scanLabelToken(s),
186	+	case '%' => return scanPrefixedToken(s, TokenKind::Register, "expected register after `%`"),
187	+	case '-' => return scanSignedNumberOrToken(s, TokenKind::Minus),
188	+	case '+' => return scanSignedNumberOrToken(s, TokenKind::Plus),
189	+	case '/' => return tok(s, TokenKind::Slash),
190	+	case '*' => return tok(s, TokenKind::Star),
191	+	else => return invalid(s.token, "unexpected character"),
192	+	}
193	+	}
194	+
195	+	/// Create a token spanning the current scanner range.
196	+	fn tok(s: *Scanner, kind: TokenKind) -> Token {
197	+	return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token };
198	+	}
199	+
200	+	/// Scan the identifier continuation characters that follow the current token start.
201	+	fn scanIdentifierBody(s: *mut Scanner) {
202	+	while let ch = current(s); char::isAlpha(ch) or char::isDigit(ch) or ch == '_' {
203	+	advance(s);
204	+	}
205	+	}
206	+
207	+	/// Scan a signed number when `+` or `-` is followed by a digit, otherwise return the punctuation token.
208	+	fn scanSignedNumberOrToken(s: *mut Scanner, kind: TokenKind) -> Token {
209	+	if let nextCh = current(s); char::isDigit(nextCh) {
210	+	return scanNumber(s);
211	+	}
212	+	return tok(s, kind);
213	+	}
214	+
215	+	/// Scan a numeric literal.
216	+	fn scanNumber(s: *mut Scanner) -> Token {
217	+	let first = s.source[s.cursor - 1];
218	+	if first == '-' or first == '+' {
219	+	advance(s);
220	+	}
221	+	if s.source[s.cursor - 1] == '0' {
222	+	if let ch = current(s); ch == 'x' or ch == 'X' {
223	+	advance(s);
224	+	if let digit = current(s); not char::isHexDigit(digit) {
225	+	return invalid(s.token, "invalid hex literal");
226	+	}
227	+	while let digit = current(s); char::isHexDigit(digit) {
228	+	advance(s);
229	+	}
230	+	return tok(s, TokenKind::Number);
231	+	}
232	+	}
233	+	while let digit = current(s); char::isDigit(digit) {
234	+	advance(s);
235	+	}
236	+	return tok(s, TokenKind::Number);
237	+	}
238	+
239	+	/// Scan a printable token terminated by `delim`.
240	+	fn scanCharsUntil(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
241	+	while let ch = current(s); ch <> delim {
242	+	if not char::isPrint(ch) {
243	+	return invalid(s.token, "invalid character");
244	+	}
245	+	if consume(s, '\\') {
246	+	if isEof(s) {
247	+	return nil;
248	+	}
249	+	}
250	+	advance(s);
251	+	}
252	+	if not consume(s, delim) {
253	+	return nil;
254	+	}
255	+	return tok(s, kind);
256	+	}
257	+
258	+	/// Scan a string literal.
259	+	fn scanString(s: *mut Scanner) -> Token {
260	+	if let token = scanCharsUntil(s, '"', TokenKind::String) {
261	+	return token;
262	+	}
263	+	return invalid(s.token, "unterminated string");
264	+	}
265	+
266	+	/// Scan a character literal.
267	+	fn scanChar(s: *mut Scanner) -> Token {
268	+	if let token = scanCharsUntil(s, '\'', TokenKind::Char) {
269	+	return token;
270	+	}
271	+	return invalid(s.token, "unterminated character");
272	+	}
273	+
274	+	/// Scan an identifier-shaped token of the given kind.
275	+	fn scanIdentToken(s: *mut Scanner, kind: TokenKind) -> Token {
276	+	scanIdentifierBody(s);
277	+
278	+	return Token {
279	+	kind,
280	+	source: strings::intern(s.pool, &s.source[s.token..s.cursor]),
281	+	offset: s.token,
282	+	};
283	+	}
284	+
285	+	/// Scan a sigil-prefixed identifier-shaped token.
286	+	fn scanPrefixedToken(s: mut Scanner, kind: TokenKind, message: [u8]) -> Token {
287	+	let ch = current(s) else {
288	+	return invalid(s.token, message);
289	+	};
290	+	if not char::isAlpha(ch) and ch <> '_' {
291	+	return invalid(s.token, message);
292	+	}
293	+	scanIdentifierBody(s);
294	+
295	+	return Token {
296	+	kind,
297	+	source: strings::intern(s.pool, &s.source[s.token..s.cursor]),
298	+	offset: s.token,
299	+	};
300	+	}
301	+
302	+	/// Scan an assembler label token, accepting either `@name` or `@"quoted"` syntax.
303	+	fn scanLabelToken(s: *mut Scanner) -> Token {
304	+	let ch = current(s) else {
305	+	return invalid(s.token, "expected label after `@`");
306	+	};
307	+	if ch == '"' {
308	+	advance(s);
309	+	if let token = scanCharsUntil(s, '"', TokenKind::QuotedLabel) {
310	+	return token;
311	+	}
312	+	return invalid(s.token, "unterminated quoted label");
313	+	}
314	+	return scanPrefixedToken(s, TokenKind::Label, "expected label after `@`");
315	+	}

lib/std/arch/rv64/asm/scanner/tests.rad added +140 -0

1	+	use std::mem;
2	+	use std::testing;
3	+	use std::lang::strings;
4	+
5	+	/// String pool used by assembler scanner tests.
6	+	static TEST_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 };
7	+
8	+	/// Create a scanner for test input.
9	+	fn testScanner(source: *[u8]) -> super::Scanner {
10	+	return super::scanner(super::SourceKind::String, source, &mut TEST_STRING_POOL);
11	+	}
12	+
13	+	/// Scanner recognizes assembler-specific sigils and scoped names.
14	+	@test fn testScanRegisterDirectiveAndLabelTokens() throws (testing::TestError) {
15	+	let mut s = testScanner(
16	+	".text %sp @entry name::tail 42"
17	+	);
18	+	let directive = super::next(&mut s);
19	+	try testing::expect(directive.kind == super::TokenKind::Directive);
20	+	try testing::expect(mem::eq(directive.source, ".text"));
21	+
22	+	let reg = super::next(&mut s);
23	+	try testing::expect(reg.kind == super::TokenKind::Register);
24	+	try testing::expect(mem::eq(reg.source, "%sp"));
25	+
26	+	let label = super::next(&mut s);
27	+	try testing::expect(label.kind == super::TokenKind::Label);
28	+	try testing::expect(mem::eq(label.source, "@entry"));
29	+
30	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
31	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::ColonColon);
32	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
33	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Number);
34	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
35	+	}
36	+
37	+	/// Keyword-shaped text remains plain assembler identifiers.
38	+	@test fn testScanKeywordShapedAsmNamesRemainAsmTokens() throws (testing::TestError) {
39	+	let mut s = testScanner(
40	+	"and or not align addi .text @label"
41	+	);
42	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
43	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
44	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
45	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
46	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Ident);
47	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Directive);
48	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Label);
49	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
50	+	}
51	+
52	+	/// Quoted labels can spell symbol names that are not identifier-shaped.
53	+	@test fn testScanQuotedLabelToken() throws (testing::TestError) {
54	+	let mut s = testScanner(
55	+	"@\"foo.bar.baz\""
56	+	);
57	+	let label = super::next(&mut s);
58	+	try testing::expect(label.kind == super::TokenKind::QuotedLabel);
59	+	try testing::expect(mem::eq(label.source, "@\"foo.bar.baz\""));
60	+	try testing::expect(super::next(&mut s).kind == super::TokenKind::Eof);
61	+	}
62	+
63	+	/// Sigil-prefixed tokens require the name to start immediately after the sigil.
64	+	@test fn testScanSigilsRequireAdjacency() throws (testing::TestError) {
65	+	let mut regScan = testScanner("% a0");
66	+	try testing::expect(super::next(&mut regScan).kind == super::TokenKind::Invalid);
67	+
68	+	let mut labelScan = testScanner("@ entry");
69	+	try testing::expect(super::next(&mut labelScan).kind == super::TokenKind::Invalid);
70	+
71	+	let mut directiveScan = testScanner(". text");
72	+	try testing::expect(super::next(&mut directiveScan).kind == super::TokenKind::Invalid);
73	+	}
74	+
75	+	/// Scanner reaches EOF after trailing whitespace and comments.
76	+	@test fn testScanProgramEndingWithNewline() throws (testing::TestError) {
77	+	let mut s = testScanner(
78	+	".text;\n@start\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n// comment\nbeq %a0 %zero @done;\n@done\nret;\n"
79	+	);
80	+	loop {
81	+	let tok = super::next(&mut s);
82	+	if tok.kind == super::TokenKind::Eof {
83	+	try testing::expect(tok.source.len == 0);
84	+	return;
85	+	}
86	+	}
87	+	}
88	+
89	+	/// Signed numbers scan only the numeric formats supported by the assembler scanner.
90	+	@test fn testScanSignedHexAndUnsupportedNumericForms() throws (testing::TestError) {
91	+	let mut s = testScanner(
92	+	"+0x2a -0b10 45.5"
93	+	);
94	+	let mut tok = super::next(&mut s);
95	+	try testing::expect(tok.kind == super::TokenKind::Number);
96	+	try testing::expect(mem::eq(tok.source, "+0x2a"));
97	+
98	+	set tok = super::next(&mut s);
99	+	try testing::expect(tok.kind == super::TokenKind::Number);
100	+	try testing::expect(mem::eq(tok.source, "-0"));
101	+
102	+	set tok = super::next(&mut s);
103	+	try testing::expect(tok.kind == super::TokenKind::Ident);
104	+	try testing::expect(mem::eq(tok.source, "b10"));
105	+
106	+	set tok = super::next(&mut s);
107	+	try testing::expect(tok.kind == super::TokenKind::Number);
108	+	try testing::expect(mem::eq(tok.source, "45"));
109	+
110	+	set tok = super::next(&mut s);
111	+	try testing::expect(tok.kind == super::TokenKind::Invalid);
112	+	try testing::expect(mem::eq(tok.source, "expected directive name after `.`"));
113	+
114	+	set tok = super::next(&mut s);
115	+	try testing::expect(tok.kind == super::TokenKind::Number);
116	+	try testing::expect(mem::eq(tok.source, "5"));
117	+	}
118	+
119	+	/// Unterminated string and character literals report invalid tokens.
120	+	@test fn testScanUnterminatedDelimitedLiterals() throws (testing::TestError) {
121	+	let mut stringScan = testScanner("\"unterminated");
122	+	let stringTok = super::next(&mut stringScan);
123	+	try testing::expect(stringTok.kind == super::TokenKind::Invalid);
124	+	try testing::expect(mem::eq(stringTok.source, "unterminated string"));
125	+
126	+	let mut escapedStringScan = testScanner("\"unterminated\\");
127	+	let escapedStringTok = super::next(&mut escapedStringScan);
128	+	try testing::expect(escapedStringTok.kind == super::TokenKind::Invalid);
129	+	try testing::expect(mem::eq(escapedStringTok.source, "unterminated string"));
130	+
131	+	let mut charScan = testScanner("'x");
132	+	let charTok = super::next(&mut charScan);
133	+	try testing::expect(charTok.kind == super::TokenKind::Invalid);
134	+	try testing::expect(mem::eq(charTok.source, "unterminated character"));
135	+
136	+	let mut escapedCharScan = testScanner("'\\");
137	+	let escapedCharTok = super::next(&mut escapedCharScan);
138	+	try testing::expect(escapedCharTok.kind == super::TokenKind::Invalid);
139	+	try testing::expect(mem::eq(escapedCharTok.source, "unterminated character"));
140	+	}

lib/std/arch/rv64/asm/tests.rad added +211 -0

1	+	//! RV64 assembler tests.
2	+
3	+	use std::testing;
4	+	use std::mem;
5	+	use std::lang::alloc;
6	+	use std::lang::sexpr;
7	+	use std::lang::strings;
8	+	use std::arch::rv64;
9	+	use std::arch::rv64::encode;
10	+	use std::arch::rv64::printer;
11	+
12	+	use super::scanner;
13	+
14	+	static ASM_ARENA_STORAGE: [u8; 65536] = undefined;
15	+	static ASM_TEXT_STORAGE: [u32; 256] = undefined;
16	+	static ASM_DATA_STORAGE: [u8; 1024] = undefined;
17	+	static ASM_STRING_POOL: strings::Pool = strings::Pool { table: undefined, count: 0 };
18	+	static PRINT_ARENA_STORAGE: [u8; 1024] = undefined;
19	+	static PRINT_BUFFER: [u8; 128] = undefined;
20	+
21	+	fn assembleSource(source: *[u8]) -> super::Program throws (testing::TestError) {
22	+	let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]);
23	+	return try super::assemble(
24	+	scanner::SourceKind::String,
25	+	source,
26	+	&mut ASM_TEXT_STORAGE[..],
27	+	&mut ASM_DATA_STORAGE[..],
28	+	&mut arena,
29	+	&mut ASM_STRING_POOL,
30	+	rv64::RO_DATA_BASE
31	+	) catch {
32	+	throw testing::TestError::Failed;
33	+	};
34	+	}
35	+
36	+	fn expectAssembleFail(source: *[u8]) throws (testing::TestError) {
37	+	let mut arena = alloc::new(&mut ASM_ARENA_STORAGE[..]);
38	+	try super::assemble(
39	+	scanner::SourceKind::String,
40	+	source,
41	+	&mut ASM_TEXT_STORAGE[..],
42	+	&mut ASM_DATA_STORAGE[..],
43	+	&mut arena,
44	+	&mut ASM_STRING_POOL,
45	+	rv64::RO_DATA_BASE
46	+	) catch {
47	+	return;
48	+	};
49	+	throw testing::TestError::Failed;
50	+	}
51	+
52	+	fn printInstrText(instr: u32) -> *[u8] {
53	+	let mut arena = alloc::new(&mut PRINT_ARENA_STORAGE[..]);
54	+	let mut pos: u32 = 0;
55	+	let mut out = sexpr::Output::Buffer { buf: &mut PRINT_BUFFER[..], pos: &mut pos };
56	+	printer::printInstr(&mut out, &mut arena, instr);
57	+	return &PRINT_BUFFER[..pos];
58	+	}
59	+
60	+	@test fn testAssemblePercentPrefixedRegisters() throws (testing::TestError) {
61	+	let program = try assembleSource(
62	+	".text;\naddi %a0 %zero 42;\nsd %a0 8(%sp);\n"
63	+	);
64	+	try testing::expect(program.text.len == 2);
65	+	try testing::expect(program.text[0] == encode::addi(rv64::A0, rv64::ZERO, 42));
66	+	try testing::expect(program.text[1] == encode::sd(rv64::A0, rv64::SP, 8));
67	+	}
68	+
69	+	@test fn testAssembleDataAddressUsesRoDataBase() throws (testing::TestError) {
70	+	let program = try assembleSource(
71	+	".text;\nla %t0 @value;\n.data;\n.byte 0;\n@value\n.byte 1;\n"
72	+	);
73	+	try testing::expect(program.text.len == 2);
74	+	try testing::expect(program.text[0] == encode::lui(rv64::T0, 0x10));
75	+	try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 1));
76	+	}
77	+
78	+	@test fn testAssembleTextAddressUsesPcRelative() throws (testing::TestError) {
79	+	let program = try assembleSource(
80	+	".text;\nla %t0 @target;\n@target\nret;\n"
81	+	);
82	+	try testing::expect(program.text.len == 3);
83	+	try testing::expect(program.text[0] == encode::auipc(rv64::T0, 0));
84	+	try testing::expect(program.text[1] == encode::addi(rv64::T0, rv64::T0, 8));
85	+	}
86	+
87	+	@test fn testAssembleQuotedLabelNames() throws (testing::TestError) {
88	+	let program = try assembleSource(
89	+	".text;\nj @\"foo.bar.baz\";\n@\"foo.bar.baz\"\nret;\n"
90	+	);
91	+	try testing::expect(program.text.len == 2);
92	+	try testing::expect(program.text[0] == encode::jal(rv64::ZERO, 4));
93	+	try testing::expect(program.text[1] == encode::jalr(rv64::ZERO, rv64::RA, 0));
94	+	}
95	+
96	+	@test fn testAssembleGlobalMarksOnlyDeclaredSymbols() throws (testing::TestError) {
97	+	let program = try assembleSource(
98	+	".text;\n.export @exported;\n@local\nret;\n@exported\nret;\n@late\n.export @late;\nret;\n"
99	+	);
100	+	try testing::expect(program.symbols.len == 3);
101	+	try testing::expect(not program.symbols[0].isExported);
102	+	try testing::expect(program.symbols[1].isExported);
103	+	try testing::expect(program.symbols[2].isExported);
104	+	}
105	+
106	+	@test fn testAssembleInvalidOperandsFail() throws (testing::TestError) {
107	+	try expectAssembleFail(
108	+	".text;\nbeq %a0 %a1 @missing;\n"
109	+	);
110	+	try expectAssembleFail(
111	+	".text;\naddi a0 zero 1;\n"
112	+	);
113	+	try expectAssembleFail(
114	+	".text;\naddi % a0 %zero 1;\n"
115	+	);
116	+	try expectAssembleFail(
117	+	".text;\nli %a0 UNKNOWN;\n"
118	+	);
119	+	try expectAssembleFail(
120	+	".text;\n@start\nj start;\n"
121	+	);
122	+	}
123	+
124	+	@test fn testAssembleInvalidSyntaxFails() throws (testing::TestError) {
125	+	try expectAssembleFail(
126	+	".text;\n@dup\n@dup\nret;\n"
127	+	);
128	+	try expectAssembleFail(
129	+	".text;\naddi %a0, %zero, 1\n"
130	+	);
131	+	try expectAssembleFail(
132	+	".constant PAGE, 4096;\n"
133	+	);
134	+	try expectAssembleFail(
135	+	".text;\naddi %a0, %zero, 1;\n"
136	+	);
137	+	try expectAssembleFail(
138	+	".export @kernel::main, @data::sym;\n"
139	+	);
140	+	}
141	+
142	+	@test fn testAssembleInvalidSectionsFail() throws (testing::TestError) {
143	+	try expectAssembleFail(
144	+	".data;\n.dword @target;\n.text;\n@target\nret;\n"
145	+	);
146	+	try expectAssembleFail(
147	+	".data;\naddi %a0 %zero 1;\n"
148	+	);
149	+	try expectAssembleFail(
150	+	".text;\n.byte 1;\n"
151	+	);
152	+	try expectAssembleFail(
153	+	".text;\n.word 1;\n"
154	+	);
155	+	try expectAssembleFail(
156	+	".text;\n.dword 1;\n"
157	+	);
158	+	try expectAssembleFail(
159	+	".text;\n.ascii \"x\";\n"
160	+	);
161	+	try expectAssembleFail(
162	+	".data;\n@value\n.byte 1;\n.text;\nj @value;\n"
163	+	);
164	+	}
165	+
166	+	@test fn testAssembleInvalidDirectivesFail() throws (testing::TestError) {
167	+	try expectAssembleFail(
168	+	".data;\n.ascii 'x';\n"
169	+	);
170	+	try expectAssembleFail(
171	+	".data;\n.byte 1 + 2;\n"
172	+	);
173	+	try expectAssembleFail(
174	+	".data;\n.byte 256;\n"
175	+	);
176	+	try expectAssembleFail(
177	+	".data;\n.word 2147483648;\n"
178	+	);
179	+	try expectAssembleFail(
180	+	".data;\n.space 4294967296;\n"
181	+	);
182	+	try expectAssembleFail(
183	+	".data;\n.align 3;\n"
184	+	);
185	+	try expectAssembleFail(
186	+	".text;\n.align 12;\n"
187	+	);
188	+	try expectAssembleFail(
189	+	".data;\n.align 4294967296;\n"
190	+	);
191	+	}
192	+
193	+	@test fn testAssembleInvalidImmediateRangesFail() throws (testing::TestError) {
194	+	try expectAssembleFail(
195	+	".text;\nslli %a0 %a1 64;\n"
196	+	);
197	+	try expectAssembleFail(
198	+	".text;\nslli %a0 %a1 4294967296;\n"
199	+	);
200	+	try expectAssembleFail(
201	+	".text;\nslliw %a0 %a1 2147483648;\n"
202	+	);
203	+	try expectAssembleFail(
204	+	".text;\ncsrsi mstatus 32;\n"
205	+	);
206	+	}
207	+
208	+	@test fn testPrintInstrUsesPercentPrefixedRegisters() throws (testing::TestError) {
209	+	let text = printInstrText(encode::addi(rv64::A0, rv64::SP, 42));
210	+	try testing::expect(mem::eq(text, "addi %a0, %sp, 42"));
211	+	}

lib/std/arch/rv64/emit.rad +13 -3

    labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE);
}

/// Record a function's code offset for call resolution.
export fn recordFuncOffset(e: *mut Emitter, name: *[u8]) {
    assert e.codeLen <= MAX_CODE_LEN;
    dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE);
    recordFuncOffsetAt(e, name, e.codeLen);
}

/// Record a function's code offset at `index` for call resolution.
export fn recordFuncOffsetAt(e: *mut Emitter, name: *[u8], index: u32) {
    assert index <= MAX_CODE_LEN;
    dict::insert(&mut e.labels.funcs, name, index as i32 * super::INSTR_SIZE);
}

/// Record a function's start position for printing.
export fn recordFunc(e: *mut Emitter, name: *[u8]) {
    recordFuncAt(e, name, e.codeLen);
}

/// Record a function's start position at `index` for printing.
export fn recordFuncAt(e: *mut Emitter, name: *[u8], index: u32) {
    assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full";
    set e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen };
    set e.funcs[e.funcsLen] = types::FuncAddr { name, index };
    set e.funcsLen += 1;
}

/// Record a local branch needing later patching.
/// Unconditional jumps use a single slot (J-type, +-1MB range).

lib/std/arch/rv64/encode.rad +63 -0

export constant F3_BLT:  u32 = 0x4;
export constant F3_BGE:  u32 = 0x5;
export constant F3_BLTU: u32 = 0x6;
export constant F3_BGEU: u32 = 0x7;

// CSR/system operations

export constant F3_CSRRW: u32 = 0x1;
export constant F3_CSRRS: u32 = 0x2;
export constant F3_CSRRC: u32 = 0x3;
export constant F3_CSRRWI: u32 = 0x5;
export constant F3_CSRRSI: u32 = 0x6;
export constant F3_CSRRCI: u32 = 0x7;

//////////////////////
// Funct7 Constants //
//////////////////////

export constant F7_NORMAL: u32 = 0b0000000;

/// Environment break (debugger breakpoint).
export fn ebreak() -> u32 {
    return encodeI(OP_SYSTEM, super::ZERO, super::ZERO, 0, 1);
}

/// Encode a CSR instruction with a register source.
fn encodeCsr(op: u32, rd: gen::Reg, csr: u32, funct3: u32, rs1: gen::Reg) -> u32 {
    return (op             & 0x7F)
         | ((*rd as u32    & 0x1F) << 7)
         | ((funct3        & 0x07) << 12)
         | ((*rs1 as u32   & 0x1F) << 15)
         | ((csr           & 0xFFF) << 20);
}

/// Encode a CSR instruction with an immediate source.
fn encodeCsrImm(op: u32, rd: gen::Reg, csr: u32, funct3: u32, imm: u32) -> u32 {
    assert imm < 32;
    return (op             & 0x7F)
         | ((*rd as u32    & 0x1F) << 7)
         | ((funct3        & 0x07) << 12)
         | ((imm           & 0x1F) << 15)
         | ((csr           & 0xFFF) << 20);
}

/// Read CSR into `rd`.
export fn csrr(rd: gen::Reg, csr: u32) -> u32 {
    return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRS, super::ZERO);
}

/// Read/write CSR: old CSR to `rd`, write `rs1`.
export fn csrrw(rd: gen::Reg, csr: u32, rs1: gen::Reg) -> u32 {
    return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRW, rs1);
}

/// Write `rs1` into CSR and discard old value.
export fn csrw(csr: u32, rs1: gen::Reg) -> u32 {
    return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRW, rs1);
}

/// Clear CSR bits from `rs1` and discard old value.
export fn csrc(csr: u32, rs1: gen::Reg) -> u32 {
    return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRC, rs1);
}

/// Set CSR bits from a 5-bit immediate and discard old value.
export fn csrsi(csr: u32, imm: u32) -> u32 {
    return encodeCsrImm(OP_SYSTEM, super::ZERO, csr, F3_CSRRSI, imm);
}

/// Wait for interrupt.
export fn wfi() -> u32 {
    return 0x10500073;
}

/// Return from machine mode trap.
export fn mret() -> u32 {
    return 0x30200073;
}

/////////////////////////
// Pseudo-instructions //
/////////////////////////

/// No operation: `addi zero, zero, 0`.

lib/std/arch/rv64/printer.rad +4 -4

16	16		// Register Names //
17	17		/////////////////////
18	18
19	19		/// ABI register names.
20	20		constant REG_NAMES: [*[u8]; 32] = [
21		-	"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
22		-	"fp", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
23		-	"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7",
24		-	"s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"
	21	+	"%zero", "%ra", "%sp", "%gp", "%tp", "%t0", "%t1", "%t2",
	22	+	"%fp", "%s1", "%a0", "%a1", "%a2", "%a3", "%a4", "%a5",
	23	+	"%a6", "%a7", "%s2", "%s3", "%s4", "%s5", "%s6", "%s7",
	24	+	"%s8", "%s9", "%s10", "%s11", "%t3", "%t4", "%t5", "%t6"
25	25		];
26	26
27	27		/// Get register name from number.
28	28		fn regName(n: u8) -> *[u8] {
29	29		return "?" if n >= 32 else REG_NAMES[n as u32];

lib/std/arch/rv64/tests.rad +42 -0

//!
//! These tests verify that instruction encodings match the RISC-V specification
//! by comparing against known-good values.

use std::testing;
use std::lang::alloc;
use std::collections::dict;

use super::encode;
use super::asm;

static ASSEMBLY_ARENA_STORAGE: [u8; 16777216] = undefined;
static ASSEMBLY_TEXT_STORAGE: [u32; 2] = undefined;

/// Helper to check encoding equals expected value.
fn expectEncoding(actual: u32, expected: u32) throws (testing::TestError) {
    try testing::expect(actual == expected);
}

@test fn testAddAssemblyExportsOnlyGlobalTextSymbols() throws (testing::TestError) {
    let mut arena = alloc::new(&mut ASSEMBLY_ARENA_STORAGE[..]);
    let symbols = try alloc::allocSlice(&mut arena, @sizeOf(asm::Symbol), @alignOf(asm::Symbol), 2) catch {
        throw testing::TestError::Failed;
    };
    let mut symbolSlice = @sliceOf((symbols as *mut [asm::Symbol]).ptr, 2, 2);
    set symbolSlice[0] = asm::Symbol {
        name: "local",
        section: asm::Section::Text,
        offset: 0,
        isExported: false,
    };
    set symbolSlice[1] = asm::Symbol {
        name: "exported",
        section: asm::Section::Text,
        offset: super::INSTR_SIZE,
        isExported: true,
    };

    let mut generator = super::beginProgram(
        super::ProgramOptions { entryPatch: super::EntryPatch::None, debug: false },
        &mut arena
    );
    super::addAssembly(
        &mut generator,
        asm::Program { text: &ASSEMBLY_TEXT_STORAGE[..], data: &[], symbols: symbolSlice }
    );

    try testing::expect(dict::get(&generator.e.labels.funcs, "local") == nil);
    let exportedOffset = dict::get(&generator.e.labels.funcs, "exported") else {
        throw testing::TestError::Failed;
    };
    try testing::expect(exportedOffset == super::INSTR_SIZE);
}

///////////////////////
// R-type ALU tests  //
///////////////////////

@test fn testEncodeAdd() throws (testing::TestError) {

lib/std/char.rad added +31 -0

1	+	//! ASCII character classification helpers shared across the standard library.
2	+
3	+	@test mod tests;
4	+
5	+	/// Return `true` when `ch` is an ASCII digit.
6	+	export fn isDigit(ch: u8) -> bool {
7	+	return ch >= '0' and ch <= '9';
8	+	}
9	+
10	+	/// Return `true` when `ch` is an ASCII hexadecimal digit.
11	+	export fn isHexDigit(ch: u8) -> bool {
12	+	return (ch >= '0' and ch <= '9')
13	+	or (ch >= 'a' and ch <= 'f')
14	+	or (ch >= 'A' and ch <= 'F');
15	+	}
16	+
17	+	/// Return `true` when `ch` is a binary digit.
18	+	export fn isBinDigit(ch: u8) -> bool {
19	+	return ch == '0' or ch == '1';
20	+	}
21	+
22	+	/// Return `true` when `ch` is an ASCII alphabetic character.
23	+	export fn isAlpha(ch: u8) -> bool {
24	+	return (ch >= 'a' and ch <= 'z')
25	+	or (ch >= 'A' and ch <= 'Z');
26	+	}
27	+
28	+	/// Return `true` when `ch` is printable ASCII.
29	+	export fn isPrint(ch: u8) -> bool {
30	+	return ch >= ' ' and ch <= '~';
31	+	}

lib/std/char/tests.rad added +42 -0

1	+	use std::testing;
2	+
3	+	@test fn testIsDigit() throws (testing::TestError) {
4	+	try testing::expect(super::isDigit('0'));
5	+	try testing::expect(super::isDigit('9'));
6	+	try testing::expectNot(super::isDigit('/'));
7	+	try testing::expectNot(super::isDigit(':'));
8	+	}
9	+
10	+	@test fn testIsHexDigit() throws (testing::TestError) {
11	+	try testing::expect(super::isHexDigit('0'));
12	+	try testing::expect(super::isHexDigit('9'));
13	+	try testing::expect(super::isHexDigit('a'));
14	+	try testing::expect(super::isHexDigit('f'));
15	+	try testing::expect(super::isHexDigit('A'));
16	+	try testing::expect(super::isHexDigit('F'));
17	+	try testing::expectNot(super::isHexDigit('g'));
18	+	try testing::expectNot(super::isHexDigit('G'));
19	+	}
20	+
21	+	@test fn testIsBinDigit() throws (testing::TestError) {
22	+	try testing::expect(super::isBinDigit('0'));
23	+	try testing::expect(super::isBinDigit('1'));
24	+	try testing::expectNot(super::isBinDigit('2'));
25	+	try testing::expectNot(super::isBinDigit('a'));
26	+	}
27	+
28	+	@test fn testIsAlpha() throws (testing::TestError) {
29	+	try testing::expect(super::isAlpha('a'));
30	+	try testing::expect(super::isAlpha('z'));
31	+	try testing::expect(super::isAlpha('A'));
32	+	try testing::expect(super::isAlpha('Z'));
33	+	try testing::expectNot(super::isAlpha('0'));
34	+	try testing::expectNot(super::isAlpha('_'));
35	+	}
36	+
37	+	@test fn testIsPrint() throws (testing::TestError) {
38	+	try testing::expect(super::isPrint(' '));
39	+	try testing::expect(super::isPrint('~'));
40	+	try testing::expectNot(super::isPrint(31));
41	+	try testing::expectNot(super::isPrint(127));
42	+	}

lib/std/fmt.rad +168 -0

//! Formatting utilities for converting values to strings.
use super::mem;

/// Maximum `u64` value.
export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF;
/// Maximum string length for a formatted u32 (eg. "4294967295").
export constant U32_STR_LEN: u32 = 10;
/// Maximum string length for a formatted i32 (eg. "-2147483648").
export constant I32_STR_LEN: u32 = U32_STR_LEN + 1;
/// Maximum string length for a formatted u64 (eg. "18446744073709551615").

/// Maximum string length for a formatted i64 (eg. "-9223372036854775808").
export constant I64_STR_LEN: u32 = 20;
/// Maximum string length for a formatted bool (eg. "false").
export constant BOOL_STR_LEN: u32 = 5;

/// Radix/base of a parsed integer literal.
export union Radix {
    /// Binary literal (0b...).
    Binary,
    /// Decimal literal.
    Decimal,
    /// Hexadecimal literal (0x...).
    Hex,
}

/// Errors reported while parsing literal text.
export union ParseError {
    /// Literal text was empty or missing required digits.
    Invalid,
    /// Literal contained an invalid digit for its radix.
    InvalidDigit,
    /// Literal value exceeded the supported range.
    Overflow,
}

/// Parsed integer literal metadata.
export record IntLiteral {
    /// Raw characters that comprised the literal.
    text: *[u8],
    /// Absolute magnitude parsed from the literal.
    magnitude: u64,
    /// Radix used by the literal.
    radix: Radix,
    /// Whether the literal spelled an explicit sign.
    signed: bool,
    /// Whether the literal used a negative sign.
    negative: bool,
}

/// Format a u32 by writing it to the provided buffer.
export fn formatU32(val: u32, buffer: *mut [u8]) -> *[u8] {
    assert buffer.len >= U32_STR_LEN;

    let mut x: u32 = val;

    } else {
        try! mem::copy(buffer, "false");
        return &buffer[..5];
    }
}

/// Convert a single ASCII digit into its numeric value for the given radix.
export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 {
    assert radix >= 2 and radix <= 36;

    // Default to an out-of-range value so non-digits fall through to `nil`.
    let mut value: u32 = 36;

    if ch >= '0' and ch <= '9' {
        set value = (ch - '0') as u32;
    } else if radix > 10 {
        // Mask to convert ASCII letters to uppercase.
        let upper = ch & 0xDF;
        if upper >= 'A' and upper <= 'Z' {
            set value = (upper - 'A') as u32 + 10;
        }
    }
    if value < radix {
        return value;
    }
    return nil;
}

/// Decode a single-byte ASCII escape.
export fn decodeAsciiEscape(ch: u8) -> u8 {
    match ch {
        case 'n'  => return '\n',
        case 't'  => return '\t',
        case 'r'  => return '\r',
        case '\\' => return '\\',
        case '"'  => return '"',
        case '\'' => return '\'',
        case '0'  => return 0,
        else      => return ch,
    }
}

/// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign.
export fn parseInt(text: *[u8]) -> IntLiteral throws (ParseError) {
    if text.len == 0 {
        throw ParseError::Invalid;
    }
    let first = text[0];
    let negative = first == '-';
    let signed: bool = negative or (first == '+');

    let mut start: u32 = 0;
    let mut radix: u32 = 10;
    let mut radixType = Radix::Decimal;

    if signed {
        set start = 1;
        if start >= text.len {
            throw ParseError::Invalid;
        }
    }
    if start + 1 < text.len and text[start] == '0' {
        let prefix = text[start + 1];
        if prefix == 'x' or prefix == 'X' {
            set radix = 16;
            set radixType = Radix::Hex;
            set start += 2;
        } else if prefix == 'b' or prefix == 'B' {
            set radix = 2;
            set radixType = Radix::Binary;
            set start += 2;
        }
        if start >= text.len {
            throw ParseError::Invalid;
        }
    }
    let mut value: u64 = 0;
    let radix64: u64 = radix as u64;
    for i in start..text.len {
        let ch = text[i];
        let digit = digitFromAscii(ch, radix) else {
            throw ParseError::InvalidDigit;
        };
        if value > (U64_MAX / radix64) {
            throw ParseError::Overflow;
        }
        set value *= radix64;

        if value > U64_MAX - (digit as u64) {
            throw ParseError::Overflow;
        }
        set value += (digit as u64);
    }
    return IntLiteral {
        text, magnitude: value, radix: radixType, signed, negative,
    };
}

/// Process escape sequences in a raw string, writing the result into `dst`.
/// Returns the number of bytes written.
export fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 {
    let mut i: u32 = 0;
    let mut j: u32 = 0;

    while i < raw.len {
        if raw[i] == '\\' and i + 1 < raw.len {
            set dst[j] = decodeAsciiEscape(raw[i + 1]);
            set i += 2;
        } else {
            set dst[j] = raw[i];
            set i += 1;
        }
        set j += 1;
    }
    return j;
}

/// Parse a single-byte character literal, including the single quotes.
export fn parseChar(text: *[u8]) -> u8 throws (ParseError) {
    if text.len < 2 {
        throw ParseError::Invalid;
    }
    let raw = &text[1..text.len - 1];
    if raw.len == 0 {
        throw ParseError::Invalid;
    }
    if raw[0] == '\\' {
        if raw.len <> 2 {
            throw ParseError::Invalid;
        }
        return decodeAsciiEscape(raw[1]);
    }
    if raw.len <> 1 {
        throw ParseError::Invalid;
    }
    return raw[0];
}

lib/std/lang/ast.rad +2 -25

//! Radiance AST modules.
export mod printer;

use std::io;
use std::fmt;
use std::lang::alloc;

/// Maximum number of trait methods.
export constant MAX_TRAIT_METHODS: u32 = 8;


    Signed,
    /// Unsigned, eg. `u32`.
    Unsigned,
}

/// Radix/base of a number.
export union Radix {
    /// Binary literal (0b...).
    Binary,
    /// Decimal literal.
    Decimal,
    /// Hexadecimal literal (0x...).
    Hex,
}

/// Parsed integer literal metadata.
export record IntLiteral {
    /// Raw characters that comprised the literal.
    text: *[u8],
    /// Absolute magnitude parsed from the literal.
    magnitude: u64,
    /// Radix used by the literal.
    radix: Radix,
    /// Whether the literal spelled an explicit sign.
    signed: bool,
    /// Whether the literal used a negative sign.
    negative: bool,
}

/// Binary operator kinds used in numeric expressions.
export union BinaryOp {
    /// Addition (`+`).
    Add,
    /// Subtraction (`-`).

    /// String literal like `"Hello World!"`.
    String(*[u8]),
    /// Identifier expression.
    Ident(*[u8]),
    /// Numeric literal such as `42` or `0xFF`.
    Number(IntLiteral),
    Number(fmt::IntLiteral),
    /// Range expression such as `0..10` or `..`.
    Range(Range),
    /// Array literal expression.
    ArrayLit(*mut [*Node]),
    /// Array repeat literal expression.

lib/std/lang/gen/data.rad +32 -6

    syms: *mut [DataSym],
    count: *mut u32,
    base: u32,
    readOnly: bool
) -> u32 {
    let mut offset: u32 = 0;
    return layoutSectionAtOffset(items, syms, count, base, 0, readOnly);
}

/// Lay out data symbols for a single section starting at [`startOffset`].
export fn layoutSectionAtOffset(
    items: *[il::Data],
    syms: *mut [DataSym],
    count: *mut u32,
    base: u32,
    startOffset: u32,
    readOnly: bool
) -> u32 {
    let mut offset: u32 = startOffset;

    // Initialized data first.
    for i in 0..items.len {
        let data = &items[i];
        if data.readOnly == readOnly and not data.isUndefined {

    fnLabels: *labels::Labels,
    codeBase: u32,
    buf: *mut [u8],
    readOnly: bool
) -> u32 {
    let mut offset: u32 = 0;
    return emitSectionAtOffset(items, dataSymMap, fnLabels, codeBase, buf, readOnly, 0);
}

/// Emit data bytes for a single section starting at `startOffset`.
export fn emitSectionAtOffset(
    items: *[il::Data],
    dataSymMap: *DataSymMap,
    fnLabels: *labels::Labels,
    codeBase: u32,
    buf: *mut [u8],
    readOnly: bool,
    startOffset: u32
) -> u32 {
    let mut offset: u32 = startOffset;

    for i in 0..items.len {
        let data = &items[i];
        if data.readOnly == readOnly and not data.isUndefined {
            set offset = mem::alignUp(offset, data.alignment);
            assert offset + data.size <= buf.len, "emitSection: buffer overflow";
            assert offset + data.size <= buf.len, "emitSectionAtOffset: buffer overflow";
            for j in 0..data.values.len {
                let v = &data.values[j];
                for _ in 0..v.count {
                    match v.item {
                        case il::DataItem::Val { typ, val } => {
                            let size = il::typeSize(typ);
                            let valPtr = &val as *u8;
                            try! mem::copy(&mut buf[offset..], @sliceOf(valPtr, size));

                            set offset += size;
                        },
                        case il::DataItem::Sym(name) => {
                            let addr = lookupAddr(dataSymMap, name) else {
                                panic "emitSection: data symbol not found";
                                panic "emitSectionAtOffset: data symbol not found";
                            };
                            let addr64: u64 = addr as u64;
                            let addrPtr = &addr64 as *u8;

                            try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8));

                            set offset += 8;
                            set offset += @sizeOf(u64);
                        },
                        case il::DataItem::Fn(name) => {
                            let addr = codeBase + labels::funcOffset(fnLabels, name) as u32;
                            let addr64: u64 = addr as u64;
                            let addrPtr = &addr64 as *u8;

                            try! mem::copy(&mut buf[offset..], @sliceOf(addrPtr, 8));

                            set offset += 8;
                            set offset += @sizeOf(*u8);
                        },
                        case il::DataItem::Str(s) => {
                            try! mem::copy(&mut buf[offset..], s);
                            set offset += s.len;
                        },

lib/std/lang/parser.rad +23 -118

//! Recursive descent parser for the Radiance programming language.
@test export mod tests;

use std::mem;
use std::io;
use std::fmt;
use std::lang::alloc;
use std::lang::ast;
use std::lang::strings;
use std::lang::scanner;

/// Maximum `u32` value.
export constant U32_MAX: u32 = 0xFFFFFFFF;
/// Minimum `i64` value.
export constant I64_MIN: i64 = -0x8000000000000000;
/// Largest magnitude representable by a negative `i64`.
export constant I64_MIN_MAGNITUDE: u64 = 0x8000000000000000;
/// Maximum representable `i64` magnitude.
export constant I64_MAX_MAGNITUDE: u64 = 0x7FFFFFFFFFFFFFFF;
/// Maximum representable `u64` value.
export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF;
/// Maximum number of fields in a record.
export constant MAX_RECORD_FIELDS: u32 = 32;


/// Emit a `true` or `false` literal node.
fn nodeBool(p: *mut Parser, value: bool) -> *ast::Node {
    return node(p, ast::NodeValue::Bool(value));
}

/// Convert a single ASCII digit into its numeric value for the given radix.
export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 {
    assert radix >= 2 and radix <= 36;

    // Default to an out-of-range value so non-digits fall through to `nil`.
    let mut value: u32 = 36;

    if ch >= '0' and ch <= '9' {
        set value = (ch - '0') as u32;
    } else if radix > 10 {
        // Mask to convert ASCII letters to uppercase.
        let upper = ch & 0xDF;
        if upper >= 'A' and upper <= 'Z' {
            set value = (upper - 'A') as u32 + 10;
        }
    }
    if value < radix {
        return value;
    }
    return nil;
}

/// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign.
fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> ast::IntLiteral
/// Parse an integer literal while mapping shared errors into parser diagnostics.
fn parseIntLiteral(p: *mut Parser, text: *[u8]) -> fmt::IntLiteral
    throws (ParseError)
{
    if text.len == 0 {
        throw failParsing(p, "integer literal is empty");
    }
    let first = text[0];
    let negative = first == '-';
    let signed: bool = negative or (first == '+');

    let mut start: u32 = 0;
    let mut radix: u32 = 10;
    let mut radixType = ast::Radix::Decimal;

    if signed {
        set start = 1;
        if start >= text.len {
            throw failParsing(p, "integer literal requires digits after sign");
        }
    }
    if start + 1 < text.len and text[start] == '0' {
        let prefix = text[start + 1];
        if prefix == 'x' or prefix == 'X' {
            set radix = 16;
            set radixType = ast::Radix::Hex;
            set start += 2;
        } else if prefix == 'b' or prefix == 'B' {
            set radix = 2;
            set radixType = ast::Radix::Binary;
            set start += 2;
        }
        if start >= text.len {
            throw failParsing(p, "integer literal prefix must be followed by digits");
    let literal = try fmt::parseInt(text) catch err {
        match err {
            case fmt::ParseError::Invalid =>
                throw failParsing(p, "invalid integer literal"),
            case fmt::ParseError::InvalidDigit =>
                throw failParsing(p, "invalid digit in integer literal"),
            case fmt::ParseError::Overflow =>
                throw failParsing(p, "integer literal overflow"),
        }
    }
    let mut value: u64 = 0;
    let radix64: u64 = radix as u64;
    for i in start..text.len {
        let ch = text[i];
        let digit = digitFromAscii(ch, radix) else {
            throw failParsing(p, "invalid digit in integer literal");
        };
        if value > (U64_MAX / radix64) {
            throw failParsing(p, "integer literal overflow");
        }
        set value *= radix64;

        if value > U64_MAX - (digit as u64) {
            throw failParsing(p, "integer literal overflow");
        }
        set value += (digit as u64);
    }
    return ast::IntLiteral {
        text, magnitude: value, radix: radixType, signed, negative,
    };
    return literal;
}

/// Emit an integer type node.
fn nodeTypeInt(p: *mut Parser, width: u8, sign: ast::Signedness) -> *ast::Node {
    return node(p, ast::NodeValue::TypeSig(
        ast::TypeSig::Integer { width, sign }
    ));
}

/// Emit a number literal node with the provided literal metadata.
fn nodeNumber(p: *mut Parser, literal: ast::IntLiteral) -> *ast::Node {
fn nodeNumber(p: *mut Parser, literal: fmt::IntLiteral) -> *ast::Node {
    return node(p, ast::NodeValue::Number(literal));
}

/// Emit a `super` node.
fn nodeSuper(p: *mut Parser) -> *ast::Node {
    return node(p, ast::NodeValue::Super);
}

/// Process escape sequences in a raw string, writing the result into `dst`.
/// Returns the number of bytes written.
fn unescapeString(raw: *[u8], dst: *mut [u8]) -> u32 {
    let mut i: u32 = 0;
    let mut j: u32 = 0;

    while i < raw.len {
        if raw[i] == '\\' and i + 1 < raw.len {
            match raw[i + 1] {
                case 'n'  => set dst[j] = '\n',
                case 't'  => set dst[j] = '\t',
                case 'r'  => set dst[j] = '\r',
                case '\\' => set dst[j] = '\\',
                case '"'  => set dst[j] = '"',
                case '0'  => set dst[j] = 0,
                else      => set dst[j] = raw[i + 1],
            }
            set i += 2;
        } else {
            set dst[j] = raw[i];
            set i += 1;
        }
        set j += 1;
    }
    return j;
}

/// Emit a single attribute node.
fn nodeAttribute(p: *mut Parser, attr: ast::Attribute) -> *ast::Node {
    return node(p, ast::NodeValue::Attribute(attr));
}


            advance(p);
            return node(p, ast::NodeValue::Undef);
        }
        case scanner::TokenKind::Char => {
            advance(p);
            let src = p.previous.source;
            let mut ch: u8 = 0;

            if src[1] == '\\' { // Handle escape sequences.
                match src[2] {
                    case 'n'  => { set ch = '\n'; }
                    case 't'  => { set ch = '\t'; }
                    case 'r'  => { set ch = '\r'; }
                    case '\'' => { set ch = '\''; }
                    case '\\' => { set ch = '\\'; }
                    else      => { set ch = src[2]; }
                }
            } else {
                set ch = src[1];
            }
            let ch = try fmt::parseChar(p.previous.source) catch {
                throw failParsing(p, "invalid char literal");
            };
            return node(p, ast::NodeValue::Char(ch));
        }
        case scanner::TokenKind::String => {
            advance(p);
            let src = p.previous.source;
            let raw = &src[1..src.len - 1]; // Strip quotes.

            // Process escape sequences into arena buffer.
            let buf = alloc::remainingBuf(&mut p.arena.arena);
            let len = unescapeString(raw, buf);
            let len = fmt::unescapeString(raw, buf);
            alloc::commit(&mut p.arena.arena, len);

            return node(p, ast::NodeValue::String(&buf[..len]));
        }
        case scanner::TokenKind::Underscore => {

lib/std/lang/parser/tests.rad +8 -31

//! Parser tests.

use std::mem;
use std::fmt;
use std::testing;
use std::lang::ast;
use std::lang::scanner;
use std::lang::strings;



    return root;
}

/// Parse an expression expected to be a number literal and return its payload.
fn parseNumberLiteral(text: *[u8]) -> ast::IntLiteral
fn parseNumberLiteral(text: *[u8]) -> fmt::IntLiteral
    throws (testing::TestError)
{
    let mut arena = ast::nodeArena(&mut ARENA_STORAGE[..]);
    let mut parser = super::mkParser(scanner::SourceLoc::String, text, &mut arena, &mut STRING_POOL);
    super::advance(&mut parser);


/// Verify that decimal literals record magnitude and base metadata.
@test fn testParseDecimalLiteralMetadata() throws (testing::TestError) {
    let lit = try parseNumberLiteral("1234");
    try testing::expect(lit.magnitude == 1234);
    try testing::expect(lit.radix == ast::Radix::Decimal);
    try testing::expect(lit.radix == fmt::Radix::Decimal);
}

/// Verify that hexadecimal literals record metadata without marking them signed.
@test fn testParseNumberMetadata() throws (testing::TestError) {
    let lit = try parseNumberLiteral("0xFF");
    try testing::expect(lit.magnitude == 0xFF);
    try testing::expect(lit.radix == ast::Radix::Hex);
    try testing::expect(lit.radix == fmt::Radix::Hex);
    try testing::expect(not lit.signed);
    try testing::expect(not lit.negative);
}

/// Verify that binary literals capture their radix.
@test fn testParseBinaryLiteralMetadata() throws (testing::TestError) {
    let lit = try parseNumberLiteral("0b1010");
    try testing::expect(lit.magnitude == 0b1010);
    try testing::expect(lit.radix == ast::Radix::Binary);
    try testing::expect(lit.radix == fmt::Radix::Binary);
}

/// Signed literals produced by the scanner keep sign details in metadata.
@test fn testParseSignedLiteralMetadata() throws (testing::TestError) {
    let literal = try parseNumberLiteral("42");

/// Literals with prefixes still parse correctly when explicitly signed.
@test fn testParseSignedPrefixedLiteral() throws (testing::TestError) {
    let hex = try parseNumberLiteral("+0x2A");
    try testing::expect(hex.signed);
    try testing::expect(not hex.negative);
    try testing::expect(hex.radix == ast::Radix::Hex);
    try testing::expect(hex.radix == fmt::Radix::Hex);
    try testing::expect(hex.magnitude == 0x2A);

    let neg = try parseNumberLiteral("-0x2A");
    try testing::expect(neg.signed);
    try testing::expect(neg.negative);
    try testing::expect(neg.radix == ast::Radix::Hex);
    try testing::expect(neg.radix == fmt::Radix::Hex);
    try testing::expect(neg.magnitude == 0x2A);

    let bin = try parseNumberLiteral("-0b11");
    try testing::expect(bin.signed);
    try testing::expect(bin.negative);
    try testing::expect(bin.radix == ast::Radix::Binary);
    try testing::expect(bin.radix == fmt::Radix::Binary);
    try testing::expect(bin.magnitude == 0b11);
}

/// Range expressions parse with explicit start and end bounds.
@test fn testParseRangeExpr() throws (testing::TestError) {

    try expectNumberLiteralFail("0x1G");
    try expectNumberLiteralFail("0b102");
    try expectNumberLiteralFail("+0x1G");
}

/// Ensure digit-to-value conversion covers decimal and hex ranges.
@test fn testDigitFromAscii() throws (testing::TestError) {
    let zero = super::digitFromAscii('0', 10) else throw testing::TestError::Failed;
    try testing::expect(zero == 0);

    let nine = super::digitFromAscii('9', 10) else throw testing::TestError::Failed;
    try testing::expect(nine == 9);

    let lower = super::digitFromAscii('a', 16) else throw testing::TestError::Failed;
    try testing::expect(lower == 10);

    let lowerF = super::digitFromAscii('f', 16) else throw testing::TestError::Failed;
    try testing::expect(lowerF == 15);

    let upper = super::digitFromAscii('A', 16) else throw testing::TestError::Failed;
    try testing::expect(upper == 10);

    let upperF = super::digitFromAscii('F', 16) else throw testing::TestError::Failed;
    try testing::expect(upperF == 15);

    try testing::expect(super::digitFromAscii('g', 16) == nil);
    try testing::expect(super::digitFromAscii('_', 10) == nil);
}

/// Test parsing nil literal.
@test fn testParseNil() throws (testing::TestError) {
    let r1 = try! parseExprStr("nil");
    let case ast::NodeValue::Nil = r1.value
        else throw testing::TestError::Failed;

lib/std/lang/scanner.rad +22 -49

//!
//! This module implements a hand-written scanner that tokenizes Radiance
//! source code into a stream of tokens for consumption by the parser.
@test mod tests;

use std::char;
use std::mem;
use std::lang::strings;

/// Token kinds representing all lexical elements in Radiance.
///

            else => return,
        }
    }
}

/// Check if character is an ASCII digit (0-9).
fn isDigit(c: u8) -> bool {
    return c >= '0' and c <= '9';
}

/// Check if character is a hexadecimal digit (0-9, a-f, A-F).
fn isHexDigit(c: u8) -> bool {
    return (c >= '0' and c <= '9')
        or (c >= 'a' and c <= 'f')
        or (c >= 'A' and c <= 'F');
}

/// Check if character is a binary digit (0 or 1).
fn isBinDigit(c: u8) -> bool {
    return c == '0' or c == '1';
}

/// Check if character is alphabetic.
fn isAlpha(c: u8) -> bool {
    return (c >= 'a' and c <= 'z')
        or (c >= 'A' and c <= 'Z');
}

/// Check if character is printable ASCII.
fn isPrint(c: u8) -> bool {
    return c >= ' ' and c <= '~';
}

/// Scan numeric literal (decimal, hex, or binary).
fn scanNumber(s: *mut Scanner) -> Token {
    let first = s.source[s.cursor - 1];
    if first == '-' or first == '+' {
        advance(s);

    // Check for hex literal (`0x` or `0X` prefix).
    if s.source[s.cursor - 1] == '0' {
        if let ch = current(s); ch == 'x' or ch == 'X' {
            advance(s);
            // Must have at least one hex digit after `0x`.
            if let ch = current(s); not isHexDigit(ch) {
            if let ch = current(s); not char::isHexDigit(ch) {
                return invalid(s.token, "invalid hex literal");
            }
            while let ch = current(s); isHexDigit(ch) {
            while let ch = current(s); char::isHexDigit(ch) {
                advance(s);
            }
            return tok(s, TokenKind::Number);
        }
        // Check for binary literal (`0b` or `0B` prefix).
        if let ch = current(s); ch == 'b' or ch == 'B' {
            advance(s);
            // Must have at least one binary digit after `0b`.
            if let ch = current(s); not isBinDigit(ch) {
            if let ch = current(s); not char::isBinDigit(ch) {
                return invalid(s.token, "invalid binary literal");
            }
            while let ch = current(s); isBinDigit(ch) {
            while let ch = current(s); char::isBinDigit(ch) {
                advance(s);
            }
            return tok(s, TokenKind::Number);
        }
    }

    // Regular decimal number.
    while let ch = current(s); isDigit(ch) {
    while let ch = current(s); char::isDigit(ch) {
        advance(s);
    }

    // Look for decimal part.
    if let ch = current(s); ch == '.' {
        if let p = peek(s); isDigit(p) {
        if let p = peek(s); char::isDigit(p) {
            advance(s); // Consume the "."
            while let ch = current(s); isDigit(ch) {
            while let ch = current(s); char::isDigit(ch) {
                advance(s);
            }
        }
    }
    return tok(s, TokenKind::Number);
}

fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
    while let ch = current(s); ch <> delim {
        if not isPrint(ch) {
        if not char::isPrint(ch) {
            return invalid(s.token, "invalid character");
        }
        if consume(s, '\\') { // Consume escapes
            if isEof(s) {
                return nil;

    return TokenKind::Ident;
}

/// Scan an identifier, keyword, or label.
fn scanIdentifier(s: *mut Scanner) -> Token {
    while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' {
    while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
        advance(s);
    }
    let ident = &s.source[s.token..s.cursor];
    let kind = keywordOrIdent(ident);


    if isEof(s) {
        return tok(s, TokenKind::Eof);
    }
    let c: u8 = advance(s);

    if isDigit(c) {
    if char::isDigit(c) {
        return scanNumber(s);
    }
    if isAlpha(c) {
    if char::isAlpha(c) {
        return scanIdentifier(s);
    }
    match c {
        case '\'' => return scanChar(s),
        case '"'  => return scanString(s),

            }
            if consume(s, '=') {
                return tok(s, TokenKind::MinusEqual);
            }
            // If followed by a digit, scan as negative number
            if let ch = current(s); isDigit(ch) {
            if let ch = current(s); char::isDigit(ch) {
                return scanNumber(s);
            }
            return tok(s, TokenKind::Minus);
        }
        case '+' => {
            if consume(s, '=') {
                return tok(s, TokenKind::PlusEqual);
            }
            if let ch = current(s); isDigit(ch) {
            if let ch = current(s); char::isDigit(ch) {
                return scanNumber(s);
            }
            return tok(s, TokenKind::Plus);
        }
        case '/' => {

            }
            return tok(s, TokenKind::Gt);
        }
        case '@' => {
            // Scan `@identifier` as a single token.
            while let ch = current(s); isAlpha(ch) {
                advance(s);
            }
            // Must have at least one character after `@`.
            if s.cursor - s.token <= 1 {
            let ch = current(s) else {
                return invalid(s.token, "expected identifier after `@`");
            };
            if not char::isAlpha(ch) and ch <> '_' {
                return invalid(s.token, "expected identifier after `@`");
            }
            while let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
                advance(s);
            }
            let name = &s.source[s.token..s.cursor];
            return Token {
                kind: TokenKind::AtIdent,
                source: strings::intern(s.pool, name),
                offset: s.token,
            };
        }
        case '_' => {
            if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' {
            if let ch = current(s); char::isAlpha(ch) or ch == '_' or char::isDigit(ch) {
                // This is part of an identifier like `_foo` or `__start`
                return scanIdentifier(s);
            }
            return tok(s, TokenKind::Underscore);
        }

lib/std/lang/strings.rad +1 -1


use std::mem;
use std::collections::dict;

/// Table size.
constant TABLE_SIZE: u32 = 8192;
constant TABLE_SIZE: u32 = 32768;

/// String interning pool using open-addressed hash table.
///
/// Each unique string content is stored only once, allowing pointer equality
/// to be used instead of content comparison for symbol lookups and module names.

lib/std/tests.rad +99 -0

    let result: *[u8] = fmt::formatI64(-9223372036854775808, &mut buffer[..]);
    try testing::expect(result.len == 20);
    try testing::expectBytesEq(result, "-9223372036854775808");
}

@test fn testParseIntLiteralText() throws (testing::TestError) {
    let dec = try fmt::parseInt("123") catch {
        throw testing::TestError::Failed;
    };
    try testing::expect(dec.magnitude == 123);
    try testing::expect(dec.radix == fmt::Radix::Decimal);
    try testing::expect(not dec.signed);
    try testing::expect(not dec.negative);

    let hex = try fmt::parseInt("-0x2a") catch {
        throw testing::TestError::Failed;
    };
    try testing::expect(hex.magnitude == 42);
    try testing::expect(hex.radix == fmt::Radix::Hex);
    try testing::expect(hex.signed);
    try testing::expect(hex.negative);

    let bin = try fmt::parseInt("+0b101") catch {
        throw testing::TestError::Failed;
    };
    try testing::expect(bin.magnitude == 5);
    try testing::expect(bin.radix == fmt::Radix::Binary);
    try testing::expect(bin.signed);
    try testing::expect(not bin.negative);
}

@test fn testDigitFromAscii() throws (testing::TestError) {
    let zero = fmt::digitFromAscii('0', 10) else throw testing::TestError::Failed;
    try testing::expect(zero == 0);

    let nine = fmt::digitFromAscii('9', 10) else throw testing::TestError::Failed;
    try testing::expect(nine == 9);

    let lower = fmt::digitFromAscii('a', 16) else throw testing::TestError::Failed;
    try testing::expect(lower == 10);

    let lowerF = fmt::digitFromAscii('f', 16) else throw testing::TestError::Failed;
    try testing::expect(lowerF == 15);

    let upper = fmt::digitFromAscii('A', 16) else throw testing::TestError::Failed;
    try testing::expect(upper == 10);

    let upperF = fmt::digitFromAscii('F', 16) else throw testing::TestError::Failed;
    try testing::expect(upperF == 15);

    try testing::expect(fmt::digitFromAscii('g', 16) == nil);
    try testing::expect(fmt::digitFromAscii('_', 10) == nil);
}

@test fn testParseIntLiteralTextErrors() throws (testing::TestError) {
    try fmt::parseInt("") catch {
        return;
    };
    throw testing::TestError::Failed;
}

@test fn testParseIntLiteralTextInvalidDigitErrors() throws (testing::TestError) {
    try fmt::parseInt("0b2") catch {
        return;
    };
    throw testing::TestError::Failed;
}

@test fn testParseIntLiteralTextOverflowErrors() throws (testing::TestError) {
    try fmt::parseInt("18446744073709551616") catch {
        return;
    };
    throw testing::TestError::Failed;
}

@test fn testParseCharLiteralText() throws (testing::TestError) {
    let x = try fmt::parseChar("'x'") catch {
        throw testing::TestError::Failed;
    };
    try testing::expect(x == 'x');

    let newline = try fmt::parseChar("'\\n'") catch {
        throw testing::TestError::Failed;
    };
    try testing::expect(newline == '\n');
}

@test fn testParseCharLiteralTextErrors() throws (testing::TestError) {
    try fmt::parseChar("''") catch {
        return;
    };
    throw testing::TestError::Failed;
}

@test fn testUnescapeString() throws (testing::TestError) {
    let mut buffer: [u8; 8] = [0; 8];
    let len = fmt::unescapeString("a\\n\\0", &mut buffer[..]);

    try testing::expect(len == 3);
    try testing::expect(buffer[0] == 'a');
    try testing::expect(buffer[1] == '\n');
    try testing::expect(buffer[2] == 0);
}

// mem /////////////////////////////////////////////////////////////////////////

@test fn testCopyFullSlice() throws (testing::TestError) {
    let mut xs: [u8; 3] = [1, 2, 3];
    let mut ys: [u8; 3] = [4, 5, 6];

scripts/count-lines-no-comments.sh +35 -11

#!/bin/sh
# Count non-blank lines in all .rad files, excluding comment lines and tests.
# Count non-blank, non-comment lines in .rad files, skipping tests.

dir="${1:-.}"

if [ ! -d "$dir" ]; then
  echo "Error: Directory '$dir' does not exist"
  exit 1
if [ "$#" -eq 0 ]; then
  set -- .
fi

echo "Counting non-blank, non-comment lines in .rad files in: $dir"
echo "--------------------------------------------------------------"
tmpList=$(mktemp)
tmpFiles=$(mktemp)
trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM

for input in "$@"; do
  if [ -d "$input" ]; then
    find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList"
  elif [ -f "$input" ]; then
    case "$input" in
      */tests/*|*/tests.rad|tests.rad)
        ;;
      *.rad)
        printf '%s\n' "$input" >> "$tmpList"
        ;;
      *)
        echo "Error: File '$input' is not a .rad file" >&2
        exit 1
        ;;
    esac
  else
    echo "Error: Path '$input' does not exist" >&2
    exit 1
  fi
done

echo "Counting non-blank, non-comment lines in .rad files for inputs: $*"
echo "---------------------------------------------------------------------"

total=0
for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do
sort -u "$tmpList" > "$tmpFiles"

while IFS= read -r file; do
  if [ -f "$file" ]; then
    count=$(grep -v '^[[:space:]]*$' "$file" | grep -v '^[[:space:]]*//' | wc -l)
    total=$((total + count))
    printf "%6d  %s\n" "$count" "$file"
  fi
done
done < "$tmpFiles"

echo "--------------------------------------------------------------"
echo "---------------------------------------------------------------------"
printf "%6d  TOTAL\n" "$total"

scripts/count-lines.sh +35 -11

#!/bin/sh
# Count non-blank lines in all .rad files, skipping tests.
# Count non-blank lines in .rad files, skipping tests.

dir="${1:-.}"

if [ ! -d "$dir" ]; then
  echo "Error: Directory '$dir' does not exist"
  exit 1
if [ "$#" -eq 0 ]; then
  set -- .
fi

echo "Counting non-blank lines in .rad files in: $dir"
echo "-------------------------------------------"
tmpList=$(mktemp)
tmpFiles=$(mktemp)
trap 'rm -f "$tmpList" "$tmpFiles"' EXIT HUP INT TERM

for input in "$@"; do
  if [ -d "$input" ]; then
    find "$input" -type f -name "*.rad" -not -path "*/tests/*" -not -name "tests.rad" >> "$tmpList"
  elif [ -f "$input" ]; then
    case "$input" in
      */tests/*|*/tests.rad|tests.rad)
        ;;
      *.rad)
        printf '%s\n' "$input" >> "$tmpList"
        ;;
      *)
        echo "Error: File '$input' is not a .rad file" >&2
        exit 1
        ;;
    esac
  else
    echo "Error: Path '$input' does not exist" >&2
    exit 1
  fi
done

echo "Counting non-blank lines in .rad files for inputs: $*"
echo "------------------------------------------------------"

total=0
for file in $(find "$dir" -name "*.rad" -type f -not -path "*/tests/*" -not -name "tests.rad" | sort); do
sort -u "$tmpList" > "$tmpFiles"

while IFS= read -r file; do
  if [ -f "$file" ]; then
    count=$(grep -v '^[[:space:]]*$' "$file" | wc -l)
    total=$((total + count))
    printf "%6d  %s\n" "$count" "$file"
  fi
done
done < "$tmpFiles"

echo "-------------------------------------------"
echo "------------------------------------------------------"
printf "%6d  TOTAL\n" "$total"

std.lib +5 -0

lib/std.rad
lib/std/char.rad
lib/std/fmt.rad
lib/std/mem.rad
lib/std/vec.rad
lib/std/io.rad
lib/std/intrinsics.rad

lib/std/arch/rv64/encode.rad
lib/std/arch/rv64/decode.rad
lib/std/arch/rv64/emit.rad
lib/std/arch/rv64/isel.rad
lib/std/arch/rv64/printer.rad
lib/std/arch/rv64/asm.rad
lib/std/arch/rv64/asm/scanner.rad
lib/std/arch/rv64/asm/parser.rad
lib/std/arch/rv64/asm/emit.rad
lib/std/lang.rad
lib/std/lang/alloc.rad
lib/std/lang/strings.rad
lib/std/lang/sexpr.rad
lib/std/lang/ast.rad

std.lib.test +3 -0

lib/std/testing.rad
lib/std/tests.rad
lib/std/char/tests.rad
lib/std/arch/rv64/tests.rad
lib/std/arch/rv64/asm/tests.rad
lib/std/arch/rv64/asm/scanner/tests.rad
lib/std/lang/alloc/tests.rad
lib/std/lang/parser/tests.rad
lib/std/lang/module/tests.rad
lib/std/lang/scanner/tests.rad
lib/std/lang/resolver/tests.rad

test/run +13 -6

#!/bin/sh
# Run binary tests.
# Usage: test/run [<test.rad>...]
# Usage: test/run [<test.rad|test.ras>...]
#
# If no arguments are provided, runs all tests in `test/tests/`.
# If no arguments are provided, runs all `.rad` and `.ras` tests in
# `test/tests/`.
#
# For each test:
#   - If a `.ril` file exists alongside it, the IL output is checked
#     against it via the runner binary.
#   - If `//! returns: N` appears in the file, the test is compiled to
#     a binary and executed; the exit code must match N.

RUNNER="test/runner.rv64"
TEST_DIR="test/tests"
EMU="${RAD_EMULATOR:-emulator} -stack-size=1024 -run"
EMU_RUN="${RAD_EMULATOR:-emulator} -run"
EMU_RUN="${RAD_EMULATOR:-emulator} -no-jit -run"

if [ ! -f "$RUNNER" ]; then
  echo "error: runner binary not found: $RUNNER" >&2
  echo "hint: run 'make test' first" >&2
  exit 1

# Disable core dumps for tests.
ulimit -c 0

# Collect tests.
if [ $# -eq 0 ]; then
  tests=$(find "$TEST_DIR" -name '*.rad' | sort)
  tests=$(find "$TEST_DIR" \( -name '*.rad' -o -name '*.ras' \) | sort)
else
  tests="$*"
fi

if [ -z "$tests" ]; then


passed=0
failed=0

for test in $tests; do
  ril="${test%.rad}.ril"
  bin="${test%.rad}.rv64"
  case "$test" in
    *.rad) base="${test%.rad}" ;;
    *.ras) base="${test%.ras}" ;;
    *) base="$test" ;;
  esac

  ril="${base}.ril"
  bin="${base}.rv64"

  # IL check: run the runner if a .ril file exists.
  if [ -f "$ril" ]; then
    if $EMU "$RUNNER" -- "$test"; then
      passed=$((passed + 1))

test/runner.rad +80 -3

//! IL snapshot test runner.
//! IL snapshot test runner and `.ras` asm helper.
//!
//! Given a `.rad` source file, lowers it to IL and compares the output
//! against the corresponding `.ril` snapshot file. Called by `test/run`
//! for every test that has a `.ril` file.
//! against the corresponding `.ril` snapshot file. It also supports an
//! `assemble <input.ras> <output.rv64>` subcommand used by `bin-test`.

use std::io;
use std::mem;
use std::sys;
use std::sys::unix;

use std::lang::parser;
use std::lang::scanner;
use std::lang::resolver;
use std::lang::strings;
use std::lang::lower;
use std::arch::rv64;
use std::arch::rv64::asm;

/// Buffer size for reading source files (8 KB).
constant SOURCE_BUF_SIZE: u32 = 8192;
/// Buffer size for reading expected IL files (32 KB).
constant EXPECTED_BUF_SIZE: u32 = 32768;


/// Maximum number of AST nodes per test file.
constant MAX_NODE_DATA: u32 = 4096;
/// Maximum number of resolver errors per test file.
constant MAX_ERRORS: u32 = 16;
/// Maximum number of text words in a `.ras` test binary.
constant ASM_TEXT_CAPACITY: u32 = 256;
/// Maximum number of data bytes in a `.ras` test binary.
constant ASM_DATA_CAPACITY: u32 = 1024;
constant RO_DATA_EXT: *[u8] = ".ro.data";

// Static storage for large buffers to avoid stack overflow.
// Tests run serially so sharing these is safe.
static SOURCE_BUF: [u8; SOURCE_BUF_SIZE] = undefined;
static EXPECTED_BUF: [u8; EXPECTED_BUF_SIZE] = undefined;

static IL_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
static PRINT_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
static RESOLVER_ARENA_STORAGE: [u8; ARENA_SIZE] = undefined;
static NODE_DATA_STORAGE: [resolver::NodeData; MAX_NODE_DATA] = undefined;
static ERROR_STORAGE: [resolver::Error; MAX_ERRORS] = undefined;
static ASM_TEXT_STORAGE: [u32; ASM_TEXT_CAPACITY] = undefined;
static ASM_DATA_STORAGE: [u8; ASM_DATA_CAPACITY] = undefined;

/// Strip a `//` comment from a line, preserving `//` inside quoted strings.
/// Returns the content before the comment, trimmed of trailing whitespace.
fn stripLine(line: *[u8]) -> *[u8] {
    let mut end = line.len;

    set buf[len] = 0;

    return &buf[..len];
}

fn appendPathExt(basePath: *[u8], ext: *[u8], buf: *mut [u8]) -> ?*[u8] {
    if basePath.len + ext.len + 1 > buf.len {
        return nil;
    }
    let mut pos: u32 = 0;

    set pos += try! mem::copy(&mut buf[pos..], basePath);
    set pos += try! mem::copy(&mut buf[pos..], ext);
    set buf[pos] = 0;

    return &buf[..pos];
}

fn writeCode(code: *[u32], path: *[u8]) -> bool {
    let bytes = @sliceOf(code.ptr as *u8, code.len * 4);
    return unix::writeFile(path, bytes);
}

fn assembleBinary(sourcePath: *[u8], outputPath: *[u8]) -> bool {
    let mut roDataPathBuf: [u8; MAX_PATH_LEN] = undefined;
    let source = unix::readFile(sourcePath, &mut SOURCE_BUF[..]) else {
        io::printError("error: could not read source: ");
        io::printError(sourcePath);
        io::printError("\n");
        return false;
    };
    let roDataPath = appendPathExt(outputPath, RO_DATA_EXT, &mut roDataPathBuf[..]) else {
        io::printError("error: output path too long\n");
        return false;
    };

    let mut arena = alloc::new(&mut AST_ARENA_STORAGE[..]);
    let program = try asm::assemble(
        asm::scanner::SourceKind::File { path: sourcePath },
        source,
        &mut ASM_TEXT_STORAGE[..],
        &mut ASM_DATA_STORAGE[..],
        &mut arena,
        &mut STRING_POOL,
        rv64::RO_DATA_BASE
    ) catch {
        io::printError("error: assembly failed: ");
        io::printError(sourcePath);
        io::printError("\n");
        return false;
    };
    if not writeCode(program.text, outputPath) {
        io::printError("error: could not write output: ");
        io::printError(outputPath);
        io::printError("\n");
        return false;
    }
    if not unix::writeFile(roDataPath, program.data) {
        io::printError("error: could not write data: ");
        io::printError(roDataPath);
        io::printError("\n");
        return false;
    }
    return true;
}

/// Run a single IL snapshot test case. Returns `true` on success.
fn runTest(sourcePath: *[u8]) -> bool {
    // Path buffer.
    let mut rilPathBuf: [u8; MAX_PATH_LEN] = undefined;
    let mut pkgScope: resolver::Scope = undefined;


/// Run a single test specified as an argument.
@default fn main(env: *sys::Env) -> i32 {
    let args = env.args;

    if args.len == 4 and mem::eq(args[1], "assemble") {
        if assembleBinary(args[2], args[3]) {
            return 0;
        } else {
            return 1;
        }
    }
    if args.len <> 2 {
        io::printError("error: expected test file path as argument");
        return 1;
    }
    let sourcePath = args[1];

test/tests/asm.basic.text.program.ras added +13 -0

1	+	//! returns: 42
2	+
3	+	.text;
4	+	@entry
5	+	addi %a0 %zero 42;
6	+	sd %a0 8(%sp);
7	+	beq %a0 %zero @fail;
8	+	li %a7 93;
9	+	ecall;
10	+	@fail
11	+	li %a0 1;
12	+	li %a7 93;
13	+	ecall;

test/tests/asm.branch.comparisons.ras added +40 -0

1	+	//! returns: 17
2	+
3	+	.text;
4	+	@entry
5	+	li %a0 0;
6	+	li %t0 5;
7	+	li %t1 5;
8	+	beq %t0 %t1 @beqOk;
9	+	j @fail;
10	+	@beqOk
11	+	bne %t0 %t1 @fail;
12	+	li %t2 -1;
13	+	li %t3 1;
14	+	blt %t2 %t3 @bltOk;
15	+	j @fail;
16	+	@bltOk
17	+	bgt %t3 %t2 @bgtOk;
18	+	j @fail;
19	+	@bgtOk
20	+	bge %t3 %t2 @bgeOk;
21	+	j @fail;
22	+	@bgeOk
23	+	ble %t2 %t3 @bleOk;
24	+	j @fail;
25	+	@bleOk
26	+	li %t4 1;
27	+	li %t5 2;
28	+	bltu %t4 %t5 @bltuOk;
29	+	j @fail;
30	+	@bltuOk
31	+	bgeu %t5 %t4 @done;
32	+	j @fail;
33	+	@done
34	+	li %a0 17;
35	+	li %a7 93;
36	+	ecall;
37	+	@fail
38	+	li %a0 1;
39	+	li %a7 93;
40	+	ecall;

test/tests/asm.call.return.flow.ras added +17 -0

1	+	//! returns: 29
2	+
3	+	.text;
4	+	@entry
5	+	call @helper;
6	+	j @exit;
7	+	@helper
8	+	mv %t0 %ra;
9	+	jal %ra @leaf;
10	+	mv %ra %t0;
11	+	ret;
12	+	@leaf
13	+	li %a0 29;
14	+	ret;
15	+	@exit
16	+	li %a7 93;
17	+	ecall;

test/tests/asm.compare.set.logic.ras added +29 -0

1	+	//! returns: 36
2	+
3	+	.text;
4	+	@entry
5	+	nop;
6	+	li %t0 -1;
7	+	li %t1 1;
8	+	slt %a0 %t0 %t1;
9	+	sltu %t2 %t1 %t0;
10	+	add %a0 %a0 %t2;
11	+	slti %t3 %t0 0;
12	+	add %a0 %a0 %t3;
13	+	sltiu %t4 %zero 1;
14	+	add %a0 %a0 %t4;
15	+	seqz %t5 %zero;
16	+	add %a0 %a0 %t5;
17	+	snez %t6 %t1;
18	+	add %a0 %a0 %t6;
19	+	neg %a1 %t1;
20	+	slti %a1 %a1 0;
21	+	add %a0 %a0 %a1;
22	+	xori %a2 %zero 7;
23	+	ori %a2 %a2 8;
24	+	andi %a2 %a2 15;
25	+	add %a0 %a0 %a2;
26	+	xor %a3 %a2 %t6;
27	+	add %a0 %a0 %a3;
28	+	li %a7 93;
29	+	ecall;

test/tests/asm.csr.system.instructions.ras added +16 -0

1	+	//! returns: 88
2	+
3	+	.text;
4	+	@entry
5	+	j @exit;
6	+	csrr %a0 mhartid;
7	+	csrw mtvec %a0;
8	+	csrrw %t0 mscratch %t1;
9	+	csrsi mstatus 8;
10	+	csrc mip %t0;
11	+	wfi;
12	+	mret;
13	+	@exit
14	+	li %a0 88;
15	+	li %a7 93;
16	+	ecall;

test/tests/asm.data.directives.ras added +24 -0

1	+	//! returns: 139
2	+
3	+	.constant dataBase 0x10000;
4	+	.data;
5	+	.byte 1, 'A';
6	+	.word 0x11223344;
7	+	.dword 0x1122334455667788;
8	+	.ascii "hi";
9	+	.ascii "x";
10	+	.align 4;
11	+	.text;
12	+	@entry
13	+	li %t0 dataBase;
14	+	lbu %a0 0(%t0);
15	+	lbu %t1 1(%t0);
16	+	add %a0 %a0 %t1;
17	+	lbu %t1 14(%t0);
18	+	add %a0 %a0 %t1;
19	+	lbu %t1 15(%t0);
20	+	add %a0 %a0 %t1;
21	+	lbu %t1 16(%t0);
22	+	add %a0 %a0 %t1;
23	+	li %a7 93;
24	+	ecall;

test/tests/asm.data.symbol.fixup.ras added +16 -0

1	+	//! returns: 2
2	+
3	+	.constant dataBase 0x10000;
4	+	.data;
5	+	.byte 0;
6	+	@here
7	+	.byte 1;
8	+	.word @here;
9	+	.text;
10	+	@entry
11	+	li %t0 dataBase;
12	+	lbu %a0 1(%t0);
13	+	lbu %t1 2(%t0);
14	+	add %a0 %a0 %t1;
15	+	li %a7 93;
16	+	ecall;

test/tests/asm.directive.boundary.values.ras added +40 -0

1	+	//! returns: 97
2	+
3	+	.constant dataBase 0x10000;
4	+	.data;
5	+	.word -2147483648;
6	+	.word 2147483647;
7	+	.dword 0x1122334455667788;
8	+	.byte 255;
9	+	.text;
10	+	@entry
11	+	j @exit;
12	+	.align 8;
13	+	ret;
14	+	@exit
15	+	li %t0 dataBase;
16	+	lbu %a0 3(%t0);
17	+	lbu %t1 4(%t0);
18	+	add %a0 %a0 %t1;
19	+	lbu %t1 7(%t0);
20	+	add %a0 %a0 %t1;
21	+	lbu %t1 8(%t0);
22	+	add %a0 %a0 %t1;
23	+	lbu %t1 9(%t0);
24	+	add %a0 %a0 %t1;
25	+	lbu %t1 10(%t0);
26	+	add %a0 %a0 %t1;
27	+	lbu %t1 11(%t0);
28	+	add %a0 %a0 %t1;
29	+	lbu %t1 12(%t0);
30	+	add %a0 %a0 %t1;
31	+	lbu %t1 13(%t0);
32	+	add %a0 %a0 %t1;
33	+	lbu %t1 14(%t0);
34	+	add %a0 %a0 %t1;
35	+	lbu %t1 15(%t0);
36	+	add %a0 %a0 %t1;
37	+	lbu %t1 16(%t0);
38	+	add %a0 %a0 %t1;
39	+	li %a7 93;
40	+	ecall;

test/tests/asm.global.scoped.symbols.ras added +16 -0

1	+	//! returns: 7
2	+
3	+	.export @kernel::main;
4	+	.text;
5	+	@entry
6	+	j @kernel::main;
7	+	li %a0 1;
8	+	li %a7 93;
9	+	ecall;
10	+	@kernel::main
11	+	li %a0 7;
12	+	li %a7 93;
13	+	ecall;
14	+	.data;
15	+	@data::sym
16	+	.byte 1;

test/tests/asm.instruction.matrix.alu.ras added +29 -0

1	+	//! returns: 11
2	+
3	+	.text;
4	+	@entry
5	+	li %a0 0;
6	+	li %a1 6;
7	+	li %a2 3;
8	+	and %t0 %a1 %a2;
9	+	add %a0 %a0 %t0;
10	+	li %a3 4;
11	+	li %a4 1;
12	+	or %t1 %a3 %a4;
13	+	add %a0 %a0 %t1;
14	+	li %t2 0;
15	+	not %t3 %t2;
16	+	andi %t3 %t3 1;
17	+	add %a0 %a0 %t3;
18	+	li %s1 3;
19	+	mv %s2 %s1;
20	+	andi %s2 %s2 1;
21	+	add %a0 %a0 %s2;
22	+	li %t4 0;
23	+	seqz %t5 %t4;
24	+	add %a0 %a0 %t5;
25	+	li %t6 9;
26	+	snez %a6 %t6;
27	+	add %a0 %a0 %a6;
28	+	li %a7 93;
29	+	ecall;

test/tests/asm.instruction.matrix.mem.control.ras added +21 -0

1	+	//! returns: 12
2	+
3	+	.text;
4	+	@entry
5	+	addi %sp %sp -32;
6	+	li %t0 7;
7	+	sb %t0 1(%sp);
8	+	lb %a0 1(%sp);
9	+	sd %a0 8(%sp);
10	+	lui %t2 0;
11	+	auipc %t1 0;
12	+	jalr %ra %t1 16;
13	+	li %a0 1;
14	+	j @exit;
15	+	@helper
16	+	ld %a0 8(%sp);
17	+	addi %a0 %a0 5;
18	+	addi %sp %sp 32;
19	+	@exit
20	+	li %a7 93;
21	+	ecall;

test/tests/asm.instruction.matrix.system.ras added +11 -0

1	+	//! returns: 77
2	+
3	+	.text;
4	+	@entry
5	+	j @exit;
6	+	ecall;
7	+	ebreak;
8	+	@exit
9	+	li %a0 77;
10	+	li %a7 93;
11	+	ecall;

test/tests/asm.label.fixups.ras added +16 -0

1	+	//! returns: 3
2	+
3	+	.text;
4	+	@entry
5	+	li %a0 3;
6	+	li %a1 0;
7	+	@loop
8	+	beqz %a0 @done;
9	+	addi %a1 %a1 1;
10	+	addi %a0 %a0 -1;
11	+	bnez %a0 @loop;
12	+	j @done;
13	+	@done
14	+	mv %a0 %a1;
15	+	li %a7 93;
16	+	ecall;

test/tests/asm.li.expressions.ras added +12 -0

1	+	//! returns: 162
2	+
3	+	.constant PAGE 4096;
4	+	.constant VALUE 0x12340000 + 0x5678;
5	+	.text;
6	+	@entry
7	+	li %a0 42;
8	+	li %a1 VALUE;
9	+	andi %a1 %a1 255;
10	+	add %a0 %a0 %a1;
11	+	li %a7 93;
12	+	ecall;

test/tests/asm.link.rad added +7 -0

1	+	//! returns: 42
2	+
3	+	fn asmAddOne(x: i32) -> i32;
4	+
5	+	@default fn main() -> i32 {
6	+	return asmAddOne(41);
7	+	}

test/tests/asm.link.ras added +5 -0

1	+	.text;
2	+	.export @"asm.link::asmAddOne";
3	+	@"asm.link::asmAddOne"
4	+	addi %a0 %a0 1;
5	+	ret;

test/tests/asm.load.store.widths.ras added +33 -0

1	+	//! returns: 72
2	+
3	+	.text;
4	+	@entry
5	+	addi %sp %sp -32;
6	+	li %t0 0x80;
7	+	sb %t0 0(%sp);
8	+	lb %t1 0(%sp);
9	+	slti %a0 %t1 0;
10	+	lbu %t2 0(%sp);
11	+	srli %t2 %t2 7;
12	+	add %a0 %a0 %t2;
13	+	li %t0 0x8000;
14	+	sh %t0 2(%sp);
15	+	lh %t3 2(%sp);
16	+	slti %t3 %t3 0;
17	+	add %a0 %a0 %t3;
18	+	lhu %t4 2(%sp);
19	+	srli %t4 %t4 15;
20	+	add %a0 %a0 %t4;
21	+	li %t0 17;
22	+	sw %t0 8(%sp);
23	+	lw %t5 8(%sp);
24	+	add %a0 %a0 %t5;
25	+	lwu %t6 8(%sp);
26	+	add %a0 %a0 %t6;
27	+	li %t0 34;
28	+	sd %t0 16(%sp);
29	+	ld %a1 16(%sp);
30	+	add %a0 %a0 %a1;
31	+	addi %sp %sp 32;
32	+	li %a7 93;
33	+	ecall;

test/tests/asm.mul.div.rem.ras added +33 -0

1	+	//! returns: 4
2	+
3	+	.text;
4	+	@entry
5	+	li %t0 20;
6	+	li %t1 6;
7	+	mul %a0 %t0 %t1;
8	+	rem %t2 %t0 %t1;
9	+	add %a0 %a0 %t2;
10	+	div %t3 %t0 %t1;
11	+	add %a0 %a0 %t3;
12	+	remu %t4 %t0 %t1;
13	+	add %a0 %a0 %t4;
14	+	divu %t5 %t0 %t1;
15	+	add %a0 %a0 %t5;
16	+	mulw %t6 %t0 %t1;
17	+	add %a0 %a0 %t6;
18	+	divw %a1 %t0 %t1;
19	+	add %a0 %a0 %a1;
20	+	divuw %a2 %t0 %t1;
21	+	add %a0 %a0 %a2;
22	+	remw %a3 %t0 %t1;
23	+	add %a0 %a0 %a3;
24	+	remuw %a4 %t0 %t1;
25	+	add %a0 %a0 %a4;
26	+	mulh %a5 %t0 %t1;
27	+	add %a0 %a0 %a5;
28	+	mulhu %a6 %t0 %t1;
29	+	add %a0 %a0 %a6;
30	+	mulhsu %a7 %t0 %t1;
31	+	add %a0 %a0 %a7;
32	+	li %a7 93;
33	+	ecall;

test/tests/asm.rodata.prefix.rad added +14 -0

1	+	//! returns: 0
2	+
3	+	fn asmDataValue() -> i32;
4	+
5	+	@default fn main() -> i32 {
6	+	let s = "hello";
7	+
8	+	assert s.len == 5;
9	+	assert s[0] == 'h' as u8;
10	+	assert s[4] == 'o' as u8;
11	+	assert asmDataValue() == 41;
12	+
13	+	return 0;
14	+	}

test/tests/asm.rodata.prefix.ras added +10 -0

1	+	.text;
2	+	.export @"asm.rodata.prefix::asmDataValue";
3	+	@"asm.rodata.prefix::asmDataValue"
4	+	la %t0 @value;
5	+	lbu %a0 0(%t0);
6	+	ret;
7	+
8	+	.data;
9	+	@value
10	+	.byte 41;

test/tests/asm.scoped.symbols.la.ras added +12 -0

1	+	//! returns: 41
2	+
3	+	.text;
4	+	@entry
5	+	la %t0 @data::sym;
6	+	addi %a0 %t0 40;
7	+	li %a7 93;
8	+	ecall;
9	+	.data;
10	+	.byte 0;
11	+	@data::sym
12	+	.byte 1;

test/tests/asm.scoped.symbols.tail.ras added +12 -0

1	+	//! returns: 9
2	+
3	+	.text;
4	+	@entry
5	+	tail @kernel::main;
6	+	li %a0 1;
7	+	li %a7 93;
8	+	ecall;
9	+	@kernel::main
10	+	li %a0 9;
11	+	li %a7 93;
12	+	ecall;

test/tests/asm.section.switching.ras added +15 -0

1	+	//! returns: 3
2	+
3	+	.data;
4	+	@msg
5	+	.ascii "ok";
6	+	.text;
7	+	@entry
8	+	jal %ra @done;
9	+	li %a0 1;
10	+	li %a7 93;
11	+	ecall;
12	+	@done
13	+	li %a0 3;
14	+	li %a7 93;
15	+	ecall;

test/tests/asm.space.constant.expressions.ras added +20 -0

1	+	//! returns: 12
2	+
3	+	.constant PAGE 4096;
4	+	.constant COUNT 8;
5	+	.constant SPACE PAGE / 1024;
6	+	.constant WORD (COUNT - 4) * 2;
7	+	.constant BYTE PAGE / 1024;
8	+	.constant dataBase 0x10000;
9	+	.data;
10	+	.space SPACE;
11	+	.word WORD;
12	+	.byte BYTE;
13	+	.text;
14	+	@entry
15	+	li %t0 dataBase;
16	+	lbu %a0 4(%t0);
17	+	lbu %t1 8(%t0);
18	+	add %a0 %a0 %t1;
19	+	li %a7 93;
20	+	ecall;

test/tests/asm.string.directive.lists.ras added +18 -0

1	+	//! returns: 180
2	+
3	+	.constant dataBase 0x10000;
4	+	.data;
5	+	.ascii "a", "b";
6	+	.ascii "x", "y";
7	+	.text;
8	+	@entry
9	+	li %t0 dataBase;
10	+	lbu %a0 0(%t0);
11	+	lbu %t1 1(%t0);
12	+	add %a0 %a0 %t1;
13	+	lbu %t1 2(%t0);
14	+	add %a0 %a0 %t1;
15	+	lbu %t1 3(%t0);
16	+	add %a0 %a0 %t1;
17	+	li %a7 93;
18	+	ecall;

test/tests/asm.word.dword.constants.ras added +20 -0

1	+	//! returns: 50
2	+
3	+	.constant WORD 0x11223344;
4	+	.constant DWORD 0x55667788;
5	+	.constant dataBase 0x10000;
6	+	.data;
7	+	.word WORD;
8	+	.dword DWORD;
9	+	.text;
10	+	@entry
11	+	li %t0 dataBase;
12	+	lbu %a0 0(%t0);
13	+	lbu %t1 3(%t0);
14	+	add %a0 %a0 %t1;
15	+	lbu %t1 4(%t0);
16	+	add %a0 %a0 %t1;
17	+	lbu %t1 7(%t0);
18	+	add %a0 %a0 %t1;
19	+	li %a7 93;
20	+	ecall;

test/tests/asm.word.shift.ops.ras added +38 -0

1	+	//! returns: 101
2	+
3	+	.text;
4	+	@entry
5	+	li %t0 1;
6	+	slli %t1 %t0 5;
7	+	slliw %t2 %t0 4;
8	+	sllw %t3 %t2 %t0;
9	+	sll %t4 %t0 %t0;
10	+	li %t5 -8;
11	+	srai %t6 %t5 2;
12	+	sraiw %a1 %t5 1;
13	+	sraw %a2 %t5 %t0;
14	+	li %a3 64;
15	+	srli %a3 %a3 5;
16	+	li %a4 64;
17	+	srliw %a4 %a4 4;
18	+	srlw %a5 %a4 %t0;
19	+	srl %a6 %a3 %t0;
20	+	addiw %a0 %zero 7;
21	+	addw %a0 %a0 %t0;
22	+	subw %a0 %a0 %t0;
23	+	add %a0 %a0 %t4;
24	+	add %a0 %a0 %a3;
25	+	add %a0 %a0 %a4;
26	+	add %a0 %a0 %a5;
27	+	add %a0 %a0 %a6;
28	+	slti %t6 %t6 0;
29	+	add %a0 %a0 %t6;
30	+	slti %a1 %a1 0;
31	+	add %a0 %a0 %a1;
32	+	slti %a2 %a2 0;
33	+	add %a0 %a0 %a2;
34	+	add %a0 %a0 %t1;
35	+	add %a0 %a0 %t2;
36	+	add %a0 %a0 %t3;
37	+	li %a7 93;
38	+	ecall;

vim/ras.vim added +63 -0

1	+	" ras.vim
2	+	" Syntax for Radiance Assembly (.ras) files
3	+	"
4	+	if exists("b:current_syntax")
5	+	finish
6	+	endif
7	+
8	+	syntax clear
9	+
10	+	" Comments
11	+	syntax match rasComment "//.*$"
12	+	syntax keyword rasTodo TODO FIXME contained containedin=rasComment
13	+
14	+	" Directives
15	+	syntax match rasDirective "\.\%(align\\|ascii\\|byte\\|constant\\|data\\|dword\\|global\\|space\\|text\\|word\)\>"
16	+
17	+	" Labels
18	+	syntax match rasLabel "@[A-Za-z_][A-Za-z0-9_]\%(::[A-Za-z_][A-Za-z0-9_]\)*"
19	+
20	+	" Mnemonics
21	+	syntax keyword rasMnemonic add addi addiw addw and andi auipc
22	+	syntax keyword rasMnemonic beq beqz bge bgeu bgt ble blt bltu bne bnez
23	+	syntax keyword rasMnemonic call csrc csrr csrrw csrsi csrw
24	+	syntax keyword rasMnemonic div divu divuw divw
25	+	syntax keyword rasMnemonic ebreak ecall
26	+	syntax keyword rasMnemonic j jal jalr la lb lbu ld lh lhu li lui lw lwu
27	+	syntax keyword rasMnemonic mret mul mulh mulhsu mulhu mulw mv
28	+	syntax keyword rasMnemonic neg nop not or ori
29	+	syntax keyword rasMnemonic rem remu remuw remw ret
30	+	syntax keyword rasMnemonic sb sd seqz sh sll slli slliw sllw slt slti sltiu sltu snez
31	+	syntax keyword rasMnemonic sra srai sraiw sraw srl srli srliw srlw sub subw sw
32	+	syntax keyword rasMnemonic tail wfi xor xori
33	+
34	+	" Registers
35	+	syntax match rasRegister "%\%(a[0-7]\\|fp\\|gp\\|ra\\|s[0-9]\\|s10\\|s11\\|sp\\|t[0-6]\\|tp\\|zero\)\>"
36	+
37	+	" CSR names
38	+	syntax keyword rasCsr mcause mepc mhartid mie mip mscratch mstatus mtval mtvec
39	+
40	+	" Numbers and literals
41	+	syntax match rasNumber "\%([+-]\)\=\<0[xX][0-9A-Fa-f]\+\>"
42	+	syntax match rasNumber "\%([+-]\)\=\<\d\+\>"
43	+	syntax region rasString start=+"+ skip=+\\"+ end=+"+
44	+	syntax region rasChar start=+'+ skip=+\\'+ end=+'+
45	+
46	+	" Namespaced symbols and punctuation
47	+	syntax match rasNamespaceSep "::"
48	+	syntax match rasPunct "[(),;:+\-*/]"
49	+
50	+	highlight default link rasComment Comment
51	+	highlight default link rasTodo Todo
52	+	highlight default link rasDirective Special
53	+	highlight default link rasLabel Label
54	+	highlight default link rasMnemonic Keyword
55	+	highlight default link rasRegister Identifier
56	+	highlight default link rasCsr Type
57	+	highlight default link rasNumber Number
58	+	highlight default link rasString String
59	+	highlight default link rasChar Character
60	+	highlight default link rasNamespaceSep Delimiter
61	+	highlight default link rasPunct Delimiter
62	+
63	+	let b:current_syntax = "ras"

867	974		set low.output = lower::FnOutput::Stream(lower::FnSink {
868	975		ctx: &mut codegenCtx as *mut opaque,
869	976		emitFn: generateLoweredFn,
870	977		});
871	978		try lowerAllPackagesInto(ctx, res, &mut low);
	979	+	let asmData = try assembleAsmInputs(ctx, &mut generator, &mut res.arena);
872	980
873	981		match generator.entryPatch {
874	982		case rv64::EntryPatch::Reserved(targetName) => {
875	983		if targetName == nil {
876		-	io::printError("radiance: fatal: no default function found\n");
877		-	throw Error::Other;
	984	+	throw error(&["fatal:", "no default function found"]);
878	985		}
879	986		}
880	987		else => {}
881	988		}
882	989		if let path = codegenOptions.logPath {
883	990		pkgLog(entryPkg, &["generating code", "(", path, ")", ".."]);
884	991		}
885		-	return rv64::finishProgram(&mut generator, &low.data[..], storage, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
	992	+	return rv64::finishProgram(&mut generator, &low.data[..], storage, asmData, &mut RO_DATA_BUF[..], &mut RW_DATA_BUF[..]);
886	993		}
887	994
888	995		/// Lower, optionally dump, and optionally generate binary output.
889	996		fn compile(
890	997		ctx: *mut CompileContext,

922	1029		debug: ctx.debug,
923	1030		entryMode: CodegenEntryMode::DefaultEntry,
924	1031		});
925	1032
926	1033		if not writeCode(result.code, outPath) {
927		-	io::printError("radiance: fatal: failed to write output file\n");
928		-	throw Error::Other;
	1034	+	throw error(&["fatal:", "failed to write output file"]);
929	1035		}
930	1036		// Write data files.
931	1037		try writeDataWithExt(&RO_DATA_BUF[..result.roDataSize], outPath, RO_DATA_EXT);
932	1038		try writeDataWithExt(&RW_DATA_BUF[..result.rwDataSize], outPath, RW_DATA_EXT);
933	1039

67	67		# Binary Tests
68	68
69	69		BIN_TEST_DIR := test/tests
70	70		# Only tests with `//! returns:` are compiled to binaries and executed.
71	71		BIN_TEST_EXE_SRC := $(shell grep -rl '^//! returns:' $(BIN_TEST_DIR))
72		-	BIN_TEST_EXE_BIN := $(BIN_TEST_EXE_SRC:.rad=.rv64)
	72	+	BIN_TEST_RAD_EXE_SRC := $(filter %.rad,$(BIN_TEST_EXE_SRC))
	73	+	BIN_TEST_RAS_EXE_SRC := $(filter %.ras,$(BIN_TEST_EXE_SRC))
	74	+	BIN_TEST_RAD_ASM_SRC := $(wildcard $(BIN_TEST_RAD_EXE_SRC:.rad=.ras))
	75	+	BIN_TEST_EXE_BIN := $(patsubst %.rad,%.rv64,$(BIN_TEST_RAD_EXE_SRC)) \
	76	+	$(patsubst %.ras,%.rv64,$(BIN_TEST_RAS_EXE_SRC))
73	77		BIN_RUNNER := test/runner.rv64
74	78		BIN_TEST_RUN := test/run
75	79
76	80		bin-test: $(BIN_RUNNER) $(BIN_TEST_EXE_BIN)
77	81		@echo

80	84		# Runner binary: the lowering IL checker.
81	85		$(BIN_RUNNER): test/runner.rad $(STD_LIB) $(RAD_BIN)
82	86		@echo "radiance test/runner.rad => $@"
83	87		@$(RADIANCE) $(STD) -pkg runner -mod test/runner.rad -entry runner -o $@
84	88
	89	+	# A `.rad` executable test can have a same-basename `.ras` module.
	90	+	$(patsubst %.ras,%.rv64,$(BIN_TEST_RAD_ASM_SRC)): %.rv64: %.ras
	91	+
85	92		# Compile each executable test to a binary.
86	93		$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.rad $(RAD_BIN)
87	94		@echo "radiance $< => $@"
88		-	@$(RADIANCE) -pkg test -mod $< -o $@
	95	+	@$(RADIANCE) -pkg test -mod $< $(patsubst %,-mod %,$(wildcard $(@:.rv64=.ras))) -o $@
	96	+
	97	+	$(BIN_TEST_DIR)/%.rv64: $(BIN_TEST_DIR)/%.ras $(BIN_RUNNER)
	98	+	@echo "asm $< => $@"
	99	+	@$(EMU) $(EMU_FLAGS) -run $(BIN_RUNNER) -- assemble $< $@
89	100
90	101		clean-bin-test:
91	102		@rm -f $(BIN_RUNNER) \
92	103		$(BIN_RUNNER:.rv64=.rv64.debug) \
93	104		$(BIN_RUNNER:.rv64=.rv64.s) \

11	11		use std::lang::strings;
12	12		use std::lang::package;
13	13		use std::lang::il;
14	14		use std::lang::lower;
15	15		use std::arch::rv64;
	16	+	use std::arch::rv64::asm;
16	17		use std::arch::rv64::printer;
17	18		use std::lang::sexpr;
18	19		use std::lang::gen::data;
19	20		use std::lang::gen::types;
20	21		use std::sys;

29	30		constant MAX_TOTAL_MODULES: u32 = 192;
30	31		/// Source code buffer arena (2 MB).
31	32		constant MAX_SOURCES_SIZE: u32 = 2097152;
32	33		/// Maximum number of test functions we can discover.
33	34		constant MAX_TESTS: u32 = 1024;
	35	+	/// Maximum number of assembly source paths we can load per package.
	36	+	constant MAX_ASM_MODULES: u32 = 64;
34	37
35	38		/// Temporary arena size (32 MB) - retains all parsed AST until resolution.
36	39		/// Used for: AST during parsing, then codegen scratch space.
37	40		constant TEMP_ARENA_SIZE: u32 = 33554432;
38	41		/// Main arena size (64 MB) - lives throughout compilation.

66	69
67	70		/// Read-only data file extension.
68	71		constant RO_DATA_EXT: *[u8] = ".ro.data";
69	72		/// Read-write data file extension.
70	73		constant RW_DATA_EXT: *[u8] = ".rw.data";
71		-	/// Maximum rodata size (1MB).
72		-	constant MAX_RO_DATA_SIZE: u32 = 1048576;
73		-	/// Maximum rwdata size (1MB).
74		-	constant MAX_RW_DATA_SIZE: u32 = 1048576;
	74	+	/// Maximum rodata size (4MB).
	75	+	constant MAX_RO_DATA_SIZE: u32 = 4194304;
	76	+	/// Maximum rwdata size (4MB).
	77	+	constant MAX_RW_DATA_SIZE: u32 = 4194304;
75	78		/// Maximum path length.
76	79		constant MAX_PATH_LEN: u32 = 256;
77	80		/// Read-only data buffer.
78	81		static RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
79	82		/// Read-write data buffer.
80	83		static RW_DATA_BUF: [u8; MAX_RW_DATA_SIZE] = undefined;
	84	+	/// Assembly module source buffer.
	85	+	static ASM_SOURCE_BUF: [u8; MAX_SOURCES_SIZE] = undefined;
	86	+	/// Temporary assembly text buffer.
	87	+	static ASM_TEXT_BUF: [u32; 262144] = undefined;
	88	+	/// Temporary assembly data buffer.
	89	+	static ASM_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
	90	+	/// Accumulated assembly read-only data.
	91	+	static ASM_RO_DATA_BUF: [u8; MAX_RO_DATA_SIZE] = undefined;
	92	+
	93	+	/// Assembly source file extension.
	94	+	constant ASM_SOURCE_EXT: *[u8] = ".ras";
81	95
82	96		/// Usage string.
83	97		constant USAGE: *[u8] =
84	98		"usage: radiance -pkg <name> -mod <input>.. [-pkg <name> -mod <input>..] -entry <pkg> -o <output>\n";
85	99

108	122		modPath: [[u8]],
109	123		/// Test function name (eg. "testFoo").
110	124		fnName: *[u8],
111	125		}
112	126
	127	+	/// Source inputs belonging to one command-line package.
	128	+	record PackageInput {
	129	+	/// Package name from the `-pkg` argument.
	130	+	name: *[u8],
	131	+	/// Radiance source paths for this package.
	132	+	radPaths: [*[u8]; MAX_LOADED_MODULES],
	133	+	/// Number of Radiance source paths.
	134	+	radPathCount: u32,
	135	+	/// Assembly source paths for this package.
	136	+	asmPaths: [*[u8]; MAX_ASM_MODULES],
	137	+	/// Number of assembly source paths.
	138	+	asmPathCount: u32,
	139	+	}
	140	+
113	141		/// Compilation context.
114	142		record CompileContext {
115	143		/// Array of packages to compile.
116	144		packages: [package::Package; MAX_PACKAGES],
	145	+	/// Driver inputs for each package slot.
	146	+	inputs: [PackageInput; MAX_PACKAGES],
117	147		/// Number of packages.
118	148		packageCount: u32,
119	149		/// Index of entry package.
120	150		entryPkgIdx: ?u32,
121	151		/// Global module graph shared by all packages.

162	192		debug: bool,
163	193		/// How the generated program should handle entry.
164	194		entryMode: CodegenEntryMode,
165	195		}
166	196
	197	+	/// Print a driver error line.
	198	+	fn error(msg: [[u8]]) -> Error {
	199	+	io::printError("radiance: ");
	200	+
	201	+	for part, i in msg {
	202	+	io::printError(part);
	203	+	if i < msg.len - 1 {
	204	+	io::printError(" ");
	205	+	}
	206	+	}
	207	+	io::printError("\n");
	208	+	return Error::Other;
	209	+	}
	210	+
167	211		/// Print a log line for the given package.
168	212		fn pkgLog(pkg: package::Package, msg: [*[u8]]) {
169	213		io::printError("radiance: ");
170	214		io::printError(pkg.name);
171	215		io::printError(": ");

177	221		}
178	222		}
179	223		io::printError("\n");
180	224		}
181	225
	226	+	/// Return `true` when `path` ends with `ext`.
	227	+	fn hasExtension(path: [u8], ext: [u8]) -> bool {
	228	+	if path.len < ext.len {
	229	+	return false;
	230	+	}
	231	+	let start = path.len - ext.len;
	232	+	return mem::eq(&path[start..], ext);
	233	+	}
	234	+
	235	+	/// Create an empty source input set for one package.
	236	+	fn packageInput(name: *[u8]) -> PackageInput {
	237	+	return PackageInput {
	238	+	name,
	239	+	radPaths: undefined,
	240	+	radPathCount: 0,
	241	+	asmPaths: undefined,
	242	+	asmPathCount: 0,
	243	+	};
	244	+	}
	245	+
182	246		/// Register, load, and parse `path` within `pkg`.
183	247		fn processModule(
184	248		pkg: *mut package::Package,
185	249		graph: *mut module::ModuleGraph,
186	250		path: *[u8],

188	252		sourceArena: *mut alloc::Arena
189	253		) throws (Error) {
190	254		pkgLog(pkg, &["parsing", "(", path, ")", ".."]);
191	255
192	256		let moduleId = try package::registerModule(pkg, graph, path) catch {
193		-	io::printError("radiance: error registering module\n");
194		-	throw Error::Other;
	257	+	throw error(&["error registering module"]);
195	258		};
196	259		// Read file into remaining arena space.
197	260		let buffer = alloc::remainingBuf(sourceArena);
198	261		if buffer.len == 0 {
199		-	io::printError("radiance: fatal: source arena exhausted\n");
200		-	throw Error::Other;
	262	+	throw error(&["fatal:", "source arena exhausted"]);
201	263		}
202	264		let source = unix::readFile(path, buffer) else {
203		-	io::printError("radiance: error reading file\n");
204		-	throw Error::Other;
	265	+	throw error(&["error reading file"]);
205	266		};
206	267		if source.len == buffer.len {
207		-	io::printError("radiance: fatal: source arena too small, file truncated: ");
208		-	io::printError(path);
209		-	io::printError("\n");
210		-	throw Error::Other;
	268	+	throw error(&["fatal:", "source arena too small, file truncated:", path]);
211	269		}
212	270		// Commit only what was read.
213	271		alloc::commit(sourceArena, source.len);
214	272
215	273		let ast = try parser::parse(scanner::SourceLoc::File(path), source, nodeArena, &mut STRING_POOL) catch {
216	274		throw Error::Other;
217	275		};
218	276		try module::setAst(graph, moduleId, ast) catch {
219		-	io::printError("radiance: error setting AST\n");
220		-	throw Error::Other;
	277	+	throw error(&["error setting AST"]);
221	278		};
222	279		try module::setSource(graph, moduleId, source) catch {
223		-	io::printError("radiance: error setting source\n");
224		-	throw Error::Other;
	280	+	throw error(&["error setting source"]);
225	281		};
226	282		}
227	283
228	284		/// Consume the next argument, or print an error and throw.
229		-	fn nextArg(args: [[u8]], idx: mut u32, msg: [u8]) -> *[u8] throws (Error) {
	285	+	fn nextArg(args: [[u8]], idx: mut u32, msg: [[u8]]) -> [u8] throws (Error) {
230	286		set *idx += 1;
231	287		if *idx >= args.len {
232		-	io::printError(msg);
233		-	throw Error::Other;
	288	+	throw error(msg);
234	289		}
235	290		return args[*idx];
236	291		}
237	292
238	293		/// Parse CLI arguments and return compilation context.

244	299		let mut debugEnabled = false;
245	300		let mut outputPath: ?*[u8] = nil;
246	301		let mut dump = Dump::None;
247	302		let mut entryPkgName: ?*[u8] = nil;
248	303
249		-	// Per-package module path tracking.
250		-	let mut moduleCounts: [u32; MAX_PACKAGES] = undefined;
251		-	let mut modulePaths: [[*[u8]; MAX_LOADED_MODULES]; MAX_PACKAGES] = undefined;
252		-	let mut pkgNames: [*[u8]; MAX_PACKAGES] = undefined;
	304	+	// Per-package source path tracking.
	305	+	let mut inputs: [PackageInput; MAX_PACKAGES] = undefined;
253	306		let mut pkgCount: u32 = 0;
254	307		let mut currentPkgIdx: ?u32 = nil;
255	308
256		-	for i in 0..MAX_PACKAGES {
257		-	set moduleCounts[i] = 0;
258		-	}
259	309		if args.len == 0 {
260	310		io::printError(USAGE);
261	311		throw Error::Other;
262	312		}
263	313		let mut idx: u32 = 0;
264	314
265	315		while idx < args.len {
266	316		let arg = args[idx];
267	317		if mem::eq(arg, "-pkg") {
268		-	try nextArg(args, &mut idx, "radiance: `-pkg` requires a package name\n");
	318	+	try nextArg(args, &mut idx, &["`-pkg` requires a package name"]);
269	319		if pkgCount >= MAX_PACKAGES {
270		-	io::printError("radiance: too many packages specified\n");
271		-	throw Error::Other;
	320	+	throw error(&["too many packages specified"]);
272	321		}
273		-	set pkgNames[pkgCount] = args[idx];
	322	+	set inputs[pkgCount] = packageInput(args[idx]);
274	323		set currentPkgIdx = pkgCount;
275	324		set pkgCount += 1;
276	325		} else if mem::eq(arg, "-mod") {
277		-	try nextArg(args, &mut idx, "radiance: `-mod` requires a module path\n");
	326	+	try nextArg(args, &mut idx, &["`-mod` requires a module path"]);
278	327		let pkgIdx = currentPkgIdx else {
279		-	io::printError("radiance: `-mod` must follow a `-pkg` argument\n");
280		-	throw Error::Other;
	328	+	throw error(&["`-mod` must follow a `-pkg` argument"]);
281	329		};
282		-	if moduleCounts[pkgIdx] >= MAX_LOADED_MODULES {
283		-	io::printError("radiance: too many modules specified for package\n");
284		-	throw Error::Other;
	330	+	let input = &mut inputs[pkgIdx];
	331	+	if hasExtension(args[idx], ASM_SOURCE_EXT) {
	332	+	if input.asmPathCount >= MAX_ASM_MODULES {
	333	+	throw error(&["too many assembly modules specified"]);
	334	+	}
	335	+	set input.asmPaths[input.asmPathCount] = args[idx];
	336	+	set input.asmPathCount += 1;
	337	+	} else {
	338	+	if input.radPathCount >= MAX_LOADED_MODULES {
	339	+	throw error(&["too many modules specified for package"]);
	340	+	}
	341	+	set input.radPaths[input.radPathCount] = args[idx];
	342	+	set input.radPathCount += 1;
285	343		}
286		-	set modulePaths[pkgIdx][moduleCounts[pkgIdx]] = args[idx];
287		-	set moduleCounts[pkgIdx] += 1;
288	344		} else if mem::eq(arg, "-entry") {
289		-	try nextArg(args, &mut idx, "radiance: `-entry` requires a package name\n");
	345	+	try nextArg(args, &mut idx, &["`-entry` requires a package name"]);
290	346		set entryPkgName = args[idx];
291	347		} else if mem::eq(arg, "-test") {
292	348		set buildTest = true;
293	349		} else if mem::eq(arg, "-debug") {
294	350		set debugEnabled = true;
295	351		} else if mem::eq(arg, "-o") {
296		-	try nextArg(args, &mut idx, "radiance: `-o` requires an output path\n");
	352	+	try nextArg(args, &mut idx, &["`-o` requires an output path"]);
297	353		set outputPath = args[idx];
298	354		} else if mem::eq(arg, "-dump") {
299		-	try nextArg(args, &mut idx, "radiance: `-dump` requires a mode (eg. ast)\n");
	355	+	try nextArg(args, &mut idx, &["`-dump` requires a mode (eg. ast)"]);
300	356		let mode = args[idx];
301	357		if mem::eq(mode, "ast") {
302	358		set dump = Dump::Ast;
303	359		} else if mem::eq(mode, "graph") {
304	360		set dump = Dump::Graph;
305	361		} else if mem::eq(mode, "il") {
306	362		set dump = Dump::Il;
307	363		} else if mem::eq(mode, "asm") {
308	364		set dump = Dump::Asm;
309	365		} else {
310		-	io::printError("radiance: unknown dump mode `");
311		-	io::printError(mode);
312		-	io::printError("` (expected: ast, graph, il, asm)\n");
313		-	throw Error::Other;
	366	+	throw error(&["unknown dump mode", mode, "(expected: ast, graph, il, asm)"]);
314	367		}
315	368		} else {
316		-	io::printError("radiance: unknown argument `");
317		-	io::printError(arg);
318		-	io::printError("`\n");
319		-	throw Error::Other;
	369	+	throw error(&["unknown argument", arg]);
320	370		}
321	371		set idx += 1;
322	372		}
323	373		if pkgCount == 0 {
324		-	io::printError("radiance: no package specified\n");
325		-	throw Error::Other;
	374	+	throw error(&["no package specified"]);
	375	+	}
	376	+	for i in 0..pkgCount {
	377	+	if inputs[i].radPathCount == 0 {
	378	+	throw error(&["package", inputs[i].name, "has no Radiance modules specified"]);
	379	+	}
326	380		}
327	381
328	382		// Determine entry package index.
329	383		let mut entryPkgIdx: ?u32 = nil;
330	384		if pkgCount == 1 {
331	385		// Single package: it is the entry.
332	386		set entryPkgIdx = 0;
333	387		} else {
334	388		// Multiple packages: need -entry.
335	389		let entryName = entryPkgName else {
336		-	io::printError("radiance: `-entry` required when multiple packages specified\n");
337		-	throw Error::Other;
	390	+	throw error(&["`-entry` required when multiple packages specified"]);
338	391		};
339	392		for i in 0..pkgCount {
340		-	if mem::eq(pkgNames[i], entryName) {
	393	+	if mem::eq(inputs[i].name, entryName) {
341	394		set entryPkgIdx = i;
342	395		break;
343	396		}
344	397		}
345	398		if entryPkgIdx == nil {
346		-	io::printError("radiance: fatal: entry package `");
347		-	io::printError(entryName);
348		-	io::printError("` not found\n");
349		-
350		-	throw Error::Other;
	399	+	throw error(&["fatal:", "entry package", entryName, "not found"]);
351	400		}
352	401		}
353	402		let graph = module::moduleGraph(&mut MODULE_ENTRIES[..], &mut STRING_POOL, arena);
354	403		let mut ctx = CompileContext {
355	404		packages: undefined,
	405	+	inputs,
356	406		packageCount: pkgCount,
357	407		entryPkgIdx,
358	408		graph,
359	409		config: resolver::Config { buildTest },
360	410		dump,

362	412		debug: debugEnabled,
363	413		};
364	414		// Initialize and parse all packages.
365	415		let mut sourceArena = alloc::new(&mut MODULE_SOURCES[..]);
366	416		for i in 0..pkgCount {
367		-	package::init(&mut ctx.packages[i], i as u16, pkgNames[i], &mut STRING_POOL);
	417	+	package::init(&mut ctx.packages[i], i as u16, ctx.inputs[i].name, &mut STRING_POOL);
368	418
369		-	for j in 0..moduleCounts[i] {
370		-	let path = modulePaths[i][j];
	419	+	for j in 0..ctx.inputs[i].radPathCount {
	420	+	let path = ctx.inputs[i].radPaths[j];
371	421		try processModule(&mut ctx.packages[i], &mut ctx.graph, path, arena, &mut sourceArena);
372	422		}
373	423		}
374	424		return ctx;
375	425		}
376	426
377	427		/// Get the entry package from the context.
378	428		fn getEntryPackage(ctx: CompileContext) -> package::Package throws (Error) {
379	429		let entryIdx = ctx.entryPkgIdx else {
380		-	io::printError("radiance: no entry package specified\n");
381		-	throw Error::Other;
	430	+	throw error(&["no entry package specified"]);
382	431		};
383	432		return &ctx.packages[entryIdx];
384	433		}
385	434
386	435		/// Get root module info from a package.
387	436		fn getRootModule(pkg: package::Package, graph: module::ModuleGraph) -> RootModule throws (Error) {
388	437		let rootId = pkg.rootModuleId else {
389		-	io::printError("radiance: no root module found\n");
390		-	throw Error::Other;
	438	+	throw error(&["no root module found"]);
391	439		};
392	440		let rootEntry = module::get(graph, rootId) else {
393		-	io::printError("radiance: root module entry not found\n");
394		-	throw Error::Other;
	441	+	throw error(&["root module entry not found"]);
395	442		};
396	443		let rootAst = rootEntry.ast else {
397		-	io::printError("radiance: root module has no AST\n");
398		-	throw Error::Other;
	444	+	throw error(&["root module has no AST"]);
399	445		};
400	446		return RootModule { entry: rootEntry, ast: rootAst };
401	447		}
402	448
403	449		/// Dump the module graph.

463	509		low: *mut lower::Lowerer,
464	510		pkg: *mut package::Package,
465	511		isEntry: bool
466	512		) throws (Error) {
467	513		let rootId = pkg.rootModuleId else {
468		-	io::printError("radiance: no root module found\n");
469		-	throw Error::Other;
	514	+	throw error(&["no root module found"]);
470	515		};
471	516		// Set lowerer's package context for qualified name generation.
472	517		// TODO: We shouldn't have to call this manually.
473	518		lower::setPackage(low, &ctx.graph, pkg.name);
474	519

483	528		modId: u16,
484	529		isRoot: bool,
485	530		pkg: *package::Package
486	531		) throws (Error) {
487	532		let entry = module::get(graph, modId) else {
488		-	io::printError("radiance: module entry not found\n");
489		-	throw Error::Other;
	533	+	throw error(&["module entry not found"]);
490	534		};
491	535		let modAst = entry.ast else {
492		-	io::printError("radiance: module has no AST\n");
493		-	throw Error::Other;
	536	+	throw error(&["module has no AST"]);
494	537		};
495	538		pkgLog(pkg, &["lowering", "(", entry.filePath, ")", ".."]);
496	539
497	540		try lower::lowerModule(low, modId, modAst, isRoot) catch err {
498		-	io::printError("radiance: internal error during lowering: ");
	541	+	io::printError("radiance: ");
	542	+	io::printError("internal error during lowering: ");
499	543		lower::printError(err);
500	544		io::printError("\n");
	545	+
501	546		throw Error::Other;
502	547		};
503	548		// Recurse into children.
504	549		for i in 0..entry.childrenLen {
505	550		let childId = module::childAt(entry, i);

623	668		if let entry = module::get(&ctx.graph, modIdx as u16) {
624	669		collectModuleTests(entry, &mut tests[..], &mut testCount);
625	670		}
626	671		}
627	672		if testCount == 0 {
628		-	io::printError("radiance: fatal: no test functions found\n");
629		-	throw Error::Other;
	673	+	throw error(&["fatal:", "no test functions found"]);
630	674		}
631	675		let mut countBuf: [u8; 10] = undefined;
632	676		let countStr = fmt::formatU32(testCount, &mut countBuf[..]);
633	677		pkgLog(entryPkg, &["found", countStr, "test(s)"]);
634	678

725	769		set pos += try! mem::copy(&mut path[pos..], basePath);
726	770		set pos += try! mem::copy(&mut path[pos..], ext);
727	771		set path[pos] = 0; // Null-terminate for syscall.
728	772
729	773		if not unix::writeFile(&path[..pos], data) {
730		-	io::printError("radiance: fatal: failed to write data file\n");
731		-	throw Error::Other;
	774	+	throw error(&["fatal:", "failed to write data file"]);
732	775		}
733	776		}
734	777
735	778		/// Serialize debug entries and write the `.debug` file.
736	779		/// Resolves module IDs to file paths via the module graph.

779	822		pkgScope: &mut RESOLVER_PKG_SCOPE,
780	823		errors: &mut RESOLVER_ERRORS[..],
781	824		};
782	825		let mut res = resolver::resolver(storage, ctx.config);
783	826
784		-	// Build package inputs.
785		-	let mut packages: [resolver::Pkg; MAX_PACKAGES] = undefined;
	827	+	// Build the semantic package list consumed by the resolver.
	828	+	let mut resolverPkgs: [resolver::Pkg; MAX_PACKAGES] = undefined;
	829	+	let mut resolverPackageCount: u32 = 0;
786	830		for i in 0..ctx.packageCount {
787	831		let pkg = &ctx.packages[i];
788	832		let root = try getRootModule(pkg, &ctx.graph);
789	833
790		-	set packages[i] = resolver::Pkg {
	834	+	set resolverPkgs[resolverPackageCount] = resolver::Pkg {
791	835		rootEntry: root.entry,
792	836		rootAst: root.ast,
793	837		};
	838	+	set resolverPackageCount += 1;
794	839		}
795	840
796	841		// Resolve all packages.
797	842		// TODO: Fix this error printing dance.
798		-	let diags = try resolver::resolve(&mut res, &ctx.graph, &packages[..ctx.packageCount]) catch {
	843	+	let diags = try resolver::resolve(&mut res, &ctx.graph, &resolverPkgs[..resolverPackageCount]) catch {
799	844		let diags = resolver::Diagnostics { errors: res.errors };
800	845		resolver::printer::printDiagnostics(&diags, &res);
801	846		throw Error::Other;
802	847		};
803	848		if not resolver::success(&diags) {
804	849		resolver::printer::printDiagnostics(&diags, &res);
805		-	io::print("radiance: failed: ");
806		-	io::printU32(diags.errors.len);
807		-	io::printLn(" errors");
808		-	throw Error::Other;
	850	+	let mut countBuf: [u8; 10] = undefined;
	851	+	let countStr = fmt::formatU32(diags.errors.len, &mut countBuf[..]);
	852	+	throw error(&["failed:", countStr, "errors"]);
809	853		}
810	854		return res;
811	855		}
812	856
813		-	/// Emit one lowered function to RV64 codegen and reclaim its IL arena.
	857	+	/// Emit one lowered function to machine code and reclaim its IL arena.
814	858		fn generateLoweredFn(ctxPtr: mut opaque, func: il::Fn, role: lower::FnRole) {
815	859		let ctx = ctxPtr as *mut CodegenSinkContext;
816	860
817	861		match role {
818	862		case lower::FnRole::Default => {

827	871		}
828	872		rv64::generateFunction(ctx.generator, func, ctx.codegenArena);
829	873		alloc::reset(ctx.fnArena);
830	874		}
831	875
	876	+	/// Assemble one `.ras` input and merge it into the active code generator.
	877	+	///
	878	+	/// Text symbols are appended to `generator`. Data emitted by the assembler is
	879	+	/// copied into `ASM_RO_DATA_BUF` at `asmDataLen`, and `asmDataLen` is advanced
	880	+	/// so the next assembly module receives the correct rodata base address.
	881	+	fn assembleAsmModule(
	882	+	generator: *mut rv64::Generator,
	883	+	pkg: *package::Package,
	884	+	path: *[u8],
	885	+	asmDataLen: *mut u32,
	886	+	arena: *mut alloc::Arena
	887	+	) throws (Error) {
	888	+	pkgLog(pkg, &["asm:", "parsing", "(", path, ")", ".."]);
	889	+
	890	+	let source = unix::readFile(path, &mut ASM_SOURCE_BUF[..]) else {
	891	+	throw error(&["error reading assembly file"]);
	892	+	};
	893	+	if source.len == ASM_SOURCE_BUF.len {
	894	+	throw error(&["fatal:", "assembly source too large:", path]);
	895	+	}
	896	+	let program = try asm::assemble(
	897	+	asm::scanner::SourceKind::File { path },
	898	+	source,
	899	+	&mut ASM_TEXT_BUF[..],
	900	+	&mut ASM_DATA_BUF[..],
	901	+	arena,
	902	+	&mut STRING_POOL,
	903	+	rv64::RO_DATA_BASE + *asmDataLen
	904	+	) catch {
	905	+	throw error(&["assembly failed:", path]);
	906	+	};
	907	+	if *asmDataLen + program.data.len > ASM_RO_DATA_BUF.len {
	908	+	throw error(&["fatal:", "assembly rodata too large"]);
	909	+	}
	910	+	try! mem::copy(&mut ASM_RO_DATA_BUF[*asmDataLen..], program.data);
	911	+	set *asmDataLen += program.data.len;
	912	+
	913	+	rv64::addAssembly(generator, program);
	914	+	}
	915	+
	916	+	/// Assemble all inputs collected in the package inputs.
	917	+	fn assembleAsmInputs(
	918	+	ctx: *CompileContext,
	919	+	generator: *mut rv64::Generator,
	920	+	arena: *mut alloc::Arena
	921	+	) -> *[u8] throws (Error) {
	922	+	let mut asmDataLen: u32 = 0;
	923	+
	924	+	for i in 0..ctx.packageCount {
	925	+	let input = &ctx.inputs[i];
	926	+	for j in 0..input.asmPathCount {
	927	+	try assembleAsmModule(
	928	+	generator,
	929	+	&ctx.packages[i],
	930	+	input.asmPaths[j],
	931	+	&mut asmDataLen,
	932	+	arena
	933	+	);
	934	+	}
	935	+	}
	936	+	return &ASM_RO_DATA_BUF[..asmDataLen];
	937	+	}
	938	+
832	939		/// Lower all packages while streaming each lowered function into RV64 codegen.
833	940		fn lowerAndGenerateAllPackages(
834	941		ctx: *mut CompileContext,
835	942		res: *mut resolver::Resolver,
836	943		fnArena: *mut alloc::Arena,

1	1		//! The Radiance Standard Library.
2	2
3	3		export mod io;
4	4		export mod collections;
	5	+	export mod char;
5	6		export mod lang;
6	7		export mod sys;
7	8		export mod arch;
8	9		export mod fmt;
9	10		export mod mem;

13	13		export mod encode;
14	14		export mod decode;
15	15		export mod emit;
16	16		export mod isel;
17	17		export mod printer;
	18	+	export mod asm;
18	19
19	20		@test mod tests;
20	21
21	22		use std::mem;
22	23		use std::collections::dict;

238	239
239	240		// Reclaim unused memory after instruction selection.
240	241		alloc::restore(arena, checkpoint);
241	242		}
242	243
	244	+	/// Add the text section of an assembled program to the generator.
	245	+	///
	246	+	/// This function snapshots the generator's current code length as the base
	247	+	/// index, converts each text symbol's byte offset to an instruction index, adds
	248	+	/// that base, and records the final address for printing. Only `.export` text
	249	+	/// symbols are exported to the emitter's function-offset table for extern call
	250	+	/// resolution. Local labels must not escape their assembly fragment because
	251	+	/// separate assembly inputs may reuse the same local names.
	252	+	///
	253	+	/// Non-text symbols are ignored here because assembled data is not appended to
	254	+	/// the generator's text stream. The driver merges assembled data into the RO data
	255	+	/// prefix separately and passes that data to [`finishProgram`].
	256	+	export fn addAssembly(generator: *mut Generator, program: asm::Program) {
	257	+	let baseIndex = generator.e.codeLen;
	258	+
	259	+	for symbol in program.symbols {
	260	+	if symbol.section == asm::Section::Text {
	261	+	let index = baseIndex + ((symbol.offset as u32) / INSTR_SIZE as u32);
	262	+	emit::recordFuncAt(&mut generator.e, symbol.name, index);
	263	+	if symbol.isExported {
	264	+	emit::recordFuncOffsetAt(&mut generator.e, symbol.name, index);
	265	+	}
	266	+	}
	267	+	}
	268	+	for word in program.text {
	269	+	emit::emit(&mut generator.e, word);
	270	+	}
	271	+	}
	272	+
243	273		/// Finish RV64 code generation and return the emitted program.
244	274		export fn finishProgram(
245	275		generator: *mut Generator,
246	276		globalData: *[il::Data],
247	277		storage: Storage,
	278	+	roDataPrefix: *[u8],
248	279		roDataBuf: *mut [u8],
249	280		rwDataBuf: *mut [u8]
250	281		) -> Program {
251	282		// Build data map after function lowering. Function-local literals can add
252	283		// global data while functions are lowered, so final layout belongs here.
253	284		let mut dataSymCount: u32 = 0;
254		-	let roLayoutSize = data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, true);
	285	+	let roLayoutSize = data::layoutSectionAtOffset(
	286	+	globalData, storage.dataSyms, &mut dataSymCount, RO_DATA_BASE, roDataPrefix.len, true
	287	+	);
255	288		data::layoutSection(globalData, storage.dataSyms, &mut dataSymCount, RW_DATA_BASE, false);
256	289
257	290		let dataSyms = &storage.dataSyms[..dataSymCount];
258	291		let dataSymMap = data::buildMap(dataSyms, storage.dataSymEntries);
259	292		let codeBase = mem::alignUp(RO_DATA_BASE + roLayoutSize, DWORD_SIZE as u32);

274	307		// Patch function calls and address loads now that all functions are emitted.
275	308		emit::patchCalls(&mut generator.e);
276	309		emit::patchAddrLoads(&mut generator.e, &dataSymMap);
277	310
278	311		// Emit data sections.
279		-	let roDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true);
280		-	let rwDataSize = data::emitSection(globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false);
281		-
	312	+	assert roDataPrefix.len <= roDataBuf.len, "finishProgram: rodata prefix buffer overflow";
	313	+	try! mem::copy(roDataBuf, roDataPrefix);
	314	+
	315	+	let roDataSize = data::emitSectionAtOffset(
	316	+	globalData, &dataSymMap, &generator.e.labels, codeBase, roDataBuf, true, roDataPrefix.len
	317	+	);
	318	+	let rwDataSize = data::emitSection(
	319	+	globalData, &dataSymMap, &generator.e.labels, codeBase, rwDataBuf, false
	320	+	);
282	321		return Program {
283	322		code: emit::getCode(&generator.e),
284	323		funcs: emit::getFuncs(&generator.e),
285	324		roDataSize,
286	325		rwDataSize,

235	235		labels::recordBlock(&mut e.labels, blockIdx, e.codeLen as i32 * super::INSTR_SIZE);
236	236		}
237	237
238	238		/// Record a function's code offset for call resolution.
239	239		export fn recordFuncOffset(e: mut Emitter, name: [u8]) {
240		-	assert e.codeLen <= MAX_CODE_LEN;
241		-	dict::insert(&mut e.labels.funcs, name, e.codeLen as i32 * super::INSTR_SIZE);
	240	+	recordFuncOffsetAt(e, name, e.codeLen);
	241	+	}
	242	+
	243	+	/// Record a function's code offset at `index` for call resolution.
	244	+	export fn recordFuncOffsetAt(e: mut Emitter, name: [u8], index: u32) {
	245	+	assert index <= MAX_CODE_LEN;
	246	+	dict::insert(&mut e.labels.funcs, name, index as i32 * super::INSTR_SIZE);
242	247		}
243	248
244	249		/// Record a function's start position for printing.
245	250		export fn recordFunc(e: mut Emitter, name: [u8]) {
	251	+	recordFuncAt(e, name, e.codeLen);
	252	+	}
	253	+
	254	+	/// Record a function's start position at `index` for printing.
	255	+	export fn recordFuncAt(e: mut Emitter, name: [u8], index: u32) {
246	256		assert e.funcsLen < e.funcs.len, "recordFunc: funcs buffer full";
247		-	set e.funcs[e.funcsLen] = types::FuncAddr { name, index: e.codeLen };
	257	+	set e.funcs[e.funcsLen] = types::FuncAddr { name, index };
248	258		set e.funcsLen += 1;
249	259		}
250	260
251	261		/// Record a local branch needing later patching.
252	262		/// Unconditional jumps use a single slot (J-type, +-1MB range).

53	53		export constant F3_BLT: u32 = 0x4;
54	54		export constant F3_BGE: u32 = 0x5;
55	55		export constant F3_BLTU: u32 = 0x6;
56	56		export constant F3_BGEU: u32 = 0x7;
57	57
	58	+	// CSR/system operations
	59	+
	60	+	export constant F3_CSRRW: u32 = 0x1;
	61	+	export constant F3_CSRRS: u32 = 0x2;
	62	+	export constant F3_CSRRC: u32 = 0x3;
	63	+	export constant F3_CSRRWI: u32 = 0x5;
	64	+	export constant F3_CSRRSI: u32 = 0x6;
	65	+	export constant F3_CSRRCI: u32 = 0x7;
	66	+
58	67		//////////////////////
59	68		// Funct7 Constants //
60	69		//////////////////////
61	70
62	71		export constant F7_NORMAL: u32 = 0b0000000;

535	544		/// Environment break (debugger breakpoint).
536	545		export fn ebreak() -> u32 {
537	546		return encodeI(OP_SYSTEM, super::ZERO, super::ZERO, 0, 1);
538	547		}
539	548
	549	+	/// Encode a CSR instruction with a register source.
	550	+	fn encodeCsr(op: u32, rd: gen::Reg, csr: u32, funct3: u32, rs1: gen::Reg) -> u32 {
	551	+	return (op & 0x7F)
	552	+	\| ((*rd as u32 & 0x1F) << 7)
	553	+	\| ((funct3 & 0x07) << 12)
	554	+	\| ((*rs1 as u32 & 0x1F) << 15)
	555	+	\| ((csr & 0xFFF) << 20);
	556	+	}
	557	+
	558	+	/// Encode a CSR instruction with an immediate source.
	559	+	fn encodeCsrImm(op: u32, rd: gen::Reg, csr: u32, funct3: u32, imm: u32) -> u32 {
	560	+	assert imm < 32;
	561	+	return (op & 0x7F)
	562	+	\| ((*rd as u32 & 0x1F) << 7)
	563	+	\| ((funct3 & 0x07) << 12)
	564	+	\| ((imm & 0x1F) << 15)
	565	+	\| ((csr & 0xFFF) << 20);
	566	+	}
	567	+
	568	+	/// Read CSR into `rd`.
	569	+	export fn csrr(rd: gen::Reg, csr: u32) -> u32 {
	570	+	return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRS, super::ZERO);
	571	+	}
	572	+
	573	+	/// Read/write CSR: old CSR to `rd`, write `rs1`.
	574	+	export fn csrrw(rd: gen::Reg, csr: u32, rs1: gen::Reg) -> u32 {
	575	+	return encodeCsr(OP_SYSTEM, rd, csr, F3_CSRRW, rs1);
	576	+	}
	577	+
	578	+	/// Write `rs1` into CSR and discard old value.
	579	+	export fn csrw(csr: u32, rs1: gen::Reg) -> u32 {
	580	+	return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRW, rs1);
	581	+	}
	582	+
	583	+	/// Clear CSR bits from `rs1` and discard old value.
	584	+	export fn csrc(csr: u32, rs1: gen::Reg) -> u32 {
	585	+	return encodeCsr(OP_SYSTEM, super::ZERO, csr, F3_CSRRC, rs1);
	586	+	}
	587	+
	588	+	/// Set CSR bits from a 5-bit immediate and discard old value.
	589	+	export fn csrsi(csr: u32, imm: u32) -> u32 {
	590	+	return encodeCsrImm(OP_SYSTEM, super::ZERO, csr, F3_CSRRSI, imm);
	591	+	}
	592	+
	593	+	/// Wait for interrupt.
	594	+	export fn wfi() -> u32 {
	595	+	return 0x10500073;
	596	+	}
	597	+
	598	+	/// Return from machine mode trap.
	599	+	export fn mret() -> u32 {
	600	+	return 0x30200073;
	601	+	}
	602	+
540	603		/////////////////////////
541	604		// Pseudo-instructions //
542	605		/////////////////////////
543	606
544	607		/// No operation: `addi zero, zero, 0`.

2	2		//!
3	3		//! These tests verify that instruction encodings match the RISC-V specification
4	4		//! by comparing against known-good values.
5	5
6	6		use std::testing;
	7	+	use std::lang::alloc;
	8	+	use std::collections::dict;
	9	+
7	10		use super::encode;
	11	+	use super::asm;
	12	+
	13	+	static ASSEMBLY_ARENA_STORAGE: [u8; 16777216] = undefined;
	14	+	static ASSEMBLY_TEXT_STORAGE: [u32; 2] = undefined;
8	15
9	16		/// Helper to check encoding equals expected value.
10	17		fn expectEncoding(actual: u32, expected: u32) throws (testing::TestError) {
11	18		try testing::expect(actual == expected);
12	19		}
13	20
	21	+	@test fn testAddAssemblyExportsOnlyGlobalTextSymbols() throws (testing::TestError) {
	22	+	let mut arena = alloc::new(&mut ASSEMBLY_ARENA_STORAGE[..]);
	23	+	let symbols = try alloc::allocSlice(&mut arena, @sizeOf(asm::Symbol), @alignOf(asm::Symbol), 2) catch {
	24	+	throw testing::TestError::Failed;
	25	+	};
	26	+	let mut symbolSlice = @sliceOf((symbols as *mut [asm::Symbol]).ptr, 2, 2);
	27	+	set symbolSlice[0] = asm::Symbol {
	28	+	name: "local",
	29	+	section: asm::Section::Text,
	30	+	offset: 0,
	31	+	isExported: false,
	32	+	};
	33	+	set symbolSlice[1] = asm::Symbol {
	34	+	name: "exported",
	35	+	section: asm::Section::Text,
	36	+	offset: super::INSTR_SIZE,
	37	+	isExported: true,
	38	+	};
	39	+
	40	+	let mut generator = super::beginProgram(
	41	+	super::ProgramOptions { entryPatch: super::EntryPatch::None, debug: false },
	42	+	&mut arena
	43	+	);
	44	+	super::addAssembly(
	45	+	&mut generator,
	46	+	asm::Program { text: &ASSEMBLY_TEXT_STORAGE[..], data: &[], symbols: symbolSlice }
	47	+	);
	48	+
	49	+	try testing::expect(dict::get(&generator.e.labels.funcs, "local") == nil);
	50	+	let exportedOffset = dict::get(&generator.e.labels.funcs, "exported") else {
	51	+	throw testing::TestError::Failed;
	52	+	};
	53	+	try testing::expect(exportedOffset == super::INSTR_SIZE);
	54	+	}
	55	+
14	56		///////////////////////
15	57		// R-type ALU tests //
16	58		///////////////////////
17	59
18	60		@test fn testEncodeAdd() throws (testing::TestError) {

1	1		//! Formatting utilities for converting values to strings.
2	2		use super::mem;
3	3
	4	+	/// Maximum `u64` value.
	5	+	export constant U64_MAX: u64 = 0xFFFFFFFFFFFFFFFF;
4	6		/// Maximum string length for a formatted u32 (eg. "4294967295").
5	7		export constant U32_STR_LEN: u32 = 10;
6	8		/// Maximum string length for a formatted i32 (eg. "-2147483648").
7	9		export constant I32_STR_LEN: u32 = U32_STR_LEN + 1;
8	10		/// Maximum string length for a formatted u64 (eg. "18446744073709551615").

10	12		/// Maximum string length for a formatted i64 (eg. "-9223372036854775808").
11	13		export constant I64_STR_LEN: u32 = 20;
12	14		/// Maximum string length for a formatted bool (eg. "false").
13	15		export constant BOOL_STR_LEN: u32 = 5;
14	16
	17	+	/// Radix/base of a parsed integer literal.
	18	+	export union Radix {
	19	+	/// Binary literal (0b...).
	20	+	Binary,
	21	+	/// Decimal literal.
	22	+	Decimal,
	23	+	/// Hexadecimal literal (0x...).
	24	+	Hex,
	25	+	}
	26	+
	27	+	/// Errors reported while parsing literal text.
	28	+	export union ParseError {
	29	+	/// Literal text was empty or missing required digits.
	30	+	Invalid,
	31	+	/// Literal contained an invalid digit for its radix.
	32	+	InvalidDigit,
	33	+	/// Literal value exceeded the supported range.
	34	+	Overflow,
	35	+	}
	36	+
	37	+	/// Parsed integer literal metadata.
	38	+	export record IntLiteral {
	39	+	/// Raw characters that comprised the literal.
	40	+	text: *[u8],
	41	+	/// Absolute magnitude parsed from the literal.
	42	+	magnitude: u64,
	43	+	/// Radix used by the literal.
	44	+	radix: Radix,
	45	+	/// Whether the literal spelled an explicit sign.
	46	+	signed: bool,
	47	+	/// Whether the literal used a negative sign.
	48	+	negative: bool,
	49	+	}
	50	+
15	51		/// Format a u32 by writing it to the provided buffer.
16	52		export fn formatU32(val: u32, buffer: mut [u8]) -> [u8] {
17	53		assert buffer.len >= U32_STR_LEN;
18	54
19	55		let mut x: u32 = val;

135	171		} else {
136	172		try! mem::copy(buffer, "false");
137	173		return &buffer[..5];
138	174		}
139	175		}
	176	+
	177	+	/// Convert a single ASCII digit into its numeric value for the given radix.
	178	+	export fn digitFromAscii(ch: u8, radix: u32) -> ?u32 {
	179	+	assert radix >= 2 and radix <= 36;
	180	+
	181	+	// Default to an out-of-range value so non-digits fall through to `nil`.
	182	+	let mut value: u32 = 36;
	183	+
	184	+	if ch >= '0' and ch <= '9' {
	185	+	set value = (ch - '0') as u32;
	186	+	} else if radix > 10 {
	187	+	// Mask to convert ASCII letters to uppercase.
	188	+	let upper = ch & 0xDF;
	189	+	if upper >= 'A' and upper <= 'Z' {
	190	+	set value = (upper - 'A') as u32 + 10;
	191	+	}
	192	+	}
	193	+	if value < radix {
	194	+	return value;
	195	+	}
	196	+	return nil;
	197	+	}
	198	+
	199	+	/// Decode a single-byte ASCII escape.
	200	+	export fn decodeAsciiEscape(ch: u8) -> u8 {
	201	+	match ch {
	202	+	case 'n' => return '\n',
	203	+	case 't' => return '\t',
	204	+	case 'r' => return '\r',
	205	+	case '\\' => return '\\',
	206	+	case '"' => return '"',
	207	+	case '\'' => return '\'',
	208	+	case '0' => return 0,
	209	+	else => return ch,
	210	+	}
	211	+	}
	212	+
	213	+	/// Parse an integer literal (binary, decimal, or hexadecimal) including an optional sign.
	214	+	export fn parseInt(text: *[u8]) -> IntLiteral throws (ParseError) {
	215	+	if text.len == 0 {
	216	+	throw ParseError::Invalid;
	217	+	}
	218	+	let first = text[0];
	219	+	let negative = first == '-';
	220	+	let signed: bool = negative or (first == '+');
	221	+
	222	+	let mut start: u32 = 0;
	223	+	let mut radix: u32 = 10;
	224	+	let mut radixType = Radix::Decimal;
	225	+
	226	+	if signed {
	227	+	set start = 1;
	228	+	if start >= text.len {
	229	+	throw ParseError::Invalid;
	230	+	}
	231	+	}
	232	+	if start + 1 < text.len and text[start] == '0' {
	233	+	let prefix = text[start + 1];
	234	+	if prefix == 'x' or prefix == 'X' {
	235	+	set radix = 16;
	236	+	set radixType = Radix::Hex;
	237	+	set start += 2;
	238	+	} else if prefix == 'b' or prefix == 'B' {
	239	+	set radix = 2;
	240	+	set radixType = Radix::Binary;
	241	+	set start += 2;
	242	+	}
	243	+	if start >= text.len {
	244	+	throw ParseError::Invalid;
	245	+	}
	246	+	}
	247	+	let mut value: u64 = 0;
	248	+	let radix64: u64 = radix as u64;
	249	+	for i in start..text.len {
	250	+	let ch = text[i];
	251	+	let digit = digitFromAscii(ch, radix) else {
	252	+	throw ParseError::InvalidDigit;
	253	+	};
	254	+	if value > (U64_MAX / radix64) {
	255	+	throw ParseError::Overflow;
	256	+	}
	257	+	set value *= radix64;
	258	+
	259	+	if value > U64_MAX - (digit as u64) {
	260	+	throw ParseError::Overflow;
	261	+	}
	262	+	set value += (digit as u64);
	263	+	}
	264	+	return IntLiteral {
	265	+	text, magnitude: value, radix: radixType, signed, negative,
	266	+	};
	267	+	}
	268	+
	269	+	/// Process escape sequences in a raw string, writing the result into `dst`.
	270	+	/// Returns the number of bytes written.
	271	+	export fn unescapeString(raw: [u8], dst: mut [u8]) -> u32 {
	272	+	let mut i: u32 = 0;
	273	+	let mut j: u32 = 0;
	274	+
	275	+	while i < raw.len {
	276	+	if raw[i] == '\\' and i + 1 < raw.len {
	277	+	set dst[j] = decodeAsciiEscape(raw[i + 1]);
	278	+	set i += 2;
	279	+	} else {
	280	+	set dst[j] = raw[i];
	281	+	set i += 1;
	282	+	}
	283	+	set j += 1;
	284	+	}
	285	+	return j;
	286	+	}
	287	+
	288	+	/// Parse a single-byte character literal, including the single quotes.
	289	+	export fn parseChar(text: *[u8]) -> u8 throws (ParseError) {
	290	+	if text.len < 2 {
	291	+	throw ParseError::Invalid;
	292	+	}
	293	+	let raw = &text[1..text.len - 1];
	294	+	if raw.len == 0 {
	295	+	throw ParseError::Invalid;
	296	+	}
	297	+	if raw[0] == '\\' {
	298	+	if raw.len <> 2 {
	299	+	throw ParseError::Invalid;
	300	+	}
	301	+	return decodeAsciiEscape(raw[1]);
	302	+	}
	303	+	if raw.len <> 1 {
	304	+	throw ParseError::Invalid;
	305	+	}
	306	+	return raw[0];
	307	+	}

1	1		//! Radiance AST modules.
2	2		export mod printer;
3	3
4	4		use std::io;
	5	+	use std::fmt;
5	6		use std::lang::alloc;
6	7
7	8		/// Maximum number of trait methods.
8	9		export constant MAX_TRAIT_METHODS: u32 = 8;
9	10

75	76		Signed,
76	77		/// Unsigned, eg. `u32`.
77	78		Unsigned,
78	79		}
79	80
80		-	/// Radix/base of a number.
81		-	export union Radix {
82		-	/// Binary literal (0b...).
83		-	Binary,
84		-	/// Decimal literal.
85		-	Decimal,
86		-	/// Hexadecimal literal (0x...).
87		-	Hex,
88		-	}
89		-
90		-	/// Parsed integer literal metadata.
91		-	export record IntLiteral {
92		-	/// Raw characters that comprised the literal.
93		-	text: *[u8],
94		-	/// Absolute magnitude parsed from the literal.
95		-	magnitude: u64,
96		-	/// Radix used by the literal.
97		-	radix: Radix,
98		-	/// Whether the literal spelled an explicit sign.
99		-	signed: bool,
100		-	/// Whether the literal used a negative sign.
101		-	negative: bool,
102		-	}
103		-
104	81		/// Binary operator kinds used in numeric expressions.
105	82		export union BinaryOp {
106	83		/// Addition (`+`).
107	84		Add,
108	85		/// Subtraction (`-`).