radiance · Radiance self-hosting compiler

compiler/ lib/ examples/ std/ arch/ collections/ lang/ alloc/ ast/ gen/ il/ module/ parser/ resolver/ scanner/ alloc.rad 4.2 KiB ast.rad 22.4 KiB gen.rad 489 B il.rad 15.1 KiB lower.rad 259.5 KiB module.rad 13.4 KiB package.rad 1.2 KiB parser.rad 78.5 KiB resolver.rad 244.3 KiB scanner.rad 18.1 KiB sexpr.rad 6.3 KiB strings.rad 2.2 KiB sys/ arch.rad 65 B collections.rad 36 B fmt.rad 3.8 KiB intrinsics.rad 399 B io.rad 1.2 KiB lang.rad 222 B mem.rad 2.1 KiB sys.rad 167 B testing.rad 2.3 KiB tests.rad 11.6 KiB vec.rad 3.1 KiB std.rad 231 B scripts/ seed/ test/ vim/ .gitignore 353 B .gitsigners 112 B LICENSE 1.1 KiB Makefile 3.0 KiB README 2.5 KiB std.lib 1.0 KiB std.lib.test 252 B

lib/std/lang/scanner.rad 18.1 KiB raw

//! Lexical scanner for the Radiance programming language.
//!
//! This module implements a hand-written scanner that tokenizes Radiance
//! source code into a stream of tokens for consumption by the parser.
@test mod tests;

use std::mem;
use std::lang::strings;

/// Token kinds representing all lexical elements in Radiance.
///
/// This enum covers operators, keywords, literals, and structural
/// elements used by the parser to build the AST.
pub union TokenKind {
    /// Special end of file token generated when the input is exhausted.
    Eof,
    /// Special invalid token.
    Invalid,

    LParen,     // (
    RParen,     // )
    LBrace,     // {
    RBrace,     // }
    LBracket,   // [
    RBracket,   // ]
    Comma,      // ,
    Dot,        // .
    DotDot,     // ..
    Minus,      // -
    Plus,       // +
    Colon,      // :
    ColonColon, // ::
    Semicolon,  // ;
    Slash,      // /
    Star,       // *
    Percent,    // %
    Amp,        // &
    Pipe,       // |
    Caret,      // ^
    Tilde,      // ~
    Underscore, // _
    Question,   // ?
    Bang,       // !
    BangEqual,  // !=
    Equal,      // =
    EqualEqual, // ==
    Gt,         // >
    GtEqual,    // >=
    Lt,         // <
    LtEqual,    // <=
    LtLt,       // <<
    GtGt,       // >>
    Arrow,      // ->
    FatArrow,   // =>

    // Compound assignment operators.
    PlusEqual,    // +=
    MinusEqual,   // -=
    StarEqual,    // *=
    SlashEqual,   // /=
    PercentEqual, // %=
    AmpEqual,     // &=
    PipeEqual,    // |=
    CaretEqual,   // ^=
    LtLtEqual,    // <<=
    GtGtEqual,    // >>=

    // Boolean operators.
    Not, And, Or,

    /// Eg. `fnord`
    Ident,
    /// Eg. `@default`
    AtIdent,
    /// The `log` keyword.
    Log,

    // Literals.
    String,     // "fnord"
    Char,       // 'f'
    Number,     // 42
    True,       // true
    False,      // false
    Nil,        // nil
    Undefined,  // undefined

    // Control flow tokens.
    If, Else, Return, Break,
    Continue, While, For, In,
    Loop, Match, Case, Try, Catch,
    Throw, Throws, Panic, Assert,

    // Variable binding tokens.
    Let, Mut, Const, Align,

    // Module-related tokens.
    Mod, Use, Super,

    // Type or function attributes.
    Pub, Extern, Static,

    // Trait-related tokens.
    Trait, Instance,

    // Type-related tokens.
    I8, I16, I32, I64, U8, U16, U32, U64,
    Opaque, Fn, Bool, Union, Record, As
}

/// A reserved keyword.
record Keyword {
    /// Keyword string.
    name: *[u8],
    /// Corresponding token.
    tok: TokenKind,
}

/// Sorted keyword table for binary search.
const KEYWORDS: [Keyword; 51] = [
    { name: "align", tok: TokenKind::Align },
    { name: "and", tok: TokenKind::And },
    { name: "as", tok: TokenKind::As },
    { name: "assert", tok: TokenKind::Assert },
    { name: "bool", tok: TokenKind::Bool },
    { name: "break", tok: TokenKind::Break },
    { name: "case", tok: TokenKind::Case },
    { name: "catch", tok: TokenKind::Catch },
    { name: "const", tok: TokenKind::Const },
    { name: "continue", tok: TokenKind::Continue },
    { name: "else", tok: TokenKind::Else },
    { name: "extern", tok: TokenKind::Extern },
    { name: "false", tok: TokenKind::False },
    { name: "fn", tok: TokenKind::Fn },
    { name: "for", tok: TokenKind::For },
    { name: "i16", tok: TokenKind::I16 },
    { name: "i32", tok: TokenKind::I32 },
    { name: "i64", tok: TokenKind::I64 },
    { name: "i8", tok: TokenKind::I8 },
    { name: "if", tok: TokenKind::If },
    { name: "in", tok: TokenKind::In },
    { name: "instance", tok: TokenKind::Instance },
    { name: "let", tok: TokenKind::Let },
    { name: "log", tok: TokenKind::Log },
    { name: "loop", tok: TokenKind::Loop },
    { name: "match", tok: TokenKind::Match },
    { name: "mod", tok: TokenKind::Mod },
    { name: "mut", tok: TokenKind::Mut },
    { name: "nil", tok: TokenKind::Nil },
    { name: "not", tok: TokenKind::Not },
    { name: "opaque", tok: TokenKind::Opaque },
    { name: "or", tok: TokenKind::Or },
    { name: "panic", tok: TokenKind::Panic },
    { name: "pub", tok: TokenKind::Pub },
    { name: "record", tok: TokenKind::Record },
    { name: "return", tok: TokenKind::Return },
    { name: "static", tok: TokenKind::Static },
    { name: "super", tok: TokenKind::Super },
    { name: "throw", tok: TokenKind::Throw },
    { name: "throws", tok: TokenKind::Throws },
    { name: "trait", tok: TokenKind::Trait },
    { name: "true", tok: TokenKind::True },
    { name: "try", tok: TokenKind::Try },
    { name: "u16", tok: TokenKind::U16 },
    { name: "u32", tok: TokenKind::U32 },
    { name: "u64", tok: TokenKind::U64 },
    { name: "u8", tok: TokenKind::U8 },
    { name: "undefined", tok: TokenKind::Undefined },
    { name: "union", tok: TokenKind::Union },
    { name: "use", tok: TokenKind::Use },
    { name: "while", tok: TokenKind::While },
];

/// Describes where source code originated from.
pub union SourceLoc {
    /// Source loaded from a file at the given path.
    File(*[u8]),
    /// Source provided as an inline string (no file path).
    String,
}

/// Lexical scanner state for tokenizing Radiance source code.
///
/// Maintains position information and source buffer reference.
pub record Scanner {
    /// Origin of the source being scanned.
    sourceLoc: SourceLoc,
    /// Source buffer.
    source: *[u8],
    /// Offset of current token into buffer.
    token: u32,
    /// Offset of current character being scanned.
    cursor: u32,
    /// Interned string pool.
    pool: *mut strings::Pool,
}

/// Individual token with kind, source text, and position.
///
/// Represents a single lexical element extracted from source,
/// including its original text and byte offset for error reporting.
pub record Token {
    /// Token kind.
    kind: TokenKind,
    /// Token source string.
    source: *[u8],
    /// Byte offset of `source` in input buffer.
    offset: u32,
}

/// Source code location with line/column information.
///
/// Used for error reporting and debugging.
pub record Location {
    /// Origin of the source.
    source: SourceLoc,
    /// Line number.
    line: u16,
    /// Column number.
    col: u16,
}

/// Create a new scanner object.
pub fn scanner(sourceLoc: SourceLoc, source: *[u8], pool: *mut strings::Pool) -> Scanner {
    // Intern built-in functions and attributes.
    strings::intern(pool, "@sizeOf");
    strings::intern(pool, "@alignOf");
    strings::intern(pool, "@sliceOf");
    strings::intern(pool, "@default");
    strings::intern(pool, "@intrinsic");
    strings::intern(pool, "@test");
    // Intern built-in slice methods.
    strings::intern(pool, "append");
    strings::intern(pool, "delete");

    return Scanner { sourceLoc, source, token: 0, cursor: 0, pool };
}

/// Check if we've reached the end of input.
pub fn isEof(s: *Scanner) -> bool {
    return s.cursor >= s.source.len;
}

/// Get the current character, if any.
pub fn current(s: *Scanner) -> ?u8 {
    if isEof(s) {
        return nil;
    }
    return s.source[s.cursor];
}

/// Peek at the next character without advancing the scanner.
fn peek(s: *Scanner) -> ?u8 {
    if s.cursor + 1 >= s.source.len {
        return nil;
    }
    return s.source[s.cursor + 1];
}

/// Advance scanner and return the character that was consumed.
fn advance(s: *mut Scanner) -> u8 {
    s.cursor += 1;
    return s.source[s.cursor - 1];
}

/// Consume the expected character if it matches the current position.
fn consume(s: *mut Scanner, expected: u8) -> bool {
    if let c = current(s); c == expected {
        advance(s);
        return true;
    }
    return false;
}

/// Create a token from the current scanner state.
fn tok(s: *Scanner, kind: TokenKind) -> Token {
    return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token };
}

/// Create an invalid token with the given message.
pub fn invalid(offset: u32, message: *[u8]) -> Token {
    return Token { kind: TokenKind::Invalid, source: message, offset };
}

/// Skip whitespace characters and line comments.
fn skipWhitespace(s: *mut Scanner) {
    while let ch = current(s) {
        match ch {
            case ' ', '\n', '\r', '\t' => advance(s),
            case '/' => {
                if let c = peek(s); c == '/' {
                    while let ch = current(s); ch != '\n' {
                        advance(s);
                    }
                } else {
                    return;
                }
            }
            else => return,
        }
    }
}

/// Check if character is an ASCII digit (0-9).
fn isDigit(c: u8) -> bool {
    return c >= '0' and c <= '9';
}

/// Check if character is a hexadecimal digit (0-9, a-f, A-F).
fn isHexDigit(c: u8) -> bool {
    return (c >= '0' and c <= '9')
        or (c >= 'a' and c <= 'f')
        or (c >= 'A' and c <= 'F');
}

/// Check if character is a binary digit (0 or 1).
fn isBinDigit(c: u8) -> bool {
    return c == '0' or c == '1';
}

/// Check if character is alphabetic.
fn isAlpha(c: u8) -> bool {
    return (c >= 'a' and c <= 'z')
        or (c >= 'A' and c <= 'Z');
}

/// Check if character is printable ASCII.
fn isPrint(c: u8) -> bool {
    return c >= ' ' and c <= '~';
}

/// Scan numeric literal (decimal, hex, or binary).
fn scanNumber(s: *mut Scanner) -> Token {
    let first = s.source[s.cursor - 1];
    if first == '-' or first == '+' {
        advance(s);
    }
    // Check for hex literal (`0x` or `0X` prefix).
    if s.source[s.cursor - 1] == '0' {
        if let ch = current(s); ch == 'x' or ch == 'X' {
            advance(s);
            // Must have at least one hex digit after `0x`.
            if let ch = current(s); not isHexDigit(ch) {
                return invalid(s.token, "invalid hex literal");
            }
            while let ch = current(s); isHexDigit(ch) {
                advance(s);
            }
            return tok(s, TokenKind::Number);
        }
        // Check for binary literal (`0b` or `0B` prefix).
        if let ch = current(s); ch == 'b' or ch == 'B' {
            advance(s);
            // Must have at least one binary digit after `0b`.
            if let ch = current(s); not isBinDigit(ch) {
                return invalid(s.token, "invalid binary literal");
            }
            while let ch = current(s); isBinDigit(ch) {
                advance(s);
            }
            return tok(s, TokenKind::Number);
        }
    }

    // Regular decimal number.
    while let ch = current(s); isDigit(ch) {
        advance(s);
    }

    // Look for decimal part.
    if let ch = current(s); ch == '.' {
        if let p = peek(s); isDigit(p) {
            advance(s); // Consume the "."
            while let ch = current(s); isDigit(ch) {
                advance(s);
            }
        }
    }
    return tok(s, TokenKind::Number);
}

fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
    while let ch = current(s); ch != delim {
        if not isPrint(ch) {
            return invalid(s.token, "invalid character");
        }
        consume(s, '\\'); // Consume escapes
        advance(s);
    }
    if not consume(s, delim) {
        return nil;
    }
    return tok(s, kind);
}

/// Scan string literal enclosed in double quotes.
fn scanString(s: *mut Scanner) -> Token {
    if let tok = scanDelimited(s, '"', TokenKind::String) {
        return tok;
    }
    return invalid(s.token, "unterminated string");
}

/// Scan character literal enclosed in single quotes.
fn scanChar(s: *mut Scanner) -> Token {
    if let tok = scanDelimited(s, '\'', TokenKind::Char) {
        return tok;
    }
    return invalid(s.token, "unterminated character");
}

/// Scan a keyword or an identifier.
fn keywordOrIdent(src: *[u8]) -> TokenKind {
    let mut left: u32 = 0;
    let mut right: u32 = KEYWORDS.len;

    while left < right {
        let mid = left + ((right - left) / 2);
        let kw = KEYWORDS[mid];
        let cmp = mem::cmp(src, kw.name);

        match cmp {
            case -1 => right = mid,
            case 1 => left = mid + 1,
            else => return kw.tok,
        }
    }
    return TokenKind::Ident;
}

/// Scan an identifier, keyword, or label.
fn scanIdentifier(s: *mut Scanner) -> Token {
    while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' {
        advance(s);
    }
    let ident = &s.source[s.token..s.cursor];
    let kind = keywordOrIdent(ident);

    // Only intern actual identifiers, not keywords.
    if kind == TokenKind::Ident {
        return Token { kind, source: strings::intern(s.pool, ident), offset: s.token };
    }
    return tok(s, kind);
}

/// Scan the next token.
pub fn next(s: *mut Scanner) -> Token {
    skipWhitespace(s);  // Skip any whitespace between tokens.
    s.token = s.cursor; // Token starts at current position.

    if isEof(s) {
        return tok(s, TokenKind::Eof);
    }
    let c: u8 = advance(s);

    if isDigit(c) {
        return scanNumber(s);
    }
    if isAlpha(c) {
        return scanIdentifier(s);
    }
    match c {
        case '\'' => return scanChar(s),
        case '"'  => return scanString(s),
        case '('  => return tok(s, TokenKind::LParen),
        case ')'  => return tok(s, TokenKind::RParen),
        case '{'  => return tok(s, TokenKind::LBrace),
        case '}'  => return tok(s, TokenKind::RBrace),
        case '['  => return tok(s, TokenKind::LBracket),
        case ']'  => return tok(s, TokenKind::RBracket),
        case ';'  => return tok(s, TokenKind::Semicolon),
        case ','  => return tok(s, TokenKind::Comma),
        case '.'  => {
            if consume(s, '.') {
                return tok(s, TokenKind::DotDot);
            }
            return tok(s, TokenKind::Dot);
        }
        case ':'  => {
            if consume(s, ':') {
                return tok(s, TokenKind::ColonColon);
            }
            return tok(s, TokenKind::Colon);
        }
        case '-'  => {
            if consume(s, '>') {
                return tok(s, TokenKind::Arrow);
            }
            if consume(s, '=') {
                return tok(s, TokenKind::MinusEqual);
            }
            // If followed by a digit, scan as negative number
            if let ch = current(s); isDigit(ch) {
                return scanNumber(s);
            }
            return tok(s, TokenKind::Minus);
        }
        case '+' => {
            if consume(s, '=') {
                return tok(s, TokenKind::PlusEqual);
            }
            if let ch = current(s); isDigit(ch) {
                return scanNumber(s);
            }
            return tok(s, TokenKind::Plus);
        }
        case '/' => {
            if consume(s, '=') {
                return tok(s, TokenKind::SlashEqual);
            }
            return tok(s, TokenKind::Slash);
        }
        case '*' => {
            if consume(s, '=') {
                return tok(s, TokenKind::StarEqual);
            }
            return tok(s, TokenKind::Star);
        }
        case '%' => {
            if consume(s, '=') {
                return tok(s, TokenKind::PercentEqual);
            }
            return tok(s, TokenKind::Percent);
        }
        case '&' => {
            if consume(s, '=') {
                return tok(s, TokenKind::AmpEqual);
            }
            return tok(s, TokenKind::Amp);
        }
        case '?' => return tok(s, TokenKind::Question),
        case '|' => {
            if consume(s, '=') {
                return tok(s, TokenKind::PipeEqual);
            }
            return tok(s, TokenKind::Pipe);
        }
        case '^' => {
            if consume(s, '=') {
                return tok(s, TokenKind::CaretEqual);
            }
            return tok(s, TokenKind::Caret);
        }
        case '~' => return tok(s, TokenKind::Tilde),
        case '!' => {
            if consume(s, '=') {
                return tok(s, TokenKind::BangEqual);
            }
            return tok(s, TokenKind::Bang);
        }
        case '=' => {
            if consume(s, '>') {
                return tok(s, TokenKind::FatArrow);
            }
            if consume(s, '=') {
                return tok(s, TokenKind::EqualEqual);
            }
            return tok(s, TokenKind::Equal);
        }
        case '<' => {
            if consume(s, '<') {
                if consume(s, '=') {
                    return tok(s, TokenKind::LtLtEqual);
                }
                return tok(s, TokenKind::LtLt);
            }
            if consume(s, '=') {
                return tok(s, TokenKind::LtEqual);
            }
            return tok(s, TokenKind::Lt);
        }
        case '>' => {
            if consume(s, '>') {
                if consume(s, '=') {
                    return tok(s, TokenKind::GtGtEqual);
                }
                return tok(s, TokenKind::GtGt);
            }
            if consume(s, '=') {
                return tok(s, TokenKind::GtEqual);
            }
            return tok(s, TokenKind::Gt);
        }
        case '@' => {
            // Scan `@identifier` as a single token.
            while let ch = current(s); isAlpha(ch) {
                advance(s);
            }
            // Must have at least one character after `@`.
            if s.cursor - s.token <= 1 {
                return invalid(s.token, "expected identifier after `@`");
            }
            let name = &s.source[s.token..s.cursor];
            return Token {
                kind: TokenKind::AtIdent,
                source: strings::intern(s.pool, name),
                offset: s.token,
            };
        }
        case '_' => {
            if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' {
                // This is part of an identifier like `_foo` or `__start`
                return scanIdentifier(s);
            }
            return tok(s, TokenKind::Underscore);
        }
        else => return invalid(s.token, "unexpected character"),
    }
}

/// Get the source code location from a byte offset.
pub fn getLocation(sourceLoc: SourceLoc, source: *[u8], offset: u32) -> ?Location {
    let mut l: u16 = 1;
    let mut c: u16 = 1;

    if offset >= source.len {
        return nil;
    }
    for ch in &source[..offset] {
        if ch == '\n' {
            c = 1;
            l += 1;
        } else {
            c += 1;
        }
    }
    return Location { source: sourceLoc, line: l, col: c };
}

1	//! Lexical scanner for the Radiance programming language.
2	//!
3	//! This module implements a hand-written scanner that tokenizes Radiance
4	//! source code into a stream of tokens for consumption by the parser.
5	@test mod tests;
6
7	use std::mem;
8	use std::lang::strings;
9
10	/// Token kinds representing all lexical elements in Radiance.
11	///
12	/// This enum covers operators, keywords, literals, and structural
13	/// elements used by the parser to build the AST.
14	pub union TokenKind {
15	/// Special end of file token generated when the input is exhausted.
16	Eof,
17	/// Special invalid token.
18	Invalid,
19
20	LParen, // (
21	RParen, // )
22	LBrace, // {
23	RBrace, // }
24	LBracket, // [
25	RBracket, // ]
26	Comma, // ,
27	Dot, // .
28	DotDot, // ..
29	Minus, // -
30	Plus, // +
31	Colon, // :
32	ColonColon, // ::
33	Semicolon, // ;
34	Slash, // /
35	Star, // *
36	Percent, // %
37	Amp, // &
38	Pipe, // \|
39	Caret, // ^
40	Tilde, // ~
41	Underscore, // _
42	Question, // ?
43	Bang, // !
44	BangEqual, // !=
45	Equal, // =
46	EqualEqual, // ==
47	Gt, // >
48	GtEqual, // >=
49	Lt, // <
50	LtEqual, // <=
51	LtLt, // <<
52	GtGt, // >>
53	Arrow, // ->
54	FatArrow, // =>
55
56	// Compound assignment operators.
57	PlusEqual, // +=
58	MinusEqual, // -=
59	StarEqual, // *=
60	SlashEqual, // /=
61	PercentEqual, // %=
62	AmpEqual, // &=
63	PipeEqual, // \|=
64	CaretEqual, // ^=
65	LtLtEqual, // <<=
66	GtGtEqual, // >>=
67
68	// Boolean operators.
69	Not, And, Or,
70
71	/// Eg. `fnord`
72	Ident,
73	/// Eg. `@default`
74	AtIdent,
75	/// The `log` keyword.
76	Log,
77
78	// Literals.
79	String, // "fnord"
80	Char, // 'f'
81	Number, // 42
82	True, // true
83	False, // false
84	Nil, // nil
85	Undefined, // undefined
86
87	// Control flow tokens.
88	If, Else, Return, Break,
89	Continue, While, For, In,
90	Loop, Match, Case, Try, Catch,
91	Throw, Throws, Panic, Assert,
92
93	// Variable binding tokens.
94	Let, Mut, Const, Align,
95
96	// Module-related tokens.
97	Mod, Use, Super,
98
99	// Type or function attributes.
100	Pub, Extern, Static,
101
102	// Trait-related tokens.
103	Trait, Instance,
104
105	// Type-related tokens.
106	I8, I16, I32, I64, U8, U16, U32, U64,
107	Opaque, Fn, Bool, Union, Record, As
108	}
109
110	/// A reserved keyword.
111	record Keyword {
112	/// Keyword string.
113	name: *[u8],
114	/// Corresponding token.
115	tok: TokenKind,
116	}
117
118	/// Sorted keyword table for binary search.
119	const KEYWORDS: [Keyword; 51] = [
120	{ name: "align", tok: TokenKind::Align },
121	{ name: "and", tok: TokenKind::And },
122	{ name: "as", tok: TokenKind::As },
123	{ name: "assert", tok: TokenKind::Assert },
124	{ name: "bool", tok: TokenKind::Bool },
125	{ name: "break", tok: TokenKind::Break },
126	{ name: "case", tok: TokenKind::Case },
127	{ name: "catch", tok: TokenKind::Catch },
128	{ name: "const", tok: TokenKind::Const },
129	{ name: "continue", tok: TokenKind::Continue },
130	{ name: "else", tok: TokenKind::Else },
131	{ name: "extern", tok: TokenKind::Extern },
132	{ name: "false", tok: TokenKind::False },
133	{ name: "fn", tok: TokenKind::Fn },
134	{ name: "for", tok: TokenKind::For },
135	{ name: "i16", tok: TokenKind::I16 },
136	{ name: "i32", tok: TokenKind::I32 },
137	{ name: "i64", tok: TokenKind::I64 },
138	{ name: "i8", tok: TokenKind::I8 },
139	{ name: "if", tok: TokenKind::If },
140	{ name: "in", tok: TokenKind::In },
141	{ name: "instance", tok: TokenKind::Instance },
142	{ name: "let", tok: TokenKind::Let },
143	{ name: "log", tok: TokenKind::Log },
144	{ name: "loop", tok: TokenKind::Loop },
145	{ name: "match", tok: TokenKind::Match },
146	{ name: "mod", tok: TokenKind::Mod },
147	{ name: "mut", tok: TokenKind::Mut },
148	{ name: "nil", tok: TokenKind::Nil },
149	{ name: "not", tok: TokenKind::Not },
150	{ name: "opaque", tok: TokenKind::Opaque },
151	{ name: "or", tok: TokenKind::Or },
152	{ name: "panic", tok: TokenKind::Panic },
153	{ name: "pub", tok: TokenKind::Pub },
154	{ name: "record", tok: TokenKind::Record },
155	{ name: "return", tok: TokenKind::Return },
156	{ name: "static", tok: TokenKind::Static },
157	{ name: "super", tok: TokenKind::Super },
158	{ name: "throw", tok: TokenKind::Throw },
159	{ name: "throws", tok: TokenKind::Throws },
160	{ name: "trait", tok: TokenKind::Trait },
161	{ name: "true", tok: TokenKind::True },
162	{ name: "try", tok: TokenKind::Try },
163	{ name: "u16", tok: TokenKind::U16 },
164	{ name: "u32", tok: TokenKind::U32 },
165	{ name: "u64", tok: TokenKind::U64 },
166	{ name: "u8", tok: TokenKind::U8 },
167	{ name: "undefined", tok: TokenKind::Undefined },
168	{ name: "union", tok: TokenKind::Union },
169	{ name: "use", tok: TokenKind::Use },
170	{ name: "while", tok: TokenKind::While },
171	];
172
173	/// Describes where source code originated from.
174	pub union SourceLoc {
175	/// Source loaded from a file at the given path.
176	File(*[u8]),
177	/// Source provided as an inline string (no file path).
178	String,
179	}
180
181	/// Lexical scanner state for tokenizing Radiance source code.
182	///
183	/// Maintains position information and source buffer reference.
184	pub record Scanner {
185	/// Origin of the source being scanned.
186	sourceLoc: SourceLoc,
187	/// Source buffer.
188	source: *[u8],
189	/// Offset of current token into buffer.
190	token: u32,
191	/// Offset of current character being scanned.
192	cursor: u32,
193	/// Interned string pool.
194	pool: *mut strings::Pool,
195	}
196
197	/// Individual token with kind, source text, and position.
198	///
199	/// Represents a single lexical element extracted from source,
200	/// including its original text and byte offset for error reporting.
201	pub record Token {
202	/// Token kind.
203	kind: TokenKind,
204	/// Token source string.
205	source: *[u8],
206	/// Byte offset of `source` in input buffer.
207	offset: u32,
208	}
209
210	/// Source code location with line/column information.
211	///
212	/// Used for error reporting and debugging.
213	pub record Location {
214	/// Origin of the source.
215	source: SourceLoc,
216	/// Line number.
217	line: u16,
218	/// Column number.
219	col: u16,
220	}
221
222	/// Create a new scanner object.
223	pub fn scanner(sourceLoc: SourceLoc, source: [u8], pool: mut strings::Pool) -> Scanner {
224	// Intern built-in functions and attributes.
225	strings::intern(pool, "@sizeOf");
226	strings::intern(pool, "@alignOf");
227	strings::intern(pool, "@sliceOf");
228	strings::intern(pool, "@default");
229	strings::intern(pool, "@intrinsic");
230	strings::intern(pool, "@test");
231	// Intern built-in slice methods.
232	strings::intern(pool, "append");
233	strings::intern(pool, "delete");
234
235	return Scanner { sourceLoc, source, token: 0, cursor: 0, pool };
236	}
237
238	/// Check if we've reached the end of input.
239	pub fn isEof(s: *Scanner) -> bool {
240	return s.cursor >= s.source.len;
241	}
242
243	/// Get the current character, if any.
244	pub fn current(s: *Scanner) -> ?u8 {
245	if isEof(s) {
246	return nil;
247	}
248	return s.source[s.cursor];
249	}
250
251	/// Peek at the next character without advancing the scanner.
252	fn peek(s: *Scanner) -> ?u8 {
253	if s.cursor + 1 >= s.source.len {
254	return nil;
255	}
256	return s.source[s.cursor + 1];
257	}
258
259	/// Advance scanner and return the character that was consumed.
260	fn advance(s: *mut Scanner) -> u8 {
261	s.cursor += 1;
262	return s.source[s.cursor - 1];
263	}
264
265	/// Consume the expected character if it matches the current position.
266	fn consume(s: *mut Scanner, expected: u8) -> bool {
267	if let c = current(s); c == expected {
268	advance(s);
269	return true;
270	}
271	return false;
272	}
273
274	/// Create a token from the current scanner state.
275	fn tok(s: *Scanner, kind: TokenKind) -> Token {
276	return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token };
277	}
278
279	/// Create an invalid token with the given message.
280	pub fn invalid(offset: u32, message: *[u8]) -> Token {
281	return Token { kind: TokenKind::Invalid, source: message, offset };
282	}
283
284	/// Skip whitespace characters and line comments.
285	fn skipWhitespace(s: *mut Scanner) {
286	while let ch = current(s) {
287	match ch {
288	case ' ', '\n', '\r', '\t' => advance(s),
289	case '/' => {
290	if let c = peek(s); c == '/' {
291	while let ch = current(s); ch != '\n' {
292	advance(s);
293	}
294	} else {
295	return;
296	}
297	}
298	else => return,
299	}
300	}
301	}
302
303	/// Check if character is an ASCII digit (0-9).
304	fn isDigit(c: u8) -> bool {
305	return c >= '0' and c <= '9';
306	}
307
308	/// Check if character is a hexadecimal digit (0-9, a-f, A-F).
309	fn isHexDigit(c: u8) -> bool {
310	return (c >= '0' and c <= '9')
311	or (c >= 'a' and c <= 'f')
312	or (c >= 'A' and c <= 'F');
313	}
314
315	/// Check if character is a binary digit (0 or 1).
316	fn isBinDigit(c: u8) -> bool {
317	return c == '0' or c == '1';
318	}
319
320	/// Check if character is alphabetic.
321	fn isAlpha(c: u8) -> bool {
322	return (c >= 'a' and c <= 'z')
323	or (c >= 'A' and c <= 'Z');
324	}
325
326	/// Check if character is printable ASCII.
327	fn isPrint(c: u8) -> bool {
328	return c >= ' ' and c <= '~';
329	}
330
331	/// Scan numeric literal (decimal, hex, or binary).
332	fn scanNumber(s: *mut Scanner) -> Token {
333	let first = s.source[s.cursor - 1];
334	if first == '-' or first == '+' {
335	advance(s);
336	}
337	// Check for hex literal (`0x` or `0X` prefix).
338	if s.source[s.cursor - 1] == '0' {
339	if let ch = current(s); ch == 'x' or ch == 'X' {
340	advance(s);
341	// Must have at least one hex digit after `0x`.
342	if let ch = current(s); not isHexDigit(ch) {
343	return invalid(s.token, "invalid hex literal");
344	}
345	while let ch = current(s); isHexDigit(ch) {
346	advance(s);
347	}
348	return tok(s, TokenKind::Number);
349	}
350	// Check for binary literal (`0b` or `0B` prefix).
351	if let ch = current(s); ch == 'b' or ch == 'B' {
352	advance(s);
353	// Must have at least one binary digit after `0b`.
354	if let ch = current(s); not isBinDigit(ch) {
355	return invalid(s.token, "invalid binary literal");
356	}
357	while let ch = current(s); isBinDigit(ch) {
358	advance(s);
359	}
360	return tok(s, TokenKind::Number);
361	}
362	}
363
364	// Regular decimal number.
365	while let ch = current(s); isDigit(ch) {
366	advance(s);
367	}
368
369	// Look for decimal part.
370	if let ch = current(s); ch == '.' {
371	if let p = peek(s); isDigit(p) {
372	advance(s); // Consume the "."
373	while let ch = current(s); isDigit(ch) {
374	advance(s);
375	}
376	}
377	}
378	return tok(s, TokenKind::Number);
379	}
380
381	fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
382	while let ch = current(s); ch != delim {
383	if not isPrint(ch) {
384	return invalid(s.token, "invalid character");
385	}
386	consume(s, '\\'); // Consume escapes
387	advance(s);
388	}
389	if not consume(s, delim) {
390	return nil;
391	}
392	return tok(s, kind);
393	}
394
395	/// Scan string literal enclosed in double quotes.
396	fn scanString(s: *mut Scanner) -> Token {
397	if let tok = scanDelimited(s, '"', TokenKind::String) {
398	return tok;
399	}
400	return invalid(s.token, "unterminated string");
401	}
402
403	/// Scan character literal enclosed in single quotes.
404	fn scanChar(s: *mut Scanner) -> Token {
405	if let tok = scanDelimited(s, '\'', TokenKind::Char) {
406	return tok;
407	}
408	return invalid(s.token, "unterminated character");
409	}
410
411	/// Scan a keyword or an identifier.
412	fn keywordOrIdent(src: *[u8]) -> TokenKind {
413	let mut left: u32 = 0;
414	let mut right: u32 = KEYWORDS.len;
415
416	while left < right {
417	let mid = left + ((right - left) / 2);
418	let kw = KEYWORDS[mid];
419	let cmp = mem::cmp(src, kw.name);
420
421	match cmp {
422	case -1 => right = mid,
423	case 1 => left = mid + 1,
424	else => return kw.tok,
425	}
426	}
427	return TokenKind::Ident;
428	}
429
430	/// Scan an identifier, keyword, or label.
431	fn scanIdentifier(s: *mut Scanner) -> Token {
432	while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' {
433	advance(s);
434	}
435	let ident = &s.source[s.token..s.cursor];
436	let kind = keywordOrIdent(ident);
437
438	// Only intern actual identifiers, not keywords.
439	if kind == TokenKind::Ident {
440	return Token { kind, source: strings::intern(s.pool, ident), offset: s.token };
441	}
442	return tok(s, kind);
443	}
444
445	/// Scan the next token.
446	pub fn next(s: *mut Scanner) -> Token {
447	skipWhitespace(s); // Skip any whitespace between tokens.
448	s.token = s.cursor; // Token starts at current position.
449
450	if isEof(s) {
451	return tok(s, TokenKind::Eof);
452	}
453	let c: u8 = advance(s);
454
455	if isDigit(c) {
456	return scanNumber(s);
457	}
458	if isAlpha(c) {
459	return scanIdentifier(s);
460	}
461	match c {
462	case '\'' => return scanChar(s),
463	case '"' => return scanString(s),
464	case '(' => return tok(s, TokenKind::LParen),
465	case ')' => return tok(s, TokenKind::RParen),
466	case '{' => return tok(s, TokenKind::LBrace),
467	case '}' => return tok(s, TokenKind::RBrace),
468	case '[' => return tok(s, TokenKind::LBracket),
469	case ']' => return tok(s, TokenKind::RBracket),
470	case ';' => return tok(s, TokenKind::Semicolon),
471	case ',' => return tok(s, TokenKind::Comma),
472	case '.' => {
473	if consume(s, '.') {
474	return tok(s, TokenKind::DotDot);
475	}
476	return tok(s, TokenKind::Dot);
477	}
478	case ':' => {
479	if consume(s, ':') {
480	return tok(s, TokenKind::ColonColon);
481	}
482	return tok(s, TokenKind::Colon);
483	}
484	case '-' => {
485	if consume(s, '>') {
486	return tok(s, TokenKind::Arrow);
487	}
488	if consume(s, '=') {
489	return tok(s, TokenKind::MinusEqual);
490	}
491	// If followed by a digit, scan as negative number
492	if let ch = current(s); isDigit(ch) {
493	return scanNumber(s);
494	}
495	return tok(s, TokenKind::Minus);
496	}
497	case '+' => {
498	if consume(s, '=') {
499	return tok(s, TokenKind::PlusEqual);
500	}
501	if let ch = current(s); isDigit(ch) {
502	return scanNumber(s);
503	}
504	return tok(s, TokenKind::Plus);
505	}
506	case '/' => {
507	if consume(s, '=') {
508	return tok(s, TokenKind::SlashEqual);
509	}
510	return tok(s, TokenKind::Slash);
511	}
512	case '*' => {
513	if consume(s, '=') {
514	return tok(s, TokenKind::StarEqual);
515	}
516	return tok(s, TokenKind::Star);
517	}
518	case '%' => {
519	if consume(s, '=') {
520	return tok(s, TokenKind::PercentEqual);
521	}
522	return tok(s, TokenKind::Percent);
523	}
524	case '&' => {
525	if consume(s, '=') {
526	return tok(s, TokenKind::AmpEqual);
527	}
528	return tok(s, TokenKind::Amp);
529	}
530	case '?' => return tok(s, TokenKind::Question),
531	case '\|' => {
532	if consume(s, '=') {
533	return tok(s, TokenKind::PipeEqual);
534	}
535	return tok(s, TokenKind::Pipe);
536	}
537	case '^' => {
538	if consume(s, '=') {
539	return tok(s, TokenKind::CaretEqual);
540	}
541	return tok(s, TokenKind::Caret);
542	}
543	case '~' => return tok(s, TokenKind::Tilde),
544	case '!' => {
545	if consume(s, '=') {
546	return tok(s, TokenKind::BangEqual);
547	}
548	return tok(s, TokenKind::Bang);
549	}
550	case '=' => {
551	if consume(s, '>') {
552	return tok(s, TokenKind::FatArrow);
553	}
554	if consume(s, '=') {
555	return tok(s, TokenKind::EqualEqual);
556	}
557	return tok(s, TokenKind::Equal);
558	}
559	case '<' => {
560	if consume(s, '<') {
561	if consume(s, '=') {
562	return tok(s, TokenKind::LtLtEqual);
563	}
564	return tok(s, TokenKind::LtLt);
565	}
566	if consume(s, '=') {
567	return tok(s, TokenKind::LtEqual);
568	}
569	return tok(s, TokenKind::Lt);
570	}
571	case '>' => {
572	if consume(s, '>') {
573	if consume(s, '=') {
574	return tok(s, TokenKind::GtGtEqual);
575	}
576	return tok(s, TokenKind::GtGt);
577	}
578	if consume(s, '=') {
579	return tok(s, TokenKind::GtEqual);
580	}
581	return tok(s, TokenKind::Gt);
582	}
583	case '@' => {
584	// Scan `@identifier` as a single token.
585	while let ch = current(s); isAlpha(ch) {
586	advance(s);
587	}
588	// Must have at least one character after `@`.
589	if s.cursor - s.token <= 1 {
590	return invalid(s.token, "expected identifier after `@`");
591	}
592	let name = &s.source[s.token..s.cursor];
593	return Token {
594	kind: TokenKind::AtIdent,
595	source: strings::intern(s.pool, name),
596	offset: s.token,
597	};
598	}
599	case '_' => {
600	if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' {
601	// This is part of an identifier like `_foo` or `__start`
602	return scanIdentifier(s);
603	}
604	return tok(s, TokenKind::Underscore);
605	}
606	else => return invalid(s.token, "unexpected character"),
607	}
608	}
609
610	/// Get the source code location from a byte offset.
611	pub fn getLocation(sourceLoc: SourceLoc, source: *[u8], offset: u32) -> ?Location {
612	let mut l: u16 = 1;
613	let mut c: u16 = 1;
614
615	if offset >= source.len {
616	return nil;
617	}
618	for ch in &source[..offset] {
619	if ch == '\n' {
620	c = 1;
621	l += 1;
622	} else {
623	c += 1;
624	}
625	}
626	return Location { source: sourceLoc, line: l, col: c };
627	}