lib/std/lang/scanner.rad 23.4 KiB raw
1
//! Lexical scanner for the Radiance programming language.
2
//!
3
//! This module implements a hand-written scanner that tokenizes Radiance
4
//! source code into a stream of tokens for consumption by the parser.
5
@test mod tests;
6
7
use std::mem;
8
use std::lang::strings;
9
10
/// Token kinds representing all lexical elements in Radiance.
11
///
12
/// This enum covers operators, keywords, literals, and structural
13
/// elements used by the parser to build the AST.
14
pub union TokenKind {
15
    /// Special end of file token generated when the input is exhausted.
16
    Eof,
17
    /// Special invalid token.
18
    Invalid,
19
20
    LParen,     // (
21
    RParen,     // )
22
    LBrace,     // {
23
    RBrace,     // }
24
    LBracket,   // [
25
    RBracket,   // ]
26
    Comma,      // ,
27
    Dot,        // .
28
    DotDot,     // ..
29
    Minus,      // -
30
    Plus,       // +
31
    Colon,      // :
32
    ColonColon, // ::
33
    Semicolon,  // ;
34
    Slash,      // /
35
    Star,       // *
36
    Percent,    // %
37
    Amp,        // &
38
    Pipe,       // |
39
    Caret,      // ^
40
    Tilde,      // ~
41
    Underscore, // _
42
    Question,   // ?
43
    Bang,       // !
44
    BangEqual,  // !=
45
    Equal,      // =
46
    EqualEqual, // ==
47
    Gt,         // >
48
    GtEqual,    // >=
49
    Lt,         // <
50
    LtEqual,    // <=
51
    LtLt,       // <<
52
    GtGt,       // >>
53
    Arrow,      // ->
54
    FatArrow,   // =>
55
56
    // Compound assignment operators.
57
    PlusEqual,    // +=
58
    MinusEqual,   // -=
59
    StarEqual,    // *=
60
    SlashEqual,   // /=
61
    PercentEqual, // %=
62
    AmpEqual,     // &=
63
    PipeEqual,    // |=
64
    CaretEqual,   // ^=
65
    LtLtEqual,    // <<=
66
    GtGtEqual,    // >>=
67
68
    // Boolean operators.
69
    Not, And, Or,
70
71
    /// Eg. `input:`
72
    Label,
73
    /// Eg. `fnord`
74
    Ident,
75
    /// Eg. `@default`
76
    AtIdent,
77
    /// The `log` keyword.
78
    Log,
79
80
    // Literals.
81
    String,     // "fnord"
82
    Char,       // 'f'
83
    Number,     // 42
84
    True,       // true
85
    False,      // false
86
    Nil,        // nil
87
    Undefined,  // undefined
88
89
    // Control flow tokens.
90
    If, Else, Return, Break,
91
    Continue, While, For, In,
92
    Loop, Match, Case, Try, Catch,
93
    Throw, Throws, Panic, Assert,
94
95
    // Variable binding tokens.
96
    Let, Mut, Const, Align,
97
98
    // Module-related tokens.
99
    Mod, Use, Super,
100
101
    // Type or function attributes.
102
    Pub, Extern, Static,
103
104
    // Trait-related tokens.
105
    Trait, Instance,
106
107
    // Type-related tokens.
108
    I8, I16, I32, I64, U8, U16, U32, U64,
109
    Void, Opaque, Fn, Bool, Union, Record, As
110
}
111
112
/// Convert a token kind to its string representation.
113
pub fn tokenKindToString(kind: TokenKind) -> *[u8] {
114
    match kind {
115
        case TokenKind::Eof => return "Eof",
116
        case TokenKind::Invalid => return "Invalid",
117
        case TokenKind::LParen => return "LParen",
118
        case TokenKind::RParen => return "RParen",
119
        case TokenKind::LBrace => return "LBrace",
120
        case TokenKind::RBrace => return "RBrace",
121
        case TokenKind::LBracket => return "LBracket",
122
        case TokenKind::RBracket => return "RBracket",
123
        case TokenKind::Comma => return "Comma",
124
        case TokenKind::Dot => return "Dot",
125
        case TokenKind::DotDot => return "DotDot",
126
        case TokenKind::Minus => return "Minus",
127
        case TokenKind::Plus => return "Plus",
128
        case TokenKind::Colon => return "Colon",
129
        case TokenKind::ColonColon => return "ColonColon",
130
        case TokenKind::Semicolon => return "Semicolon",
131
        case TokenKind::Slash => return "Slash",
132
        case TokenKind::Star => return "Star",
133
        case TokenKind::Percent => return "Percent",
134
        case TokenKind::Amp => return "Amp",
135
        case TokenKind::Pipe => return "Pipe",
136
        case TokenKind::Caret => return "Caret",
137
        case TokenKind::Tilde => return "Tilde",
138
        case TokenKind::Underscore => return "Underscore",
139
        case TokenKind::AtIdent => return "AtIdent",
140
        case TokenKind::Question => return "Question",
141
        case TokenKind::Bang => return "Bang",
142
        case TokenKind::BangEqual => return "BangEqual",
143
        case TokenKind::Equal => return "Equal",
144
        case TokenKind::EqualEqual => return "EqualEqual",
145
        case TokenKind::Gt => return "Gt",
146
        case TokenKind::GtEqual => return "GtEqual",
147
        case TokenKind::Lt => return "Lt",
148
        case TokenKind::LtEqual => return "LtEqual",
149
        case TokenKind::LtLt => return "LtLt",
150
        case TokenKind::GtGt => return "GtGt",
151
        case TokenKind::Arrow => return "Arrow",
152
        case TokenKind::FatArrow => return "FatArrow",
153
        case TokenKind::PlusEqual => return "PlusEqual",
154
        case TokenKind::MinusEqual => return "MinusEqual",
155
        case TokenKind::StarEqual => return "StarEqual",
156
        case TokenKind::SlashEqual => return "SlashEqual",
157
        case TokenKind::PercentEqual => return "PercentEqual",
158
        case TokenKind::AmpEqual => return "AmpEqual",
159
        case TokenKind::PipeEqual => return "PipeEqual",
160
        case TokenKind::CaretEqual => return "CaretEqual",
161
        case TokenKind::LtLtEqual => return "LtLtEqual",
162
        case TokenKind::GtGtEqual => return "GtGtEqual",
163
        case TokenKind::Not => return "Not",
164
        case TokenKind::And => return "And",
165
        case TokenKind::Or => return "Or",
166
        case TokenKind::Label => return "Label",
167
        case TokenKind::Ident => return "Ident",
168
        case TokenKind::Log => return "Log",
169
        case TokenKind::String => return "String",
170
        case TokenKind::Char => return "Char",
171
        case TokenKind::Number => return "Number",
172
        case TokenKind::True => return "True",
173
        case TokenKind::False => return "False",
174
        case TokenKind::Nil => return "Nil",
175
        case TokenKind::Undefined => return "Undefined",
176
        case TokenKind::If => return "If",
177
        case TokenKind::Else => return "Else",
178
        case TokenKind::Return => return "Return",
179
        case TokenKind::Break => return "Break",
180
        case TokenKind::Continue => return "Continue",
181
        case TokenKind::While => return "While",
182
        case TokenKind::For => return "For",
183
        case TokenKind::In => return "In",
184
        case TokenKind::Loop => return "Loop",
185
        case TokenKind::Match => return "Match",
186
        case TokenKind::Case => return "Case",
187
        case TokenKind::Try => return "Try",
188
        case TokenKind::Catch => return "Catch",
189
        case TokenKind::Throw => return "Throw",
190
        case TokenKind::Throws => return "Throws",
191
        case TokenKind::Panic => return "Panic",
192
        case TokenKind::Assert => return "Assert",
193
        case TokenKind::Let => return "Let",
194
        case TokenKind::Mut => return "Mut",
195
        case TokenKind::Const => return "Const",
196
        case TokenKind::Align => return "Align",
197
        case TokenKind::Mod => return "Mod",
198
        case TokenKind::Use => return "Use",
199
        case TokenKind::Super => return "Super",
200
        case TokenKind::Pub => return "Pub",
201
        case TokenKind::Extern => return "Extern",
202
        case TokenKind::Static => return "Static",
203
        case TokenKind::Trait => return "Trait",
204
        case TokenKind::Instance => return "Instance",
205
        case TokenKind::I8 => return "I8",
206
        case TokenKind::I16 => return "I16",
207
        case TokenKind::I32 => return "I32",
208
        case TokenKind::I64 => return "I64",
209
        case TokenKind::U8 => return "U8",
210
        case TokenKind::U16 => return "U16",
211
        case TokenKind::U32 => return "U32",
212
        case TokenKind::U64 => return "U64",
213
        case TokenKind::Void => return "Void",
214
        case TokenKind::Opaque => return "Opaque",
215
        case TokenKind::Fn => return "Fn",
216
        case TokenKind::Bool => return "Bool",
217
        case TokenKind::Union => return "Union",
218
        case TokenKind::Record => return "Record",
219
        case TokenKind::As => return "As",
220
    }
221
}
222
223
/// A reserved keyword.
224
record Keyword {
225
    /// Keyword string.
226
    name: *[u8],
227
    /// Corresponding token.
228
    tok: TokenKind,
229
}
230
231
/// Sorted keyword table for binary search.
232
const KEYWORDS: [Keyword; 52] = [
233
    { name: "align", tok: TokenKind::Align },
234
    { name: "and", tok: TokenKind::And },
235
    { name: "as", tok: TokenKind::As },
236
    { name: "assert", tok: TokenKind::Assert },
237
    { name: "bool", tok: TokenKind::Bool },
238
    { name: "break", tok: TokenKind::Break },
239
    { name: "case", tok: TokenKind::Case },
240
    { name: "catch", tok: TokenKind::Catch },
241
    { name: "const", tok: TokenKind::Const },
242
    { name: "continue", tok: TokenKind::Continue },
243
    { name: "else", tok: TokenKind::Else },
244
    { name: "extern", tok: TokenKind::Extern },
245
    { name: "false", tok: TokenKind::False },
246
    { name: "fn", tok: TokenKind::Fn },
247
    { name: "for", tok: TokenKind::For },
248
    { name: "i16", tok: TokenKind::I16 },
249
    { name: "i32", tok: TokenKind::I32 },
250
    { name: "i64", tok: TokenKind::I64 },
251
    { name: "i8", tok: TokenKind::I8 },
252
    { name: "if", tok: TokenKind::If },
253
    { name: "in", tok: TokenKind::In },
254
    { name: "instance", tok: TokenKind::Instance },
255
    { name: "let", tok: TokenKind::Let },
256
    { name: "log", tok: TokenKind::Log },
257
    { name: "loop", tok: TokenKind::Loop },
258
    { name: "match", tok: TokenKind::Match },
259
    { name: "mod", tok: TokenKind::Mod },
260
    { name: "mut", tok: TokenKind::Mut },
261
    { name: "nil", tok: TokenKind::Nil },
262
    { name: "not", tok: TokenKind::Not },
263
    { name: "opaque", tok: TokenKind::Opaque },
264
    { name: "or", tok: TokenKind::Or },
265
    { name: "panic", tok: TokenKind::Panic },
266
    { name: "pub", tok: TokenKind::Pub },
267
    { name: "record", tok: TokenKind::Record },
268
    { name: "return", tok: TokenKind::Return },
269
    { name: "static", tok: TokenKind::Static },
270
    { name: "super", tok: TokenKind::Super },
271
    { name: "throw", tok: TokenKind::Throw },
272
    { name: "throws", tok: TokenKind::Throws },
273
    { name: "trait", tok: TokenKind::Trait },
274
    { name: "true", tok: TokenKind::True },
275
    { name: "try", tok: TokenKind::Try },
276
    { name: "u16", tok: TokenKind::U16 },
277
    { name: "u32", tok: TokenKind::U32 },
278
    { name: "u64", tok: TokenKind::U64 },
279
    { name: "u8", tok: TokenKind::U8 },
280
    { name: "undefined", tok: TokenKind::Undefined },
281
    { name: "union", tok: TokenKind::Union },
282
    { name: "use", tok: TokenKind::Use },
283
    { name: "void", tok: TokenKind::Void },
284
    { name: "while", tok: TokenKind::While },
285
];
286
287
/// Describes where source code originated from.
288
pub union SourceLoc {
289
    /// Source loaded from a file at the given path.
290
    File(*[u8]),
291
    /// Source provided as an inline string (no file path).
292
    String,
293
}
294
295
/// Lexical scanner state for tokenizing Radiance source code.
296
///
297
/// Maintains position information and source buffer reference.
298
pub record Scanner {
299
    /// Origin of the source being scanned.
300
    sourceLoc: SourceLoc,
301
    /// Source buffer.
302
    source: *[u8],
303
    /// Offset of current token into buffer.
304
    token: u32,
305
    /// Offset of current character being scanned.
306
    cursor: u32,
307
    /// Interned string pool.
308
    pool: *mut strings::Pool,
309
}
310
311
/// Individual token with kind, source text, and position.
312
///
313
/// Represents a single lexical element extracted from source,
314
/// including its original text and byte offset for error reporting.
315
pub record Token {
316
    /// Token kind.
317
    kind: TokenKind,
318
    /// Token source string.
319
    source: *[u8],
320
    /// Byte offset of `source` in input buffer.
321
    offset: u32,
322
}
323
324
/// Source code location with line/column information.
325
///
326
/// Used for error reporting and debugging.
327
pub record Location {
328
    /// Origin of the source.
329
    source: SourceLoc,
330
    /// Line number.
331
    line: u16,
332
    /// Column number.
333
    col: u16,
334
}
335
336
/// Create a new scanner object.
337
pub fn scanner(sourceLoc: SourceLoc, source: *[u8], pool: *mut strings::Pool) -> Scanner {
338
    // Intern built-in functions and attributes.
339
    strings::intern(pool, "@sizeOf");
340
    strings::intern(pool, "@alignOf");
341
    strings::intern(pool, "@sliceOf");
342
    strings::intern(pool, "@default");
343
    strings::intern(pool, "@intrinsic");
344
    strings::intern(pool, "@test");
345
    // Intern built-in slice methods.
346
    strings::intern(pool, "append");
347
    strings::intern(pool, "delete");
348
349
    return Scanner { sourceLoc, source, token: 0, cursor: 0, pool };
350
}
351
352
/// Check if we've reached the end of input.
353
pub fn isEof(s: *Scanner) -> bool {
354
    return s.cursor >= s.source.len;
355
}
356
357
/// Get the current character, if any.
358
pub fn current(s: *Scanner) -> ?u8 {
359
    if isEof(s) {
360
        return nil;
361
    }
362
    return s.source[s.cursor];
363
}
364
365
/// Peek at the next character without advancing the scanner.
366
fn peek(s: *Scanner) -> ?u8 {
367
    if s.cursor + 1 >= s.source.len {
368
        return nil;
369
    }
370
    return s.source[s.cursor + 1];
371
}
372
373
/// Advance scanner and return the character that was consumed.
374
fn advance(s: *mut Scanner) -> u8 {
375
    s.cursor += 1;
376
    return s.source[s.cursor - 1];
377
}
378
379
/// Consume the expected character if it matches the current position.
380
fn consume(s: *mut Scanner, expected: u8) -> bool {
381
    if let c = current(s); c == expected {
382
        advance(s);
383
        return true;
384
    }
385
    return false;
386
}
387
388
/// Create a token from the current scanner state.
389
fn tok(s: *Scanner, kind: TokenKind) -> Token {
390
    return Token { kind, source: &s.source[s.token..s.cursor], offset: s.token };
391
}
392
393
/// Create an invalid token with the given message.
394
pub fn invalid(offset: u32, message: *[u8]) -> Token {
395
    return Token { kind: TokenKind::Invalid, source: message, offset };
396
}
397
398
/// Skip whitespace characters and line comments.
399
fn skipWhitespace(s: *mut Scanner) {
400
    while let ch = current(s) {
401
        match ch {
402
            case ' ', '\n', '\r', '\t' => advance(s),
403
            case '/' => {
404
                if let c = peek(s); c == '/' {
405
                    while let ch = current(s); ch != '\n' {
406
                        advance(s);
407
                    }
408
                } else {
409
                    return;
410
                }
411
            }
412
            else => return,
413
        }
414
    }
415
}
416
417
/// Check if character is an ASCII digit (0-9).
418
fn isDigit(c: u8) -> bool {
419
    return c >= '0' and c <= '9';
420
}
421
422
/// Check if character is a hexadecimal digit (0-9, a-f, A-F).
423
fn isHexDigit(c: u8) -> bool {
424
    return (c >= '0' and c <= '9')
425
        or (c >= 'a' and c <= 'f')
426
        or (c >= 'A' and c <= 'F');
427
}
428
429
/// Check if character is a binary digit (0 or 1).
430
fn isBinDigit(c: u8) -> bool {
431
    return c == '0' or c == '1';
432
}
433
434
/// Check if character is alphabetic.
435
fn isAlpha(c: u8) -> bool {
436
    return (c >= 'a' and c <= 'z')
437
        or (c >= 'A' and c <= 'Z');
438
}
439
440
/// Check if character is printable ASCII.
441
fn isPrint(c: u8) -> bool {
442
    return c >= ' ' and c <= '~';
443
}
444
445
/// Scan numeric literal (decimal, hex, or binary).
446
fn scanNumber(s: *mut Scanner) -> Token {
447
    let first = s.source[s.cursor - 1];
448
    if first == '-' or first == '+' {
449
        advance(s);
450
    }
451
    // Check for hex literal (`0x` or `0X` prefix).
452
    if s.source[s.cursor - 1] == '0' {
453
        if let ch = current(s); ch == 'x' or ch == 'X' {
454
            advance(s);
455
            // Must have at least one hex digit after `0x`.
456
            if let ch = current(s); not isHexDigit(ch) {
457
                return invalid(s.token, "invalid hex literal");
458
            }
459
            while let ch = current(s); isHexDigit(ch) {
460
                advance(s);
461
            }
462
            return tok(s, TokenKind::Number);
463
        }
464
        // Check for binary literal (`0b` or `0B` prefix).
465
        if let ch = current(s); ch == 'b' or ch == 'B' {
466
            advance(s);
467
            // Must have at least one binary digit after `0b`.
468
            if let ch = current(s); not isBinDigit(ch) {
469
                return invalid(s.token, "invalid binary literal");
470
            }
471
            while let ch = current(s); isBinDigit(ch) {
472
                advance(s);
473
            }
474
            return tok(s, TokenKind::Number);
475
        }
476
    }
477
478
    // Regular decimal number.
479
    while let ch = current(s); isDigit(ch) {
480
        advance(s);
481
    }
482
483
    // Look for decimal part.
484
    if let ch = current(s); ch == '.' {
485
        if let p = peek(s); isDigit(p) {
486
            advance(s); // Consume the "."
487
            while let ch = current(s); isDigit(ch) {
488
                advance(s);
489
            }
490
        }
491
    }
492
    return tok(s, TokenKind::Number);
493
}
494
495
fn scanDelimited(s: *mut Scanner, delim: u8, kind: TokenKind) -> ?Token {
496
    while let ch = current(s); ch != delim {
497
        if not isPrint(ch) {
498
            return invalid(s.token, "invalid character");
499
        }
500
        consume(s, '\\'); // Consume escapes
501
        advance(s);
502
    }
503
    if not consume(s, delim) {
504
        return nil;
505
    }
506
    return tok(s, kind);
507
}
508
509
/// Scan string literal enclosed in double quotes.
510
fn scanString(s: *mut Scanner) -> Token {
511
    if let tok = scanDelimited(s, '"', TokenKind::String) {
512
        return tok;
513
    }
514
    return invalid(s.token, "unterminated string");
515
}
516
517
/// Scan character literal enclosed in single quotes.
518
fn scanChar(s: *mut Scanner) -> Token {
519
    if let tok = scanDelimited(s, '\'', TokenKind::Char) {
520
        return tok;
521
    }
522
    return invalid(s.token, "unterminated character");
523
}
524
525
/// Scan a keyword or an identifier.
526
fn keywordOrIdent(src: *[u8]) -> TokenKind {
527
    let mut left: u32 = 0;
528
    let mut right: u32 = KEYWORDS.len;
529
530
    while left < right {
531
        let mid = left + ((right - left) / 2);
532
        let kw = KEYWORDS[mid];
533
        let cmp = mem::cmp(src, kw.name);
534
535
        match cmp {
536
            case -1 => right = mid,
537
            case 1 => left = mid + 1,
538
            else => return kw.tok,
539
        }
540
    }
541
    return TokenKind::Ident;
542
}
543
544
/// Scan an identifier, keyword, or label.
545
fn scanIdentifier(s: *mut Scanner) -> Token {
546
    while let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' or ch == '#' {
547
        advance(s);
548
    }
549
    let ident = &s.source[s.token..s.cursor];
550
    let kind = keywordOrIdent(ident);
551
552
    // Only intern actual identifiers, not keywords.
553
    if kind == TokenKind::Ident {
554
        return Token { kind, source: strings::intern(s.pool, ident), offset: s.token };
555
    }
556
    return tok(s, kind);
557
}
558
559
/// Scan the next token.
560
pub fn next(s: *mut Scanner) -> Token {
561
    skipWhitespace(s);  // Skip any whitespace between tokens.
562
    s.token = s.cursor; // Token starts at current position.
563
564
    if isEof(s) {
565
        return tok(s, TokenKind::Eof);
566
    }
567
    let c: u8 = advance(s);
568
569
    if isDigit(c) {
570
        return scanNumber(s);
571
    }
572
    if isAlpha(c) {
573
        return scanIdentifier(s);
574
    }
575
    match c {
576
        case '\'' => return scanChar(s),
577
        case '"'  => return scanString(s),
578
        case '('  => return tok(s, TokenKind::LParen),
579
        case ')'  => return tok(s, TokenKind::RParen),
580
        case '{'  => return tok(s, TokenKind::LBrace),
581
        case '}'  => return tok(s, TokenKind::RBrace),
582
        case '['  => return tok(s, TokenKind::LBracket),
583
        case ']'  => return tok(s, TokenKind::RBracket),
584
        case ';'  => return tok(s, TokenKind::Semicolon),
585
        case ','  => return tok(s, TokenKind::Comma),
586
        case '.'  => {
587
            if consume(s, '.') {
588
                return tok(s, TokenKind::DotDot);
589
            }
590
            return tok(s, TokenKind::Dot);
591
        }
592
        case ':'  => {
593
            if consume(s, ':') {
594
                return tok(s, TokenKind::ColonColon);
595
            }
596
            return tok(s, TokenKind::Colon);
597
        }
598
        case '-'  => {
599
            if consume(s, '>') {
600
                return tok(s, TokenKind::Arrow);
601
            }
602
            if consume(s, '=') {
603
                return tok(s, TokenKind::MinusEqual);
604
            }
605
            // If followed by a digit, scan as negative number
606
            if let ch = current(s); isDigit(ch) {
607
                return scanNumber(s);
608
            }
609
            return tok(s, TokenKind::Minus);
610
        }
611
        case '+' => {
612
            if consume(s, '=') {
613
                return tok(s, TokenKind::PlusEqual);
614
            }
615
            if let ch = current(s); isDigit(ch) {
616
                return scanNumber(s);
617
            }
618
            return tok(s, TokenKind::Plus);
619
        }
620
        case '/' => {
621
            if consume(s, '=') {
622
                return tok(s, TokenKind::SlashEqual);
623
            }
624
            return tok(s, TokenKind::Slash);
625
        }
626
        case '*' => {
627
            if consume(s, '=') {
628
                return tok(s, TokenKind::StarEqual);
629
            }
630
            return tok(s, TokenKind::Star);
631
        }
632
        case '%' => {
633
            if consume(s, '=') {
634
                return tok(s, TokenKind::PercentEqual);
635
            }
636
            return tok(s, TokenKind::Percent);
637
        }
638
        case '&' => {
639
            if consume(s, '=') {
640
                return tok(s, TokenKind::AmpEqual);
641
            }
642
            return tok(s, TokenKind::Amp);
643
        }
644
        case '?' => return tok(s, TokenKind::Question),
645
        case '|' => {
646
            if consume(s, '=') {
647
                return tok(s, TokenKind::PipeEqual);
648
            }
649
            return tok(s, TokenKind::Pipe);
650
        }
651
        case '^' => {
652
            if consume(s, '=') {
653
                return tok(s, TokenKind::CaretEqual);
654
            }
655
            return tok(s, TokenKind::Caret);
656
        }
657
        case '~' => return tok(s, TokenKind::Tilde),
658
        case '!' => {
659
            if consume(s, '=') {
660
                return tok(s, TokenKind::BangEqual);
661
            }
662
            return tok(s, TokenKind::Bang);
663
        }
664
        case '=' => {
665
            if consume(s, '>') {
666
                return tok(s, TokenKind::FatArrow);
667
            }
668
            if consume(s, '=') {
669
                return tok(s, TokenKind::EqualEqual);
670
            }
671
            return tok(s, TokenKind::Equal);
672
        }
673
        case '<' => {
674
            if consume(s, '<') {
675
                if consume(s, '=') {
676
                    return tok(s, TokenKind::LtLtEqual);
677
                }
678
                return tok(s, TokenKind::LtLt);
679
            }
680
            if consume(s, '=') {
681
                return tok(s, TokenKind::LtEqual);
682
            }
683
            return tok(s, TokenKind::Lt);
684
        }
685
        case '>' => {
686
            if consume(s, '>') {
687
                if consume(s, '=') {
688
                    return tok(s, TokenKind::GtGtEqual);
689
                }
690
                return tok(s, TokenKind::GtGt);
691
            }
692
            if consume(s, '=') {
693
                return tok(s, TokenKind::GtEqual);
694
            }
695
            return tok(s, TokenKind::Gt);
696
        }
697
        case '@' => {
698
            // Scan `@identifier` as a single token.
699
            while let ch = current(s); isAlpha(ch) {
700
                advance(s);
701
            }
702
            // Must have at least one character after `@`.
703
            if s.cursor - s.token <= 1 {
704
                return invalid(s.token, "expected identifier after `@`");
705
            }
706
            let name = &s.source[s.token..s.cursor];
707
            return Token {
708
                kind: TokenKind::AtIdent,
709
                source: strings::intern(s.pool, name),
710
                offset: s.token,
711
            };
712
        }
713
        case '_' => {
714
            if let ch = current(s); isAlpha(ch) or isDigit(ch) or ch == '_' {
715
                // This is part of an identifier like `_foo` or `__start`
716
                return scanIdentifier(s);
717
            }
718
            return tok(s, TokenKind::Underscore);
719
        }
720
        else => return invalid(s.token, "unexpected character"),
721
    }
722
}
723
724
/// Get the source code location from a byte offset.
725
pub fn getLocation(sourceLoc: SourceLoc, source: *[u8], offset: u32) -> ?Location {
726
    let mut l: u16 = 1;
727
    let mut c: u16 = 1;
728
729
    if offset >= source.len {
730
        return nil;
731
    }
732
    for ch in &source[..offset] {
733
        if ch == '\n' {
734
            c = 1;
735
            l += 1;
736
        } else {
737
            c += 1;
738
        }
739
    }
740
    return Location { source: sourceLoc, line: l, col: c };
741
}