lib/std/lang/strings.rad 2.2 KiB raw
1
//! String interning for efficient identifier comparison.
2
//!
3
//! This module provides a hash-based string pool that enables fast pointer-based
4
//! equality checks for identifiers and module names.
5
//!
6
//! Instead of comparing string contents repeatedly during symbol lookup,
7
//! we intern strings once and compare pointers.
8
9
use std::mem;
10
use std::collections::dict;
11
12
/// Table size.
13
const TABLE_SIZE: u32 = 8192;
14
15
/// String interning pool using open-addressed hash table.
16
///
17
/// Each unique string content is stored only once, allowing pointer equality
18
/// to be used instead of content comparison for symbol lookups and module names.
19
pub record Pool {
20
    /// Hash table slots.
21
    table: [*[u8]; TABLE_SIZE],
22
    /// Number of strings currently in the pool.
23
    count: u32,
24
}
25
26
/// Lookup result.
27
union Lookup {
28
    /// Found.
29
    Found(*[u8]),
30
    /// Not found, contains empty slot index.
31
    Empty(u32),
32
}
33
34
/// Look up a string, returning either the found entry or the empty slot index.
35
fn lookup(sp: *Pool, str: *[u8]) -> Lookup {
36
    let mut idx = dict::hash(str) & (TABLE_SIZE - 1);
37
38
    loop {
39
        let entry = sp.table[idx];
40
        if entry.len > 0 {
41
            if mem::eq(entry, str) {
42
                return Lookup::Found(entry);
43
            }
44
            idx = (idx + 1) & (TABLE_SIZE - 1);
45
        } else {
46
            return Lookup::Empty(idx);
47
        }
48
    }
49
}
50
51
/// Look up a string in the pool.
52
/// Returns the canonical interned pointer if the string exists.
53
pub fn find(sp: *Pool, str: *[u8]) -> ?*[u8] {
54
    match lookup(sp, str) {
55
        case Lookup::Found(entry) => return entry,
56
        case Lookup::Empty(_) => return nil,
57
    }
58
}
59
60
/// Intern a string, returning a canonical pointer for equality comparison.
61
///
62
/// If the string content already exists in the pool, returns the existing pointer.
63
/// Otherwise, adds the string pointer to the pool and returns it.
64
pub fn intern(sp: *mut Pool, str: *[u8]) -> *[u8] {
65
    match lookup(sp, str) {
66
        case Lookup::Found(entry) => return entry,
67
        case Lookup::Empty(idx) => {
68
            assert sp.count < TABLE_SIZE / 2, "intern: string pool is full";
69
            sp.table[idx] = str;
70
            sp.count += 1;
71
72
            return str;
73
        }
74
    }
75
}