compiler/
lib/
examples/
std/
arch/
collections/
lang/
alloc/
ast/
gen/
il/
module/
parser/
resolver/
scanner/
alloc.rad
4.2 KiB
ast.rad
22.4 KiB
gen.rad
489 B
il.rad
15.1 KiB
lower.rad
259.5 KiB
module.rad
13.4 KiB
package.rad
1.2 KiB
parser.rad
78.5 KiB
resolver.rad
244.3 KiB
scanner.rad
18.1 KiB
sexpr.rad
6.3 KiB
strings.rad
2.2 KiB
sys/
arch.rad
65 B
collections.rad
36 B
fmt.rad
3.8 KiB
intrinsics.rad
399 B
io.rad
1.2 KiB
lang.rad
222 B
mem.rad
2.1 KiB
sys.rad
167 B
testing.rad
2.3 KiB
tests.rad
11.6 KiB
vec.rad
3.1 KiB
std.rad
231 B
scripts/
seed/
test/
vim/
.gitignore
353 B
.gitsigners
112 B
LICENSE
1.1 KiB
Makefile
3.0 KiB
README
2.5 KiB
std.lib
1.0 KiB
std.lib.test
252 B
lib/std/lang/strings.rad
raw
| 1 | //! String interning for efficient identifier comparison. |
| 2 | //! |
| 3 | //! This module provides a hash-based string pool that enables fast pointer-based |
| 4 | //! equality checks for identifiers and module names. |
| 5 | //! |
| 6 | //! Instead of comparing string contents repeatedly during symbol lookup, |
| 7 | //! we intern strings once and compare pointers. |
| 8 | |
| 9 | use std::mem; |
| 10 | use std::collections::dict; |
| 11 | |
| 12 | /// Table size. |
| 13 | const TABLE_SIZE: u32 = 8192; |
| 14 | |
| 15 | /// String interning pool using open-addressed hash table. |
| 16 | /// |
| 17 | /// Each unique string content is stored only once, allowing pointer equality |
| 18 | /// to be used instead of content comparison for symbol lookups and module names. |
| 19 | pub record Pool { |
| 20 | /// Hash table slots. |
| 21 | table: [*[u8]; TABLE_SIZE], |
| 22 | /// Number of strings currently in the pool. |
| 23 | count: u32, |
| 24 | } |
| 25 | |
| 26 | /// Lookup result. |
| 27 | union Lookup { |
| 28 | /// Found. |
| 29 | Found(*[u8]), |
| 30 | /// Not found, contains empty slot index. |
| 31 | Empty(u32), |
| 32 | } |
| 33 | |
| 34 | /// Look up a string, returning either the found entry or the empty slot index. |
| 35 | fn lookup(sp: *Pool, str: *[u8]) -> Lookup { |
| 36 | let mut idx = dict::hash(str) & (TABLE_SIZE - 1); |
| 37 | |
| 38 | loop { |
| 39 | let entry = sp.table[idx]; |
| 40 | if entry.len > 0 { |
| 41 | if mem::eq(entry, str) { |
| 42 | return Lookup::Found(entry); |
| 43 | } |
| 44 | idx = (idx + 1) & (TABLE_SIZE - 1); |
| 45 | } else { |
| 46 | return Lookup::Empty(idx); |
| 47 | } |
| 48 | } |
| 49 | } |
| 50 | |
| 51 | /// Look up a string in the pool. |
| 52 | /// Returns the canonical interned pointer if the string exists. |
| 53 | pub fn find(sp: *Pool, str: *[u8]) -> ?*[u8] { |
| 54 | match lookup(sp, str) { |
| 55 | case Lookup::Found(entry) => return entry, |
| 56 | case Lookup::Empty(_) => return nil, |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | /// Intern a string, returning a canonical pointer for equality comparison. |
| 61 | /// |
| 62 | /// If the string content already exists in the pool, returns the existing pointer. |
| 63 | /// Otherwise, adds the string pointer to the pool and returns it. |
| 64 | pub fn intern(sp: *mut Pool, str: *[u8]) -> *[u8] { |
| 65 | match lookup(sp, str) { |
| 66 | case Lookup::Found(entry) => return entry, |
| 67 | case Lookup::Empty(idx) => { |
| 68 | assert sp.count < TABLE_SIZE / 2, "intern: string pool is full"; |
| 69 | sp.table[idx] = str; |
| 70 | sp.count += 1; |
| 71 | |
| 72 | return str; |
| 73 | } |
| 74 | } |
| 75 | } |