Optimize bitset iterator with de Bruijn
0e8a1cce5328a6b86a7f39d47732328d76d6a97007b6c948408831ce32d1b642
1 parent
28a18c72
lib/std/lang/gen/bitset.rad
+39 -21
| 170 | 170 | } |
|
| 171 | 171 | } |
|
| 172 | 172 | ||
| 173 | 173 | /// Iterator state for iterating set bits. |
|
| 174 | 174 | pub record BitIter { |
|
| 175 | + | /// Bitset being iterated. |
|
| 175 | 176 | bs: *Bitset, |
|
| 177 | + | /// Current word index. |
|
| 176 | 178 | wordIdx: u32, |
|
| 177 | - | bitIdx: u32, |
|
| 179 | + | /// Remaining bits in the current word (visited bits cleared). |
|
| 180 | + | remaining: u32, |
|
| 178 | 181 | } |
|
| 179 | 182 | ||
| 180 | 183 | /// Create an iterator over set bits. |
|
| 181 | 184 | pub fn iter(bs: *Bitset) -> BitIter { |
|
| 182 | - | return BitIter { bs, wordIdx: 0, bitIdx: 0 }; |
|
| 185 | + | let remaining = bs.bits[0] if bs.len > 0 else 0; |
|
| 186 | + | return BitIter { bs, wordIdx: 0, remaining }; |
|
| 183 | 187 | } |
|
| 184 | 188 | ||
| 185 | 189 | /// Get the next set bit, or nil if none remain. |
|
| 186 | 190 | pub fn iterNext(it: *mut BitIter) -> ?u32 { |
|
| 187 | 191 | let numWords = wordsFor(it.bs.len); |
|
| 188 | - | while it.wordIdx < numWords { |
|
| 189 | - | let word = it.bs.bits[it.wordIdx]; |
|
| 190 | - | // Skip empty words entirely. |
|
| 191 | - | if word == 0 and it.bitIdx == 0 { |
|
| 192 | - | it.wordIdx += 1; |
|
| 193 | - | } else { |
|
| 194 | - | while it.bitIdx < 32 { |
|
| 195 | - | let n = it.wordIdx * 32 + it.bitIdx; |
|
| 196 | - | if n >= it.bs.len { |
|
| 197 | - | return nil; |
|
| 198 | - | } |
|
| 199 | - | it.bitIdx += 1; |
|
| 200 | - | if (word & (1 << (it.bitIdx - 1))) != 0 { |
|
| 201 | - | return n; |
|
| 202 | - | } |
|
| 203 | - | } |
|
| 204 | - | it.wordIdx += 1; |
|
| 205 | - | it.bitIdx = 0; |
|
| 192 | + | ||
| 193 | + | // Skip to next non-zero word. |
|
| 194 | + | while it.remaining == 0 { |
|
| 195 | + | it.wordIdx += 1; |
|
| 196 | + | if it.wordIdx >= numWords { |
|
| 197 | + | return nil; |
|
| 206 | 198 | } |
|
| 199 | + | it.remaining = it.bs.bits[it.wordIdx]; |
|
| 200 | + | } |
|
| 201 | + | // Find lowest set bit position using de Bruijn sequence. |
|
| 202 | + | let b = ctz(it.remaining); |
|
| 203 | + | let n = it.wordIdx * 32 + b; |
|
| 204 | + | if n >= it.bs.len { |
|
| 205 | + | return nil; |
|
| 207 | 206 | } |
|
| 208 | - | return nil; |
|
| 207 | + | // Clear the lowest set bit. |
|
| 208 | + | it.remaining &= it.remaining - 1; |
|
| 209 | + | ||
| 210 | + | return n; |
|
| 211 | + | } |
|
| 212 | + | ||
| 213 | + | /// Count trailing zeros in a 32-bit value. |
|
| 214 | + | /// Returns the bit position of the lowest set bit (0-31). |
|
| 215 | + | /// Behavior is undefined if `x` is 0. |
|
| 216 | + | fn ctz(x: u32) -> u32 { |
|
| 217 | + | // De Bruijn sequence for 32-bit CTZ. |
|
| 218 | + | const DEBRUIJN: u32 = 0x077CB531; |
|
| 219 | + | const TABLE: [u8; 32] = [ |
|
| 220 | + | 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, |
|
| 221 | + | 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, |
|
| 222 | + | ]; |
|
| 223 | + | // Isolate lowest set bit, multiply by de Bruijn constant, look up. |
|
| 224 | + | let isolated = x & (~x + 1); |
|
| 225 | + | ||
| 226 | + | return TABLE[(isolated * DEBRUIJN) >> 27] as u32; |
|
| 209 | 227 | } |