Optimize IL generation of loops
d46af8343f1e6a22d415695d12ff6e3a2d1e65939624598c3befb7561bab7861
Improves IL when no `continue` exists.
1 parent
8cacf46c
lib/std/lang/lower.rad
+22 -16
| 5380 | 5380 | let val = emitRead(self, elemReg, 0, *elemType); |
|
| 5381 | 5381 | ||
| 5382 | 5382 | defVar(self, v, val); |
|
| 5383 | 5383 | } |
|
| 5384 | 5384 | } |
|
| 5385 | - | // Lower the loop body and jump to the @step block. |
|
| 5385 | + | // Lower the loop body. |
|
| 5386 | 5386 | try lowerBlock(self, body); |
|
| 5387 | - | let stepBlock = try getOrCreateContinueBlock(self); |
|
| 5388 | - | try emitJmpAndSeal(self, stepBlock); |
|
| 5389 | 5387 | ||
| 5390 | - | // Switch to step block and emit step/increment. |
|
| 5391 | - | switchToBlock(self, stepBlock); |
|
| 5392 | - | ||
| 5393 | - | match *iter { |
|
| 5394 | - | case ForIter::Range { valVar, valType, indexVar, .. } => { |
|
| 5395 | - | try emitIncrement(self, valVar, valType); |
|
| 5396 | - | ||
| 5397 | - | if let idxVar = indexVar { |
|
| 5388 | + | // Check if a `continue` statement created a step block. |
|
| 5389 | + | let ctx = currentLoop(self) else panic; |
|
| 5390 | + | if let stepBlock = ctx.continueTarget { |
|
| 5391 | + | // Body has `continue` statements: jump to step block, emit increment there. |
|
| 5392 | + | try emitJmpAndSeal(self, stepBlock); |
|
| 5393 | + | switchToBlock(self, stepBlock); |
|
| 5394 | + | } |
|
| 5395 | + | // Otherwise, emit increment directly in the current block, |
|
| 5396 | + | // saving the jump to a separate step block. |
|
| 5397 | + | if not blockHasTerminator(self) { |
|
| 5398 | + | match *iter { |
|
| 5399 | + | case ForIter::Range { valVar, valType, indexVar, .. } => { |
|
| 5400 | + | try emitIncrement(self, valVar, valType); |
|
| 5401 | + | if let idxVar = indexVar { |
|
| 5402 | + | try emitIncrement(self, idxVar, il::Type::W32); |
|
| 5403 | + | } |
|
| 5404 | + | } |
|
| 5405 | + | case ForIter::Collection { idxVar, .. } => { |
|
| 5398 | 5406 | try emitIncrement(self, idxVar, il::Type::W32); |
|
| 5399 | 5407 | } |
|
| 5400 | 5408 | } |
|
| 5401 | - | case ForIter::Collection { idxVar, .. } => { |
|
| 5402 | - | try emitIncrement(self, idxVar, il::Type::W32); |
|
| 5403 | - | } |
|
| 5409 | + | try emitJmp(self, loopBlock); |
|
| 5404 | 5410 | } |
|
| 5405 | - | try emitJmp(self, loopBlock); |
|
| 5406 | - | ||
| 5407 | 5411 | exitLoop(self); |
|
| 5412 | + | ||
| 5408 | 5413 | try sealBlock(self, loopBlock); |
|
| 5409 | 5414 | try sealBlock(self, endBlock); |
|
| 5415 | + | ||
| 5410 | 5416 | switchToBlock(self, endBlock); |
|
| 5411 | 5417 | } |
|
| 5412 | 5418 | ||
| 5413 | 5419 | /// Lower a `for` loop over a range, array, or slice. |
|
| 5414 | 5420 | fn lowerFor(self: *mut FnLowerer, node: *ast::Node, f: ast::For) throws (LowerError) { |
lib/std/lang/lower/tests/average.ril
+2 -4
| 8 | 8 | @body2 |
|
| 9 | 9 | mul w64 %4 %3 4; |
|
| 10 | 10 | add w64 %5 %2 %4; |
|
| 11 | 11 | load w32 %6 %5 0; |
|
| 12 | 12 | add w32 %8 %7 %6; |
|
| 13 | - | jmp @step4; |
|
| 13 | + | add w32 %9 %3 1; |
|
| 14 | + | jmp @loop1(%9, %8); |
|
| 14 | 15 | @merge3 |
|
| 15 | 16 | load w32 %11 %0 8; |
|
| 16 | 17 | udiv w32 %12 %7 %11; |
|
| 17 | 18 | ret %12; |
|
| 18 | - | @step4 |
|
| 19 | - | add w32 %9 %3 1; |
|
| 20 | - | jmp @loop1(%9, %8); |
|
| 21 | 19 | } |
lib/std/lang/lower/tests/loop.for.array.ril
+2 -4
| 7 | 7 | mul w64 %2 %1 4; |
|
| 8 | 8 | add w64 %3 %0 %2; |
|
| 9 | 9 | sload w32 %4 %3 0; |
|
| 10 | 10 | add w32 %6 %4 %1; |
|
| 11 | 11 | add w32 %7 %5 %6; |
|
| 12 | - | jmp @step4; |
|
| 13 | - | @merge3 |
|
| 14 | - | ret %5; |
|
| 15 | - | @step4 |
|
| 16 | 12 | add w32 %8 %1 1; |
|
| 17 | 13 | jmp @loop1(%8, %7); |
|
| 14 | + | @merge3 |
|
| 15 | + | ret %5; |
|
| 18 | 16 | } |
lib/std/lang/lower/tests/loop.for.break.bound.ril
+0 -2
| 8 | 8 | @merge3 |
|
| 9 | 9 | ret %0; |
|
| 10 | 10 | @then4 |
|
| 11 | 11 | jmp @merge3; |
|
| 12 | 12 | @merge5 |
|
| 13 | - | jmp @step6; |
|
| 14 | - | @step6 |
|
| 15 | 13 | add w32 %2 %1 1; |
|
| 16 | 14 | jmp @loop1(%2); |
|
| 17 | 15 | } |
lib/std/lang/lower/tests/loop.for.indexed.ril
+2 -4
| 4 | 4 | @loop1(w32 %1, w32 %2, w32 %3) |
|
| 5 | 5 | br.slt w32 %1 %0 @body2 @merge3; |
|
| 6 | 6 | @body2 |
|
| 7 | 7 | add w32 %4 %1 %3; |
|
| 8 | 8 | add w32 %5 %2 %4; |
|
| 9 | - | jmp @step4; |
|
| 10 | - | @merge3 |
|
| 11 | - | ret %2; |
|
| 12 | - | @step4 |
|
| 13 | 9 | add w32 %6 %1 1; |
|
| 14 | 10 | add w32 %7 %3 1; |
|
| 15 | 11 | jmp @loop1(%6, %5, %7); |
|
| 12 | + | @merge3 |
|
| 13 | + | ret %2; |
|
| 16 | 14 | } |
lib/std/lang/lower/tests/loop.for.placeholder.ril
+4 -8
| 3 | 3 | jmp @loop1(0, 0); |
|
| 4 | 4 | @loop1(w32 %1, w32 %2) |
|
| 5 | 5 | br.ult w32 %1 %0 @body2 @merge3; |
|
| 6 | 6 | @body2 |
|
| 7 | 7 | add w32 %3 %2 1; |
|
| 8 | - | jmp @step4; |
|
| 9 | - | @merge3 |
|
| 10 | - | ret %2; |
|
| 11 | - | @step4 |
|
| 12 | 8 | add w32 %4 %1 1; |
|
| 13 | 9 | jmp @loop1(%4, %3); |
|
| 10 | + | @merge3 |
|
| 11 | + | ret %2; |
|
| 14 | 12 | } |
|
| 15 | 13 | ||
| 16 | 14 | fn w32 $forPlaceholderArray(w64 %0) { |
|
| 17 | 15 | @entry0 |
|
| 18 | 16 | jmp @loop1(0, 0); |
|
| 19 | 17 | @loop1(w32 %1, w32 %2) |
|
| 20 | 18 | br.slt w32 %1 4 @body2 @merge3; |
|
| 21 | 19 | @body2 |
|
| 22 | 20 | add w32 %3 %2 1; |
|
| 23 | - | jmp @step4; |
|
| 24 | - | @merge3 |
|
| 25 | - | ret %2; |
|
| 26 | - | @step4 |
|
| 27 | 21 | add w32 %4 %1 1; |
|
| 28 | 22 | jmp @loop1(%4, %3); |
|
| 23 | + | @merge3 |
|
| 24 | + | ret %2; |
|
| 29 | 25 | } |
lib/std/lang/lower/tests/loop.for.ril
+2 -4
| 3 | 3 | jmp @loop1(0, 0); |
|
| 4 | 4 | @loop1(w32 %1, w32 %2) |
|
| 5 | 5 | br.slt w32 %1 %0 @body2 @merge3; |
|
| 6 | 6 | @body2 |
|
| 7 | 7 | add w32 %3 %2 %1; |
|
| 8 | - | jmp @step4; |
|
| 9 | - | @merge3 |
|
| 10 | - | ret %2; |
|
| 11 | - | @step4 |
|
| 12 | 8 | add w32 %4 %1 1; |
|
| 13 | 9 | jmp @loop1(%4, %3); |
|
| 10 | + | @merge3 |
|
| 11 | + | ret %2; |
|
| 14 | 12 | } |
lib/std/lang/lower/tests/loop.for.slice.ril
+2 -4
| 9 | 9 | mul w64 %4 %3 4; |
|
| 10 | 10 | add w64 %5 %2 %4; |
|
| 11 | 11 | sload w32 %6 %5 0; |
|
| 12 | 12 | add w32 %8 %6 %3; |
|
| 13 | 13 | add w32 %9 %7 %8; |
|
| 14 | - | jmp @step4; |
|
| 15 | - | @merge3 |
|
| 16 | - | ret %7; |
|
| 17 | - | @step4 |
|
| 18 | 14 | add w32 %10 %3 1; |
|
| 19 | 15 | jmp @loop1(%10, %9); |
|
| 16 | + | @merge3 |
|
| 17 | + | ret %7; |
|
| 20 | 18 | } |
lib/std/lang/lower/tests/loop.for.unsigned.range.ril
+4 -8
| 3 | 3 | jmp @loop1(%0, 0); |
|
| 4 | 4 | @loop1(w32 %2, w32 %3) |
|
| 5 | 5 | br.ult w32 %2 %1 @body2 @merge3; |
|
| 6 | 6 | @body2 |
|
| 7 | 7 | add w32 %4 %3 1; |
|
| 8 | - | jmp @step4; |
|
| 9 | - | @merge3 |
|
| 10 | - | ret %3; |
|
| 11 | - | @step4 |
|
| 12 | 8 | add w32 %5 %2 1; |
|
| 13 | 9 | jmp @loop1(%5, %4); |
|
| 10 | + | @merge3 |
|
| 11 | + | ret %3; |
|
| 14 | 12 | } |
|
| 15 | 13 | ||
| 16 | 14 | fn w32 $loopForSignedRange(w32 %0, w32 %1) { |
|
| 17 | 15 | @entry0 |
|
| 18 | 16 | jmp @loop1(%0, 0); |
|
| 19 | 17 | @loop1(w32 %2, w32 %3) |
|
| 20 | 18 | br.slt w32 %2 %1 @body2 @merge3; |
|
| 21 | 19 | @body2 |
|
| 22 | 20 | add w32 %4 %3 1; |
|
| 23 | - | jmp @step4; |
|
| 24 | - | @merge3 |
|
| 25 | - | ret %3; |
|
| 26 | - | @step4 |
|
| 27 | 21 | add w32 %5 %2 1; |
|
| 28 | 22 | jmp @loop1(%5, %4); |
|
| 23 | + | @merge3 |
|
| 24 | + | ret %3; |
|
| 29 | 25 | } |
lib/std/lang/lower/tests/loop.mutable.ril
+2 -4
| 22 | 22 | load w64 %6 %4 0; |
|
| 23 | 23 | mul w64 %7 %2 4; |
|
| 24 | 24 | add w64 %8 %6 %7; |
|
| 25 | 25 | load w32 %9 %8 0; |
|
| 26 | 26 | call w32 %10 $max(%3, %9); |
|
| 27 | - | jmp @step6; |
|
| 27 | + | add w32 %11 %2 1; |
|
| 28 | + | jmp @loop1(%11, %10, %4); |
|
| 28 | 29 | @guard#trap5 |
|
| 29 | 30 | ebreak; |
|
| 30 | 31 | unreachable; |
|
| 31 | - | @step6 |
|
| 32 | - | add w32 %11 %2 1; |
|
| 33 | - | jmp @loop1(%11, %10, %4); |
|
| 34 | 32 | } |