Pass small aggregates by value

a74c66b894a416edecb34727b883de4d043c47f21081c9c34097e1fc9174d53d
Previously, all aggregate types (records, unions) were passed and
returned via hidden pointer parameters, requiring the caller to allocate
a buffer and the callee to write through it. This is unnecessarily
expensive for small aggregates that fit in a single 64-bit register.
Alexis Sellier committed ago 1 parent 5aad3898
lib/std/lang/lower.rad +39 -2
925 925
        };
926 926
    }
927 927
    // If the function returns an aggregate or is throwing, prepend a hidden
928 928
    // return parameter. The caller allocates the buffer and passes it
929 929
    // as the first argument; the callee writes the return value into it.
930 -
    let needsRetReg = fnType.throwListLen > 0 or isAggregateType(*fnType.returnType);
930 +
    let needsRetReg = fnType.throwListLen > 0
931 +
        or (isAggregateType(*fnType.returnType) and not isSmallAggregate(*fnType.returnType));
931 932
    if needsRetReg and not isExtern {
932 933
        fnLow.returnReg = nextReg(&mut fnLow);
933 934
    }
934 935
    let lowParams = try lowerParams(&mut fnLow, *fnType, decl.sig.params);
935 936
    let func = try! alloc::alloc(self.arena, @sizeOf(il::Fn), @alignOf(il::Fn)) as *mut il::Fn;
3640 3641
            return false;
3641 3642
        }
3642 3643
    }
3643 3644
}
3644 3645
3646 +
/// Check if a resolver type is a small aggregate that can be
3647 +
/// passed or returned by value in a register.
3648 +
fn isSmallAggregate(typ: resolver::Type) -> bool {
3649 +
    match typ {
3650 +
        case resolver::Type::Nominal(_) => {
3651 +
            if resolver::isVoidUnion(typ) {
3652 +
                return false;
3653 +
            }
3654 +
            let layout = resolver::getTypeLayout(typ);
3655 +
            return layout.size <= resolver::PTR_SIZE;
3656 +
        }
3657 +
        else => return false,
3658 +
    }
3659 +
}
3660 +
3645 3661
/// Check if a node is a void union variant literal (e.g. `Color::Red`).
3646 3662
/// If so, returns the variant's tag index. This enables optimized comparisons
3647 3663
/// that only check the tag instead of doing full aggregate comparison.
3648 3664
fn voidVariantIndex(res: *resolver::Resolver, node: *ast::Node) -> ?i64 {
3649 3665
    let data = resolver::nodeData(res, node);
5139 5155
        let src = try emitValToReg(self, val);
5140 5156
        let size = retBufSize(self);
5141 5157
5142 5158
        emit(self, il::Instr::Blit { dst: retReg, src, size });
5143 5159
        emit(self, il::Instr::Ret { val: il::Val::Reg(retReg) });
5160 +
    } else if isSmallAggregate(*self.fnType.returnType) {
5161 +
        let src = try emitValToReg(self, val);
5162 +
        let dst = nextReg(self);
5163 +
5164 +
        emit(self, il::Instr::Load { typ: il::Type::W64, dst, src, offset: 0 });
5165 +
        emit(self, il::Instr::Ret { val: il::Val::Reg(dst) });
5144 5166
    } else {
5145 5167
        emit(self, il::Instr::Ret { val });
5146 5168
    }
5147 5169
}
5148 5170
5828 5850
    };
5829 5851
    let retTy = resolver::typeFor(self.low.resolver, node) else {
5830 5852
        throw LowerError::MissingType(node);
5831 5853
    };
5832 5854
    let isThrowing = fnInfo.throwListLen > 0;
5833 -
    let needsRetBuf = isThrowing or isAggregateType(retTy);
5855 +
    let needsRetBuf = isThrowing or (isAggregateType(retTy) and not isSmallAggregate(retTy));
5834 5856
5835 5857
    // Lower function value and arguments, reserving an extra slot for the
5836 5858
    // hidden return buffer when needed.
5837 5859
    let callee = try lowerCallee(self, call.callee);
5838 5860
    let mut offset: u32 = 0;
5878 5900
    });
5879 5901
5880 5902
    // Non-void functions produce a value in a register, while void functions
5881 5903
    // return an undefined value that shouldn't be used.
5882 5904
    if let d = dst {
5905 +
        if isSmallAggregate(retTy) {
5906 +
            let slot = try emitReserveLayout(
5907 +
                self,
5908 +
                resolver::Layout {
5909 +
                    size: resolver::PTR_SIZE,
5910 +
                    alignment: resolver::PTR_SIZE
5911 +
                });
5912 +
            emit(self, il::Instr::Store {
5913 +
                typ: il::Type::W64,
5914 +
                src: il::Val::Reg(d),
5915 +
                dst: slot,
5916 +
                offset: 0,
5917 +
            });
5918 +
            return il::Val::Reg(slot);
5919 +
        }
5883 5920
        return il::Val::Reg(d);
5884 5921
    }
5885 5922
    return il::Val::Undef;
5886 5923
}
5887 5924
lib/std/lang/lower/tests/const.record.mutcopy.ril +12 -12
4 4
5 5
data $SCRATCH2 align 1 {
6 6
    w8 31;
7 7
}
8 8
9 -
fn w64 $test(w64 %0, w64 %1) {
9 +
fn w64 $test(w64 %0) {
10 10
  @entry0
11 -
    copy %2 $SCRATCH1;
12 -
    reserve %3 1 1;
13 -
    blit %3 %2 1;
14 -
    load w8 %4 %1 0;
15 -
    copy %5 $SCRATCH1;
16 -
    load w8 %6 %5 0;
17 -
    br.eq w8 %4 %6 @then1 @merge2;
11 +
    copy %1 $SCRATCH1;
12 +
    reserve %2 1 1;
13 +
    blit %2 %1 1;
14 +
    load w8 %3 %0 0;
15 +
    copy %4 $SCRATCH1;
16 +
    load w8 %5 %4 0;
17 +
    br.eq w8 %3 %5 @then1 @merge2;
18 18
  @then1
19 -
    copy %7 $SCRATCH2;
20 -
    blit %3 %7 1;
19 +
    copy %6 $SCRATCH2;
20 +
    blit %2 %6 1;
21 21
    jmp @merge2;
22 22
  @merge2
23 -
    blit %0 %3 1;
24 -
    ret %0;
23 +
    load w64 %8 %2 0;
24 +
    ret %8;
25 25
}
lib/std/lang/lower/tests/field.aggregate.ril +12 -12
8 8
    store w32 30 %0 8;
9 9
    sload w32 %2 %0 4;
10 10
    ret %2;
11 11
}
12 12
13 -
fn w64 $accessUnionField(w64 %0) {
13 +
fn w64 $accessUnionField() {
14 14
  @entry0
15 -
    reserve %1 12 4;
16 -
    reserve %2 4 4;
17 -
    store w32 42 %2 0;
18 -
    reserve %3 8 4;
19 -
    store w8 1 %3 0;
20 -
    add w64 %4 %3 4;
21 -
    blit %4 %2 4;
22 -
    blit %1 %3 8;
23 -
    store w32 99 %1 8;
24 -
    blit %0 %1 8;
25 -
    ret %0;
15 +
    reserve %0 12 4;
16 +
    reserve %1 4 4;
17 +
    store w32 42 %1 0;
18 +
    reserve %2 8 4;
19 +
    store w8 1 %2 0;
20 +
    add w64 %3 %2 4;
21 +
    blit %3 %1 4;
22 +
    blit %0 %2 8;
23 +
    store w32 99 %0 8;
24 +
    load w64 %4 %0 0;
25 +
    ret %4;
26 26
}
27 27
28 28
fn w32 $accessSliceField() {
29 29
  @entry0
30 30
    reserve %0 12 4;
lib/std/lang/lower/tests/let.copy.semantics.ril +9 -8
1 -
fn w64 $makePoint(w64 %0) {
1 +
fn w64 $makePoint() {
2 2
  @entry0
3 -
    reserve %1 8 4;
4 -
    store w32 1 %1 0;
5 -
    store w32 2 %1 4;
6 -
    blit %0 %1 8;
7 -
    ret %0;
3 +
    reserve %0 8 4;
4 +
    store w32 1 %0 0;
5 +
    store w32 2 %0 4;
6 +
    load w64 %1 %0 0;
7 +
    ret %1;
8 8
}
9 9
10 10
fn w32 $letFromCall() {
11 11
  @entry0
12 -
    reserve %0 8 4;
13 -
    call w64 %1 $makePoint(%0);
12 +
    call w64 %0 $makePoint();
13 +
    reserve %1 8 8;
14 +
    store w64 %0 %1 0;
14 15
    sload w32 %2 %1 0;
15 16
    ret %2;
16 17
}
17 18
18 19
fn w32 $letFromField() {
lib/std/lang/lower/tests/record.ctor.tuple.ril +6 -6
1 -
fn w64 $make(w64 %0) {
1 +
fn w64 $make() {
2 2
  @entry0
3 -
    reserve %1 8 4;
4 -
    store w32 1 %1 0;
5 -
    store w32 2 %1 4;
6 -
    blit %0 %1 8;
7 -
    ret %0;
3 +
    reserve %0 8 4;
4 +
    store w32 1 %0 0;
5 +
    store w32 2 %0 4;
6 +
    load w64 %1 %0 0;
7 +
    ret %1;
8 8
}
lib/std/lang/lower/tests/union.ctor.ril +14 -14
6 6
fn w8 $makeBlue() {
7 7
  @entry0
8 8
    ret 2;
9 9
}
10 10
11 -
fn w64 $makeSome(w64 %0) {
11 +
fn w64 $makeSome() {
12 12
  @entry0
13 -
    reserve %1 4 4;
14 -
    store w32 7 %1 0;
15 -
    reserve %2 8 4;
16 -
    store w8 1 %2 0;
17 -
    add w64 %3 %2 4;
18 -
    blit %3 %1 4;
19 -
    blit %0 %2 8;
20 -
    ret %0;
13 +
    reserve %0 4 4;
14 +
    store w32 7 %0 0;
15 +
    reserve %1 8 4;
16 +
    store w8 1 %1 0;
17 +
    add w64 %2 %1 4;
18 +
    blit %2 %0 4;
19 +
    load w64 %3 %1 0;
20 +
    ret %3;
21 21
}
22 22
23 -
fn w64 $makeNone(w64 %0) {
23 +
fn w64 $makeNone() {
24 24
  @entry0
25 -
    reserve %1 8 4;
26 -
    store w8 0 %1 0;
27 -
    blit %0 %1 8;
28 -
    ret %0;
25 +
    reserve %0 8 4;
26 +
    store w8 0 %0 0;
27 +
    load w64 %1 %0 0;
28 +
    ret %1;
29 29
}
lib/std/lang/lower/tests/union.variant.access.ril +19 -19
6 6
fn w8 $assignGreen() {
7 7
  @entry0
8 8
    ret 1;
9 9
}
10 10
11 -
fn w64 $makeNone(w64 %0) {
11 +
fn w64 $makeNone() {
12 12
  @entry0
13 -
    reserve %1 8 4;
14 -
    store w8 0 %1 0;
15 -
    blit %0 %1 8;
16 -
    ret %0;
13 +
    reserve %0 8 4;
14 +
    store w8 0 %0 0;
15 +
    load w64 %1 %0 0;
16 +
    ret %1;
17 17
}
18 18
19 -
fn w64 $makeSome(w64 %0, w32 %1) {
19 +
fn w64 $makeSome(w32 %0) {
20 20
  @entry0
21 -
    reserve %2 4 4;
22 -
    store w32 %1 %2 0;
23 -
    reserve %3 8 4;
24 -
    store w8 1 %3 0;
25 -
    add w64 %4 %3 4;
26 -
    blit %4 %2 4;
27 -
    blit %0 %3 8;
28 -
    ret %0;
21 +
    reserve %1 4 4;
22 +
    store w32 %0 %1 0;
23 +
    reserve %2 8 4;
24 +
    store w8 1 %2 0;
25 +
    add w64 %3 %2 4;
26 +
    blit %3 %1 4;
27 +
    load w64 %4 %2 0;
28 +
    ret %4;
29 29
}
30 30
31 -
fn w64 $assignNone(w64 %0) {
31 +
fn w64 $assignNone() {
32 32
  @entry0
33 -
    reserve %1 8 4;
34 -
    store w8 0 %1 0;
35 -
    blit %0 %1 8;
36 -
    ret %0;
33 +
    reserve %0 8 4;
34 +
    store w8 0 %0 0;
35 +
    load w64 %1 %0 0;
36 +
    ret %1;
37 37
}