Skip extension for zero immediates

ab23f64bda23d79675c8e7fc09b90a676fa89e1b8982b5ced2d9417ebd667227
Alexis Sellier committed ago 1 parent 845f7ebb
lib/std/arch/rv64/isel.rad +71 -10
50 50
/// Mask for extracting byte value.
51 51
const MASK_W8: i32 = 0xFF;
52 52
/// Maximum number of block arguments supported.
53 53
const MAX_BLOCK_ARGS: u32 = 16;
54 54
55 +
/// Signed integer range limits.
56 +
const I8_MIN: i64 = -128;
57 +
const I8_MAX: i64 = 127;
58 +
const I16_MIN: i64 = -32768;
59 +
const I16_MAX: i64 = 32767;
60 +
const I32_MIN: i64 = -2147483648;
61 +
const I32_MAX: i64 = 2147483647;
62 +
63 +
/// Unsigned integer range limits.
64 +
const U8_MAX: i64 = 255;
65 +
const U16_MAX: i64 = 65535;
66 +
const U32_MAX: i64 = 4294967295;
67 +
55 68
/// Binary operation.
56 69
union BinOp { Add, And, Or, Xor }
57 70
/// Shift operation.
58 71
union ShiftOp { Sll, Srl, Sra }
59 72
/// Compare operation.
604 617
                // For SLT: sign-extension is semantically correct.
605 618
                // For ULT: bltu gives identical results on sign- vs zero-
606 619
                // extended W32 values (the relative ordering is preserved
607 620
                // because the sign bit maps to the same half of 64-bit space).
608 621
                // For EQ/NE: both extensions produce identical equality results.
609 -
                if not aIsZero { emitSext(s.e, rs1, rs1, typ); }
610 -
                if not bIsZero { emitSext(s.e, rs2, rs2, typ); }
622 +
                if not aIsZero and not isExtendedImm(a, typ, true) {
623 +
                    emitSext(s.e, rs1, rs1, typ);
624 +
                }
625 +
                if not bIsZero and not isExtendedImm(b, typ, true) {
626 +
                    emitSext(s.e, rs2, rs2, typ);
627 +
                }
611 628
            } else if let case il::CmpOp::Slt = op {
612 -
                if not aIsZero { emitSext(s.e, rs1, rs1, typ); }
613 -
                if not bIsZero { emitSext(s.e, rs2, rs2, typ); }
629 +
                if not aIsZero and not isExtendedImm(a, typ, true) {
630 +
                    emitSext(s.e, rs1, rs1, typ);
631 +
                }
632 +
                if not bIsZero and not isExtendedImm(b, typ, true) {
633 +
                    emitSext(s.e, rs2, rs2, typ);
634 +
                }
614 635
            } else {
615 -
                if not aIsZero { emitZext(s.e, rs1, rs1, typ); }
616 -
                if not bIsZero { emitZext(s.e, rs2, rs2, typ); }
636 +
                if not aIsZero and not isExtendedImm(a, typ, false) {
637 +
                    emitZext(s.e, rs1, rs1, typ);
638 +
                }
639 +
                if not bIsZero and not isExtendedImm(b, typ, false) {
640 +
                    emitZext(s.e, rs2, rs2, typ);
641 +
                }
617 642
            }
618 643
            // Block-argument moves must only execute on the taken path.
619 644
            // When `thenArgs` is non-empty, invert the branch so that the
620 645
            // then-moves land on the fall-through (taken) side.
621 646
            //
722 747
            emit::emit(s.e, encode::ebreak());
723 748
        },
724 749
    }
725 750
}
726 751
752 +
/// Check if a value is an immediate that's already correctly extended.
753 +
/// `loadImm` produces the exact 64-bit value; this checks whether that value
754 +
/// already matches what sign/zero-extension to the given type would produce.
755 +
fn isExtendedImm(val: il::Val, typ: il::Type, signed: bool) -> bool {
756 +
    if let case il::Val::Imm(imm) = val {
757 +
        if signed {
758 +
            // Sign-extension truncates to the type width and sign-extends.
759 +
            // The 64-bit value is already correctly sign-extended if it
760 +
            // fits in the signed range of the target type.
761 +
            match typ {
762 +
                case il::Type::W8 => return imm >= I8_MIN and imm <= I8_MAX,
763 +
                case il::Type::W16 => return imm >= I16_MIN and imm <= I16_MAX,
764 +
                case il::Type::W32 => return imm >= I32_MIN and imm <= I32_MAX,
765 +
                case il::Type::W64 => return true,
766 +
            }
767 +
        } else {
768 +
            // Zero-extension: value must be non-negative and within unsigned range.
769 +
            match typ {
770 +
                case il::Type::W8 => return imm >= 0 and imm <= U8_MAX,
771 +
                case il::Type::W16 => return imm >= 0 and imm <= U16_MAX,
772 +
                case il::Type::W32 => return imm >= 0 and imm <= U32_MAX,
773 +
                case il::Type::W64 => return true,
774 +
            }
775 +
        }
776 +
    }
777 +
    return false;
778 +
}
779 +
727 780
/// Check if a value is an immediate zero.
728 781
fn isZeroImm(val: il::Val) -> bool {
729 782
    if let case il::Val::Imm(imm) = val {
730 783
        return imm == 0;
731 784
    }
835 888
            // Canonicalize both operands to the declared width.
836 889
            // For W32 EQ, sign-extension suffices instead of
837 890
            // zero-extension since both sides get same canonical form.
838 891
            if typ == il::Type::W32 {
839 892
                emitSext(s.e, rs1, rs1, typ);
840 -
                emitSext(s.e, rs2, rs2, typ);
893 +
                if not isExtendedImm(b, typ, true) {
894 +
                    emitSext(s.e, rs2, rs2, typ);
895 +
                }
841 896
            } else {
842 897
                emitZext(s.e, rs1, rs1, typ);
843 -
                emitZext(s.e, rs2, rs2, typ);
898 +
                if not isExtendedImm(b, typ, false) {
899 +
                    emitZext(s.e, rs2, rs2, typ);
900 +
                }
844 901
            }
845 902
            emit::emit(s.e, encode::xor(rd, rs1, rs2));
846 903
            emit::emit(s.e, encode::sltiu(rd, rd, 1));
847 904
        }
848 905
        case il::BinOp::Ne => {
849 906
            let rs2 = resolveVal(s, super::SCRATCH2, b);
850 907
            if typ == il::Type::W32 {
851 908
                emitSext(s.e, rs1, rs1, typ);
852 -
                emitSext(s.e, rs2, rs2, typ);
909 +
                if not isExtendedImm(b, typ, true) {
910 +
                    emitSext(s.e, rs2, rs2, typ);
911 +
                }
853 912
            } else {
854 913
                emitZext(s.e, rs1, rs1, typ);
855 -
                emitZext(s.e, rs2, rs2, typ);
914 +
                if not isExtendedImm(b, typ, false) {
915 +
                    emitZext(s.e, rs2, rs2, typ);
916 +
                }
856 917
            }
857 918
            emit::emit(s.e, encode::xor(rd, rs1, rs2));
858 919
            emit::emit(s.e, encode::sltu(rd, super::ZERO, rd));
859 920
        }
860 921
        case il::BinOp::Slt =>