Skip to content

Commit

Permalink
pulley: Implement the wide-arithmetic proposal
Browse files Browse the repository at this point in the history
Add a few minor instructions/lowerings for the new operations added as
part of the wide-arithmetic proposal. These are all part of the
"extended" opcode set since they shouldn't be common and if they're
performance critical you probably want a native backend instead.
  • Loading branch information
alexcrichton committed Jan 7, 2025
1 parent 5030709 commit 753fce8
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 17 deletions.
34 changes: 24 additions & 10 deletions cranelift/codegen/meta/src/pulley.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ impl Inst<'_> {
let src2 = parts.next().unwrap_or(dst);
Operand::Binop { dst, src1, src2 }
}
("dst", ty) => Operand::Writable { name, ty },
(name, ty) if name.starts_with("dst") => Operand::Writable { name, ty },
(name, "RegSet < XReg >") => Operand::Normal {
name,
ty: "XRegSet",
Expand Down Expand Up @@ -137,7 +137,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
format_string.push_str(name);
format_string.push_str("}");
if ty.contains("Reg") {
if name == "dst" {
if matches!(op, Operand::Writable { .. }) {
locals.push_str(&format!("let {name} = reg_name(*{name}.to_reg());\n"));
} else {
locals.push_str(&format!("let {name} = reg_name(**{name});\n"));
Expand Down Expand Up @@ -342,7 +342,7 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
let mut rule = String::new();
isle.push_str(&format!("(decl pulley_{snake_name} ("));
rule.push_str(&format!("(rule (pulley_{snake_name} "));
let mut result = None;
let mut results = Vec::new();
let mut ops = Vec::new();
for op in inst.operands() {
match op {
Expand All @@ -352,16 +352,14 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
ops.push(name);
}
Operand::Writable { name: _, ty } => {
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
result = Some(ty);
results.push(ty);
}
Operand::Binop { dst, src1, src2 } => {
isle.push_str(&format!("{src1} {src2}"));
rule.push_str("src1 src2");
ops.push("src1");
ops.push("src2");
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
result = Some(dst);
results.push(dst);
}
}
isle.push_str(" ");
Expand All @@ -370,8 +368,8 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
isle.push_str(") ");
rule.push_str(")");
let ops = ops.join(" ");
match result {
Some(result) => {
match &results[..] {
[result] => {
isle.push_str(result);
rule.push_str(&format!(
"
Expand All @@ -384,12 +382,28 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
result.to_lowercase()
));
}
None => {
[a, b] => {
isle.push_str("ValueRegs");
rule.push_str(&format!(
"
(let (
(dst1 Writable{a} (temp_writable_{}))
(dst2 Writable{b} (temp_writable_{}))
(_ Unit (emit (RawInst.{name} dst1 dst2 {ops})))
)
(value_regs dst1 dst2)))\
\n",
a.to_lowercase(),
b.to_lowercase(),
));
}
[] => {
isle.push_str("SideEffectNoResult");
rule.push_str(&format!(
" (SideEffectNoResult.Inst (RawInst.{name} {ops})))\n",
));
}
other => panic!("cannot codegen results {other:?}"),
}
isle.push_str(")\n");

Expand Down
29 changes: 29 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@
(if-let neg_u32 (u32_try_from_u64 neg_u64))
neg_u32)

;; 128-bit addition
(rule 1 (lower (has_type $I128 (iadd a b)))
(let ((a ValueRegs a)
(b ValueRegs b))
(pulley_xadd128
(value_regs_get a 0)
(value_regs_get a 1)
(value_regs_get b 0)
(value_regs_get b 1))))

;; vector addition
(rule 1 (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b))
(rule 1 (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b))
(rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
Expand Down Expand Up @@ -260,6 +271,17 @@
(if-let c (u8_from_negated_iconst b))
(pulley_xadd64_u8 a c))

;; 128-bit subtraction
(rule 1 (lower (has_type $I128 (isub a b)))
(let ((a ValueRegs a)
(b ValueRegs b))
(pulley_xsub128
(value_regs_get a 0)
(value_regs_get a 1)
(value_regs_get b 0)
(value_regs_get b 1))))

;; vector subtraction
(rule 1 (lower (has_type $I8X16 (isub a b))) (pulley_vsubi8x16 a b))
(rule 1 (lower (has_type $I16X8 (isub a b))) (pulley_vsubi16x8 a b))
(rule 1 (lower (has_type $I32X4 (isub a b))) (pulley_vsubi32x4 a b))
Expand All @@ -286,6 +308,13 @@
(rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b))))
(pulley_xmul64_s8 a b))

;; 128-bit (or wide) multiplication
(rule (lower (has_type $I128 (imul (uextend a) (uextend b))))
(pulley_xwidemul64_u (zext64 a) (zext64 b)))
(rule (lower (has_type $I128 (imul (sextend a) (sextend b))))
(pulley_xwidemul64_s (sext64 a) (sext64 b)))

;; vector multiplication
(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
(rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b))
Expand Down
7 changes: 0 additions & 7 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,6 @@ impl Compiler {
if config.threads() {
return true;
}
// Unsupported proposals. Note that other proposals have partial
// support at this time (pulley is a work-in-progress) and so
// individual tests are listed below as "should fail" even if
// they're not covered in this list.
if config.wide_arithmetic() {
return true;
}
}
}

Expand Down
71 changes: 71 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,17 @@ impl Interpreter<'_> {
}
ControlFlow::Continue(())
}

fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
let lo = self.state[lo].get_u64();
let hi = self.state[hi].get_i64();
i128::from(lo) | (i128::from(hi) << 64)
}

fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
self.state[lo].set_u64(val as u64);
self.state[hi].set_u64((val >> 64) as u64);
}
}

#[test]
Expand Down Expand Up @@ -4791,4 +4802,64 @@ impl ExtendedOpVisitor for Interpreter<'_> {
self.state[dst].set_f64x2(a);
ControlFlow::Continue(())
}

fn xadd128(
&mut self,
dst_lo: XReg,
dst_hi: XReg,
lhs_lo: XReg,
lhs_hi: XReg,
rhs_lo: XReg,
rhs_hi: XReg,
) -> ControlFlow<Done> {
let lhs = self.get_i128(lhs_lo, lhs_hi);
let rhs = self.get_i128(rhs_lo, rhs_hi);
let result = lhs.wrapping_add(rhs);
self.set_i128(dst_lo, dst_hi, result);
ControlFlow::Continue(())
}

fn xsub128(
&mut self,
dst_lo: XReg,
dst_hi: XReg,
lhs_lo: XReg,
lhs_hi: XReg,
rhs_lo: XReg,
rhs_hi: XReg,
) -> ControlFlow<Done> {
let lhs = self.get_i128(lhs_lo, lhs_hi);
let rhs = self.get_i128(rhs_lo, rhs_hi);
let result = lhs.wrapping_sub(rhs);
self.set_i128(dst_lo, dst_hi, result);
ControlFlow::Continue(())
}

fn xwidemul64_s(
&mut self,
dst_lo: XReg,
dst_hi: XReg,
lhs: XReg,
rhs: XReg,
) -> ControlFlow<Done> {
let lhs = self.state[lhs].get_i64();
let rhs = self.state[rhs].get_i64();
let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
self.set_i128(dst_lo, dst_hi, result);
ControlFlow::Continue(())
}

fn xwidemul64_u(
&mut self,
dst_lo: XReg,
dst_hi: XReg,
lhs: XReg,
rhs: XReg,
) -> ControlFlow<Done> {
let lhs = self.state[lhs].get_u64();
let rhs = self.state[rhs].get_u64();
let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
self.set_i128(dst_lo, dst_hi, result as i128);
ControlFlow::Continue(())
}
}
33 changes: 33 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,39 @@ macro_rules! for_each_extended_op {
vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg };
/// `dst = ieee_fma(a, b, c)`
vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg };

/// `dst_lo:dst_hi = lhs_lo:lhs_hi + rhs_lo:rhs_hi`
xadd128 = Xadd128 {
dst_lo: XReg,
dst_hi: XReg,
lhs_lo: XReg,
lhs_hi: XReg,
rhs_lo: XReg,
rhs_hi: XReg
};
/// `dst_lo:dst_hi = lhs_lo:lhs_hi - rhs_lo:rhs_hi`
xsub128 = Xsub128 {
dst_lo: XReg,
dst_hi: XReg,
lhs_lo: XReg,
lhs_hi: XReg,
rhs_lo: XReg,
rhs_hi: XReg
};
/// `dst_lo:dst_hi = sext(lhs) * sext(rhs)`
xwidemul64_s = Xwidemul64S {
dst_lo: XReg,
dst_hi: XReg,
lhs: XReg,
rhs: XReg
};
/// `dst_lo:dst_hi = zext(lhs) * zext(rhs)`
xwidemul64_u = Xwidemul64U {
dst_lo: XReg,
dst_hi: XReg,
lhs: XReg,
rhs: XReg
};
}
};
}
Expand Down

0 comments on commit 753fce8

Please sign in to comment.