Skip to content

Commit

Permalink
boots: Emit Int128 constants at end of instruction stream
Browse files Browse the repository at this point in the history
  • Loading branch information
dinfuehr committed Jan 2, 2025
1 parent 18890a3 commit d6fa00c
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 21 deletions.
79 changes: 79 additions & 0 deletions pkgs/boots/assembler/x64.dora
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,16 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), src);
}

pub fn andps_rl(dest: FloatRegister, src: Label) {
assert(!self.hasAvx2);
if dest.needsRexBit() {
self.emitRex(false, true, false, false);
}
self.emitByte(0x0fu8);
self.emitByte(0x54u8);
self.emitLabelAddress(dest.lowBits(), src);
}

pub fn andq_ri(lhs: Register, imm: Immediate) {
self.emitAlu64Imm(lhs, imm, 0b100i32, 0x25u8);
}
Expand Down Expand Up @@ -1305,6 +1315,22 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), rhs);
}

pub fn vandps_rl(dest: FloatRegister, lhs: FloatRegister, rhs: Label) {
assert(self.hasAvx2);
self.emitVex(
dest.needsRexBit(),
false,
false,
VEX_MMMMM_0F,
VEX_W0,
lhs.value(),
VEX_L_SCALAR_128,
VEX_PP_NONE,
);
self.emitByte(0x54u8);
self.emitLabelAddress(dest.lowBits(), rhs);
}

pub fn vcvtsd2ss_rr(dest: FloatRegister, lhs: FloatRegister, rhs: FloatRegister) {
assert(self.hasAvx2);
self.emitVex(
Expand Down Expand Up @@ -1931,6 +1957,22 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), rhs);
}

pub fn vxorpd_rl(dest: FloatRegister, lhs: FloatRegister, rhs: Label) {
assert(self.hasAvx2);
self.emitVex(
dest.needsRexBit(),
false,
false,
VEX_MMMMM_0F,
VEX_W0,
lhs.value(),
VEX_L_SCALAR_128,
VEX_PP_66,
);
self.emitByte(0x57u8);
self.emitLabelAddress(dest.lowBits(), rhs);
}

pub fn vxorps_ra(dest: FloatRegister, lhs: FloatRegister, rhs: Address) {
assert(self.hasAvx2);
self.emitVex(
Expand All @@ -1947,6 +1989,22 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), rhs);
}

pub fn vxorps_rl(dest: FloatRegister, lhs: FloatRegister, rhs: Label) {
assert(self.hasAvx2);
self.emitVex(
dest.needsRexBit(),
false,
false,
VEX_MMMMM_0F,
VEX_W0,
lhs.value(),
VEX_L_SCALAR_128,
VEX_PP_NONE,
);
self.emitByte(0x57u8);
self.emitLabelAddress(dest.lowBits(), rhs);
}

pub fn vxorps_rr(dest: FloatRegister, lhs: FloatRegister, rhs: FloatRegister) {
assert(self.hasAvx2);
self.emitVex(
Expand Down Expand Up @@ -2018,6 +2076,17 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), src);
}

pub fn xorpd_rl(dest: FloatRegister, src: Label) {
assert(!self.hasAvx2);
self.emitByte(0x66u8);
if dest.needsRexBit() {
self.emitRex(false, true, false, false);
}
self.emitByte(0x0Fu8);
self.emitByte(0x57u8);
self.emitLabelAddress(dest.lowBits(), src);
}

pub fn xorps_ra(dest: FloatRegister, src: Address) {
assert(!self.hasAvx2);
self.emitRexSseAddressOptional(dest, src);
Expand All @@ -2026,6 +2095,16 @@ impl AssemblerX64 {
self.emitAddress(dest.lowBits(), src);
}

pub fn xorps_rl(dest: FloatRegister, src: Label) {
assert(!self.hasAvx2);
if dest.needsRexBit() {
self.emitRex(false, true, false, false);
}
self.emitByte(0x0Fu8);
self.emitByte(0x57u8);
self.emitLabelAddress(dest.lowBits(), src);
}

pub fn xorps_rr(dest: FloatRegister, src: FloatRegister) {
assert(!self.hasAvx2);
self.emitRexSseModRmOptional(dest, src);
Expand Down
36 changes: 15 additions & 21 deletions pkgs/boots/codegen/x64.dora
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ impl CodeGenX64 {
self.asm.emitInt64(value.asInt64());
}

ConstPoolValue::Int128(low, high) => {
self.asm.emitInt64(low);
self.asm.emitInt64(high);
}

_ => unreachable[()](),
}
}
Expand Down Expand Up @@ -170,23 +175,17 @@ impl CodeGenX64 {
}

fn andps_ri128(dest: FloatRegister, src: FloatRegister, lower: Int64, upper: Int64) {
let entry = self.constPool.push(ConstPoolValue::Int128(lower, upper));
let const_offset = entry.disp;
let label = self.asm.createLabel();
self.epilogConstants.push((label, ConstPoolValue::Int128(lower, upper)));

if self.hasAvx2 {
self.asm.vandps_ra(dest, src, AsmAddress::rip(0i32));
self.asm.vandps_rl(dest, src, label);
} else {
if dest != src {
self.asm.movaps_rr(dest, src);
}
self.asm.andps_ra(dest, AsmAddress::rip(0i32));
self.asm.andps_rl(dest, label);
}

let inst_end = self.asm.position();
let disp = -(const_offset + inst_end);
self.asm.setPosition(inst_end - 4i32);
self.asm.emitInt32(disp);
self.asm.setPositionEnd();
}

fn movq_ra_gp(dest: Register, address: AsmAddress, ty: Type) {
Expand Down Expand Up @@ -660,31 +659,26 @@ impl CodeGenX64 {
1 << 63i32
};

let const_offset = self.constPool.push(ConstPoolValue::Int128(value, 0)).disp;
let label = self.asm.createLabel();
self.epilogConstants.push((label, ConstPoolValue::Int128(value, 0)));

if self.hasAvx2 {
if ty == Type::Float32 {
self.asm.vxorps_ra(dest, src, AsmAddress::rip(0i32))
self.asm.vxorps_rl(dest, src, label);
} else {
assert(ty == Type::Float64);
self.asm.vxorpd_ra(dest, src, AsmAddress::rip(0i32))
self.asm.vxorpd_rl(dest, src, label);
}
} else {
if ty == Type::Float32 {
self.asm.movss_rr(dest, src);
self.asm.xorps_ra(dest, AsmAddress::rip(0i32))
self.asm.xorps_rl(dest, label);
} else {
assert(ty == Type::Float64);
self.asm.movsd_rr(dest, src);
self.asm.xorpd_ra(dest, AsmAddress::rip(0i32))
self.asm.xorpd_rl(dest, label);
}
}

let inst_end = self.asm.position();
let disp = -(const_offset + inst_end);
self.asm.setPosition(inst_end - 4i32);
self.asm.emitInt32(disp);
self.asm.setPositionEnd();
}
}

Expand Down

0 comments on commit d6fa00c

Please sign in to comment.