From e44054dc74f69d8ffa1e2358509a43f50441fb39 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 25 Jul 2024 12:57:26 +0800 Subject: [PATCH 01/18] [deps] Bump T1 dependencies --- build.sc | 2 +- nix/t1/_sources/generated.json | 46 ++++++++++++++++++++++++---------- nix/t1/_sources/generated.nix | 32 +++++++++++------------ nix/t1/omreader.nix | 2 +- nix/t1/t1.nix | 2 +- script/build.sc | 2 +- script/default.nix | 2 +- 7 files changed, 54 insertions(+), 34 deletions(-) diff --git a/build.sc b/build.sc index eb52a1c3bc..8e2cc5c3d6 100644 --- a/build.sc +++ b/build.sc @@ -20,7 +20,7 @@ object v { val scala = "2.13.14" val mainargs = ivy"com.lihaoyi::mainargs:0.5.0" val oslib = ivy"com.lihaoyi::os-lib:0.9.1" - val upickle = ivy"com.lihaoyi::upickle:3.1.3" + val upickle = ivy"com.lihaoyi::upickle:3.3.1" val spire = ivy"org.typelevel::spire:latest.integration" val evilplot = ivy"io.github.cibotech::evilplot:latest.integration" } diff --git a/nix/t1/_sources/generated.json b/nix/t1/_sources/generated.json index 6965570ad6..041aaf641b 100644 --- a/nix/t1/_sources/generated.json +++ b/nix/t1/_sources/generated.json @@ -19,9 +19,29 @@ }, "version": "4a81e23e1794844b36c53385d343475d4d7eca49" }, + "berkeley-hardfloat": { + "cargoLocks": null, + "date": "2024-06-05", + "extract": null, + "name": "berkeley-hardfloat", + "passthru": null, + "pinned": false, + "src": { + "deepClone": false, + "fetchSubmodules": false, + "leaveDotGit": false, + "name": null, + "owner": "ucb-bar", + "repo": "berkeley-hardfloat", + "rev": "26f00d00c3f3f57480065e02bfcfde3d3b41ec51", + "sha256": "sha256-gA1Ol7xnzC+10lGwK9+ftfJcMhKsC0KhjENQvUg3u88=", + "type": "github" + }, + "version": "26f00d00c3f3f57480065e02bfcfde3d3b41ec51" + }, "chisel": { "cargoLocks": null, - "date": "2024-07-12", + "date": "2024-07-25", "extract": null, "name": "chisel", "passthru": null, @@ -33,11 +53,11 @@ "name": null, "owner": "chipsalliance", "repo": "chisel", - "rev": "ae5434977a0c0ede55a46847bfcbc8dbf2286c35", - "sha256": "sha256-POPpNMnbe4IidbqSlrgBzWHRn6eeL6gh+MuT3v6bw2w=", + "rev": "73e96ea5db53ad176e6a9011fc42c888c901d7e2", + "sha256": "sha256-VvK0NfvXh4oHn2rj2OWTe1zpONt6ReyvhMGDxa3ikE4=", "type": "github" }, - "version": "ae5434977a0c0ede55a46847bfcbc8dbf2286c35" + "version": "73e96ea5db53ad176e6a9011fc42c888c901d7e2" }, "chisel-interface": { "cargoLocks": null, @@ -61,7 +81,7 @@ }, "riscv-opcodes": { "cargoLocks": null, - "date": "2024-04-10", + "date": "2024-07-24", "extract": null, "name": "riscv-opcodes", "passthru": null, @@ -73,15 +93,15 @@ "name": null, "owner": "riscv", "repo": "riscv-opcodes", - "rev": "9fa26954e79d4403eedcbe1b35395001bbbeb8b1", - "sha256": "sha256-Gt3v8/VVNhB4IFL7kud8Y7EnSM2/2H4urV1AmBviP9E=", + "rev": "07b21cc5143a15959eda12e30aa40cea0971efe0", + "sha256": "sha256-B9njfBxZfm7xkSKBD8JOUWIKEzL8ra/X9FKC3CJ2gK8=", "type": "github" }, - "version": "9fa26954e79d4403eedcbe1b35395001bbbeb8b1" + "version": "07b21cc5143a15959eda12e30aa40cea0971efe0" }, "rvdecoderdb": { "cargoLocks": null, - "date": "2024-01-28", + "date": "2024-07-25", "extract": null, "name": "rvdecoderdb", "passthru": null, @@ -93,10 +113,10 @@ "name": null, "owner": "sequencer", "repo": "rvdecoderdb", - "rev": "d65525e7e18004b0877d8fbe2c435296ab986f44", - "sha256": "sha256-MzEoFjyUgarR62ux4ngYNFOgvAoeasdr1EVhaCvuh+Q=", + "rev": "6f22826d2c8facb6bf0b41f4bea26a2225751220", + "sha256": "sha256-4Hwa2Z4mmALy4ZElWzxFgqC+7EsyBhahVYlVUzyYKF4=", "type": "github" }, - "version": "d65525e7e18004b0877d8fbe2c435296ab986f44" - }, + "version": "6f22826d2c8facb6bf0b41f4bea26a2225751220" + } } \ No newline at end of file diff --git a/nix/t1/_sources/generated.nix b/nix/t1/_sources/generated.nix index 6373034424..21acd89f9b 100644 --- a/nix/t1/_sources/generated.nix +++ b/nix/t1/_sources/generated.nix @@ -15,27 +15,27 @@ }; berkeley-hardfloat = { pname = "berkeley-hardfloat"; - version = "b3c8a38c286101973b3bc071f7918392343faba7"; + version = "26f00d00c3f3f57480065e02bfcfde3d3b41ec51"; src = fetchFromGitHub { owner = "ucb-bar"; repo = "berkeley-hardfloat"; - rev = "b3c8a38c286101973b3bc071f7918392343faba7"; + rev = "26f00d00c3f3f57480065e02bfcfde3d3b41ec51"; fetchSubmodules = false; - sha256 = "sha256-3j6K/qFuH8PqJT6zHVTIphq9HWxmSGoIqDo9GV1bxmU="; + sha256 = "sha256-gA1Ol7xnzC+10lGwK9+ftfJcMhKsC0KhjENQvUg3u88="; }; - date = "2023-10-25"; + date = "2024-06-05"; }; chisel = { pname = "chisel"; - version = "ae5434977a0c0ede55a46847bfcbc8dbf2286c35"; + version = "73e96ea5db53ad176e6a9011fc42c888c901d7e2"; src = fetchFromGitHub { owner = "chipsalliance"; repo = "chisel"; - rev = "ae5434977a0c0ede55a46847bfcbc8dbf2286c35"; + rev = "73e96ea5db53ad176e6a9011fc42c888c901d7e2"; fetchSubmodules = false; - sha256 = "sha256-POPpNMnbe4IidbqSlrgBzWHRn6eeL6gh+MuT3v6bw2w="; + sha256 = "sha256-VvK0NfvXh4oHn2rj2OWTe1zpONt6ReyvhMGDxa3ikE4="; }; - date = "2024-07-12"; + date = "2024-07-25"; }; chisel-interface = { pname = "chisel-interface"; @@ -51,26 +51,26 @@ }; riscv-opcodes = { pname = "riscv-opcodes"; - version = "9fa26954e79d4403eedcbe1b35395001bbbeb8b1"; + version = "07b21cc5143a15959eda12e30aa40cea0971efe0"; src = fetchFromGitHub { owner = "riscv"; repo = "riscv-opcodes"; - rev = "9fa26954e79d4403eedcbe1b35395001bbbeb8b1"; + rev = "07b21cc5143a15959eda12e30aa40cea0971efe0"; fetchSubmodules = false; - sha256 = "sha256-Gt3v8/VVNhB4IFL7kud8Y7EnSM2/2H4urV1AmBviP9E="; + sha256 = "sha256-B9njfBxZfm7xkSKBD8JOUWIKEzL8ra/X9FKC3CJ2gK8="; }; - date = "2024-04-10"; + date = "2024-07-24"; }; rvdecoderdb = { pname = "rvdecoderdb"; - version = "d65525e7e18004b0877d8fbe2c435296ab986f44"; + version = "6f22826d2c8facb6bf0b41f4bea26a2225751220"; src = fetchFromGitHub { owner = "sequencer"; repo = "rvdecoderdb"; - rev = "d65525e7e18004b0877d8fbe2c435296ab986f44"; + rev = "6f22826d2c8facb6bf0b41f4bea26a2225751220"; fetchSubmodules = false; - sha256 = "sha256-MzEoFjyUgarR62ux4ngYNFOgvAoeasdr1EVhaCvuh+Q="; + sha256 = "sha256-4Hwa2Z4mmALy4ZElWzxFgqC+7EsyBhahVYlVUzyYKF4="; }; - date = "2024-01-28"; + date = "2024-07-25"; }; } diff --git a/nix/t1/omreader.nix b/nix/t1/omreader.nix index a8d5da3992..ae2c1e4995 100644 --- a/nix/t1/omreader.nix +++ b/nix/t1/omreader.nix @@ -38,7 +38,7 @@ let ./../../common.sc ]; }; - millDepsHash = "sha256-ZwIl6YsaGde3ikbzxLzY2+/XTc5O2dQrOMKcwhKEq+k="; + millDepsHash = "sha256-vrxTqskAH7H598ZWRC/+KAXOQlQ6f+gL9c0hvD25xOM="; nativeBuildInputs = [ submodules.setupHook ]; }; diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index 56d46235a5..89dd7de5a9 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -41,7 +41,7 @@ let ./../../common.sc ]; }; - millDepsHash = "sha256-ZwIl6YsaGde3ikbzxLzY2+/XTc5O2dQrOMKcwhKEq+k="; + millDepsHash = "sha256-vrxTqskAH7H598ZWRC/+KAXOQlQ6f+gL9c0hvD25xOM="; nativeBuildInputs = [ submodules.setupHook ]; }; diff --git a/script/build.sc b/script/build.sc index 8458f64f30..a6cc710410 100644 --- a/script/build.sc +++ b/script/build.sc @@ -10,7 +10,7 @@ trait ScriptModule extends ScalaModule { val scala3 = "3.3.3" val mainargs = ivy"com.lihaoyi::mainargs:0.5.0" val oslib = ivy"com.lihaoyi::os-lib:0.10.0" - val upickle = ivy"com.lihaoyi::upickle:3.1.3" + val upickle = ivy"com.lihaoyi::upickle:3.3.1" def scalaVersion = scala3 def scalacOptions = Seq("-new-syntax") diff --git a/script/default.nix b/script/default.nix index d1bc267190..65f992c921 100644 --- a/script/default.nix +++ b/script/default.nix @@ -30,7 +30,7 @@ let ./build.sc ]; }; - millDepsHash = "sha256-J8bBgM/F+8x8EQ1DR6Va/ZY2hnsjkkzk4a+ctDMKK3k="; + millDepsHash = "sha256-89K7QEq3k50gvs4sbXu7rHajC4tmnQCqB4m5ybBTn6k="; }; passthru.withLsp = self.overrideAttrs (old: { From b944ca037ff333079124f6752eb1e5eda1bc38c9 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 19 Jul 2024 14:26:46 +0800 Subject: [PATCH 02/18] [rtl] migrate T1 to FixedIOModule --- ipemu/src/TestBench.scala | 335 +++++++++++++++++++++----------------- t1/src/T1.scala | 159 ++++++++++-------- 2 files changed, 277 insertions(+), 217 deletions(-) diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index e29782144e..8826670d03 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -23,16 +23,21 @@ class TestBenchOM extends Class { t1 := t1In } -class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends RawModule { +class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { val omInstance: Instance[TestBenchOM] = Instantiate(new TestBenchOM) - val omType: ClassType = omInstance.toDefinition.getClassType + val omType: ClassType = omInstance.toDefinition.getClassType @public val om: Property[ClassType] = IO(Output(Property[omType.Type]())) om := omInstance.getPropertyReference - val clockGen = Module(new ExtModule with HasExtModuleInline { + lazy val clockGen = Module(new ExtModule with HasExtModuleInline { + override def desiredName = "ClockGen" - setInline(s"$desiredName.sv", + setInline( + s"$desiredName.sv", s"""module $desiredName(output reg clock, output reg reset); | export "DPI-C" function dump_wave; | function dump_wave(input string file); @@ -54,179 +59,214 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends val clock = IO(Output(Bool())) val reset = IO(Output(Bool())) }) + def clock = clockGen.clock.asClock + def reset = clockGen.reset + override def implicitClock = clockGen.clock.asClock + override def implicitReset = clockGen.reset + val dut: Instance[T1] = generator.instance() - val clock: Clock = clockGen.clock.asClock - val reset: Bool = clockGen.reset - val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + val simulationTime: UInt = RegInit(0.U(64.W)) simulationTime := simulationTime + 1.U - val dut: T1 = withClockAndReset(clock, reset)(Module(generator.module())) - omInstance.t1In := Property(dut.om.asAnyClassType) + dut.io.clock := clockGen.clock.asClock + dut.io.reset := clockGen.reset + omInstance.t1In := Property(dut.io.om.asAnyClassType) // Instruction Drivers - withClockAndReset(clock, reset) { - // TODO: this initial way cannot happen before reset... - val initFlag = RegInit(false.B) - when(!initFlag) { - initFlag := true.B - printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") - } - val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) - when(watchdog =/= 0.U) { - stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") - } + // TODO: this initial way cannot happen before reset... + val initFlag = RegInit(false.B) + when(!initFlag) { + initFlag := true.B + printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") + } + val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } - // uint32_t -> svBitVecVal -> reference type with 7 length. - class Issue extends Bundle { - val instruction: UInt = UInt(32.W) - val src1Data: UInt = UInt(32.W) - val src2Data: UInt = UInt(32.W) - // mstatus, vstatus? - val vtype: UInt = UInt(32.W) - val vl: UInt = UInt(32.W) - // vlenb - val vstart: UInt = UInt(32.W) - // vxrm, vxsat are merged to vcsr - val vcsr: UInt = UInt(32.W) - // meta is used to control the simulation. - // 0 is reserved, aka not valid - // 1 is normal, it's a valid instruction - // 2 is fence, it will request - // others are exit, will end the simulation immediately - val meta: UInt = UInt(32.W) - } - class Retire extends Bundle { - val rd: UInt = UInt(32.W) - val data: UInt = UInt(32.W) - val writeRd: UInt = UInt(32.W) - val vxsat: UInt = UInt(32.W) - } - val issue = WireDefault(0.U.asTypeOf(new Issue)) - val fence = RegInit(false.B) - val outstanding = RegInit(0.U(4.W)) - val doIssue: Bool = dut.request.ready && !fence - outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.response.valid - fence := Mux(doIssue, issue.meta === 2.U, fence && !dut.response.valid && !(outstanding === 0.U)) - - issue := RawClockedNonVoidFunctionCall("issue_vector_instruction", new Issue)( - clock, - doIssue, - ) - dut.request.bits.instruction := issue.instruction - dut.request.bits.src1Data := issue.src1Data - dut.request.bits.src2Data := issue.src2Data - dut.csrInterface.vlmul := issue.vtype(2, 0) - dut.csrInterface.vSew := issue.vtype(5, 3) - dut.csrInterface.vta := issue.vtype(6) - dut.csrInterface.vma := issue.vtype(7) - dut.csrInterface.vl := issue.vl - dut.csrInterface.vStart := issue.vstart - dut.csrInterface.vxrm := issue.vcsr(2, 1) - - dut.csrInterface.ignoreException := 0.U - dut.storeBufferClear := true.B - dut.request.valid := issue.meta === 1.U - when(issue.meta =/= 0.U && issue.meta =/= 1.U && issue.meta =/= 2.U) { - stop(cf"""{"event":"SimulationStop","reason": ${issue.meta},"cycle":${simulationTime}}\n""") - } - val retire = Wire(new Retire) - retire.rd := dut.response.bits.rd.bits - retire.data := dut.response.bits.data - retire.writeRd := dut.response.bits.rd.valid - retire.vxsat := dut.response.bits.vxsat - RawClockedVoidFunctionCall("retire_vector_instruction")(clock, dut.response.valid, retire) - val dummy = Wire(Bool()) - dummy := false.B - RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.response.bits.mem && dut.response.valid, dummy) + // uint32_t -> svBitVecVal -> reference type with 7 length. + class Issue extends Bundle { + val instruction: UInt = UInt(32.W) + val src1Data: UInt = UInt(32.W) + val src2Data: UInt = UInt(32.W) + // mstatus, vstatus? + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + // vlenb + val vstart: UInt = UInt(32.W) + // vxrm, vxsat are merged to vcsr + val vcsr: UInt = UInt(32.W) + // meta is used to control the simulation. + // 0 is reserved, aka not valid + // 1 is normal, it's a valid instruction + // 2 is fence, it will request + // others are exit, will end the simulation immediately + val meta: UInt = UInt(32.W) } + class Retire extends Bundle { + val rd: UInt = UInt(32.W) + val data: UInt = UInt(32.W) + val writeRd: UInt = UInt(32.W) + val vxsat: UInt = UInt(32.W) + } + val issue = WireDefault(0.U.asTypeOf(new Issue)) + val fence = RegInit(false.B) + val outstanding = RegInit(0.U(4.W)) + val doIssue: Bool = dut.io.request.ready && !fence + outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.io.response.valid + fence := Mux(doIssue, issue.meta === 2.U, fence && !dut.io.response.valid && !(outstanding === 0.U)) + + issue := RawClockedNonVoidFunctionCall("issue_vector_instruction", new Issue)( + clock, + doIssue + ) + dut.io.request.bits.instruction := issue.instruction + dut.io.request.bits.src1Data := issue.src1Data + dut.io.request.bits.src2Data := issue.src2Data + dut.io.csrInterface.vlmul := issue.vtype(2, 0) + dut.io.csrInterface.vSew := issue.vtype(5, 3) + dut.io.csrInterface.vta := issue.vtype(6) + dut.io.csrInterface.vma := issue.vtype(7) + dut.io.csrInterface.vl := issue.vl + dut.io.csrInterface.vStart := issue.vstart + dut.io.csrInterface.vxrm := issue.vcsr(2, 1) + + dut.io.csrInterface.ignoreException := 0.U + dut.io.storeBufferClear := true.B + dut.io.request.valid := issue.meta === 1.U + when(issue.meta =/= 0.U && issue.meta =/= 1.U && issue.meta =/= 2.U) { + stop(cf"""{"event":"SimulationStop","reason": ${issue.meta},"cycle":${simulationTime}}\n""") + } + val retire = Wire(new Retire) + retire.rd := dut.io.response.bits.rd.bits + retire.data := dut.io.response.bits.data + retire.writeRd := dut.io.response.bits.rd.valid + retire.vxsat := dut.io.response.bits.vxsat + RawClockedVoidFunctionCall("retire_vector_instruction")(clock, dut.io.response.valid, retire) + val dummy = Wire(Bool()) + dummy := false.B + RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.io.response.bits.mem && dut.io.response.valid, dummy) // Memory Drivers Seq( - dut.highBandwidthLoadStorePort, - dut.indexedLoadStorePort - ).map(_.viewAs[AXI4RWIrrevocableVerilog]).lazyZip( - Seq("highBandwidthPort", "indexedAccessPort") - ).zipWithIndex.foreach { - case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => - val agent = Module(new AXI4SlaveAgent( - AXI4SlaveAgentParameter( - name= channelName, - axiParameter = bundle.parameter, - outstanding = 4, - readPayloadSize = 1, - writePayloadSize = 1, - ) - )).suggestName(s"axi4_channel${index}_${channelName}") - agent.io.channel match { - case io: AXI4RWIrrevocableVerilog => io <> bundle - } - agent.io.clock := clock - agent.io.reset := reset - agent.io.channelId := index.U - agent.io.gateRead := false.B - agent.io.gateWrite := false.B - } + dut.io.highBandwidthLoadStorePort, + dut.io.indexedLoadStorePort + ).map(_.viewAs[AXI4RWIrrevocableVerilog]) + .lazyZip( + Seq("highBandwidthPort", "indexedAccessPort") + ) + .zipWithIndex + .foreach { + case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => + val agent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = channelName, + axiParameter = bundle.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ) + ).suggestName(s"axi4_channel${index}_${channelName}") + agent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> bundle + } + agent.io.clock := clock + agent.io.reset := reset + agent.io.channelId := index.U + agent.io.gateRead := false.B + agent.io.gateWrite := false.B + } // Events for difftest and performance modeling - val laneProbes = dut.laneProbes.zipWithIndex.map{case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") - wire := probe.read(p) - wire + val laneProbes = dut.io.laneProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") + wire := probe.read(p) + wire } - val lsuProbe = probe.read(dut.lsuProbe).suggestName("lsuProbe") + val lsuProbe = probe.read(dut.io.lsuProbe).suggestName("lsuProbe") val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") - val laneVrfProbes = dut.laneVrfProbes.zipWithIndex.map{ case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") - wire := probe.read(p) - wire + val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") + wire := probe.read(p) + wire + } + + val t1Probe = probe.read(dut.io.t1Probe) + + // vrf write + laneVrfProbes.zipWithIndex.foreach { + case (lane, i) => + when(lane.valid)( + printf( + cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + ) + ) } + // memory write from store unit + when(storeUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + // memory write from other unit + when(otherUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + // issue + when(dut.io.request.fire)( + printf(cf"""{"event":"Issue","idx":${t1Probe.instructionCounter},"cycle":${simulationTime}}\n""") + ) + // check rd + when(dut.io.response.bits.rd.valid)( + printf( + cf"""{"event":"CheckRd","data":"${dut.io.response.bits.data}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" + ) + ) + // lsu enq + when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) - val t1Probe = probe.read(dut.t1Probe) - - withClockAndReset(clock, reset) { - // vrf write - laneVrfProbes.zipWithIndex.foreach { case (lane, i) => when(lane.valid)(printf(cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""")) } - // memory write from store unit - when(storeUnitProbe.valid)(printf(cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""")) - // memory write from other unit - when(otherUnitProbe.valid)(printf(cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""")) - // issue - when(dut.request.fire)(printf(cf"""{"event":"Issue","idx":${t1Probe.instructionCounter},"cycle":${simulationTime}}\n""")) - // check rd - when(dut.response.bits.rd.valid)(printf(cf"""{"event":"CheckRd","data":"${dut.response.bits.data}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""")) - // lsu enq - when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) - - // allocate 2 * chainingSize scoreboards - val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * dut.parameter.chainingSize) { _ => RegInit(0.U.asTypeOf(Valid(UInt(16.W))))} - vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) - val instructionValid = - (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ - lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) - val scoreboardEnq = Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * dut.parameter.chainingSize).W)) - vrfWriteScoreboard.zipWithIndex.foreach { case (scoreboard, tag) => + // allocate 2 * chainingSize scoreboards + val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * generator.parameter.chainingSize) { _ => + RegInit(0.U.asTypeOf(Valid(UInt(16.W)))) + } + vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) + val instructionValid = + (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ + lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) + val scoreboardEnq = + Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * generator.parameter.chainingSize).W)) + vrfWriteScoreboard.zipWithIndex.foreach { + case (scoreboard, tag) => val writeEnq: UInt = VecInit( // vrf write from lane - laneProbes.flatMap(laneProbe => laneProbe.slots.map(slot => - slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR - )) ++ laneProbes.flatMap(laneProbe => laneProbe.crossWriteProbe.map(cp => - cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR - )) ++ + laneProbes.flatMap(laneProbe => + laneProbe.slots.map(slot => slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR) + ) ++ laneProbes.flatMap(laneProbe => + laneProbe.crossWriteProbe.map(cp => cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR) + ) ++ // vrf write from lsu lsuProbe.slots.map(slot => slot.dataInstruction === tag.U && slot.writeValid && slot.dataMask.orR) ++ // vrf write from Sequencer Some(t1Probe.writeQueueEnq.bits === tag.U && t1Probe.writeQueueEnq.valid && t1Probe.writeQueueEnqMask.orR) - ).asUInt + ).asUInt // always equal to array index scoreboard.bits := scoreboard.bits + PopCount(writeEnq) - when(scoreboard.valid && !instructionValid(tag)){ - printf(cf"""{"event":"VrfScoreboardReport","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""") + when(scoreboard.valid && !instructionValid(tag)) { + printf( + cf"""{"event":"VrfScoreboardReport","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""" + ) scoreboard.valid := false.B } when(scoreboardEnq(tag)) { @@ -234,6 +274,5 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends assert(!scoreboard.valid) scoreboard.bits := 0.U } - } } } diff --git a/t1/src/T1.scala b/t1/src/T1.scala index aeb7d07d07..901a11f24e 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -4,11 +4,11 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable, public} +import chisel3.experimental.hierarchy.{Definition, Instance, Instantiate, instantiable, public} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.probe.{Probe, ProbeValue, define} import chisel3.properties.{AnyClassType, Class, ClassType, Property} -import chisel3.util._ +import chisel3.util.{Decoupled, _} import chisel3.util.experimental.BitSet import org.chipsalliance.rvdecoderdb.Instruction import org.chipsalliance.t1.rtl.decoder.{Decoder, DecoderParam, T1CustomInstruction} @@ -17,6 +17,8 @@ import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4RWIrrevocabl import org.chipsalliance.t1.rtl.lsu.{LSU, LSUParameter, LSUProbe} import org.chipsalliance.t1.rtl.vrf.{RamType, VRFParam, VRFProbe} +import scala.collection.immutable.{ListMap, SeqMap} + // TODO: this should be a object model. There should 3 object model here: // 1. T1SubsystemOM(T1(OM), MemoryRegion, Cache configuration) // 2. T1(Lane(OM), VLEN, DLEN, uarch parameters, customer IDs(for floorplan);) @@ -283,54 +285,73 @@ class T1Probe(param: T1Parameter) extends Bundle { val responseCounter: UInt = UInt(param.instructionIndexBits.W) } +class T1Interface(parameter: T1Parameter) extends Record { + def clock = elements("clock").asInstanceOf[Clock] + def reset = elements("reset").asInstanceOf[Bool] + /** request from CPU. + * because the interrupt and exception of previous instruction is unpredictable, + * and the `kill` logic in Vector processor is too high, + * thus the request should come from commit stage to avoid any interrupt or excepiton. + */ + def request = elements("request").asInstanceOf[DecoupledIO[VRequest]] + /** response to CPU. */ + def response: ValidIO[VResponse] = elements("response").asInstanceOf[ValidIO[VResponse]] + /** CSR interface from CPU. */ + def csrInterface: CSRInterface = elements("csrInterface").asInstanceOf[CSRInterface] + /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ + def storeBufferClear: Bool = elements("storeBufferClear").asInstanceOf[Bool] + def highBandwidthLoadStorePort: AXI4RWIrrevocable = elements("highBandwidthLoadStorePort").asInstanceOf[AXI4RWIrrevocable] + def indexedLoadStorePort: AXI4RWIrrevocable = elements("indexedLoadStorePort").asInstanceOf[AXI4RWIrrevocable] + def om: Property[ClassType] = elements("om").asInstanceOf[Property[ClassType]] + def lsuProbe: LSUProbe = elements("lsuProbe").asInstanceOf[LSUProbe] + def t1Probe: T1Probe = elements("t1Probe").asInstanceOf[T1Probe] + def laneProbes: Seq[LaneProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}Probe").asInstanceOf[LaneProbe]) + def laneVrfProbes: Seq[VRFProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}VrfProbe").asInstanceOf[VRFProbe]) + + val elements: SeqMap[String, Data] = SeqMap.from( + Seq( + "clock" -> Input(Clock()), + "reset" -> Input(Bool()), + "request" -> Flipped(Decoupled(new VRequest(parameter.xLen))), + "response" -> Valid(new VResponse(parameter.xLen)), + "csrInterface" -> Input(new CSRInterface(parameter.laneParam.vlMaxBits)), + "storeBufferClear" -> Input(Bool()), + "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), + "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), + "om" -> Output(Property[AnyClassType]()), + "lsuProbe" -> Output(Probe(new LSUProbe(parameter.lsuParameters))), + "t1Probe" -> Output(Probe(new T1Probe(parameter))), + ) ++ + Seq.tabulate(parameter.laneNumber)( + i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))) + ) ++ + Seq.tabulate(parameter.laneNumber)( + i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, parameter.laneParam.instructionIndexBits, parameter.laneParam.datapathWidth))) + ) + ) +} + /** Top of Vector processor: * couple to Rocket Core; * instantiate LSU, Decoder, Lane, CSR, Instruction Queue. * The logic of [[T1]] contains the Vector Sequencer and Mask Unit. */ -class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Parameter] { +@instantiable +class T1(val parameter: T1Parameter) + extends FixedIORawModule(new T1Interface(parameter)) + with SerializableModule[T1Parameter] + with ImplicitClock + with ImplicitReset { + def implicitClock: Clock = io.clock + def implicitReset: Reset = io.reset + val omInstance: Instance[T1OM] = Instantiate(new T1OM) val omType: ClassType = omInstance.toDefinition.getClassType - @public - val om: Property[ClassType] = IO(Output(Property[omType.Type]())) - om := omInstance.getPropertyReference + io.om := omInstance.getPropertyReference.asAnyClassType omInstance.vlenIn := Property(parameter.vLen) omInstance.dlenIn := Property(parameter.dLen) - /** request from CPU. - * because the interrupt and exception of previous instruction is unpredictable, - * and the `kill` logic in Vector processor is too high, - * thus the request should come from commit stage to avoid any interrupt or excepiton. - */ - @public - val request: DecoupledIO[VRequest] = IO(Flipped(Decoupled(new VRequest(parameter.xLen)))) - /** response to CPU. */ - @public - val response: ValidIO[VResponse] = IO(Valid(new VResponse(parameter.xLen))) - /** CSR interface from CPU. */ - @public - val csrInterface: CSRInterface = IO(Input(new CSRInterface(parameter.laneParam.vlMaxBits))) - /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ - @public - val storeBufferClear: Bool = IO(Input(Bool())) - @public - val highBandwidthLoadStorePort: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(parameter.axi4BundleParameter)) - @public - val indexedLoadStorePort: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32))) - // TODO: this is an example of adding a new Probe - @public - val lsuProbe = IO(Probe(new LSUProbe(parameter.lsuParameters))) - @public - val laneProbes = Seq.tabulate(parameter.laneNumber)(laneIdx => IO(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))).suggestName(s"lane${laneIdx}Probe")) - @public - val laneVrfProbes = Seq.tabulate(parameter.laneNumber)(laneIdx => IO(Probe(new VRFProbe( - parameter.laneParam.vrfParam.regNumBits, - parameter.laneParam.vrfOffsetBits, - parameter.laneParam.instructionIndexBits, - parameter.laneParam.datapathWidth - ))).suggestName(s"lane${laneIdx}VrfProbe")) - /** the LSU Module */ val lsu: Instance[LSU] = Instantiate(new LSU(parameter.lsuParameters)) @@ -341,12 +362,12 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // TODO: uarch doc about the order of instructions val instructionCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextInstructionCounter: UInt = instructionCounter + 1.U - when(request.fire) { instructionCounter := nextInstructionCounter } + when(io.request.fire) { instructionCounter := nextInstructionCounter } // todo: handle waw val responseCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextResponseCounter: UInt = responseCounter + 1.U - when(response.fire) { responseCounter := nextResponseCounter } + when(io.response.fire) { responseCounter := nextResponseCounter } // maintained a 1 depth queue for VRequest. // TODO: directly maintain a `ready` signal @@ -356,24 +377,24 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa /** maintain a [[DecoupleIO]] for [[requestReg]]. */ val requestRegDequeue = Wire(Decoupled(new VRequest(parameter.xLen))) // latch instruction, csr, decode result and instruction index to requestReg. - when(request.fire) { + when(io.request.fire) { // The LSU only need to know the instruction, and don't need information from decoder. // Thus we latch the request here, and send it to LSU. - requestReg.bits.request := request.bits + requestReg.bits.request := io.request.bits requestReg.bits.decodeResult := decode.decodeResult - requestReg.bits.csr := csrInterface + requestReg.bits.csr := io.csrInterface requestReg.bits.instructionIndex := instructionCounter // vd === 0 && not store type - requestReg.bits.vdIsV0 := (request.bits.instruction(11, 7) === 0.U) && - (request.bits.instruction(6) || !request.bits.instruction(5)) + requestReg.bits.vdIsV0 := (io.request.bits.instruction(11, 7) === 0.U) && + (io.request.bits.instruction(6) || !io.request.bits.instruction(5)) requestReg.bits.writeByte := Mux( decode.decodeResult(Decoder.red), // Must be smaller than dataPath 1.U, Mux( decode.decodeResult(Decoder.maskDestination), - (csrInterface.vl >> 3).asUInt + csrInterface.vl(2, 0).orR, - csrInterface.vl << (csrInterface.vSew + decode.decodeResult(Decoder.crossWrite)) + (io.csrInterface.vl >> 3).asUInt + io.csrInterface.vl(2, 0).orR, + io.csrInterface.vl << (io.csrInterface.vSew + decode.decodeResult(Decoder.crossWrite)) ) ) } @@ -381,13 +402,13 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // 0 1 -> update to false // 1 0 -> update to true // 1 1 -> don't update - requestReg.valid := Mux(request.fire ^ requestRegDequeue.fire, request.fire, requestReg.valid) + requestReg.valid := Mux(io.request.fire ^ requestRegDequeue.fire, io.request.fire, requestReg.valid) // ready when requestReg is free or it will be free in this cycle. - request.ready := !requestReg.valid || requestRegDequeue.ready + io.request.ready := !requestReg.valid || requestRegDequeue.ready // manually maintain a queue for requestReg. requestRegDequeue.bits := requestReg.bits.request requestRegDequeue.valid := requestReg.valid - decode.decodeInput := request.bits.instruction + decode.decodeInput := io.request.bits.instruction /** alias to [[requestReg.bits.decodeResult]], it is commonly used. */ val decodeResult: DecodeBundle = requestReg.bits.decodeResult @@ -626,7 +647,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa control.state.wVRFWrite := true.B } - when(responseCounter === control.record.instructionIndex && response.fire) { + when(responseCounter === control.record.instructionIndex && io.response.fire) { control.state.sCommit := true.B } @@ -684,12 +705,12 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // first type instruction val firstLane = ffo(completedVec.asUInt) val firstLaneIndex: UInt = OHToUInt(firstLane)(log2Ceil(parameter.laneNumber) - 1, 0) - response.bits.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) - response.bits.rd.bits := vd + io.response.bits.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) + io.response.bits.rd.bits := vd if (parameter.fpuEnable) { - response.bits.float := decodeResultReg(Decoder.float) + io.response.bits.float := decodeResultReg(Decoder.float) } else { - response.bits.float := false.B + io.response.bits.float := false.B } when(requestRegDequeue.fire) { ffoIndexReg.valid := false.B @@ -1542,15 +1563,17 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa completedVec(index) := lane.laneResponse.bits.ffoSuccess flotReduceValid(index).foreach(d => d := lane.laneResponse.bits.fpReduceValid.get) } - // TODO: add other probes for lane at here. - define(laneProbes(index), lane.probe) - define(laneVrfProbes(index), lane.vrfProbe) lane } + laneVec.zipWithIndex.foreach { case (lane, index) => + define(io.laneProbes(index), lane.probe) + define(io.laneVrfProbes(index), lane.vrfProbe) + } + omInstance.lanesIn := Property(laneVec.map(_.om.asAnyClassType)) - define(lsuProbe, lsu._probe) + define(io.lsuProbe, lsu._probe) dataInWritePipeVec := VecInit(laneVec.map(_.writeQueueValid)) @@ -1606,8 +1629,8 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa } } - highBandwidthLoadStorePort <> lsu.axi4Port - indexedLoadStorePort <> lsu.simpleAccessPorts + io.highBandwidthLoadStorePort <> lsu.axi4Port + io.indexedLoadStorePort <> lsu.simpleAccessPorts // 暂时直接连lsu的写,后续需要处理scheduler的写 vrfWrite.zip(lsu.vrfWritePort).foreach { case (sink, source) => sink <> source } @@ -1658,10 +1681,10 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // Ensuring commit order inst.record.instructionIndex === responseCounter }) - response.valid := slotCommit.asUInt.orR - response.bits.data := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) - response.bits.vxsat := DontCare - response.bits.mem := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR + io.response.valid := slotCommit.asUInt.orR + io.response.bits.data := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) + io.response.bits.vxsat := DontCare + io.response.bits.mem := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR lastSlotCommit := slotCommit.last } @@ -1688,10 +1711,8 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa /** * Probes */ - @public - val t1Probe = IO(Output(Probe(new T1Probe(parameter)))) val probeWire = Wire(new T1Probe(parameter)) - define(t1Probe, ProbeValue(probeWire)) + define(io.t1Probe, ProbeValue(probeWire)) probeWire.instructionCounter := instructionCounter probeWire.instructionIssue := requestRegDequeue.fire probeWire.issueTag := requestReg.bits.instructionIndex From 51dfcb9be4755e187750f2bd690f730642d291a8 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 25 Jul 2024 17:37:58 +0800 Subject: [PATCH 03/18] [ipemu] fix naming prefix bug introduced by lazy val --- ipemu/src/TestBench.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index 8826670d03..09900d253f 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -33,7 +33,7 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) val om: Property[ClassType] = IO(Output(Property[omType.Type]())) om := omInstance.getPropertyReference - lazy val clockGen = Module(new ExtModule with HasExtModuleInline { + val clockGen = Module(new ExtModule with HasExtModuleInline { override def desiredName = "ClockGen" setInline( From d4d5e8ece23c1c147a47435b401c7e28b883eda5 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 25 Jul 2024 15:22:52 +0800 Subject: [PATCH 04/18] [rtl] redesign T1Interface --- ipemu/src/TestBench.scala | 52 +++++++-------- t1/src/Bundles.scala | 72 ++++++++++++--------- t1/src/T1.scala | 131 +++++++++++++++++++------------------- 3 files changed, 131 insertions(+), 124 deletions(-) diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index 09900d253f..c38d5f9c91 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -112,40 +112,38 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) val issue = WireDefault(0.U.asTypeOf(new Issue)) val fence = RegInit(false.B) val outstanding = RegInit(0.U(4.W)) - val doIssue: Bool = dut.io.request.ready && !fence - outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.io.response.valid - fence := Mux(doIssue, issue.meta === 2.U, fence && !dut.io.response.valid && !(outstanding === 0.U)) + val doIssue: Bool = dut.io.issue.ready && !fence + outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.io.issue.valid + // TODO: refactor driver to spawn 3 scoreboards for record different retirement. + val t1Probe = probe.read(dut.io.t1Probe) + fence := Mux(doIssue, issue.meta === 2.U, fence && !t1Probe.retireValid && !(outstanding === 0.U)) issue := RawClockedNonVoidFunctionCall("issue_vector_instruction", new Issue)( clock, doIssue ) - dut.io.request.bits.instruction := issue.instruction - dut.io.request.bits.src1Data := issue.src1Data - dut.io.request.bits.src2Data := issue.src2Data - dut.io.csrInterface.vlmul := issue.vtype(2, 0) - dut.io.csrInterface.vSew := issue.vtype(5, 3) - dut.io.csrInterface.vta := issue.vtype(6) - dut.io.csrInterface.vma := issue.vtype(7) - dut.io.csrInterface.vl := issue.vl - dut.io.csrInterface.vStart := issue.vstart - dut.io.csrInterface.vxrm := issue.vcsr(2, 1) - - dut.io.csrInterface.ignoreException := 0.U - dut.io.storeBufferClear := true.B - dut.io.request.valid := issue.meta === 1.U + dut.io.issue.bits.instruction := issue.instruction + dut.io.issue.bits.rs1Data := issue.src1Data + dut.io.issue.bits.rs2Data := issue.src2Data + dut.io.issue.bits.vtype := issue.vtype + dut.io.issue.bits.vl := issue.vl + dut.io.issue.bits.vstart := issue.vstart + dut.io.issue.bits.vcsr := issue.vcsr + dut.io.issue.valid := issue.meta === 1.U when(issue.meta =/= 0.U && issue.meta =/= 1.U && issue.meta =/= 2.U) { stop(cf"""{"event":"SimulationStop","reason": ${issue.meta},"cycle":${simulationTime}}\n""") } val retire = Wire(new Retire) - retire.rd := dut.io.response.bits.rd.bits - retire.data := dut.io.response.bits.data - retire.writeRd := dut.io.response.bits.rd.valid - retire.vxsat := dut.io.response.bits.vxsat - RawClockedVoidFunctionCall("retire_vector_instruction")(clock, dut.io.response.valid, retire) + retire.rd := dut.io.retire.rd.bits.rdAddress + retire.data := dut.io.retire.rd.bits.rdData + retire.writeRd := dut.io.retire.rd.valid + retire.vxsat := dut.io.retire.csr.bits.vxsat + // TODO: + // retire.fflag := dut.io.retire.csr.bits.fflag + RawClockedVoidFunctionCall("retire_vector_instruction")(clock, t1Probe.retireValid, retire) val dummy = Wire(Bool()) dummy := false.B - RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.io.response.bits.mem && dut.io.response.valid, dummy) + RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.io.retire.mem.valid, dummy) // Memory Drivers Seq( @@ -201,8 +199,6 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) wire } - val t1Probe = probe.read(dut.io.t1Probe) - // vrf write laneVrfProbes.zipWithIndex.foreach { case (lane, i) => @@ -225,13 +221,13 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) ) ) // issue - when(dut.io.request.fire)( + when(dut.io.issue.fire)( printf(cf"""{"event":"Issue","idx":${t1Probe.instructionCounter},"cycle":${simulationTime}}\n""") ) // check rd - when(dut.io.response.bits.rd.valid)( + when(dut.io.retire.rd.valid)( printf( - cf"""{"event":"CheckRd","data":"${dut.io.response.bits.data}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" + cf"""{"event":"CheckRd","data":"${dut.io.retire.rd.bits.rdData}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" ) ) // lsu enq diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index fb79e2af32..bb8a36f413 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -10,23 +10,6 @@ import org.chipsalliance.t1.rtl.decoder.Decoder import org.chipsalliance.t1.rtl.lsu.LSUParameter import org.chipsalliance.t1.rtl.vrf.VRFParam -/** Interface from CPU. */ -class VRequest(xLen: Int) extends Bundle { - - /** instruction fetched by scalar processor. */ - val instruction: UInt = UInt(32.W) - - /** data read from scalar RF RS1. - * TODO: rename to rs1Data - */ - val src1Data: UInt = UInt(xLen.W) - - /** data read from scalar RF RS2. - * TODO: rename to rs2Data - */ - val src2Data: UInt = UInt(xLen.W) -} - /** Interface to CPU. */ class VResponse(xLen: Int) extends Bundle { @@ -325,9 +308,6 @@ class CSRInterface(vlWidth: Int) extends Bundle { * we always keep the undisturbed behavior, since there is no rename here. */ val vma: Bool = Bool() - - /** TODO: remove it. */ - val ignoreException: Bool = Bool() } /** [[Lane]] -> [[T1]], response for [[LaneRequest]] */ @@ -501,20 +481,11 @@ class VRFWriteReport(param: VRFParam) extends Bundle { val state = new VRFInstructionState } -/** 为了decode, 指令需要在入口的时候打一拍, 这是需要保存的信息 */ class InstructionPipeBundle(parameter: T1Parameter) extends Bundle { - // 原始指令信息 - val request: VRequest = new VRequest(parameter.xLen) - // decode 的结果 + val issue: T1Issue = new T1Issue(parameter.xLen, parameter.vLen) val decodeResult: DecodeBundle = new DecodeBundle(Decoder.allFields(parameter.decoderParam)) - // 这条指令被vector分配的index val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W) - // 指令的csr信息 - val csr = new CSRInterface(parameter.laneParam.vlMaxBits) - // 有写v0的风险 val vdIsV0: Bool = Bool() - - // How many bytes of registers will be written by one instruction? val writeByte: UInt = UInt(parameter.laneParam.vlMaxBits.W) } @@ -711,3 +682,44 @@ final class EmptyBundle extends Bundle class VRFReadPipe(size: BigInt) extends Bundle { val address: UInt = UInt(log2Ceil(size).W) } + +class T1Issue(xLen: Int, vlWidth: Int) extends Bundle { + + /** instruction fetched by scalar processor. */ + val instruction: UInt = UInt(32.W) + + /** data read from scalar RF RS1. */ + val rs1Data: UInt = UInt(xLen.W) + + /** data read from scalar RF RS2. */ + val rs2Data: UInt = UInt(xLen.W) + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) +} + +object T1Issue { + def vlmul(issue: T1Issue) = issue.vtype(2, 0) + def vsew(issue: T1Issue) = issue.vtype(5, 3) + def vta(issue: T1Issue) = issue.vtype(6) + def vma(issue: T1Issue) = issue.vtype(7) + def vxrm(issue: T1Issue) = issue.vcsr(2, 1) +} + +class T1RdRetire(xLen: Int) extends Bundle { + val rdAddress: UInt = UInt(5.W) + val rdData: UInt = UInt(xLen.W) + val isFp: Bool = Bool() +} + +class T1CSRRetire extends Bundle { + val vxsat: UInt = UInt(32.W) + val fflag: UInt = UInt(32.W) +} + +class T1Retire(xLen: Int) extends Bundle { + val rd: ValidIO[T1RdRetire] = Valid(new T1RdRetire(xLen)) + val csr: ValidIO[T1CSRRetire] = Valid(new T1CSRRetire) + val mem: ValidIO[EmptyBundle] = Valid(new EmptyBundle) +} \ No newline at end of file diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 901a11f24e..64daf2ddc4 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -276,6 +276,7 @@ class T1Probe(param: T1Parameter) extends Bundle { val instructionCounter: UInt = UInt(param.instructionIndexBits.W) val instructionIssue: Bool = Bool() val issueTag: UInt = UInt(param.instructionIndexBits.W) + val retireValid: Bool = Bool() // write queue enq for mask unit val writeQueueEnq: ValidIO[UInt] = Valid(UInt(param.instructionIndexBits.W)) val writeQueueEnqMask: UInt = UInt((param.datapathWidth / 8).W) @@ -288,34 +289,22 @@ class T1Probe(param: T1Parameter) extends Bundle { class T1Interface(parameter: T1Parameter) extends Record { def clock = elements("clock").asInstanceOf[Clock] def reset = elements("reset").asInstanceOf[Bool] - /** request from CPU. - * because the interrupt and exception of previous instruction is unpredictable, - * and the `kill` logic in Vector processor is too high, - * thus the request should come from commit stage to avoid any interrupt or excepiton. - */ - def request = elements("request").asInstanceOf[DecoupledIO[VRequest]] - /** response to CPU. */ - def response: ValidIO[VResponse] = elements("response").asInstanceOf[ValidIO[VResponse]] - /** CSR interface from CPU. */ - def csrInterface: CSRInterface = elements("csrInterface").asInstanceOf[CSRInterface] - /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ - def storeBufferClear: Bool = elements("storeBufferClear").asInstanceOf[Bool] + def issue = elements("issue").asInstanceOf[DecoupledIO[T1Issue]] + def retire = elements("retire").asInstanceOf[T1Retire] def highBandwidthLoadStorePort: AXI4RWIrrevocable = elements("highBandwidthLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def indexedLoadStorePort: AXI4RWIrrevocable = elements("indexedLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def om: Property[ClassType] = elements("om").asInstanceOf[Property[ClassType]] + // TODO: refactor to an single Probe to avoid using Record on the [[T1Interface]]. def lsuProbe: LSUProbe = elements("lsuProbe").asInstanceOf[LSUProbe] def t1Probe: T1Probe = elements("t1Probe").asInstanceOf[T1Probe] def laneProbes: Seq[LaneProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}Probe").asInstanceOf[LaneProbe]) def laneVrfProbes: Seq[VRFProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}VrfProbe").asInstanceOf[VRFProbe]) - val elements: SeqMap[String, Data] = SeqMap.from( Seq( "clock" -> Input(Clock()), "reset" -> Input(Bool()), - "request" -> Flipped(Decoupled(new VRequest(parameter.xLen))), - "response" -> Valid(new VResponse(parameter.xLen)), - "csrInterface" -> Input(new CSRInterface(parameter.laneParam.vlMaxBits)), - "storeBufferClear" -> Input(Bool()), + "issue" -> Flipped(Decoupled(new T1Issue(parameter.xLen, parameter.vLen))), + "retire" -> new T1Retire(parameter.xLen), "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), "om" -> Output(Property[AnyClassType]()), @@ -362,39 +351,46 @@ class T1(val parameter: T1Parameter) // TODO: uarch doc about the order of instructions val instructionCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextInstructionCounter: UInt = instructionCounter + 1.U - when(io.request.fire) { instructionCounter := nextInstructionCounter } + when(io.issue.fire) { instructionCounter := nextInstructionCounter } + val retire = WireDefault(false.B) // todo: handle waw val responseCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextResponseCounter: UInt = responseCounter + 1.U - when(io.response.fire) { responseCounter := nextResponseCounter } + when(retire) { responseCounter := nextResponseCounter } // maintained a 1 depth queue for VRequest. // TODO: directly maintain a `ready` signal /** register to latch instruction. */ val requestReg: ValidIO[InstructionPipeBundle] = RegInit(0.U.asTypeOf(Valid(new InstructionPipeBundle(parameter)))) - + val requestRegCSR: CSRInterface = WireDefault(0.U.asTypeOf(new CSRInterface(parameter.laneParam.vlMaxBits))) + requestRegCSR.vlmul := requestReg.bits.issue.vtype(2, 0) + requestRegCSR.vSew := requestReg.bits.issue.vtype(5, 3) + requestRegCSR.vta := requestReg.bits.issue.vtype(6) + requestRegCSR.vma := requestReg.bits.issue.vtype(7) + requestRegCSR.vl := requestReg.bits.issue.vl + requestRegCSR.vStart := requestReg.bits.issue.vstart + requestRegCSR.vxrm := requestReg.bits.issue.vcsr(2, 1) /** maintain a [[DecoupleIO]] for [[requestReg]]. */ - val requestRegDequeue = Wire(Decoupled(new VRequest(parameter.xLen))) + val requestRegDequeue = Wire(Decoupled(new T1Issue(parameter.xLen, parameter.vLen))) // latch instruction, csr, decode result and instruction index to requestReg. - when(io.request.fire) { + when(io.issue.fire) { // The LSU only need to know the instruction, and don't need information from decoder. // Thus we latch the request here, and send it to LSU. - requestReg.bits.request := io.request.bits + requestReg.bits.issue := io.issue.bits requestReg.bits.decodeResult := decode.decodeResult - requestReg.bits.csr := io.csrInterface requestReg.bits.instructionIndex := instructionCounter // vd === 0 && not store type - requestReg.bits.vdIsV0 := (io.request.bits.instruction(11, 7) === 0.U) && - (io.request.bits.instruction(6) || !io.request.bits.instruction(5)) + requestReg.bits.vdIsV0 := (io.issue.bits.instruction(11, 7) === 0.U) && + (io.issue.bits.instruction(6) || !io.issue.bits.instruction(5)) requestReg.bits.writeByte := Mux( decode.decodeResult(Decoder.red), // Must be smaller than dataPath 1.U, Mux( decode.decodeResult(Decoder.maskDestination), - (io.csrInterface.vl >> 3).asUInt + io.csrInterface.vl(2, 0).orR, - io.csrInterface.vl << (io.csrInterface.vSew + decode.decodeResult(Decoder.crossWrite)) + (io.issue.bits.vl >> 3).asUInt + io.issue.bits.vl(2, 0).orR, + io.issue.bits.vl << (T1Issue.vsew(io.issue.bits) + decode.decodeResult(Decoder.crossWrite)) ) ) } @@ -402,17 +398,16 @@ class T1(val parameter: T1Parameter) // 0 1 -> update to false // 1 0 -> update to true // 1 1 -> don't update - requestReg.valid := Mux(io.request.fire ^ requestRegDequeue.fire, io.request.fire, requestReg.valid) + requestReg.valid := Mux(io.issue.fire ^ requestRegDequeue.fire, io.issue.fire, requestReg.valid) // ready when requestReg is free or it will be free in this cycle. - io.request.ready := !requestReg.valid || requestRegDequeue.ready + io.issue.ready := !requestReg.valid || requestRegDequeue.ready // manually maintain a queue for requestReg. - requestRegDequeue.bits := requestReg.bits.request + requestRegDequeue.bits := requestReg.bits.issue requestRegDequeue.valid := requestReg.valid - decode.decodeInput := io.request.bits.instruction + decode.decodeInput := io.issue.bits.instruction /** alias to [[requestReg.bits.decodeResult]], it is commonly used. */ val decodeResult: DecodeBundle = requestReg.bits.decodeResult - // 这是当前正在mask unit 里面的那一条指令的csr信息,用来计算mask unit的控制信号 val csrRegForMaskUnit: CSRInterface = RegInit(0.U.asTypeOf(new CSRInterface(parameter.laneParam.vlMaxBits))) val vSewOHForMask: UInt = UIntToOH(csrRegForMaskUnit.vSew)(2, 0) @@ -428,26 +423,26 @@ class T1(val parameter: T1Parameter) // 只进mask unit的指令 val maskUnitInstruction: Bool = (decodeResult(Decoder.slid) || decodeResult(Decoder.mv)) val skipLastFromLane: Bool = isLoadStoreType || maskUnitInstruction || readOnlyInstruction - val instructionValid: Bool = requestReg.bits.csr.vl > requestReg.bits.csr.vStart + val instructionValid: Bool = requestReg.bits.issue.vl > requestReg.bits.issue.vstart // TODO: these should be decoding results /** load store that don't read offset. */ val noOffsetReadLoadStore: Bool = isLoadStoreType && (!requestRegDequeue.bits.instruction(26)) - val vSew1H: UInt = UIntToOH(requestReg.bits.csr.vSew) + val vSew1H: UInt = UIntToOH(T1Issue.vsew(requestReg.bits.issue)) val source1Extend: UInt = Mux1H( vSew1H(2, 0), Seq( - Fill(parameter.datapathWidth - 8, requestRegDequeue.bits.src1Data(7) && !decodeResult(Decoder.unsigned0)) - ## requestRegDequeue.bits.src1Data(7, 0), - Fill(parameter.datapathWidth - 16, requestRegDequeue.bits.src1Data(15) && !decodeResult(Decoder.unsigned0)) - ## requestRegDequeue.bits.src1Data(15, 0), - requestRegDequeue.bits.src1Data(31, 0) + Fill(parameter.datapathWidth - 8, requestRegDequeue.bits.rs1Data(7) && !decodeResult(Decoder.unsigned0)) + ## requestRegDequeue.bits.rs1Data(7, 0), + Fill(parameter.datapathWidth - 16, requestRegDequeue.bits.rs1Data(15) && !decodeResult(Decoder.unsigned0)) + ## requestRegDequeue.bits.rs1Data(15, 0), + requestRegDequeue.bits.rs1Data(31, 0) ) ) /** src1 from scalar core is a signed number. */ val src1IsSInt: Bool = !requestReg.bits.decodeResult(Decoder.unsigned0) - val imm: UInt = requestReg.bits.request.instruction(19, 15) + val imm: UInt = requestReg.bits.issue.instruction(19, 15) // todo: spec 10.1: imm 默认是 sign-extend,但是有特殊情况 val immSignExtend: UInt = Fill(16, imm(4) && (vSew1H(2) || src1IsSInt)) ## Fill(8, imm(4) && (vSew1H(1) || vSew1H(2) || src1IsSInt)) ## @@ -647,7 +642,7 @@ class T1(val parameter: T1Parameter) control.state.wVRFWrite := true.B } - when(responseCounter === control.record.instructionIndex && io.response.fire) { + when(responseCounter === control.record.instructionIndex && retire) { control.state.sCommit := true.B } @@ -705,12 +700,12 @@ class T1(val parameter: T1Parameter) // first type instruction val firstLane = ffo(completedVec.asUInt) val firstLaneIndex: UInt = OHToUInt(firstLane)(log2Ceil(parameter.laneNumber) - 1, 0) - io.response.bits.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) - io.response.bits.rd.bits := vd + io.retire.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) + io.retire.rd.bits.rdAddress := vd if (parameter.fpuEnable) { - io.response.bits.float := decodeResultReg(Decoder.float) + io.retire.rd.bits.isFp := decodeResultReg(Decoder.float) } else { - io.response.bits.float := false.B + io.retire.rd.bits.isFp := false.B } when(requestRegDequeue.fire) { ffoIndexReg.valid := false.B @@ -733,7 +728,7 @@ class T1(val parameter: T1Parameter) * lmul - sew <- [-5, 3] * 选择信号 +5 -> lmul - sew + 5 <- [0, 8] */ - def largeThanVLMax(source: UInt, advance: Bool = false.B, csrInput:CSRInterface): Bool = { + def largeThanVLMax(source: UInt, advance: Bool = false.B, lmul: UInt, sew: UInt): Bool = { val vlenLog2 = log2Ceil(parameter.vLen) // 10 val cut = if (source.getWidth >= vlenLog2) source(vlenLog2 - 1, vlenLog2 - 9) @@ -745,15 +740,15 @@ class T1(val parameter: T1Parameter) largeList(i) := a a || b } - val extendVlmul = csrInput.vlmul(2) ## csrInput.vlmul - val selectWire = UIntToOH(5.U(4.W) + extendVlmul - csrInput.vSew)(8, 0).asBools.reverse + val extendVlmul = lmul(2) ## lmul + val selectWire = UIntToOH(5.U(4.W) + extendVlmul - sew)(8, 0).asBools.reverse Mux1H(selectWire, largeList) } // 算req上面的分开吧 val gatherWire = - Mux(decodeResult(Decoder.itype), requestRegDequeue.bits.instruction(19, 15), requestRegDequeue.bits.src1Data) + Mux(decodeResult(Decoder.itype), requestRegDequeue.bits.instruction(19, 15), requestRegDequeue.bits.rs1Data) val gatherAdvance = (gatherWire >> log2Ceil(parameter.vLen)).asUInt.orR - gatherOverlap := largeThanVLMax(gatherWire, gatherAdvance, requestReg.bits.csr) + gatherOverlap := largeThanVLMax(gatherWire, gatherAdvance, T1Issue.vlmul(requestReg.bits.issue), T1Issue.vsew(requestReg.bits.issue)) val slotValid = !control.state.idle val storeAfterSlide = isStoreType && (requestRegDequeue.bits.instruction(11, 7) === vd) instructionRAWReady := !((unOrderTypeInstruction && slotValid && @@ -783,9 +778,9 @@ class T1(val parameter: T1Parameter) vs2 := requestRegDequeue.bits.instruction(24, 20) vm := requestRegDequeue.bits.instruction(25) executeFinishReg := false.B - rs1 := requestRegDequeue.bits.src1Data + rs1 := requestRegDequeue.bits.rs1Data decodeResultReg := decodeResult - csrRegForMaskUnit := requestReg.bits.csr + csrRegForMaskUnit := requestRegCSR // todo: decode need execute control.state.sMaskUnitExecution := !maskUnitType maskTypeInstruction := maskType && !decodeResult(Decoder.maskSource) @@ -987,7 +982,7 @@ class T1(val parameter: T1Parameter) val compareWire = Mux(decodeResultReg(Decoder.slid), rs1, maskUnitData) val compareAdvance: Bool = (compareWire >> log2Ceil(parameter.vLen)).asUInt.orR - val compareResult: Bool = largeThanVLMax(compareWire, compareAdvance, csrRegForMaskUnit) + val compareResult: Bool = largeThanVLMax(compareWire, compareAdvance, csrRegForMaskUnit.vlmul, csrRegForMaskUnit.vSew) // 正在被gather使用的数据在data的那个组里 val gatherDataSelect = UIntToOH((false.B ## maskUnitDataOffset)(5 + (log2Ceil(parameter.laneNumber) max 1) - 1, 5)) val dataTail = Mux1H(UIntToOH(maskUnitEEW)(1, 0), Seq(3.U(2.W), 2.U(2.W))) @@ -1072,7 +1067,7 @@ class T1(val parameter: T1Parameter) // index >= vlMax 是写0 val overlapVlMax: Bool = !slideUp && (signBit || srcOversize) // select csr - val csrSelect = Mux(control.state.idle, requestReg.bits.csr, csrRegForMaskUnit) + val csrSelect = Mux(control.state.idle, requestRegCSR, csrRegForMaskUnit) // slid read val (_, readDataOffset, readLane, readOffset, readGrowth, lmulOverlap) = indexAnalysis(readIndex, csrSelect) gatherReadDataOffset := readDataOffset @@ -1434,7 +1429,7 @@ class T1(val parameter: T1Parameter) Mux( decodeResult(Decoder.nr) || decodeResult(Decoder.maskLogic), 2.U, - Mux(gather16, 1.U, Mux(decodeResult(Decoder.extend), extendDataEEW, requestReg.bits.csr.vSew)) + Mux(gather16, 1.U, Mux(decodeResult(Decoder.extend), extendDataEEW, T1Issue.vsew(requestReg.bits.issue))) ) ) @@ -1442,14 +1437,14 @@ class T1(val parameter: T1Parameter) decodeResult(Decoder.nr), // evl for Whole Vector Register Move -> vs1 * (vlen / datapathWidth) (requestRegDequeue.bits.instruction(17, 15) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.datapathWidth).W), - requestReg.bits.csr.vl + requestReg.bits.issue.vl ) val vSewForLsu: UInt = Mux(lsWholeReg, 2.U, requestRegDequeue.bits.instruction(13, 12)) val evlForLsu: UInt = Mux( lsWholeReg, (requestRegDequeue.bits.instruction(31, 29) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.datapathWidth).W), - requestReg.bits.csr.vl + requestReg.bits.issue.vl ) /** instantiate lanes. @@ -1489,7 +1484,7 @@ class T1(val parameter: T1Parameter) lane.laneRequest.bits.mask := maskType laneReady(index) := lane.laneRequest.ready - lane.csrInterface := requestReg.bits.csr + lane.csrInterface := requestRegCSR // index type EEW Decoded in the instruction lane.csrInterface.vSew := vSewSelect lane.csrInterface.vl := evlForLane @@ -1580,8 +1575,8 @@ class T1(val parameter: T1Parameter) // 连lsu lsu.request.valid := requestRegDequeue.fire && isLoadStoreType lsu.request.bits.instructionIndex := requestReg.bits.instructionIndex - lsu.request.bits.rs1Data := requestRegDequeue.bits.src1Data - lsu.request.bits.rs2Data := requestRegDequeue.bits.src2Data + lsu.request.bits.rs1Data := requestRegDequeue.bits.rs1Data + lsu.request.bits.rs2Data := requestRegDequeue.bits.rs2Data lsu.request.bits.instructionInformation.nf := requestRegDequeue.bits.instruction(31, 29) lsu.request.bits.instructionInformation.mew := requestRegDequeue.bits.instruction(28) lsu.request.bits.instructionInformation.mop := requestRegDequeue.bits.instruction(27, 26) @@ -1595,7 +1590,7 @@ class T1(val parameter: T1Parameter) lsu.maskInput.zip(lsu.maskSelect).foreach { case (data, index) => data := cutUInt(v0.asUInt, parameter.maskGroupWidth)(index) } - lsu.csrInterface := requestReg.bits.csr + lsu.csrInterface := requestRegCSR lsu.csrInterface.vl := evlForLsu lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR @@ -1681,10 +1676,13 @@ class T1(val parameter: T1Parameter) // Ensuring commit order inst.record.instructionIndex === responseCounter }) - io.response.valid := slotCommit.asUInt.orR - io.response.bits.data := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) - io.response.bits.vxsat := DontCare - io.response.bits.mem := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR + retire := slotCommit.asUInt.orR + io.retire.rd.bits.rdData := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) + // TODO: csr retire. + io.retire.csr.bits.vxsat := DontCare + io.retire.csr.bits.fflag := DontCare + io.retire.csr.valid := false.B + io.retire.mem.valid := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR lastSlotCommit := slotCommit.last } @@ -1716,6 +1714,7 @@ class T1(val parameter: T1Parameter) probeWire.instructionCounter := instructionCounter probeWire.instructionIssue := requestRegDequeue.fire probeWire.issueTag := requestReg.bits.instructionIndex + probeWire.retireValid := retire // maskUnitWrite maskUnitWriteReady probeWire.writeQueueEnq.valid := maskUnitWrite.valid && maskUnitWriteReady probeWire.writeQueueEnq.bits := maskUnitWrite.bits.instructionIndex From 34f6dd61db712f789393bbcbb926e9e76f518e67 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 25 Jul 2024 11:02:15 +0000 Subject: [PATCH 05/18] [ci] update test case cycle data --- .github/cases/blastoise/default.json | 346 +++++++++++++-------------- .github/cases/machamp/default.json | 326 ++++++++++++------------- .github/cases/sandslash/default.json | 252 +++++++++---------- 3 files changed, 462 insertions(+), 462 deletions(-) diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 14a4b6fd1a..8c2e502b94 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -13,7 +13,7 @@ "codegen.vaaddu_vv": 91859, "codegen.vaaddu_vx": 253663, "codegen.vadc_vim": 24447, - "codegen.vadc_vvm": 11643, + "codegen.vadc_vvm": 11644, "codegen.vadc_vxm": 31030, "codegen.vadd_vi": 49930, "codegen.vadd_vv": 23267, @@ -27,137 +27,137 @@ "codegen.vasubu_vx": 253663, "codegen.vcompress_vm": 22146, "codegen.vcpop_m": 1845, - "codegen.vdiv_vv": 32539, + "codegen.vdiv_vv": 32540, "codegen.vdiv_vx": 195383, - "codegen.vdivu_vv": 32635, + "codegen.vdivu_vv": 32636, "codegen.vdivu_vx": 203376, "codegen.vfirst_m": 1757, - "codegen.vid_v": 16010, + "codegen.vid_v": 16011, "codegen.viota_m": 43475, - "codegen.vl1re16_v": 691, - "codegen.vl1re32_v": 691, - "codegen.vl1re8_v": 691, - "codegen.vl2re16_v": 725, - "codegen.vl2re32_v": 725, - "codegen.vl2re8_v": 725, - "codegen.vl4re16_v": 793, - "codegen.vl4re32_v": 793, - "codegen.vl4re8_v": 793, - "codegen.vl8re16_v": 932, - "codegen.vl8re32_v": 929, - "codegen.vl8re8_v": 929, + "codegen.vl1re16_v": 692, + "codegen.vl1re32_v": 692, + "codegen.vl1re8_v": 692, + "codegen.vl2re16_v": 726, + "codegen.vl2re32_v": 726, + "codegen.vl2re8_v": 726, + "codegen.vl4re16_v": 794, + "codegen.vl4re32_v": 794, + "codegen.vl4re8_v": 794, + "codegen.vl8re16_v": 933, + "codegen.vl8re32_v": 930, + "codegen.vl8re8_v": 930, "codegen.vle16_v": 5080, "codegen.vle16ff_v": 9387, "codegen.vle32_v": 4308, "codegen.vle32ff_v": 6276, "codegen.vle8_v": 5838, - "codegen.vle8ff_v": 15712, + "codegen.vle8ff_v": 15713, "codegen.vlm_v": 732, - "codegen.vloxei16_v": 29312, + "codegen.vloxei16_v": 29313, "codegen.vloxei32_v": 17670, "codegen.vloxei8_v": 41487, "codegen.vloxseg2ei16_v": 27608, - "codegen.vloxseg2ei32_v": 17328, + "codegen.vloxseg2ei32_v": 17329, "codegen.vloxseg2ei8_v": 34145, - "codegen.vloxseg3ei16_v": 19259, - "codegen.vloxseg3ei32_v": 13217, - "codegen.vloxseg3ei8_v": 25302, - "codegen.vloxseg4ei16_v": 21528, - "codegen.vloxseg4ei32_v": 14212, - "codegen.vloxseg4ei8_v": 29226, - "codegen.vloxseg5ei16_v": 12343, - "codegen.vloxseg5ei32_v": 7567, - "codegen.vloxseg5ei8_v": 18277, - "codegen.vloxseg6ei16_v": 13186, - "codegen.vloxseg6ei32_v": 7967, - "codegen.vloxseg6ei8_v": 20081, - "codegen.vloxseg7ei16_v": 14029, - "codegen.vloxseg7ei32_v": 8367, - "codegen.vloxseg7ei8_v": 21885, - "codegen.vloxseg8ei16_v": 14872, - "codegen.vloxseg8ei32_v": 8767, - "codegen.vloxseg8ei8_v": 23689, + "codegen.vloxseg3ei16_v": 19260, + "codegen.vloxseg3ei32_v": 13218, + "codegen.vloxseg3ei8_v": 25303, + "codegen.vloxseg4ei16_v": 21529, + "codegen.vloxseg4ei32_v": 14213, + "codegen.vloxseg4ei8_v": 29227, + "codegen.vloxseg5ei16_v": 12344, + "codegen.vloxseg5ei32_v": 7568, + "codegen.vloxseg5ei8_v": 18278, + "codegen.vloxseg6ei16_v": 13187, + "codegen.vloxseg6ei32_v": 7968, + "codegen.vloxseg6ei8_v": 20082, + "codegen.vloxseg7ei16_v": 14030, + "codegen.vloxseg7ei32_v": 8368, + "codegen.vloxseg7ei8_v": 21886, + "codegen.vloxseg8ei16_v": 14873, + "codegen.vloxseg8ei32_v": 8768, + "codegen.vloxseg8ei8_v": 23690, "codegen.vlse16_v": 41373, "codegen.vlse32_v": 26608, - "codegen.vlse8_v": 72148, + "codegen.vlse8_v": 72149, "codegen.vlseg2e16_v": 4557, "codegen.vlseg2e32_v": 3682, "codegen.vlseg2e8_v": 5402, - "codegen.vlseg3e16_v": 3850, + "codegen.vlseg3e16_v": 3851, "codegen.vlseg3e32_v": 2806, - "codegen.vlseg3e8_v": 4896, + "codegen.vlseg3e8_v": 4897, "codegen.vlseg4e16_v": 3978, "codegen.vlseg4e32_v": 2896, "codegen.vlseg4e8_v": 5050, - "codegen.vlseg5e16_v": 3142, + "codegen.vlseg5e16_v": 3143, "codegen.vlseg5e32_v": 1821, - "codegen.vlseg5e8_v": 4554, + "codegen.vlseg5e8_v": 4555, "codegen.vlseg6e16_v": 3204, "codegen.vlseg6e32_v": 1863, - "codegen.vlseg6e8_v": 4644, + "codegen.vlseg6e8_v": 4645, "codegen.vlseg7e16_v": 3266, "codegen.vlseg7e32_v": 1905, - "codegen.vlseg7e8_v": 4750, - "codegen.vlseg8e16_v": 3324, - "codegen.vlseg8e32_v": 1943, - "codegen.vlseg8e8_v": 4840, + "codegen.vlseg7e8_v": 4751, + "codegen.vlseg8e16_v": 3325, + "codegen.vlseg8e32_v": 1944, + "codegen.vlseg8e8_v": 4841, "codegen.vlsseg2e16_v": 36703, - "codegen.vlsseg2e32_v": 21878, - "codegen.vlsseg2e8_v": 63758, + "codegen.vlsseg2e32_v": 21879, + "codegen.vlsseg2e8_v": 63759, "codegen.vlsseg3e16_v": 27053, - "codegen.vlsseg3e32_v": 14618, - "codegen.vlsseg3e8_v": 50953, + "codegen.vlsseg3e32_v": 14619, + "codegen.vlsseg3e8_v": 50954, "codegen.vlsseg4e16_v": 31308, - "codegen.vlsseg4e32_v": 16188, - "codegen.vlsseg4e8_v": 61548, - "codegen.vlsseg5e16_v": 19338, - "codegen.vlsseg5e32_v": 9043, - "codegen.vlsseg5e8_v": 43788, - "codegen.vlsseg6e16_v": 20953, - "codegen.vlsseg6e32_v": 9703, - "codegen.vlsseg6e8_v": 49183, - "codegen.vlsseg7e16_v": 22568, - "codegen.vlsseg7e32_v": 10363, - "codegen.vlsseg7e8_v": 54578, - "codegen.vlsseg8e16_v": 24183, - "codegen.vlsseg8e32_v": 11023, - "codegen.vlsseg8e8_v": 59973, - "codegen.vluxei16_v": 29312, + "codegen.vlsseg4e32_v": 16189, + "codegen.vlsseg4e8_v": 61549, + "codegen.vlsseg5e16_v": 19339, + "codegen.vlsseg5e32_v": 9044, + "codegen.vlsseg5e8_v": 43789, + "codegen.vlsseg6e16_v": 20954, + "codegen.vlsseg6e32_v": 9704, + "codegen.vlsseg6e8_v": 49184, + "codegen.vlsseg7e16_v": 22569, + "codegen.vlsseg7e32_v": 10364, + "codegen.vlsseg7e8_v": 54579, + "codegen.vlsseg8e16_v": 24184, + "codegen.vlsseg8e32_v": 11024, + "codegen.vlsseg8e8_v": 59974, + "codegen.vluxei16_v": 29313, "codegen.vluxei32_v": 17670, "codegen.vluxei8_v": 41487, "codegen.vluxseg2ei16_v": 27608, - "codegen.vluxseg2ei32_v": 17328, + "codegen.vluxseg2ei32_v": 17329, "codegen.vluxseg2ei8_v": 34145, - "codegen.vluxseg3ei16_v": 19259, - "codegen.vluxseg3ei32_v": 13217, - "codegen.vluxseg3ei8_v": 25302, - "codegen.vluxseg4ei16_v": 21528, - "codegen.vluxseg4ei32_v": 14212, - "codegen.vluxseg4ei8_v": 29226, - "codegen.vluxseg5ei16_v": 12343, - "codegen.vluxseg5ei32_v": 7567, - "codegen.vluxseg5ei8_v": 18277, - "codegen.vluxseg6ei16_v": 13186, - "codegen.vluxseg6ei32_v": 7967, - "codegen.vluxseg6ei8_v": 20081, - "codegen.vluxseg7ei16_v": 14029, - "codegen.vluxseg7ei32_v": 8367, - "codegen.vluxseg7ei8_v": 21885, - "codegen.vluxseg8ei16_v": 14872, - "codegen.vluxseg8ei32_v": 8767, - "codegen.vluxseg8ei8_v": 23689, - "codegen.vmacc_vv": 24077, + "codegen.vluxseg3ei16_v": 19260, + "codegen.vluxseg3ei32_v": 13218, + "codegen.vluxseg3ei8_v": 25303, + "codegen.vluxseg4ei16_v": 21529, + "codegen.vluxseg4ei32_v": 14213, + "codegen.vluxseg4ei8_v": 29227, + "codegen.vluxseg5ei16_v": 12344, + "codegen.vluxseg5ei32_v": 7568, + "codegen.vluxseg5ei8_v": 18278, + "codegen.vluxseg6ei16_v": 13187, + "codegen.vluxseg6ei32_v": 7968, + "codegen.vluxseg6ei8_v": 20082, + "codegen.vluxseg7ei16_v": 14030, + "codegen.vluxseg7ei32_v": 8368, + "codegen.vluxseg7ei8_v": 21886, + "codegen.vluxseg8ei16_v": 14873, + "codegen.vluxseg8ei32_v": 8768, + "codegen.vluxseg8ei8_v": 23690, + "codegen.vmacc_vv": 24078, "codegen.vmacc_vx": 76097, "codegen.vmadc_vi": 36009, "codegen.vmadc_vim": 37359, - "codegen.vmadc_vv": 10944, - "codegen.vmadc_vvm": 12342, + "codegen.vmadc_vv": 10945, + "codegen.vmadc_vvm": 12343, "codegen.vmadc_vx": 49557, "codegen.vmadc_vxm": 50907, - "codegen.vmadd_vv": 24077, + "codegen.vmadd_vv": 24078, "codegen.vmadd_vx": 76097, - "codegen.vmand_mm": 9982, - "codegen.vmandn_mm": 9982, + "codegen.vmand_mm": 9983, + "codegen.vmandn_mm": 9983, "codegen.vmax_vv": 23267, "codegen.vmax_vx": 63718, "codegen.vmaxu_vv": 23267, @@ -169,15 +169,15 @@ "codegen.vmin_vx": 63718, "codegen.vminu_vv": 23267, "codegen.vminu_vx": 63718, - "codegen.vmnand_mm": 9982, - "codegen.vmnor_mm": 9982, - "codegen.vmor_mm": 9982, - "codegen.vmorn_mm": 9982, - "codegen.vmsbc_vv": 10944, - "codegen.vmsbc_vvm": 12294, + "codegen.vmnand_mm": 9983, + "codegen.vmnor_mm": 9983, + "codegen.vmor_mm": 9983, + "codegen.vmorn_mm": 9983, + "codegen.vmsbc_vv": 10945, + "codegen.vmsbc_vvm": 12295, "codegen.vmsbc_vx": 49557, "codegen.vmsbc_vxm": 50907, - "codegen.vmsbf_m": 1605, + "codegen.vmsbf_m": 1606, "codegen.vmseq_vi": 79796, "codegen.vmseq_vv": 24968, "codegen.vmseq_vx": 109241, @@ -187,7 +187,7 @@ "codegen.vmsgtu_vi": 79796, "codegen.vmsgtu_vv": 24848, "codegen.vmsgtu_vx": 109241, - "codegen.vmsif_m": 1605, + "codegen.vmsif_m": 1606, "codegen.vmsle_vi": 79796, "codegen.vmsle_vv": 24968, "codegen.vmsle_vx": 109241, @@ -201,14 +201,14 @@ "codegen.vmsne_vi": 79796, "codegen.vmsne_vv": 24968, "codegen.vmsne_vx": 109241, - "codegen.vmsof_m": 1605, - "codegen.vmul_vv": 23482, + "codegen.vmsof_m": 1606, + "codegen.vmul_vv": 23483, "codegen.vmul_vx": 85696, - "codegen.vmulh_vv": 23482, + "codegen.vmulh_vv": 23483, "codegen.vmulh_vx": 85696, - "codegen.vmulhsu_vv": 23482, + "codegen.vmulhsu_vv": 23483, "codegen.vmulhsu_vx": 85696, - "codegen.vmulhu_vv": 23482, + "codegen.vmulhu_vv": 23483, "codegen.vmulhu_vx": 85696, "codegen.vmv_s_x": 1300, "codegen.vmv_v_i": 17795, @@ -217,43 +217,43 @@ "codegen.vmv_x_s": 1780, "codegen.vmv1r_v": 1699, "codegen.vmv2r_v": 1805, - "codegen.vmv4r_v": 2044, - "codegen.vmv8r_v": 2525, - "codegen.vmxnor_mm": 9982, - "codegen.vmxor_mm": 9982, + "codegen.vmv4r_v": 2045, + "codegen.vmv8r_v": 2526, + "codegen.vmxnor_mm": 9983, + "codegen.vmxor_mm": 9983, "codegen.vnclip_wi": 153531, - "codegen.vnclip_wv": 60647, + "codegen.vnclip_wv": 60648, "codegen.vnclip_wx": 198763, "codegen.vnclipu_wi": 153531, - "codegen.vnclipu_wv": 60647, + "codegen.vnclipu_wv": 60648, "codegen.vnclipu_wx": 198763, - "codegen.vnmsac_vv": 24077, + "codegen.vnmsac_vv": 24078, "codegen.vnmsac_vx": 76097, - "codegen.vnmsub_vv": 24077, + "codegen.vnmsub_vv": 24078, "codegen.vnmsub_vx": 76097, "codegen.vnsra_wi": 38685, - "codegen.vnsra_wv": 15464, + "codegen.vnsra_wv": 15465, "codegen.vnsra_wx": 49993, "codegen.vnsrl_wi": 38685, - "codegen.vnsrl_wv": 15464, + "codegen.vnsrl_wv": 15465, "codegen.vnsrl_wx": 49993, "codegen.vor_vi": 49901, "codegen.vor_vv": 23267, "codegen.vor_vx": 63692, - "codegen.vredand_vs": 28935, - "codegen.vredmax_vs": 28935, - "codegen.vredmaxu_vs": 28935, - "codegen.vredmin_vs": 28935, - "codegen.vredminu_vs": 28935, - "codegen.vredor_vs": 28935, - "codegen.vredsum_vs": 28935, - "codegen.vredxor_vs": 28935, - "codegen.vrem_vv": 32539, + "codegen.vredand_vs": 28936, + "codegen.vredmax_vs": 28936, + "codegen.vredmaxu_vs": 28936, + "codegen.vredmin_vs": 28936, + "codegen.vredminu_vs": 28936, + "codegen.vredor_vs": 28936, + "codegen.vredsum_vs": 28936, + "codegen.vredxor_vs": 28936, + "codegen.vrem_vv": 32540, "codegen.vrem_vx": 195383, - "codegen.vremu_vv": 32635, + "codegen.vremu_vv": 32636, "codegen.vremu_vx": 203376, "codegen.vrgather_vi": 92271, - "codegen.vrgather_vv": 101644, + "codegen.vrgather_vv": 101645, "codegen.vrgather_vx": 100633, "codegen.vrgatherei16_vv": 74439, "codegen.vrsub_vi": 49930, @@ -268,16 +268,16 @@ "codegen.vsaddu_vi": 49930, "codegen.vsaddu_vv": 23267, "codegen.vsaddu_vx": 63718, - "codegen.vsbc_vvm": 11643, + "codegen.vsbc_vvm": 11644, "codegen.vsbc_vxm": 45634, "codegen.vse16_v": 3957, "codegen.vse32_v": 3347, "codegen.vse8_v": 4567, - "codegen.vsetivli": 412, - "codegen.vsetvl": 412, - "codegen.vsetvli": 412, + "codegen.vsetivli": 413, + "codegen.vsetvl": 413, + "codegen.vsetvli": 413, "codegen.vsext_vf2": 23156, - "codegen.vsext_vf4": 4184, + "codegen.vsext_vf4": 4185, "codegen.vslide1down_vx": 866459, "codegen.vslide1up_vx": 860747, "codegen.vslidedown_vi": 609497, @@ -285,10 +285,10 @@ "codegen.vslideup_vi": 605949, "codegen.vslideup_vx": 859915, "codegen.vsll_vi": 62724, - "codegen.vsll_vv": 25692, + "codegen.vsll_vv": 25693, "codegen.vsll_vx": 81160, "codegen.vsm_v": 671, - "codegen.vsmul_vv": 92719, + "codegen.vsmul_vv": 92720, "codegen.vsmul_vx": 258767, "codegen.vsoxei16_v": 27337, "codegen.vsoxei32_v": 16204, @@ -315,40 +315,40 @@ "codegen.vsoxseg8ei32_v": 8265, "codegen.vsoxseg8ei8_v": 23769, "codegen.vsra_vi": 62724, - "codegen.vsra_vv": 25692, + "codegen.vsra_vv": 25693, "codegen.vsra_vx": 81160, "codegen.vsrl_vi": 62724, - "codegen.vsrl_vv": 25692, + "codegen.vsrl_vv": 25693, "codegen.vsrl_vx": 81160, "codegen.vsse16_v": 49611, "codegen.vsse32_v": 35399, "codegen.vsse8_v": 77187, - "codegen.vsseg2e16_v": 3643, - "codegen.vsseg2e32_v": 2945, - "codegen.vsseg2e8_v": 4341, + "codegen.vsseg2e16_v": 3644, + "codegen.vsseg2e32_v": 2946, + "codegen.vsseg2e8_v": 4342, "codegen.vsseg3e16_v": 3105, "codegen.vsseg3e32_v": 2279, "codegen.vsseg3e8_v": 3931, - "codegen.vsseg4e16_v": 3281, - "codegen.vsseg4e32_v": 2407, - "codegen.vsseg4e8_v": 4155, + "codegen.vsseg4e16_v": 3282, + "codegen.vsseg4e32_v": 2408, + "codegen.vsseg4e8_v": 4156, "codegen.vsseg5e16_v": 2567, "codegen.vsseg5e32_v": 1485, "codegen.vsseg5e8_v": 3649, - "codegen.vsseg6e16_v": 2663, - "codegen.vsseg6e32_v": 1533, - "codegen.vsseg6e8_v": 3793, + "codegen.vsseg6e16_v": 2664, + "codegen.vsseg6e32_v": 1534, + "codegen.vsseg6e8_v": 3794, "codegen.vsseg7e16_v": 2759, "codegen.vsseg7e32_v": 1581, "codegen.vsseg7e8_v": 3937, - "codegen.vsseg8e16_v": 2855, - "codegen.vsseg8e32_v": 1629, - "codegen.vsseg8e8_v": 4081, - "codegen.vssra_vi": 249687, - "codegen.vssra_vv": 101559, + "codegen.vsseg8e16_v": 2856, + "codegen.vsseg8e32_v": 1630, + "codegen.vsseg8e8_v": 4082, + "codegen.vssra_vi": 249688, + "codegen.vssra_vv": 101560, "codegen.vssra_vx": 484543, - "codegen.vssrl_vi": 249687, - "codegen.vssrl_vv": 101559, + "codegen.vssrl_vi": 249688, + "codegen.vssrl_vv": 101560, "codegen.vssrl_vx": 484543, "codegen.vssseg2e16_v": 43771, "codegen.vssseg2e32_v": 29255, @@ -403,40 +403,40 @@ "codegen.vsuxseg8ei8_v": 23769, "codegen.vwadd_vv": 13853, "codegen.vwadd_vx": 39634, - "codegen.vwadd_wv": 15180, + "codegen.vwadd_wv": 15181, "codegen.vwadd_wx": 46485, "codegen.vwaddu_vv": 13853, "codegen.vwaddu_vx": 39634, - "codegen.vwaddu_wv": 15180, + "codegen.vwaddu_wv": 15181, "codegen.vwaddu_wx": 46485, - "codegen.vwmacc_vv": 14922, + "codegen.vwmacc_vv": 14923, "codegen.vwmacc_vx": 57321, - "codegen.vwmaccsu_vv": 14922, + "codegen.vwmaccsu_vv": 14923, "codegen.vwmaccsu_vx": 57321, - "codegen.vwmaccu_vv": 14922, + "codegen.vwmaccu_vv": 14923, "codegen.vwmaccu_vx": 57321, "codegen.vwmaccus_vx": 57321, - "codegen.vwmul_vv": 13997, + "codegen.vwmul_vv": 13998, "codegen.vwmul_vx": 53394, - "codegen.vwmulsu_vv": 13997, + "codegen.vwmulsu_vv": 13998, "codegen.vwmulsu_vx": 53394, - "codegen.vwmulu_vv": 13997, + "codegen.vwmulu_vv": 13998, "codegen.vwmulu_vx": 53394, "codegen.vwredsum_vs": 17203, "codegen.vwredsumu_vs": 17203, "codegen.vwsub_vv": 13853, "codegen.vwsub_vx": 39634, - "codegen.vwsub_wv": 15180, + "codegen.vwsub_wv": 15181, "codegen.vwsub_wx": 46485, "codegen.vwsubu_vv": 13853, "codegen.vwsubu_vx": 39634, - "codegen.vwsubu_wv": 15180, + "codegen.vwsubu_wv": 15181, "codegen.vwsubu_wx": 46485, "codegen.vxor_vi": 49901, "codegen.vxor_vv": 23267, "codegen.vxor_vx": 63692, "codegen.vzext_vf2": 23156, - "codegen.vzext_vf4": 4184, + "codegen.vzext_vf4": 4185, "codegen.vfadd_vv": 91907, "codegen.vfadd_vf": 322675, "codegen.vfsub_vv": 91907, @@ -445,8 +445,8 @@ "codegen.vfmul_vv": 91907, "codegen.vfmul_vf": 322675, "codegen.vfdiv_vv": 133524, - "codegen.vfdiv_vf": 668303, - "codegen.vfrdiv_vf": 668303, + "codegen.vfdiv_vf": 668304, + "codegen.vfrdiv_vf": 668304, "codegen.vfmacc_vv": 95267, "codegen.vfmacc_vf": 379027, "codegen.vfnmacc_vv": 95267, @@ -463,7 +463,7 @@ "codegen.vfmsub_vf": 379027, "codegen.vfnmsub_vv": 95267, "codegen.vfnmsub_vf": 379027, - "codegen.vfsqrt_v": 9891, + "codegen.vfsqrt_v": 9892, "codegen.vfrsqrt7_v": 6086, "codegen.vfrec7_v": 6107, "codegen.vfmin_vv": 91907, @@ -477,14 +477,14 @@ "codegen.vfsgnjx_vv": 91907, "codegen.vfsgnjx_vf": 276675, "codegen.vmfeq_vv": 99155, - "codegen.vmfeq_vf": 558627, + "codegen.vmfeq_vf": 558628, "codegen.vmfne_vv": 99155, - "codegen.vmfne_vf": 558627, + "codegen.vmfne_vf": 558628, "codegen.vmflt_vv": 99155, - "codegen.vmflt_vf": 558627, - "codegen.vmfgt_vf": 558627, - "codegen.vmfge_vf": 558627, - "codegen.vfclass_v": 6167, + "codegen.vmflt_vf": 558628, + "codegen.vmfgt_vf": 558628, + "codegen.vmfge_vf": 558628, + "codegen.vfclass_v": 6168, "codegen.vfmerge_vfm": 185579, "codegen.vfmv_v_f": 2177, "codegen.vfmv_f_s": 8035, @@ -496,9 +496,9 @@ "codegen.vfcvt_f_xu_v": 6077, "codegen.vfcvt_f_x_v": 6083, "codegen.vfredosum_vs": 160515, - "codegen.vfredusum_vs": 122275, - "codegen.vfredmax_vs": 122275, - "codegen.vfredmin_vs": 122275, + "codegen.vfredusum_vs": 122276, + "codegen.vfredmax_vs": 122276, + "codegen.vfredmin_vs": 122276, "rvv_bench.ascii_to_utf16": 1583663, "rvv_bench.ascii_to_utf32": 703954, "rvv_bench.byteswap": 3353148, diff --git a/.github/cases/machamp/default.json b/.github/cases/machamp/default.json index d7cde8503f..79c384c887 100644 --- a/.github/cases/machamp/default.json +++ b/.github/cases/machamp/default.json @@ -23,128 +23,128 @@ "codegen.vasub_vx": 250851, "codegen.vasubu_vv": 90595, "codegen.vasubu_vx": 250851, - "codegen.vcompress_vm": 32216, + "codegen.vcompress_vm": 32217, "codegen.vcpop_m": 1919, - "codegen.vdiv_vv": 33513, + "codegen.vdiv_vv": 33514, "codegen.vdiv_vx": 205363, - "codegen.vdivu_vv": 33523, + "codegen.vdivu_vv": 33524, "codegen.vdivu_vx": 214991, "codegen.vfirst_m": 1703, - "codegen.vid_v": 16078, + "codegen.vid_v": 16079, "codegen.viota_m": 64463, - "codegen.vl1re16_v": 691, - "codegen.vl1re32_v": 691, - "codegen.vl1re8_v": 691, - "codegen.vl2re16_v": 725, - "codegen.vl2re32_v": 725, - "codegen.vl2re8_v": 725, - "codegen.vl4re16_v": 793, - "codegen.vl4re32_v": 793, - "codegen.vl4re8_v": 793, - "codegen.vl8re16_v": 932, - "codegen.vl8re32_v": 929, - "codegen.vl8re8_v": 929, + "codegen.vl1re16_v": 692, + "codegen.vl1re32_v": 692, + "codegen.vl1re8_v": 692, + "codegen.vl2re16_v": 726, + "codegen.vl2re32_v": 726, + "codegen.vl2re8_v": 726, + "codegen.vl4re16_v": 794, + "codegen.vl4re32_v": 794, + "codegen.vl4re8_v": 794, + "codegen.vl8re16_v": 933, + "codegen.vl8re32_v": 930, + "codegen.vl8re8_v": 930, "codegen.vle16_v": 5089, - "codegen.vle16ff_v": 14480, + "codegen.vle16ff_v": 14481, "codegen.vle32_v": 4307, - "codegen.vle32ff_v": 8468, + "codegen.vle32ff_v": 8469, "codegen.vle8_v": 5808, - "codegen.vle8ff_v": 24083, - "codegen.vlm_v": 777, + "codegen.vle8ff_v": 24084, + "codegen.vlm_v": 778, "codegen.vloxei16_v": 40455, - "codegen.vloxei32_v": 21652, - "codegen.vloxei8_v": 58514, - "codegen.vloxseg2ei16_v": 40633, + "codegen.vloxei32_v": 21653, + "codegen.vloxei8_v": 58515, + "codegen.vloxseg2ei16_v": 40634, "codegen.vloxseg2ei32_v": 23028, "codegen.vloxseg2ei8_v": 54087, - "codegen.vloxseg3ei16_v": 29910, - "codegen.vloxseg3ei32_v": 17806, - "codegen.vloxseg3ei8_v": 39003, - "codegen.vloxseg4ei16_v": 35670, + "codegen.vloxseg3ei16_v": 29911, + "codegen.vloxseg3ei32_v": 17807, + "codegen.vloxseg3ei8_v": 39004, + "codegen.vloxseg4ei16_v": 35671, "codegen.vloxseg4ei32_v": 20271, - "codegen.vloxseg4ei8_v": 47466, + "codegen.vloxseg4ei8_v": 47467, "codegen.vloxseg5ei16_v": 21294, - "codegen.vloxseg5ei32_v": 8911, - "codegen.vloxseg5ei8_v": 29571, + "codegen.vloxseg5ei32_v": 8912, + "codegen.vloxseg5ei8_v": 29572, "codegen.vloxseg6ei16_v": 23910, - "codegen.vloxseg6ei32_v": 9584, - "codegen.vloxseg6ei8_v": 33630, + "codegen.vloxseg6ei32_v": 9585, + "codegen.vloxseg6ei8_v": 33631, "codegen.vloxseg7ei16_v": 26526, - "codegen.vloxseg7ei32_v": 10257, - "codegen.vloxseg7ei8_v": 37689, + "codegen.vloxseg7ei32_v": 10258, + "codegen.vloxseg7ei8_v": 37690, "codegen.vloxseg8ei16_v": 29142, - "codegen.vloxseg8ei32_v": 10930, - "codegen.vloxseg8ei8_v": 41748, - "codegen.vlse16_v": 66788, - "codegen.vlse32_v": 37553, - "codegen.vlse8_v": 114003, + "codegen.vloxseg8ei32_v": 10931, + "codegen.vloxseg8ei8_v": 41749, + "codegen.vlse16_v": 66789, + "codegen.vlse32_v": 37554, + "codegen.vlse8_v": 114004, "codegen.vlseg2e16_v": 4565, "codegen.vlseg2e32_v": 3692, - "codegen.vlseg2e8_v": 5394, - "codegen.vlseg3e16_v": 3884, - "codegen.vlseg3e32_v": 2823, - "codegen.vlseg3e8_v": 4876, + "codegen.vlseg2e8_v": 5395, + "codegen.vlseg3e16_v": 3885, + "codegen.vlseg3e32_v": 2824, + "codegen.vlseg3e8_v": 4877, "codegen.vlseg4e16_v": 4022, "codegen.vlseg4e32_v": 2923, "codegen.vlseg4e8_v": 5026, - "codegen.vlseg5e16_v": 3206, + "codegen.vlseg5e16_v": 3207, "codegen.vlseg5e32_v": 1778, "codegen.vlseg5e8_v": 4545, - "codegen.vlseg6e16_v": 3280, - "codegen.vlseg6e32_v": 1812, + "codegen.vlseg6e16_v": 3281, + "codegen.vlseg6e32_v": 1813, "codegen.vlseg6e8_v": 4635, - "codegen.vlseg7e16_v": 3354, - "codegen.vlseg7e32_v": 1846, + "codegen.vlseg7e16_v": 3355, + "codegen.vlseg7e32_v": 1847, "codegen.vlseg7e8_v": 4741, - "codegen.vlseg8e16_v": 3428, + "codegen.vlseg8e16_v": 3429, "codegen.vlseg8e32_v": 1878, - "codegen.vlseg8e8_v": 4827, - "codegen.vlsseg2e16_v": 60643, - "codegen.vlsseg2e32_v": 33148, - "codegen.vlsseg2e8_v": 114518, - "codegen.vlsseg3e16_v": 46898, - "codegen.vlsseg3e32_v": 21978, - "codegen.vlsseg3e8_v": 86983, - "codegen.vlsseg4e16_v": 57783, - "codegen.vlsseg4e32_v": 26028, - "codegen.vlsseg4e8_v": 109583, - "codegen.vlsseg5e16_v": 36398, - "codegen.vlsseg5e32_v": 11263, - "codegen.vlsseg5e8_v": 76843, - "codegen.vlsseg6e16_v": 41438, - "codegen.vlsseg6e32_v": 12378, - "codegen.vlsseg6e8_v": 88883, - "codegen.vlsseg7e16_v": 46478, - "codegen.vlsseg7e32_v": 13493, - "codegen.vlsseg7e8_v": 100923, - "codegen.vlsseg8e16_v": 51518, - "codegen.vlsseg8e32_v": 14608, - "codegen.vlsseg8e8_v": 112963, + "codegen.vlseg8e8_v": 4828, + "codegen.vlsseg2e16_v": 60644, + "codegen.vlsseg2e32_v": 33149, + "codegen.vlsseg2e8_v": 114519, + "codegen.vlsseg3e16_v": 46899, + "codegen.vlsseg3e32_v": 21979, + "codegen.vlsseg3e8_v": 86984, + "codegen.vlsseg4e16_v": 57784, + "codegen.vlsseg4e32_v": 26029, + "codegen.vlsseg4e8_v": 109584, + "codegen.vlsseg5e16_v": 36399, + "codegen.vlsseg5e32_v": 11264, + "codegen.vlsseg5e8_v": 76844, + "codegen.vlsseg6e16_v": 41439, + "codegen.vlsseg6e32_v": 12379, + "codegen.vlsseg6e8_v": 88884, + "codegen.vlsseg7e16_v": 46479, + "codegen.vlsseg7e32_v": 13494, + "codegen.vlsseg7e8_v": 100924, + "codegen.vlsseg8e16_v": 51519, + "codegen.vlsseg8e32_v": 14609, + "codegen.vlsseg8e8_v": 112964, "codegen.vluxei16_v": 40455, - "codegen.vluxei32_v": 21652, - "codegen.vluxei8_v": 58514, - "codegen.vluxseg2ei16_v": 40633, + "codegen.vluxei32_v": 21653, + "codegen.vluxei8_v": 58515, + "codegen.vluxseg2ei16_v": 40634, "codegen.vluxseg2ei32_v": 23028, "codegen.vluxseg2ei8_v": 54087, - "codegen.vluxseg3ei16_v": 29910, - "codegen.vluxseg3ei32_v": 17806, - "codegen.vluxseg3ei8_v": 39003, - "codegen.vluxseg4ei16_v": 35670, + "codegen.vluxseg3ei16_v": 29911, + "codegen.vluxseg3ei32_v": 17807, + "codegen.vluxseg3ei8_v": 39004, + "codegen.vluxseg4ei16_v": 35671, "codegen.vluxseg4ei32_v": 20271, - "codegen.vluxseg4ei8_v": 47466, + "codegen.vluxseg4ei8_v": 47467, "codegen.vluxseg5ei16_v": 21294, - "codegen.vluxseg5ei32_v": 8911, - "codegen.vluxseg5ei8_v": 29571, + "codegen.vluxseg5ei32_v": 8912, + "codegen.vluxseg5ei8_v": 29572, "codegen.vluxseg6ei16_v": 23910, - "codegen.vluxseg6ei32_v": 9584, - "codegen.vluxseg6ei8_v": 33630, + "codegen.vluxseg6ei32_v": 9585, + "codegen.vluxseg6ei8_v": 33631, "codegen.vluxseg7ei16_v": 26526, - "codegen.vluxseg7ei32_v": 10257, - "codegen.vluxseg7ei8_v": 37689, + "codegen.vluxseg7ei32_v": 10258, + "codegen.vluxseg7ei8_v": 37690, "codegen.vluxseg8ei16_v": 29142, - "codegen.vluxseg8ei32_v": 10930, - "codegen.vluxseg8ei8_v": 41748, - "codegen.vmacc_vv": 23371, + "codegen.vluxseg8ei32_v": 10931, + "codegen.vluxseg8ei8_v": 41749, + "codegen.vmacc_vv": 23372, "codegen.vmacc_vx": 76422, "codegen.vmadc_vi": 37536, "codegen.vmadc_vim": 38886, @@ -152,10 +152,10 @@ "codegen.vmadc_vvm": 12504, "codegen.vmadc_vx": 51738, "codegen.vmadc_vxm": 53088, - "codegen.vmadd_vv": 23371, + "codegen.vmadd_vv": 23372, "codegen.vmadd_vx": 76422, - "codegen.vmand_mm": 9979, - "codegen.vmandn_mm": 9979, + "codegen.vmand_mm": 9980, + "codegen.vmandn_mm": 9980, "codegen.vmax_vv": 22951, "codegen.vmax_vx": 63015, "codegen.vmaxu_vv": 22951, @@ -167,93 +167,93 @@ "codegen.vmin_vx": 63015, "codegen.vminu_vv": 22951, "codegen.vminu_vx": 63015, - "codegen.vmnand_mm": 9979, - "codegen.vmnor_mm": 9979, - "codegen.vmor_mm": 9979, - "codegen.vmorn_mm": 9979, + "codegen.vmnand_mm": 9980, + "codegen.vmnor_mm": 9980, + "codegen.vmor_mm": 9980, + "codegen.vmorn_mm": 9980, "codegen.vmsbc_vv": 11106, "codegen.vmsbc_vvm": 12456, "codegen.vmsbc_vx": 51738, "codegen.vmsbc_vxm": 53088, - "codegen.vmsbf_m": 1599, + "codegen.vmsbf_m": 1600, "codegen.vmseq_vi": 89192, - "codegen.vmseq_vv": 26198, + "codegen.vmseq_vv": 26199, "codegen.vmseq_vx": 122663, "codegen.vmsgt_vi": 89192, - "codegen.vmsgt_vv": 26131, + "codegen.vmsgt_vv": 26132, "codegen.vmsgt_vx": 122663, "codegen.vmsgtu_vi": 89192, - "codegen.vmsgtu_vv": 26131, + "codegen.vmsgtu_vv": 26132, "codegen.vmsgtu_vx": 122663, - "codegen.vmsif_m": 1599, + "codegen.vmsif_m": 1600, "codegen.vmsle_vi": 89192, - "codegen.vmsle_vv": 26198, + "codegen.vmsle_vv": 26199, "codegen.vmsle_vx": 122663, "codegen.vmsleu_vi": 89192, - "codegen.vmsleu_vv": 26198, + "codegen.vmsleu_vv": 26199, "codegen.vmsleu_vx": 122663, - "codegen.vmslt_vv": 26198, + "codegen.vmslt_vv": 26199, "codegen.vmslt_vx": 122663, - "codegen.vmsltu_vv": 26198, + "codegen.vmsltu_vv": 26199, "codegen.vmsltu_vx": 122663, "codegen.vmsne_vi": 89192, - "codegen.vmsne_vv": 26198, + "codegen.vmsne_vv": 26199, "codegen.vmsne_vx": 122663, - "codegen.vmsof_m": 1599, - "codegen.vmul_vv": 23254, + "codegen.vmsof_m": 1600, + "codegen.vmul_vv": 23255, "codegen.vmul_vx": 85688, - "codegen.vmulh_vv": 23254, + "codegen.vmulh_vv": 23255, "codegen.vmulh_vx": 85688, - "codegen.vmulhsu_vv": 23254, + "codegen.vmulhsu_vv": 23255, "codegen.vmulhsu_vx": 85688, - "codegen.vmulhu_vv": 23254, + "codegen.vmulhu_vv": 23255, "codegen.vmulhu_vx": 85688, "codegen.vmv_s_x": 1276, "codegen.vmv_v_i": 17636, - "codegen.vmv_v_v": 9886, + "codegen.vmv_v_v": 9887, "codegen.vmv_v_x": 7870, "codegen.vmv_x_s": 1753, "codegen.vmv1r_v": 1699, "codegen.vmv2r_v": 1805, - "codegen.vmv4r_v": 2044, - "codegen.vmv8r_v": 2525, - "codegen.vmxnor_mm": 9979, - "codegen.vmxor_mm": 9979, + "codegen.vmv4r_v": 2045, + "codegen.vmv8r_v": 2526, + "codegen.vmxnor_mm": 9980, + "codegen.vmxor_mm": 9980, "codegen.vnclip_wi": 154807, - "codegen.vnclip_wv": 60803, + "codegen.vnclip_wv": 60804, "codegen.vnclip_wx": 200311, "codegen.vnclipu_wi": 154807, - "codegen.vnclipu_wv": 60803, + "codegen.vnclipu_wv": 60804, "codegen.vnclipu_wx": 200311, - "codegen.vnmsac_vv": 23371, + "codegen.vnmsac_vv": 23372, "codegen.vnmsac_vx": 76422, - "codegen.vnmsub_vv": 23371, + "codegen.vnmsub_vv": 23372, "codegen.vnmsub_vx": 76422, "codegen.vnsra_wi": 39004, - "codegen.vnsra_wv": 15503, + "codegen.vnsra_wv": 15504, "codegen.vnsra_wx": 50380, "codegen.vnsrl_wi": 39004, - "codegen.vnsrl_wv": 15503, + "codegen.vnsrl_wv": 15504, "codegen.vnsrl_wx": 50380, "codegen.vor_vi": 49241, "codegen.vor_vv": 22951, "codegen.vor_vx": 63090, - "codegen.vredand_vs": 30831, - "codegen.vredmax_vs": 30831, - "codegen.vredmaxu_vs": 30831, - "codegen.vredmin_vs": 30831, - "codegen.vredminu_vs": 30831, - "codegen.vredor_vs": 30831, - "codegen.vredsum_vs": 30831, - "codegen.vredxor_vs": 30831, - "codegen.vrem_vv": 33513, + "codegen.vredand_vs": 30832, + "codegen.vredmax_vs": 30832, + "codegen.vredmaxu_vs": 30832, + "codegen.vredmin_vs": 30832, + "codegen.vredminu_vs": 30832, + "codegen.vredor_vs": 30832, + "codegen.vredsum_vs": 30832, + "codegen.vredxor_vs": 30832, + "codegen.vrem_vv": 33514, "codegen.vrem_vx": 205363, - "codegen.vremu_vv": 33523, + "codegen.vremu_vv": 33524, "codegen.vremu_vx": 214991, "codegen.vrgather_vi": 94651, "codegen.vrgather_vv": 178186, "codegen.vrgather_vx": 103069, - "codegen.vrgatherei16_vv": 133697, + "codegen.vrgatherei16_vv": 133698, "codegen.vrsub_vi": 49187, "codegen.vrsub_vx": 63015, "codegen.vs1r_v": 615, @@ -271,10 +271,10 @@ "codegen.vse16_v": 3957, "codegen.vse32_v": 3347, "codegen.vse8_v": 4567, - "codegen.vsetivli": 412, - "codegen.vsetvl": 412, - "codegen.vsetvli": 412, - "codegen.vsext_vf2": 39398, + "codegen.vsetivli": 413, + "codegen.vsetvl": 413, + "codegen.vsetvli": 413, + "codegen.vsext_vf2": 39399, "codegen.vsext_vf4": 6420, "codegen.vslide1down_vx": 1631019, "codegen.vslide1up_vx": 1625307, @@ -286,9 +286,9 @@ "codegen.vsll_vv": 25771, "codegen.vsll_vx": 81795, "codegen.vsm_v": 716, - "codegen.vsmul_vv": 91807, + "codegen.vsmul_vv": 91808, "codegen.vsmul_vx": 257803, - "codegen.vsoxei16_v": 38589, + "codegen.vsoxei16_v": 38590, "codegen.vsoxei32_v": 20261, "codegen.vsoxei8_v": 56641, "codegen.vsoxseg2ei16_v": 39058, @@ -321,33 +321,33 @@ "codegen.vsse16_v": 70079, "codegen.vsse32_v": 44219, "codegen.vsse8_v": 110603, - "codegen.vsseg2e16_v": 3643, - "codegen.vsseg2e32_v": 2945, - "codegen.vsseg2e8_v": 4341, + "codegen.vsseg2e16_v": 3644, + "codegen.vsseg2e32_v": 2946, + "codegen.vsseg2e8_v": 4342, "codegen.vsseg3e16_v": 3105, "codegen.vsseg3e32_v": 2279, "codegen.vsseg3e8_v": 3931, - "codegen.vsseg4e16_v": 3281, - "codegen.vsseg4e32_v": 2407, - "codegen.vsseg4e8_v": 4155, + "codegen.vsseg4e16_v": 3282, + "codegen.vsseg4e32_v": 2408, + "codegen.vsseg4e8_v": 4156, "codegen.vsseg5e16_v": 2567, "codegen.vsseg5e32_v": 1485, "codegen.vsseg5e8_v": 3649, - "codegen.vsseg6e16_v": 2663, - "codegen.vsseg6e32_v": 1533, - "codegen.vsseg6e8_v": 3793, + "codegen.vsseg6e16_v": 2664, + "codegen.vsseg6e32_v": 1534, + "codegen.vsseg6e8_v": 3794, "codegen.vsseg7e16_v": 2759, "codegen.vsseg7e32_v": 1581, "codegen.vsseg7e8_v": 3937, - "codegen.vsseg8e16_v": 2855, - "codegen.vsseg8e32_v": 1629, - "codegen.vsseg8e8_v": 4081, - "codegen.vssra_vi": 251063, + "codegen.vsseg8e16_v": 2856, + "codegen.vsseg8e32_v": 1630, + "codegen.vsseg8e8_v": 4082, + "codegen.vssra_vi": 251064, "codegen.vssra_vv": 101875, - "codegen.vssra_vx": 488655, - "codegen.vssrl_vi": 251063, + "codegen.vssra_vx": 488656, + "codegen.vssrl_vi": 251064, "codegen.vssrl_vv": 101875, - "codegen.vssrl_vx": 488655, + "codegen.vssrl_vx": 488656, "codegen.vssseg2e16_v": 63019, "codegen.vssseg2e32_v": 38351, "codegen.vssseg2e8_v": 109119, @@ -375,7 +375,7 @@ "codegen.vssubu_vx": 93444, "codegen.vsub_vv": 22951, "codegen.vsub_vx": 93444, - "codegen.vsuxei16_v": 38589, + "codegen.vsuxei16_v": 38590, "codegen.vsuxei32_v": 20261, "codegen.vsuxei8_v": 56641, "codegen.vsuxseg2ei16_v": 39058, @@ -399,19 +399,19 @@ "codegen.vsuxseg8ei16_v": 28922, "codegen.vsuxseg8ei32_v": 10443, "codegen.vsuxseg8ei8_v": 41900, - "codegen.vwadd_vv": 13856, + "codegen.vwadd_vv": 13857, "codegen.vwadd_vx": 39710, "codegen.vwadd_wv": 15217, "codegen.vwadd_wx": 46515, - "codegen.vwaddu_vv": 13856, + "codegen.vwaddu_vv": 13857, "codegen.vwaddu_vx": 39710, "codegen.vwaddu_wv": 15217, "codegen.vwaddu_wx": 46515, - "codegen.vwmacc_vv": 14785, + "codegen.vwmacc_vv": 14786, "codegen.vwmacc_vx": 57620, - "codegen.vwmaccsu_vv": 14785, + "codegen.vwmaccsu_vv": 14786, "codegen.vwmaccsu_vx": 57620, - "codegen.vwmaccu_vv": 14785, + "codegen.vwmaccu_vv": 14786, "codegen.vwmaccu_vx": 57620, "codegen.vwmaccus_vx": 57620, "codegen.vwmul_vv": 14000, @@ -422,18 +422,18 @@ "codegen.vwmulu_vx": 53468, "codegen.vwredsum_vs": 18435, "codegen.vwredsumu_vs": 18435, - "codegen.vwsub_vv": 13856, + "codegen.vwsub_vv": 13857, "codegen.vwsub_vx": 39710, "codegen.vwsub_wv": 15217, "codegen.vwsub_wx": 46515, - "codegen.vwsubu_vv": 13856, + "codegen.vwsubu_vv": 13857, "codegen.vwsubu_vx": 39710, "codegen.vwsubu_wv": 15217, "codegen.vwsubu_wx": 46515, "codegen.vxor_vi": 49241, "codegen.vxor_vv": 22951, "codegen.vxor_vx": 63090, - "codegen.vzext_vf2": 39398, + "codegen.vzext_vf2": 39399, "codegen.vzext_vf4": 6420, "rvv_bench.ascii_to_utf16": 1460078, "rvv_bench.ascii_to_utf32": 631187, diff --git a/.github/cases/sandslash/default.json b/.github/cases/sandslash/default.json index 32e69fe3af..c745b07cb1 100644 --- a/.github/cases/sandslash/default.json +++ b/.github/cases/sandslash/default.json @@ -25,131 +25,131 @@ "codegen.vasubu_vx": 336401, "codegen.vcompress_vm": 97701, "codegen.vcpop_m": 2681, - "codegen.vdiv_vv": 47995, + "codegen.vdiv_vv": 47996, "codegen.vdiv_vx": 342920, "codegen.vdivu_vv": 48248, "codegen.vdivu_vx": 359101, "codegen.vfirst_m": 2209, - "codegen.vid_v": 22148, + "codegen.vid_v": 22149, "codegen.viota_m": 194679, - "codegen.vl1re16_v": 1109, - "codegen.vl1re32_v": 1109, - "codegen.vl1re8_v": 1109, - "codegen.vl2re16_v": 1177, - "codegen.vl2re32_v": 1177, - "codegen.vl2re8_v": 1177, - "codegen.vl4re16_v": 1313, - "codegen.vl4re32_v": 1313, - "codegen.vl4re8_v": 1313, - "codegen.vl8re16_v": 1595, - "codegen.vl8re32_v": 1585, - "codegen.vl8re8_v": 1585, + "codegen.vl1re16_v": 1110, + "codegen.vl1re32_v": 1110, + "codegen.vl1re8_v": 1110, + "codegen.vl2re16_v": 1178, + "codegen.vl2re32_v": 1178, + "codegen.vl2re8_v": 1178, + "codegen.vl4re16_v": 1314, + "codegen.vl4re32_v": 1314, + "codegen.vl4re8_v": 1314, + "codegen.vl8re16_v": 1596, + "codegen.vl8re32_v": 1586, + "codegen.vl8re8_v": 1586, "codegen.vle16_v": 6965, "codegen.vle16ff_v": 40957, - "codegen.vle32_v": 6080, - "codegen.vle32ff_v": 21879, - "codegen.vle8_v": 7808, + "codegen.vle32_v": 6081, + "codegen.vle32ff_v": 21880, + "codegen.vle8_v": 7809, "codegen.vle8ff_v": 75462, - "codegen.vlm_v": 1482, - "codegen.vloxei16_v": 101085, - "codegen.vloxei32_v": 47404, + "codegen.vlm_v": 1483, + "codegen.vloxei16_v": 101086, + "codegen.vloxei32_v": 47405, "codegen.vloxei8_v": 166162, - "codegen.vloxseg2ei16_v": 108952, - "codegen.vloxseg2ei32_v": 56703, - "codegen.vloxseg2ei8_v": 158373, - "codegen.vloxseg3ei16_v": 81469, - "codegen.vloxseg3ei32_v": 43712, - "codegen.vloxseg3ei8_v": 112480, - "codegen.vloxseg4ei16_v": 103196, + "codegen.vloxseg2ei16_v": 108953, + "codegen.vloxseg2ei32_v": 56704, + "codegen.vloxseg2ei8_v": 158374, + "codegen.vloxseg3ei16_v": 81470, + "codegen.vloxseg3ei32_v": 43713, + "codegen.vloxseg3ei8_v": 112481, + "codegen.vloxseg4ei16_v": 103197, "codegen.vloxseg4ei32_v": 53554, - "codegen.vloxseg4ei8_v": 144408, + "codegen.vloxseg4ei8_v": 144409, "codegen.vloxseg5ei16_v": 57224, - "codegen.vloxseg5ei32_v": 25304, - "codegen.vloxseg5ei8_v": 90179, + "codegen.vloxseg5ei32_v": 25305, + "codegen.vloxseg5ei8_v": 90180, "codegen.vloxseg6ei16_v": 66325, - "codegen.vloxseg6ei32_v": 28714, - "codegen.vloxseg6ei8_v": 105594, + "codegen.vloxseg6ei32_v": 28715, + "codegen.vloxseg6ei8_v": 105595, "codegen.vloxseg7ei16_v": 75451, - "codegen.vloxseg7ei32_v": 32133, - "codegen.vloxseg7ei8_v": 121051, + "codegen.vloxseg7ei32_v": 32134, + "codegen.vloxseg7ei8_v": 121052, "codegen.vloxseg8ei16_v": 84552, - "codegen.vloxseg8ei32_v": 35543, - "codegen.vloxseg8ei8_v": 136466, + "codegen.vloxseg8ei32_v": 35544, + "codegen.vloxseg8ei8_v": 136467, "codegen.vlse16_v": 196925, - "codegen.vlse32_v": 102479, + "codegen.vlse32_v": 102480, "codegen.vlse8_v": 368506, "codegen.vlseg2e16_v": 6481, "codegen.vlseg2e32_v": 5446, "codegen.vlseg2e8_v": 7528, - "codegen.vlseg3e16_v": 5737, - "codegen.vlseg3e32_v": 4320, - "codegen.vlseg3e8_v": 7067, + "codegen.vlseg3e16_v": 5738, + "codegen.vlseg3e32_v": 4321, + "codegen.vlseg3e8_v": 7068, "codegen.vlseg4e16_v": 6039, "codegen.vlseg4e32_v": 4488, "codegen.vlseg4e8_v": 7415, - "codegen.vlseg5e16_v": 5049, - "codegen.vlseg5e32_v": 2919, - "codegen.vlseg5e8_v": 7096, + "codegen.vlseg5e16_v": 5050, + "codegen.vlseg5e32_v": 2920, + "codegen.vlseg5e8_v": 7097, "codegen.vlseg6e16_v": 5199, "codegen.vlseg6e32_v": 2983, "codegen.vlseg6e8_v": 7297, "codegen.vlseg7e16_v": 5346, - "codegen.vlseg7e32_v": 3047, - "codegen.vlseg7e8_v": 7498, - "codegen.vlseg8e16_v": 5497, + "codegen.vlseg7e32_v": 3048, + "codegen.vlseg7e8_v": 7499, + "codegen.vlseg8e16_v": 5498, "codegen.vlseg8e32_v": 3111, "codegen.vlseg8e8_v": 7699, - "codegen.vlsseg2e16_v": 184369, - "codegen.vlsseg2e32_v": 97273, + "codegen.vlsseg2e16_v": 184370, + "codegen.vlsseg2e32_v": 97274, "codegen.vlsseg2e8_v": 374215, - "codegen.vlsseg3e16_v": 142348, - "codegen.vlsseg3e32_v": 62352, + "codegen.vlsseg3e16_v": 142349, + "codegen.vlsseg3e32_v": 62353, "codegen.vlsseg3e8_v": 283284, "codegen.vlsseg4e16_v": 183288, - "codegen.vlsseg4e32_v": 78237, - "codegen.vlsseg4e8_v": 370769, + "codegen.vlsseg4e32_v": 78238, + "codegen.vlsseg4e8_v": 370770, "codegen.vlsseg5e16_v": 105177, - "codegen.vlsseg5e32_v": 37051, + "codegen.vlsseg5e32_v": 37052, "codegen.vlsseg5e8_v": 258898, "codegen.vlsseg6e16_v": 122687, - "codegen.vlsseg6e32_v": 42661, + "codegen.vlsseg6e32_v": 42662, "codegen.vlsseg6e8_v": 305253, - "codegen.vlsseg7e16_v": 140252, - "codegen.vlsseg7e32_v": 48271, + "codegen.vlsseg7e16_v": 140253, + "codegen.vlsseg7e32_v": 48272, "codegen.vlsseg7e8_v": 351748, - "codegen.vlsseg8e16_v": 157762, - "codegen.vlsseg8e32_v": 53881, + "codegen.vlsseg8e16_v": 157763, + "codegen.vlsseg8e32_v": 53882, "codegen.vlsseg8e8_v": 398103, - "codegen.vluxei16_v": 101085, - "codegen.vluxei32_v": 47404, + "codegen.vluxei16_v": 101086, + "codegen.vluxei32_v": 47405, "codegen.vluxei8_v": 166162, - "codegen.vluxseg2ei16_v": 108952, - "codegen.vluxseg2ei32_v": 56703, - "codegen.vluxseg2ei8_v": 158373, - "codegen.vluxseg3ei16_v": 81469, - "codegen.vluxseg3ei32_v": 43712, - "codegen.vluxseg3ei8_v": 112480, - "codegen.vluxseg4ei16_v": 103196, + "codegen.vluxseg2ei16_v": 108953, + "codegen.vluxseg2ei32_v": 56704, + "codegen.vluxseg2ei8_v": 158374, + "codegen.vluxseg3ei16_v": 81470, + "codegen.vluxseg3ei32_v": 43713, + "codegen.vluxseg3ei8_v": 112481, + "codegen.vluxseg4ei16_v": 103197, "codegen.vluxseg4ei32_v": 53554, - "codegen.vluxseg4ei8_v": 144408, + "codegen.vluxseg4ei8_v": 144409, "codegen.vluxseg5ei16_v": 57224, - "codegen.vluxseg5ei32_v": 25304, - "codegen.vluxseg5ei8_v": 90179, + "codegen.vluxseg5ei32_v": 25305, + "codegen.vluxseg5ei8_v": 90180, "codegen.vluxseg6ei16_v": 66325, - "codegen.vluxseg6ei32_v": 28714, - "codegen.vluxseg6ei8_v": 105594, + "codegen.vluxseg6ei32_v": 28715, + "codegen.vluxseg6ei8_v": 105595, "codegen.vluxseg7ei16_v": 75451, - "codegen.vluxseg7ei32_v": 32133, - "codegen.vluxseg7ei8_v": 121051, + "codegen.vluxseg7ei32_v": 32134, + "codegen.vluxseg7ei8_v": 121052, "codegen.vluxseg8ei16_v": 84552, - "codegen.vluxseg8ei32_v": 35543, - "codegen.vluxseg8ei8_v": 136466, + "codegen.vluxseg8ei32_v": 35544, + "codegen.vluxseg8ei8_v": 136467, "codegen.vmacc_vv": 31543, "codegen.vmacc_vx": 96318, "codegen.vmadc_vi": 57227, "codegen.vmadc_vim": 58847, - "codegen.vmadc_vv": 16562, - "codegen.vmadc_vvm": 18230, + "codegen.vmadc_vv": 16563, + "codegen.vmadc_vvm": 18231, "codegen.vmadc_vx": 78968, "codegen.vmadc_vxm": 80588, "codegen.vmadd_vv": 31543, @@ -171,8 +171,8 @@ "codegen.vmnor_mm": 13190, "codegen.vmor_mm": 13190, "codegen.vmorn_mm": 13190, - "codegen.vmsbc_vv": 16562, - "codegen.vmsbc_vvm": 18182, + "codegen.vmsbc_vv": 16563, + "codegen.vmsbc_vvm": 18183, "codegen.vmsbc_vx": 78968, "codegen.vmsbc_vxm": 80588, "codegen.vmsbf_m": 2105, @@ -180,10 +180,10 @@ "codegen.vmseq_vv": 42520, "codegen.vmseq_vx": 229999, "codegen.vmsgt_vi": 165628, - "codegen.vmsgt_vv": 42472, + "codegen.vmsgt_vv": 42473, "codegen.vmsgt_vx": 229999, "codegen.vmsgtu_vi": 165628, - "codegen.vmsgtu_vv": 42472, + "codegen.vmsgtu_vv": 42473, "codegen.vmsgtu_vx": 229999, "codegen.vmsif_m": 2105, "codegen.vmsle_vi": 165628, @@ -200,40 +200,40 @@ "codegen.vmsne_vv": 42520, "codegen.vmsne_vx": 229999, "codegen.vmsof_m": 2105, - "codegen.vmul_vv": 30739, + "codegen.vmul_vv": 30740, "codegen.vmul_vx": 114869, - "codegen.vmulh_vv": 30739, + "codegen.vmulh_vv": 30740, "codegen.vmulh_vx": 114869, - "codegen.vmulhsu_vv": 30739, + "codegen.vmulhsu_vv": 30740, "codegen.vmulhsu_vx": 114869, - "codegen.vmulhu_vv": 30739, + "codegen.vmulhu_vv": 30740, "codegen.vmulhu_vx": 114869, "codegen.vmv_s_x": 1718, "codegen.vmv_v_i": 27443, - "codegen.vmv_v_v": 13789, + "codegen.vmv_v_v": 13790, "codegen.vmv_v_x": 10940, "codegen.vmv_x_s": 2270, "codegen.vmv1r_v": 2189, - "codegen.vmv2r_v": 2428, + "codegen.vmv2r_v": 2429, "codegen.vmv4r_v": 3107, - "codegen.vmv8r_v": 4342, + "codegen.vmv8r_v": 4343, "codegen.vmxnor_mm": 13190, "codegen.vmxor_mm": 13190, "codegen.vnclip_wi": 216789, - "codegen.vnclip_wv": 78417, + "codegen.vnclip_wv": 78418, "codegen.vnclip_wx": 287441, "codegen.vnclipu_wi": 216789, - "codegen.vnclipu_wv": 78417, + "codegen.vnclipu_wv": 78418, "codegen.vnclipu_wx": 287441, "codegen.vnmsac_vv": 31543, "codegen.vnmsac_vx": 96318, "codegen.vnmsub_vv": 31543, "codegen.vnmsub_vx": 96318, "codegen.vnsra_wi": 54786, - "codegen.vnsra_wv": 20193, + "codegen.vnsra_wv": 20194, "codegen.vnsra_wx": 72449, "codegen.vnsrl_wi": 54786, - "codegen.vnsrl_wv": 20193, + "codegen.vnsrl_wv": 20194, "codegen.vnsrl_wx": 72449, "codegen.vor_vi": 65588, "codegen.vor_vv": 30354, @@ -246,14 +246,14 @@ "codegen.vredor_vs": 43949, "codegen.vredsum_vs": 43949, "codegen.vredxor_vs": 43949, - "codegen.vrem_vv": 47995, + "codegen.vrem_vv": 47996, "codegen.vrem_vx": 342920, "codegen.vremu_vv": 48248, "codegen.vremu_vx": 359101, "codegen.vrgather_vi": 132675, "codegen.vrgather_vv": 636093, "codegen.vrgather_vx": 154632, - "codegen.vrgatherei16_vv": 465563, + "codegen.vrgatherei16_vv": 465564, "codegen.vrsub_vi": 65413, "codegen.vrsub_vx": 84689, "codegen.vs1r_v": 1015, @@ -271,11 +271,11 @@ "codegen.vse16_v": 5499, "codegen.vse32_v": 4793, "codegen.vse8_v": 6205, - "codegen.vsetivli": 796, - "codegen.vsetvl": 796, - "codegen.vsetvli": 796, + "codegen.vsetivli": 797, + "codegen.vsetvl": 797, + "codegen.vsetvli": 797, "codegen.vsext_vf2": 134873, - "codegen.vsext_vf4": 19800, + "codegen.vsext_vf4": 19801, "codegen.vslide1down_vx": 6151877, "codegen.vslide1up_vx": 6146211, "codegen.vslidedown_vi": 4310885, @@ -283,10 +283,10 @@ "codegen.vslideup_vi": 4307448, "codegen.vslideup_vx": 6145386, "codegen.vsll_vi": 93945, - "codegen.vsll_vv": 35297, + "codegen.vsll_vv": 35298, "codegen.vsll_vx": 124250, "codegen.vsm_v": 1403, - "codegen.vsmul_vv": 120601, + "codegen.vsmul_vv": 120602, "codegen.vsmul_vx": 345113, "codegen.vsoxei16_v": 100017, "codegen.vsoxei32_v": 46463, @@ -313,10 +313,10 @@ "codegen.vsoxseg8ei32_v": 34741, "codegen.vsoxseg8ei8_v": 136706, "codegen.vsra_vi": 93945, - "codegen.vsra_vv": 35297, + "codegen.vsra_vv": 35298, "codegen.vsra_vx": 124250, "codegen.vsrl_vi": 93945, - "codegen.vsrl_vv": 35297, + "codegen.vsrl_vv": 35298, "codegen.vsrl_vx": 124250, "codegen.vsse16_v": 181937, "codegen.vsse32_v": 103133, @@ -327,26 +327,26 @@ "codegen.vsseg3e16_v": 4523, "codegen.vsseg3e32_v": 3437, "codegen.vsseg3e8_v": 5609, - "codegen.vsseg4e16_v": 4795, - "codegen.vsseg4e32_v": 3629, - "codegen.vsseg4e8_v": 5961, + "codegen.vsseg4e16_v": 4796, + "codegen.vsseg4e32_v": 3630, + "codegen.vsseg4e8_v": 5962, "codegen.vsseg5e16_v": 3917, "codegen.vsseg5e32_v": 2351, "codegen.vsseg5e8_v": 5483, - "codegen.vsseg6e16_v": 4077, - "codegen.vsseg6e32_v": 2431, - "codegen.vsseg6e8_v": 5723, + "codegen.vsseg6e16_v": 4078, + "codegen.vsseg6e32_v": 2432, + "codegen.vsseg6e8_v": 5724, "codegen.vsseg7e16_v": 4237, "codegen.vsseg7e32_v": 2511, "codegen.vsseg7e8_v": 5963, - "codegen.vsseg8e16_v": 4397, - "codegen.vsseg8e32_v": 2591, - "codegen.vsseg8e8_v": 6203, + "codegen.vsseg8e16_v": 4398, + "codegen.vsseg8e32_v": 2592, + "codegen.vsseg8e8_v": 6204, "codegen.vssra_vi": 373425, - "codegen.vssra_vv": 138833, + "codegen.vssra_vv": 138834, "codegen.vssra_vx": 748409, "codegen.vssrl_vi": 373425, - "codegen.vssrl_vv": 138833, + "codegen.vssrl_vv": 138834, "codegen.vssrl_vx": 748409, "codegen.vssseg2e16_v": 169369, "codegen.vssseg2e32_v": 96001, @@ -401,40 +401,40 @@ "codegen.vsuxseg8ei8_v": 136706, "codegen.vwadd_vv": 17925, "codegen.vwadd_vx": 53921, - "codegen.vwadd_wv": 19879, + "codegen.vwadd_wv": 19880, "codegen.vwadd_wx": 63094, "codegen.vwaddu_vv": 17925, "codegen.vwaddu_vx": 53921, - "codegen.vwaddu_wv": 19879, + "codegen.vwaddu_wv": 19880, "codegen.vwaddu_wx": 63094, "codegen.vwmacc_vv": 19365, - "codegen.vwmacc_vx": 72673, + "codegen.vwmacc_vx": 72674, "codegen.vwmaccsu_vv": 19365, - "codegen.vwmaccsu_vx": 72673, + "codegen.vwmaccsu_vx": 72674, "codegen.vwmaccu_vv": 19365, - "codegen.vwmaccu_vx": 72673, - "codegen.vwmaccus_vx": 72673, - "codegen.vwmul_vv": 18092, + "codegen.vwmaccu_vx": 72674, + "codegen.vwmaccus_vx": 72674, + "codegen.vwmul_vv": 18093, "codegen.vwmul_vx": 72112, - "codegen.vwmulsu_vv": 18092, + "codegen.vwmulsu_vv": 18093, "codegen.vwmulsu_vx": 72112, - "codegen.vwmulu_vv": 18092, + "codegen.vwmulu_vv": 18093, "codegen.vwmulu_vx": 72112, "codegen.vwredsum_vs": 26261, "codegen.vwredsumu_vs": 26261, "codegen.vwsub_vv": 17925, "codegen.vwsub_vx": 53921, - "codegen.vwsub_wv": 19879, + "codegen.vwsub_wv": 19880, "codegen.vwsub_wx": 63094, "codegen.vwsubu_vv": 17925, "codegen.vwsubu_vx": 53921, - "codegen.vwsubu_wv": 19879, + "codegen.vwsubu_wv": 19880, "codegen.vwsubu_wx": 63094, "codegen.vxor_vi": 65588, "codegen.vxor_vv": 30354, "codegen.vxor_vx": 84837, "codegen.vzext_vf2": 134873, - "codegen.vzext_vf4": 19800, + "codegen.vzext_vf4": 19801, "rvv_bench.ascii_to_utf16": 1371550, "rvv_bench.ascii_to_utf32": 583318, "rvv_bench.byteswap": 3556315, From 3fa58cbb41668e98702062f56aab6878e7424578 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Sun, 7 Jul 2024 19:32:14 +0800 Subject: [PATCH 06/18] support and-not --- t1/src/T1.scala | 12 ++++++++++-- t1/src/decoder/Decoder.scala | 6 +++++- t1/src/decoder/InstructionDocumentation.scala | 17 +++++++++++++++++ t1/src/decoder/attribute/isLogic.scala | 3 +++ t1/src/decoder/attribute/isMasklogic.scala | 3 +++ t1/src/decoder/attribute/isScheduler.scala | 5 ++++- t1/src/decoder/attribute/isSreadvd.scala | 3 +++ t1/src/decoder/attribute/isVtype.scala | 2 ++ t1/src/decoder/attribute/logicUop.scala | 3 +++ 9 files changed, 50 insertions(+), 4 deletions(-) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 64daf2ddc4..b0aefdc9f4 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -106,7 +106,12 @@ case class T1Parameter( val allInstructions: Seq[Instruction] = { org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) - .filter(instruction => instruction.instructionSet.name == "rv_v")++ + .filter{ + instruction => instruction.instructionSet.name match { + case "rv_v" => true + case "rv_zvbb" => if (zvbbEnable) true else false + case _ => false + }} ++ t1customInstructions.map(_.instruction) }.toSeq.sortBy(_.instructionSet.name).filter{ insn => insn.name match { @@ -132,6 +137,9 @@ case class T1Parameter( /** does t1 has floating datapath? */ val fpuEnable: Boolean = extensions.contains("Zve32f") + /** support of zvbb */ + lazy val zvbbEnable: Boolean = extensions.contains("Zvbb") + /** how many chaining does T1 support, this is not a parameter yet. */ val chainingSize: Int = 4 @@ -205,7 +213,7 @@ case class T1Parameter( // and the values are their respective delays. val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1)) - val decoderParam: DecoderParam = DecoderParam(fpuEnable, allInstructions) + val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions) /** paraemter for AXI4. */ val axi4BundleParameter: AXI4BundleParameter = AXI4BundleParameter( diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 3ebe07df3b..80a85b51ea 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -13,7 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.attribute._ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, allInstructions: Seq[Instruction]) +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName @@ -399,6 +399,10 @@ object Decoder { orderReduce ) else Seq() + } ++ { + if (param.zvbbEnable) + Seq() + else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/InstructionDocumentation.scala b/t1/src/decoder/InstructionDocumentation.scala index 22cf95823a..86c5a7e358 100644 --- a/t1/src/decoder/InstructionDocumentation.scala +++ b/t1/src/decoder/InstructionDocumentation.scala @@ -422,5 +422,22 @@ case class InstructionDocumentation(instruction: Instruction, param: DecoderPara case "vzext.vf2" => "TODO!" case "vzext.vf4" => "TODO!" case "vzext.vf8" => "TODO!" + // rv_zvbb + case "vandn.vv" => "TODO!" + case "vandn.vx" => "TODO!" + case "vbrev.v" => "TODO!" + case "vbrev8.v" => "TODO!" + case "vrev8.v" => "TODO!" + case "vclz.v" => "TODO!" + case "vctz.v" => "TODO!" + case "vcpop.v" => "TODO!" + case "vrol.vv" => "TODO!" + case "vrol.vx" => "TODO!" + case "vror.vv" => "TODO!" + case "vror.vx" => "TODO!" + case "vror.vi" => "TODO!" + case "vwsll.vv" => "TODO!" + case "vwsll.vx" => "TODO!" + case "vwsll.vi" => "TODO!" } } diff --git a/t1/src/decoder/attribute/isLogic.scala b/t1/src/decoder/attribute/isLogic.scala index 96cc6c57f0..93e1bbb14a 100644 --- a/t1/src/decoder/attribute/isLogic.scala +++ b/t1/src/decoder/attribute/isLogic.scala @@ -37,6 +37,9 @@ object isLogic { "vxor.vi", "vxor.vv", "vxor.vx", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isMasklogic.scala b/t1/src/decoder/attribute/isMasklogic.scala index d9f1a35994..4455a7efba 100644 --- a/t1/src/decoder/attribute/isMasklogic.scala +++ b/t1/src/decoder/attribute/isMasklogic.scala @@ -31,6 +31,9 @@ object isMasklogic { "vmsof.m", "vmxnor.mm", "vmxor.mm", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 229c45575a..8803a440df 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -260,6 +260,9 @@ object isScheduler { "vxor.vi", "vxor.vv", "vxor.vx", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } @@ -274,5 +277,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. */ " + override val description: String = " lane will send request to Sequencer and wait ack from Sequencer.\n instructions that will communicate with T1 top module.*/ " } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index bf9fc68379..c9e639df18 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -293,6 +293,9 @@ object isSreadvd { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 605588b088..4b3a6a098e 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -181,6 +181,8 @@ object isVtype { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/logicUop.scala b/t1/src/decoder/attribute/logicUop.scala index dcda5a7d05..4f0f60d8c0 100644 --- a/t1/src/decoder/attribute/logicUop.scala +++ b/t1/src/decoder/attribute/logicUop.scala @@ -63,6 +63,9 @@ object LogicUop { def t4(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( "vmandn.mm", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } From 51825fe05b3ba47c9690e8faf3d1fba70d8ab02d Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Wed, 10 Jul 2024 23:04:57 +0800 Subject: [PATCH 07/18] [rtl] add uop for zvbb reverse --- t1/src/decoder/Decoder.scala | 17 +++++- t1/src/decoder/T1DecodePattern.scala | 1 + t1/src/decoder/attribute/isScheduler.scala | 14 +++++ t1/src/decoder/attribute/isSreadvd.scala | 2 +- t1/src/decoder/attribute/isVtype.scala | 9 ++++ t1/src/decoder/attribute/logicUop.scala | 6 +-- .../decoder/attribute/zvbbExecutionType.scala | 53 +++++++++++++++++++ 7 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 t1/src/decoder/attribute/zvbbExecutionType.scala diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 80a85b51ea..8bda84cbf8 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -40,6 +40,10 @@ trait T1fpExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { def chiselType: UInt = UInt(2.W) } +trait T1zvbbExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { + def chiselType: UInt = UInt(2.W) +} + object Decoder { object logic extends BoolField { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isLogic.value @@ -341,6 +345,15 @@ object Decoder { } } + object zvbbExecutionType extends T1zvbbExecutionTypeUopField { + override def genTable(pattern: T1DecodePattern): BitPat = pattern.zvbbExecutionType match { + case ZvbbExecutionType.Brev => BitPat("b00") + case ZvbbExecutionType.Brev8 => BitPat("b01") + case ZvbbExecutionType.Rev8 => BitPat("b10") + case ZvbbExecutionType.Nil => BitPat.dontCare(2) + } + } + def allFields(param: DecoderParam): Seq[T1DecodeFiled[_ >: Bool <: UInt]] = Seq( logic, adder, @@ -401,7 +414,9 @@ object Decoder { else Seq() } ++ { if (param.zvbbEnable) - Seq() + Seq( + zvbbExecutionType, + ) else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index d1bb849309..3ba5d9906a 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -110,6 +110,7 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) + def zvbbExecutionType: zvbbExecutionType.Type = attribute.ZvbbExecutionType(this) private def documentation: String = InstructionDocumentation(instruction, param).toString diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 8803a440df..0c045dc87e 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -263,6 +263,20 @@ object isScheduler { // rv_zvbb "vandn.vv", "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vcpop.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index c9e639df18..655923307d 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -310,5 +310,5 @@ object isSreadvd { } case class isSreadvd(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sReadVD -> !(ma || maskLogic) instruction need to read vd as operator. " + override val description: String = "sReadVD -> !(ma || maskLogic): instructions that need to read vd as the operator. " } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 4b3a6a098e..f1a0468c4a 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -183,6 +183,15 @@ object isVtype { "vzext.vf8", // rv_zvbb "vandn.vv", + "vandn.vx", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/logicUop.scala b/t1/src/decoder/attribute/logicUop.scala index 4f0f60d8c0..63e6dc2b1c 100644 --- a/t1/src/decoder/attribute/logicUop.scala +++ b/t1/src/decoder/attribute/logicUop.scala @@ -63,9 +63,6 @@ object LogicUop { def t4(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( "vmandn.mm", - // rv_zvbb - "vandn.vv", - "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } @@ -84,6 +81,9 @@ object LogicUop { def t8(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( "vmnand.mm", + // rv_zvbb + "vandn.vv", + "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/zvbbExecutionType.scala b/t1/src/decoder/attribute/zvbbExecutionType.scala new file mode 100644 index 0000000000..6529f0c59f --- /dev/null +++ b/t1/src/decoder/attribute/zvbbExecutionType.scala @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object ZvbbExecutionType { + trait Type extends Uop { + def apply(t1DecodePattern: T1DecodePattern): Boolean + } + case object Brev extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vbrev.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object Brev8 extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vbrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object Rev8 extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object NIL extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + require(requirement = false, "unreachable") + false + } + } + def apply(t1DecodePattern: T1DecodePattern): Type = { + val tpe = Seq(Brev, Brev8, Rev8).filter(tpe => + tpe(t1DecodePattern) + ) + require(tpe.size <= 1) + tpe.headOption.getOrElse(Nil) + } +} + +case class ZvbbExecutionType(value: ZvbbExecutionType.Type) extends UopDecodeAttribute[ZvbbExecutionType.Type] { + override val description: String = "zvbb uop" +} From aec14b4851fe00a28e3e8397f21aaca619a3c26e Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Thu, 11 Jul 2024 17:39:26 +0800 Subject: [PATCH 08/18] add vfu for zvbb --- configgen/src/Main.scala | 4 ++- t1/src/LaneZvbb.scala | 61 +++++++++++++++++++++++++++++++++ t1/src/VectorFunctionUnit.scala | 6 ++-- t1/src/decoder/Decoder.scala | 10 +++--- 4 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 t1/src/LaneZvbb.scala diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index c48760fefb..e0ce690041 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -99,7 +99,9 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))) ) ) if (doEmit) param.emit(targetFile) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala new file mode 100644 index 0000000000..b521472f02 --- /dev/null +++ b/t1/src/LaneZvbb.scala @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl + +import chisel3.experimental.hierarchy.instantiable +import chisel3.{UInt, _} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import hardfloat._ +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} + +object LaneZvbbParam { + implicit def rw: upickle.default.ReadWriter[LaneZvbbParam] = upickle.default.macroRW +} + +case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { + val inputBundle = new LaneZvbbRequest(datapathWidth) + val outputBundle = new LaneZvbbResponse(datapathWidth) + override val NeedSplit: Boolean = false +} + +class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { + val src = Vec(3, UInt(datapathWidth.W)) // TODO: what is the order of vs1, vs2, vd + val opcode = UInt(3.W) +} + +class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { + val data = UInt(datapathWidth.W) +} + +@instantiable +class LaneZvbb(val parameter: LaneZvbbParam) + extends VFUModule(parameter) with SerializableModule[LaneZvbbParam]{ + val response: LaneZvbbResponse = Wire(new LaneZvbbResponse(parameter.datapathWidth)) + val request : LaneZvbbRequest = connectIO(response).asTypeOf(parameter.inputBundle) + + val zvbbSrc: UInt = request.src(0) // vs2 + val zvbbBRev = UInt(parameter.datapathWidth.W) // element's bit reverse + for (i <- 0 until parameter.datapathWidth) { + zvbbBRev:= zvbbBRev ## zvbbSrc(i) + } + val zvbbBRev8 = UInt(parameter.datapathWidth.W) // byte's bit reverse + for (i <- 0 until parameter.datapathWidth/8) { + for (j <- 0 until 8) { + zvbbBRev8 := zvbbBRev8 ## zvbbSrc(i * 8 + j) + } + } + val zvbbRev8 = UInt(parameter.datapathWidth.W) // element's byte reverse + for (i <- 0 until parameter.datapathWidth/8) { + zvbbRev8:= zvbbRev8 ## zvbbSrc(parameter.datapathWidth - i * 8 - 1, parameter.datapathWidth - i * 8 - 1 - 8) + } + response.data := Mux(request.opcode(0), zvbbBRev, + Mux(request.opcode(1), zvbbBRev8, + Mux(reques.opcode(2), zvbbRev8, + zvbbSrc + ) + ) + ) +} + diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 25ff98a49c..f1f3ffc62e 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -105,7 +105,8 @@ case class VFUInstantiateParameter( divModuleParameters: Seq[(SerializableModuleGenerator[LaneDiv, LaneDivParam], Seq[Int])], divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], - floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] + floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ @@ -115,7 +116,8 @@ case class VFUInstantiateParameter( divModuleParameters ++ divfpModuleParameters ++ otherModuleParameters ++ - floatModuleParameters + floatModuleParameters ++ + zvbbModuleParameters genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 8bda84cbf8..8f4bf9a31f 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -41,7 +41,7 @@ trait T1fpExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { } trait T1zvbbExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { - def chiselType: UInt = UInt(2.W) + def chiselType: UInt = UInt(3.W) } object Decoder { @@ -347,10 +347,10 @@ object Decoder { object zvbbExecutionType extends T1zvbbExecutionTypeUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.zvbbExecutionType match { - case ZvbbExecutionType.Brev => BitPat("b00") - case ZvbbExecutionType.Brev8 => BitPat("b01") - case ZvbbExecutionType.Rev8 => BitPat("b10") - case ZvbbExecutionType.Nil => BitPat.dontCare(2) + case ZvbbExecutionType.Brev => BitPat("b001") + case ZvbbExecutionType.Brev8 => BitPat("b010") + case ZvbbExecutionType.Rev8 => BitPat("b100") + case ZvbbExecutionType.Nil => BitPat.dontCare(3) } } From df24e7901f432c2bf5e6f76414e12370a876b871 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Fri, 12 Jul 2024 08:47:15 +0800 Subject: [PATCH 09/18] fix errors --- t1/src/LaneZvbb.scala | 3 +- t1/src/decoder/Decoder.scala | 5 +++ t1/src/decoder/T1DecodePattern.scala | 3 +- t1/src/decoder/attribute/isZvbb.scala | 38 +++++++++++++++++++ .../decoder/attribute/zvbbExecutionType.scala | 14 +++---- 5 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 t1/src/decoder/attribute/isZvbb.scala diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index b521472f02..f5a49db74b 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -16,6 +16,7 @@ object LaneZvbbParam { case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { val inputBundle = new LaneZvbbRequest(datapathWidth) + val decodeField: BoolField = Decoder.zvbb val outputBundle = new LaneZvbbResponse(datapathWidth) override val NeedSplit: Boolean = false } @@ -52,7 +53,7 @@ class LaneZvbb(val parameter: LaneZvbbParam) } response.data := Mux(request.opcode(0), zvbbBRev, Mux(request.opcode(1), zvbbBRev8, - Mux(reques.opcode(2), zvbbRev8, + Mux(request.opcode(2), zvbbRev8, zvbbSrc ) ) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 8f4bf9a31f..aa7924253c 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -225,6 +225,10 @@ object Decoder { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isOrderreduce.value } + object zvbb extends BoolField { + override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvbb.value + } + object topUop extends T1TopUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.topUop.value match { case _: TopT0.type => BitPat("b000") @@ -415,6 +419,7 @@ object Decoder { } ++ { if (param.zvbbEnable) Seq( + zvbb, zvbbExecutionType, ) else Seq() diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index 3ba5d9906a..7185b0af87 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -107,10 +107,11 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def isVtype: isVtype = attribute.isVtype(this) def isVwmacc: isVwmacc = attribute.isVwmacc(this) def isWidenreduce: isWidenreduce = attribute.isWidenreduce(this) + def isZvbb: isZvbb = attribute.isZvbb(this) def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) - def zvbbExecutionType: zvbbExecutionType.Type = attribute.ZvbbExecutionType(this) + def zvbbExecutionType: ZvbbExecutionType.Type = attribute.ZvbbExecutionType(this) private def documentation: String = InstructionDocumentation(instruction, param).toString diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala new file mode 100644 index 0000000000..f7f0ef4c4f --- /dev/null +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object isZvbb { + def apply(t1DecodePattern: T1DecodePattern): isZvbb = + Seq( + y _ -> Y, + n _ -> N, + dc _ -> DC + ).collectFirst { + case (fn, tri) if fn(t1DecodePattern) => isZvbb(tri) + }.get + + def y(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vbrev.v", + "vbrev8.v", + "vrev8.v", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + def n(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = t1DecodePattern.param.allInstructions.filter(i => + !(y(t1DecodePattern) || dc(t1DecodePattern)) + ) + allMatched.contains(t1DecodePattern.instruction) + } + + def dc(t1DecodePattern: T1DecodePattern): Boolean = false +} + +case class isZvbb(value: TriState) extends BooleanDecodeAttribute { + override val description: String = "goes to [[org.chipsalliance.t1.rtl.LaneZvbb]]." +} diff --git a/t1/src/decoder/attribute/zvbbExecutionType.scala b/t1/src/decoder/attribute/zvbbExecutionType.scala index 6529f0c59f..07b2d13fe2 100644 --- a/t1/src/decoder/attribute/zvbbExecutionType.scala +++ b/t1/src/decoder/attribute/zvbbExecutionType.scala @@ -11,29 +11,29 @@ object ZvbbExecutionType { } case object Brev extends Type { def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( "vbrev.v" - ) + ) else Seq() allMatched.contains(t1DecodePattern.instruction.name) } } case object Brev8 extends Type { def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( "vbrev8.v" - ) + ) else Seq() allMatched.contains(t1DecodePattern.instruction.name) } } case object Rev8 extends Type { def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( "vrev8.v" - ) + ) else Seq() allMatched.contains(t1DecodePattern.instruction.name) } } - case object NIL extends Type { + case object Nil extends Type { def apply(t1DecodePattern: T1DecodePattern): Boolean = { require(requirement = false, "unreachable") false From 6a4e3ed6aa535db1021fe79a4a83c30afd40cdb3 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Fri, 12 Jul 2024 10:50:38 +0800 Subject: [PATCH 10/18] support rot, clz, ctz --- configgen/src/Main.scala | 6 ++- t1/src/LaneZvbb.scala | 32 ++++++++++++---- t1/src/VectorFunctionUnit.scala | 2 +- t1/src/decoder/Decoder.scala | 14 ++++--- t1/src/decoder/attribute/isPopcount.scala | 1 + t1/src/decoder/attribute/isSreadvd.scala | 3 -- t1/src/decoder/attribute/isZvbb.scala | 7 ++++ .../decoder/attribute/zvbbExecutionType.scala | 37 ++++++++++++++++++- 8 files changed, 83 insertions(+), 19 deletions(-) diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index e0ce690041..d1ac6df306 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -150,7 +150,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) @@ -199,7 +200,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index f5a49db74b..57c3c886c6 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -22,8 +22,8 @@ case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter } class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { - val src = Vec(3, UInt(datapathWidth.W)) // TODO: what is the order of vs1, vs2, vd - val opcode = UInt(3.W) + val src = Vec(3, UInt(datapathWidth.W)) + val opcode = UInt(4.W) } class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { @@ -37,6 +37,7 @@ class LaneZvbb(val parameter: LaneZvbbParam) val request : LaneZvbbRequest = connectIO(response).asTypeOf(parameter.inputBundle) val zvbbSrc: UInt = request.src(0) // vs2 + val zvbbRs: UInt = request.src(1) // vs1 or rs1 val zvbbBRev = UInt(parameter.datapathWidth.W) // element's bit reverse for (i <- 0 until parameter.datapathWidth) { zvbbBRev:= zvbbBRev ## zvbbSrc(i) @@ -51,11 +52,28 @@ class LaneZvbb(val parameter: LaneZvbbParam) for (i <- 0 until parameter.datapathWidth/8) { zvbbRev8:= zvbbRev8 ## zvbbSrc(parameter.datapathWidth - i * 8 - 1, parameter.datapathWidth - i * 8 - 1 - 8) } - response.data := Mux(request.opcode(0), zvbbBRev, - Mux(request.opcode(1), zvbbBRev8, - Mux(request.opcode(2), zvbbRev8, - zvbbSrc - ) + val zvbbCLZ = UInt(parameter.datapathWidth.W) + for (i <- 0 until parameter.datapathWidth) { + when(zvbbSrc(parameter.datapathWidth-i-1) === 1.U) { + zvbbCLZ := zvbbCLZ + 1.U + } + } + val zvbbCTZ = UInt(parameter.datapathWidth.W) + for (i <- 0 until parameter.datapathWidth) { + when(zvbbSrc(i) === 1.U) { + zvbbCTZ := zvbbCTZ + 1.U + } + } + val zvbbROL = zvbbSrc.rotateLeft(zvbbRs) + val zvbbROR = zvbbSrc.rotateRight(zvbbRs) + response.data := Mux1H(UIntToOH(request.opcode), Seq( + zvbbBRev, + zvbbBRev8, + zvbbRev8, + zvbbCLZ, + zvbbCTZ, + zvbbROL, + zvbbROR, ) ) } diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index f1f3ffc62e..cf06a66afe 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -106,7 +106,7 @@ case class VFUInstantiateParameter( divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], - zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])] ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index aa7924253c..4bd781dcf2 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -41,7 +41,7 @@ trait T1fpExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { } trait T1zvbbExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { - def chiselType: UInt = UInt(3.W) + def chiselType: UInt = UInt(4.W) } object Decoder { @@ -351,10 +351,14 @@ object Decoder { object zvbbExecutionType extends T1zvbbExecutionTypeUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.zvbbExecutionType match { - case ZvbbExecutionType.Brev => BitPat("b001") - case ZvbbExecutionType.Brev8 => BitPat("b010") - case ZvbbExecutionType.Rev8 => BitPat("b100") - case ZvbbExecutionType.Nil => BitPat.dontCare(3) + case ZvbbExecutionType.Brev => BitPat("b0000") + case ZvbbExecutionType.Brev8 => BitPat("b0001") + case ZvbbExecutionType.Rev8 => BitPat("b0010") + case ZvbbExecutionType.CLZ => BitPat("b0011") + case ZvbbExecutionType.CTZ => BitPat("b0100") + case ZvbbExecutionType.ROL => BitPat("b0101") + case ZvbbExecutionType.ROR => BitPat("b0110") + case ZvbbExecutionType.Nil => BitPat.dontCare(4) } } diff --git a/t1/src/decoder/attribute/isPopcount.scala b/t1/src/decoder/attribute/isPopcount.scala index 0137b77b0c..3a949c4367 100644 --- a/t1/src/decoder/attribute/isPopcount.scala +++ b/t1/src/decoder/attribute/isPopcount.scala @@ -18,6 +18,7 @@ object isPopcount { def y(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched = Seq( "vcpop.m", + "vcpop.v", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index 655923307d..e6fa9bb765 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -293,9 +293,6 @@ object isSreadvd { "vzext.vf2", "vzext.vf4", "vzext.vf8", - // rv_zvbb - "vandn.vv", - "vandn.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala index f7f0ef4c4f..2fb8802e64 100644 --- a/t1/src/decoder/attribute/isZvbb.scala +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -20,6 +20,13 @@ object isZvbb { "vbrev.v", "vbrev8.v", "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", ) else Seq() allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/zvbbExecutionType.scala b/t1/src/decoder/attribute/zvbbExecutionType.scala index 07b2d13fe2..74d186bf7f 100644 --- a/t1/src/decoder/attribute/zvbbExecutionType.scala +++ b/t1/src/decoder/attribute/zvbbExecutionType.scala @@ -33,6 +33,41 @@ object ZvbbExecutionType { allMatched.contains(t1DecodePattern.instruction.name) } } + case object CLZ extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( + "vclz.v" + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object CTZ extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( + "vctz.v" + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object ROL extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( + "vrol.vv", + "vrol.vx", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + } + case object ROR extends Type { + def apply(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( + "vror.vv", + "vror.vx", + "vror.vi", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + } case object Nil extends Type { def apply(t1DecodePattern: T1DecodePattern): Boolean = { require(requirement = false, "unreachable") @@ -40,7 +75,7 @@ object ZvbbExecutionType { } } def apply(t1DecodePattern: T1DecodePattern): Type = { - val tpe = Seq(Brev, Brev8, Rev8).filter(tpe => + val tpe = Seq(Brev, Brev8, Rev8, CLZ, CTZ, ROL, ROR).filter(tpe => tpe(t1DecodePattern) ) require(tpe.size <= 1) From cd2abe9c956dd990734759b0b25a0f874d4cbaf3 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Fri, 12 Jul 2024 15:56:05 +0800 Subject: [PATCH 11/18] add new pockerman and fix errors --- configgen/src/Main.scala | 73 +++++++++++++++ t1/src/LaneZvbb.scala | 19 ++-- t1/src/T1.scala | 2 +- t1/src/decoder/Decoder.scala | 28 +++--- t1/src/decoder/T1DecodePattern.scala | 1 - t1/src/decoder/attribute/isScheduler.scala | 17 ---- t1/src/decoder/attribute/isVtype.scala | 6 -- t1/src/decoder/attribute/uop.scala | 3 +- .../decoder/attribute/zvbbExecutionType.scala | 88 ------------------- t1/src/decoder/attribute/zvbbUop.scala | 76 ++++++++++++++++ 10 files changed, 168 insertions(+), 145 deletions(-) delete mode 100644 t1/src/decoder/attribute/zvbbExecutionType.scala create mode 100644 t1/src/decoder/attribute/zvbbUop.scala diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index d1ac6df306..43e2c2dac2 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -71,6 +71,79 @@ object Main { t1customInstructions = Nil, vrfBankSize = 1, vrfRamType = RamType.p0rwp1rw, + vfuInstantiateParameter = VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() + ) + ) + if (doEmit) param.emit(targetFile) + param + } + + // DLEN256 VLEN256; FP; VRF p0rw,p1rw bank1; LSU bank8 beatbyte 8; Zvbb + @main def psyduck( + @arg(name = "target-file", short = 't') targetFile: os.Path, + @arg(name = "emit", short = 'e', doc = "emit config") doEmit: Boolean = true + ): T1Parameter = { + val vLen = 512 + val dLen = 256 + val param = T1Parameter( + vLen, + dLen, + extensions = Seq("Zve32f", "Zvbb"), + t1customInstructions = Nil, + lsuBankParameters = + // scalar bank 0-1G + Seq( + BitSet(BitPat("b00??????????????????????????????")) + ).map(bs => LSUBankParameter("scalar", bs, 8, true)) ++ + // ddr bank 1G-3G 512M/bank + Seq( + BitSet(BitPat("b01???????????????????????00?????"), BitPat("b10???????????????????????00?????")), + BitSet(BitPat("b01???????????????????????01?????"), BitPat("b10???????????????????????01?????")), + BitSet(BitPat("b01???????????????????????10?????"), BitPat("b10???????????????????????10?????")), + BitSet(BitPat("b01???????????????????????11?????"), BitPat("b10???????????????????????11?????")) + ).zipWithIndex.map { case (bs: BitSet, idx: Int) => LSUBankParameter(s"ddrBank$idx", bs, 8, false) } ++ + // sRam bank 3G+ 256K/bank, 8banks + Seq( + BitSet(BitPat("b11000000000?????????????000?????")), + BitSet(BitPat("b11000000000?????????????001?????")), + BitSet(BitPat("b11000000000?????????????010?????")), + BitSet(BitPat("b11000000000?????????????011?????")), + BitSet(BitPat("b11000000000?????????????100?????")), + BitSet(BitPat("b11000000000?????????????101?????")), + BitSet(BitPat("b11000000000?????????????110?????")), + BitSet(BitPat("b11000000000?????????????111?????")) + ).zipWithIndex.map { case (bs: BitSet, idx: Int) => LSUBankParameter(s"sramBank$idx", bs, 8, false) }, + vrfBankSize = 1, + vrfRamType = RamType.p0rwp1rw, vfuInstantiateParameter = VFUInstantiateParameter( slotCount = 4, logicModuleParameters = Seq( diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index 57c3c886c6..6d17bd2f71 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -38,20 +38,11 @@ class LaneZvbb(val parameter: LaneZvbbParam) val zvbbSrc: UInt = request.src(0) // vs2 val zvbbRs: UInt = request.src(1) // vs1 or rs1 - val zvbbBRev = UInt(parameter.datapathWidth.W) // element's bit reverse - for (i <- 0 until parameter.datapathWidth) { - zvbbBRev:= zvbbBRev ## zvbbSrc(i) - } - val zvbbBRev8 = UInt(parameter.datapathWidth.W) // byte's bit reverse - for (i <- 0 until parameter.datapathWidth/8) { - for (j <- 0 until 8) { - zvbbBRev8 := zvbbBRev8 ## zvbbSrc(i * 8 + j) - } - } - val zvbbRev8 = UInt(parameter.datapathWidth.W) // element's byte reverse - for (i <- 0 until parameter.datapathWidth/8) { - zvbbRev8:= zvbbRev8 ## zvbbSrc(parameter.datapathWidth - i * 8 - 1, parameter.datapathWidth - i * 8 - 1 - 8) - } + + val zvbbBRev = VecInit(zvbbSrc(parameter.datapathWidth-1, 0).asBools.reverse).asUInt // element's bit reverse + val zvbbBRev8 = VecInit(zvbbSrc(parameter.datapathWidth-1, 0).asBools.grouped(8).map(s => VecInit(s.reverse)).toSeq).asUInt // byte's bit reverse + val zvbbRev8 = VecInit(zvbbSrc(parameter.datapathWidth-1, 0).asBools.grouped(8).map(s => VecInit(s)).toSeq.reverse).asUInt // element's byte reverse + val zvbbCLZ = UInt(parameter.datapathWidth.W) for (i <- 0 until parameter.datapathWidth) { when(zvbbSrc(parameter.datapathWidth-i-1) === 1.U) { diff --git a/t1/src/T1.scala b/t1/src/T1.scala index b0aefdc9f4..bd41a84e4c 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -120,7 +120,7 @@ case class T1Parameter( } } - require(extensions.forall(Seq("Zve32x", "Zve32f").contains), "unsupported extension.") + require(extensions.forall(Seq("Zve32x", "Zve32f", "Zvbb").contains), "unsupported extension.") // TODO: require bank not overlap /** xLen of T1, we currently only support 32. */ val xLen: Int = 32 diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 4bd781dcf2..a9760eca49 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -40,10 +40,6 @@ trait T1fpExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { def chiselType: UInt = UInt(2.W) } -trait T1zvbbExecutionTypeUopField extends T1DecodeFiled[UInt] with FieldName { - def chiselType: UInt = UInt(4.W) -} - object Decoder { object logic extends BoolField { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isLogic.value @@ -336,6 +332,17 @@ object Decoder { case _: zeroUop0.type => BitPat("b0000") case _ => BitPat.dontCare(4) } + case zvbbCase: ZvbbUOPType => + zvbbCase match { + case _: zvbbUop0.type => BitPat("b0000") + case _: zvbbUop1.type => BitPat("b0001") + case _: zvbbUop2.type => BitPat("b0010") + case _: zvbbUop3.type => BitPat("b0011") + case _: zvbbUop4.type => BitPat("b0100") + case _: zvbbUop5.type => BitPat("b0101") + case _: zvbbUop6.type => BitPat("b0110") + case _ => BitPat.dontCare(4) + } case _ => BitPat.dontCare(4) } } @@ -349,18 +356,6 @@ object Decoder { } } - object zvbbExecutionType extends T1zvbbExecutionTypeUopField { - override def genTable(pattern: T1DecodePattern): BitPat = pattern.zvbbExecutionType match { - case ZvbbExecutionType.Brev => BitPat("b0000") - case ZvbbExecutionType.Brev8 => BitPat("b0001") - case ZvbbExecutionType.Rev8 => BitPat("b0010") - case ZvbbExecutionType.CLZ => BitPat("b0011") - case ZvbbExecutionType.CTZ => BitPat("b0100") - case ZvbbExecutionType.ROL => BitPat("b0101") - case ZvbbExecutionType.ROR => BitPat("b0110") - case ZvbbExecutionType.Nil => BitPat.dontCare(4) - } - } def allFields(param: DecoderParam): Seq[T1DecodeFiled[_ >: Bool <: UInt]] = Seq( logic, @@ -424,7 +419,6 @@ object Decoder { if (param.zvbbEnable) Seq( zvbb, - zvbbExecutionType, ) else Seq() } diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index 7185b0af87..5c7d107339 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -111,7 +111,6 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) - def zvbbExecutionType: ZvbbExecutionType.Type = attribute.ZvbbExecutionType(this) private def documentation: String = InstructionDocumentation(instruction, param).toString diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 0c045dc87e..b64514e645 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -260,23 +260,6 @@ object isScheduler { "vxor.vi", "vxor.vv", "vxor.vx", - // rv_zvbb - "vandn.vv", - "vandn.vx", - "vbrev.v", - "vbrev8.v", - "vrev8.v", - "vclz.v", - "vctz.v", - "vcpop.v", - "vrol.vv", - "vrol.vx", - "vror.vv", - "vror.vx", - "vror.vi", - "vwsll.vv", - "vwsll.vx", - "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index f1a0468c4a..7649d715a2 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -183,15 +183,9 @@ object isVtype { "vzext.vf8", // rv_zvbb "vandn.vv", - "vandn.vx", "vrol.vv", - "vrol.vx", "vror.vv", - "vror.vx", - "vror.vi", "vwsll.vv", - "vwsll.vx", - "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/uop.scala b/t1/src/decoder/attribute/uop.scala index 66d8dbf02c..97d49365cb 100644 --- a/t1/src/decoder/attribute/uop.scala +++ b/t1/src/decoder/attribute/uop.scala @@ -15,7 +15,8 @@ object DecoderUop { isLogic.y(t1DecodePattern) -> LogicUop(t1DecodePattern), isShift.y(t1DecodePattern) -> ShiftUop(t1DecodePattern), isOther.y(t1DecodePattern) -> OtherUop(t1DecodePattern), - isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern) + isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern), + isZvbb.y(t1DecodePattern) -> ZvbbUOP(t1DecodePattern), ).collectFirst { case (fn, tpe) if fn => DecoderUop(tpe) } diff --git a/t1/src/decoder/attribute/zvbbExecutionType.scala b/t1/src/decoder/attribute/zvbbExecutionType.scala deleted file mode 100644 index 74d186bf7f..0000000000 --- a/t1/src/decoder/attribute/zvbbExecutionType.scala +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2022 Jiuyang Liu - -package org.chipsalliance.t1.rtl.decoder.attribute - -import org.chipsalliance.t1.rtl.decoder.T1DecodePattern - -object ZvbbExecutionType { - trait Type extends Uop { - def apply(t1DecodePattern: T1DecodePattern): Boolean - } - case object Brev extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vbrev.v" - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object Brev8 extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vbrev8.v" - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object Rev8 extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vrev8.v" - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object CLZ extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vclz.v" - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object CTZ extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vctz.v" - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object ROL extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vrol.vv", - "vrol.vx", - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object ROR extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - val allMatched = if(isZvbb.y(t1DecodePattern)) Seq( - "vror.vv", - "vror.vx", - "vror.vi", - ) else Seq() - allMatched.contains(t1DecodePattern.instruction.name) - } - } - case object Nil extends Type { - def apply(t1DecodePattern: T1DecodePattern): Boolean = { - require(requirement = false, "unreachable") - false - } - } - def apply(t1DecodePattern: T1DecodePattern): Type = { - val tpe = Seq(Brev, Brev8, Rev8, CLZ, CTZ, ROL, ROR).filter(tpe => - tpe(t1DecodePattern) - ) - require(tpe.size <= 1) - tpe.headOption.getOrElse(Nil) - } -} - -case class ZvbbExecutionType(value: ZvbbExecutionType.Type) extends UopDecodeAttribute[ZvbbExecutionType.Type] { - override val description: String = "zvbb uop" -} diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala new file mode 100644 index 0000000000..ad9a45403f --- /dev/null +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +trait ZvbbUOPType extends Uop +object zvbbUop0 extends ZvbbUOPType +object zvbbUop1 extends ZvbbUOPType +object zvbbUop2 extends ZvbbUOPType +object zvbbUop3 extends ZvbbUOPType +object zvbbUop4 extends ZvbbUOPType +object zvbbUop5 extends ZvbbUOPType +object zvbbUop6 extends ZvbbUOPType + +object ZvbbUOP { + def apply(t1DecodePattern: T1DecodePattern): Uop = { + Seq( + t0 _ -> zvbbUop0, + t1 _ -> zvbbUop1, + t2 _ -> zvbbUop2, + t3 _ -> zvbbUop3, + t4 _ -> zvbbUop4, + t5 _ -> zvbbUop5, + t6 _ -> zvbbUop6, + ).collectFirst { + case (fn, tpe) if fn(t1DecodePattern) => tpe + }.getOrElse(UopDC) + } + def t0(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t1(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t2(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t3(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vclz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t4(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vctz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t5(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrol.vv", + "vrol.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t6(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vror.vv", + "vror.vx", + "vror.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } +} From 4a44f914eab84c162e063b35ac726508447d4964 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Mon, 15 Jul 2024 16:24:12 +0800 Subject: [PATCH 12/18] add scanLeft/Right --- t1/src/LaneZvbb.scala | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index 6d17bd2f71..a122848aec 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -43,20 +43,10 @@ class LaneZvbb(val parameter: LaneZvbbParam) val zvbbBRev8 = VecInit(zvbbSrc(parameter.datapathWidth-1, 0).asBools.grouped(8).map(s => VecInit(s.reverse)).toSeq).asUInt // byte's bit reverse val zvbbRev8 = VecInit(zvbbSrc(parameter.datapathWidth-1, 0).asBools.grouped(8).map(s => VecInit(s)).toSeq.reverse).asUInt // element's byte reverse - val zvbbCLZ = UInt(parameter.datapathWidth.W) - for (i <- 0 until parameter.datapathWidth) { - when(zvbbSrc(parameter.datapathWidth-i-1) === 1.U) { - zvbbCLZ := zvbbCLZ + 1.U - } - } - val zvbbCTZ = UInt(parameter.datapathWidth.W) - for (i <- 0 until parameter.datapathWidth) { - when(zvbbSrc(i) === 1.U) { - zvbbCTZ := zvbbCTZ + 1.U - } - } - val zvbbROL = zvbbSrc.rotateLeft(zvbbRs) - val zvbbROR = zvbbSrc.rotateRight(zvbbRs) + val zvbbCLZ = (PopCount(scanLeftOr(zvbbBRev)) - 1.U).asUInt + val zvbbCTZ = (PopCount(scanRightOr(zvbbBRev)) - 1.U).asUInt + val zvbbROL = zvbbSrc.rotateLeft(zvbbRs(4, 0)).asUInt + val zvbbROR = zvbbSrc.rotateRight(zvbbRs(4, 0)).asUInt response.data := Mux1H(UIntToOH(request.opcode), Seq( zvbbBRev, zvbbBRev8, From ab412718078a3d349211d95666d7c487830ed475 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 16 Jul 2024 18:45:11 +0800 Subject: [PATCH 13/18] add config --- configgen/generated/psyduck.json | 271 +++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 configgen/generated/psyduck.json diff --git a/configgen/generated/psyduck.json b/configgen/generated/psyduck.json new file mode 100644 index 0000000000..97d364b3d7 --- /dev/null +++ b/configgen/generated/psyduck.json @@ -0,0 +1,271 @@ +{ + "parameter": { + "vLen": 512, + "dLen": 256, + "extensions": [ + "Zve32f", + "Zvbb" + ], + "t1customInstructions": [], + "lsuBankParameters": [ + { + "name": "scalar", + "region": "b00??????????????????????????????", + "beatbyte": 8, + "accessScalar": true + }, + { + "name": "ddrBank0", + "region": "b01???????????????????????00?????\nb10???????????????????????00?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "ddrBank1", + "region": "b01???????????????????????01?????\nb10???????????????????????01?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "ddrBank2", + "region": "b01???????????????????????10?????\nb10???????????????????????10?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "ddrBank3", + "region": "b01???????????????????????11?????\nb10???????????????????????11?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank0", + "region": "b11000000000?????????????000?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank1", + "region": "b11000000000?????????????001?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank2", + "region": "b11000000000?????????????010?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank3", + "region": "b11000000000?????????????011?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank4", + "region": "b11000000000?????????????100?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank5", + "region": "b11000000000?????????????101?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank6", + "region": "b11000000000?????????????110?????", + "beatbyte": 8, + "accessScalar": false + }, + { + "name": "sramBank7", + "region": "b11000000000?????????????111?????", + "beatbyte": 8, + "accessScalar": false + } + ], + "vrfBankSize": 1, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", + "vfuInstantiateParameter": { + "slotCount": 4, + "logicModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.MaskedLogic" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "aluModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 0 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 1 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 2 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 3 + ] + ] + ], + "shifterModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneShifter" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "mulModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 2 + }, + "generator": "org.chipsalliance.t1.rtl.LaneMul" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "divModuleParameters": [], + "divfpModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneDivFP" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "otherModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "vlMaxBits": 10, + "groupNumberBits": 4, + "laneNumberBits": 3, + "dataPathByteWidth": 4, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.OtherUnit" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneFloat" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "zvbbModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneZvbb" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ] + } + }, + "generator": "org.chipsalliance.t1.rtl.T1" +} \ No newline at end of file From c570323f68025620d7ee972f776bec83ea9873a6 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 16 Jul 2024 18:48:12 +0800 Subject: [PATCH 14/18] fix om escape --- t1/src/decoder/attribute/isScheduler.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index b64514e645..d12f8e581c 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -274,5 +274,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = " lane will send request to Sequencer and wait ack from Sequencer.\n instructions that will communicate with T1 top module.*/ " + override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. instructions that will communicate with T1 top module.*/ " } From 3514d2d19afc118b98860e1ad820ba9889cbecfc Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Tue, 23 Jul 2024 22:15:43 +0800 Subject: [PATCH 15/18] add attribute for vwsll --- t1/src/decoder/attribute/isCrossread.scala | 4 +++ t1/src/decoder/attribute/isCrosswrite.scala | 4 +++ t1/src/decoder/attribute/isScheduler.scala | 2 +- t1/src/decoder/attribute/isSwrite.scala | 2 +- t1/src/decoder/attribute/isVtype.scala | 1 + t1/src/decoder/attribute/isZvbb.scala | 3 +++ t1/src/decoder/attribute/zvbbUop.scala | 28 +++++++++++++-------- 7 files changed, 32 insertions(+), 12 deletions(-) diff --git a/t1/src/decoder/attribute/isCrossread.scala b/t1/src/decoder/attribute/isCrossread.scala index 34c11bed3d..5787aa1361 100644 --- a/t1/src/decoder/attribute/isCrossread.scala +++ b/t1/src/decoder/attribute/isCrossread.scala @@ -56,6 +56,10 @@ object isCrossread { "vwsub.wx", "vwsubu.wv", "vwsubu.wx", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isCrosswrite.scala b/t1/src/decoder/attribute/isCrosswrite.scala index cbe920dbba..bddbc3818a 100644 --- a/t1/src/decoder/attribute/isCrosswrite.scala +++ b/t1/src/decoder/attribute/isCrosswrite.scala @@ -46,6 +46,10 @@ object isCrosswrite { "vwsubu.vx", "vwsubu.wv", "vwsubu.wx", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index d12f8e581c..423b59a358 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -274,5 +274,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. instructions that will communicate with T1 top module.*/ " + override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. Instructions that will communicate with T1 top module.*/ " } diff --git a/t1/src/decoder/attribute/isSwrite.scala b/t1/src/decoder/attribute/isSwrite.scala index cfddf2e04f..01325fe1f8 100644 --- a/t1/src/decoder/attribute/isSwrite.scala +++ b/t1/src/decoder/attribute/isSwrite.scala @@ -224,5 +224,5 @@ object isSwrite { } case class isSwrite(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. " + override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. No write to vd when isSwrite is True!!!" } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 7649d715a2..708b3f4187 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -186,6 +186,7 @@ object isVtype { "vrol.vv", "vror.vv", "vwsll.vv", + "vwsll.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala index 2fb8802e64..b77617ca38 100644 --- a/t1/src/decoder/attribute/isZvbb.scala +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -27,6 +27,9 @@ object isZvbb { "vror.vv", "vror.vx", "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) else Seq() allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala index ad9a45403f..c95cd2ea6a 100644 --- a/t1/src/decoder/attribute/zvbbUop.scala +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -30,46 +30,54 @@ object ZvbbUOP { } def t0(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vbrev.v" + "vbrev.v" ) allMatched.contains(t1DecodePattern.instruction.name) } def t1(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vbrev8.v" + "vbrev8.v" ) allMatched.contains(t1DecodePattern.instruction.name) } def t2(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vrev8.v" + "vrev8.v" ) allMatched.contains(t1DecodePattern.instruction.name) } def t3(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vclz.v" + "vclz.v" ) allMatched.contains(t1DecodePattern.instruction.name) } def t4(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vctz.v" + "vctz.v" ) allMatched.contains(t1DecodePattern.instruction.name) } def t5(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vrol.vv", - "vrol.vx", + "vrol.vv", + "vrol.vx", ) allMatched.contains(t1DecodePattern.instruction.name) } def t6(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched: Seq[String] = Seq( - "vror.vv", - "vror.vx", - "vror.vi", + "vror.vv", + "vror.vx", + "vror.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t7(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } From 6e285f2c7fd07732b886d05617a32618fd828160 Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Thu, 25 Jul 2024 15:16:54 +0800 Subject: [PATCH 16/18] add support for vwsll --- t1/src/LaneZvbb.scala | 8 ++++++++ t1/src/decoder/Decoder.scala | 1 + t1/src/decoder/attribute/isCrossread.scala | 4 ---- t1/src/decoder/attribute/zvbbUop.scala | 2 ++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index a122848aec..d412210844 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -28,6 +28,7 @@ class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { val data = UInt(datapathWidth.W) + val source2 = UInt(datapathWidth.W) } @instantiable @@ -47,6 +48,11 @@ class LaneZvbb(val parameter: LaneZvbbParam) val zvbbCTZ = (PopCount(scanRightOr(zvbbBRev)) - 1.U).asUInt val zvbbROL = zvbbSrc.rotateLeft(zvbbRs(4, 0)).asUInt val zvbbROR = zvbbSrc.rotateRight(zvbbRs(4, 0)).asUInt + + val zvbbSLL64 = (0.U((parameter.datapathWidth-1).W) ## zvbbSrc.zext).asUInt << zvbbRs(5, 0) + val zvbbSLL = zvbbSLL64(parameter.datapathWidth-1, 0) + val zvbbSLLMSB = zvbbSLL64(2*parameter.datapathWidth-1, parameter.datapathWidth) + response.data := Mux1H(UIntToOH(request.opcode), Seq( zvbbBRev, zvbbBRev8, @@ -55,7 +61,9 @@ class LaneZvbb(val parameter: LaneZvbbParam) zvbbCTZ, zvbbROL, zvbbROR, + zvbbSLL, ) ) + response.source2 = Mux(request.opcode == 7.U, zvbbSLLMSB, 0.U) } diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index a9760eca49..861478d411 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -341,6 +341,7 @@ object Decoder { case _: zvbbUop4.type => BitPat("b0100") case _: zvbbUop5.type => BitPat("b0101") case _: zvbbUop6.type => BitPat("b0110") + case _: zvbbUop7.type => BitPat("b0111") case _ => BitPat.dontCare(4) } case _ => BitPat.dontCare(4) diff --git a/t1/src/decoder/attribute/isCrossread.scala b/t1/src/decoder/attribute/isCrossread.scala index 5787aa1361..34c11bed3d 100644 --- a/t1/src/decoder/attribute/isCrossread.scala +++ b/t1/src/decoder/attribute/isCrossread.scala @@ -56,10 +56,6 @@ object isCrossread { "vwsub.wx", "vwsubu.wv", "vwsubu.wx", - // rv_zvbb - "vwsll.vv", - "vwsll.vx", - "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala index c95cd2ea6a..4fdfbba09d 100644 --- a/t1/src/decoder/attribute/zvbbUop.scala +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -13,6 +13,7 @@ object zvbbUop3 extends ZvbbUOPType object zvbbUop4 extends ZvbbUOPType object zvbbUop5 extends ZvbbUOPType object zvbbUop6 extends ZvbbUOPType +object zvbbUop7 extends ZvbbUOPType object ZvbbUOP { def apply(t1DecodePattern: T1DecodePattern): Uop = { @@ -24,6 +25,7 @@ object ZvbbUOP { t4 _ -> zvbbUop4, t5 _ -> zvbbUop5, t6 _ -> zvbbUop6, + t7 _ -> zvbbUop7, ).collectFirst { case (fn, tpe) if fn(t1DecodePattern) => tpe }.getOrElse(UopDC) From 89921c3ab6a8b3dc2cba154272449b5c723284ba Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Thu, 25 Jul 2024 15:22:14 +0800 Subject: [PATCH 17/18] fix error --- t1/src/LaneZvbb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index d412210844..d38f0a53af 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -64,6 +64,6 @@ class LaneZvbb(val parameter: LaneZvbbParam) zvbbSLL, ) ) - response.source2 = Mux(request.opcode == 7.U, zvbbSLLMSB, 0.U) + response.source2 := Mux(request.opcode === 7.U, zvbbSLLMSB, 0.U) } From e965109b8c08c1b7e175e96772caad21872ef4bd Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 13:37:05 +0800 Subject: [PATCH 18/18] [nix] bump rvv-codegen Signed-off-by: Avimitin --- nix/pkgs/rvv-codegen.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/pkgs/rvv-codegen.nix b/nix/pkgs/rvv-codegen.nix index 4e6d3ed7b3..bffddec26f 100644 --- a/nix/pkgs/rvv-codegen.nix +++ b/nix/pkgs/rvv-codegen.nix @@ -11,10 +11,10 @@ buildGoModule { pname = "riscv-vector-test"; version = "unstable-2023-04-12"; src = fetchFromGitHub { - owner = "ksco"; + owner = "chipsalliance"; repo = "riscv-vector-tests"; - rev = "bafa717d37b9bef3e80b66a50b01c22f532306bc"; - hash = "sha256-C91HUDyMykS3qM9h+rJ2uKAJcKHkoakw9I+wwtco0m8="; + rev = "caae5c8fcf465be73266f9b3bd672f71a362548e"; + hash = "sha256-388MKOO+g4PjR3BcxiA8vNY7itDcIhz88vZmMZkbsj8="; }; doCheck = false; vendorHash = "sha256-9cQlivpHg6IDYpmgBp34n6BR/I0FIYnmrXCuiGmAhNE=";