diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index 90410e9b7..f9a19239a 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -279,9 +279,6 @@ class InstructionControlRecord(param: LaneParameter) extends Bundle { /** 存 mask */ val mask: ValidIO[UInt] = Valid(UInt(param.datapathWidth.W)) - /** 把mask按每四个分一个组,然后看orR */ - val maskGroupedOrR: UInt = UInt((param.datapathWidth / param.sewMin).W) - /** 这一组写vrf的mask */ val vrfWriteMask: UInt = UInt(4.W) } diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index b4fccec51..83b5ec30d 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -709,7 +709,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready - // todo: connect state from stage1 stage2.enqueue.bits.elements.foreach { case (k ,d) => stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData) } @@ -988,8 +987,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ entranceControl.mask.valid := laneRequest.bits.mask // assign mask from [[V]] entranceControl.mask.bits := maskInput - // TODO: remove it. - entranceControl.maskGroupedOrR := maskGroupedOrR // mask used for VRF write in this group. entranceControl.vrfWriteMask := 0.U diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 7fe828e87..83f17af72 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -76,7 +76,7 @@ case class LSUBankParameter(name: String, region: BitSet, beatbyte: Int, accessS * * @note * Chaining: - * - limited by VRF Memory Port. TODO: add bank in VRF. + * - limited by VRF Memory Port. * - the chaining size is decided by logic units. if the bandwidth is limited by the logic units, we should increase lane size. * TODO: sort a machine-readable chaining matrix for test case generation. */ @@ -183,9 +183,7 @@ case class T1Parameter( /** Used in memory bundle parameter. */ val memoryDataWidthBytes: Int = lsuBankParameters.head.beatbyte - /** LSU MSHR Size, from experience, we use 3 for 2R1W,this is also limited by the number of memory ports. - * TODO: in vector design, there are some instructions which have 3R1W, this may decrease performance. we need perf it. - */ + /** LSU MSHR Size, Contains a load unit, a store unit and an other unit. */ val lsuMSHRSize: Int = 3 /** 2 for 3 MSHR(read + write + otherUnit) */ @@ -204,7 +202,6 @@ case class T1Parameter( /** for TileLink `mask` element. */ val maskWidth: Int = lsuBankParameters.head.beatbyte - // todo val vrfReadLatency = 2 // each element: Each lane will be connected to the other two lanes, @@ -295,9 +292,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ val storeBufferClear: Bool = IO(Input(Bool())) - /** TileLink memory ports. - * TODO: Multiple LSU support - */ + /** TileLink memory ports. */ val memoryPorts: Vec[TLBundle] = IO(Vec(parameter.lsuBankParameters.size, parameter.tlParam.bundle())) // TODO: this is an example of adding a new Probe @@ -365,8 +360,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // manually maintain a queue for requestReg. requestRegDequeue.bits := requestReg.bits.request requestRegDequeue.valid := requestReg.valid - // TODO: decode the 7 bits in LSB, to get the instruction type. - // we only need to use it to find if it's a load/store instruction. decode.decodeInput := request.bits.instruction /** alias to [[requestReg.bits.decodeResult]], it is commonly used. */ @@ -554,7 +547,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa ) }) val dataResult: ValidIO[UInt] = RegInit(0.U.asTypeOf(Valid(UInt(parameter.datapathWidth.W)))) - // todo: viota & compress & reduce val executeForLastLaneFire: Bool = WireDefault(false.B) @@ -581,7 +573,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa val lsuFinished: Bool = ohCheck(lsu.lastReport, control.record.instructionIndex, parameter.chainingSize) val busClear: Bool = !ohCheck(dataInCrossBus, control.record.instructionIndex, parameter.chainingSize) // instruction is allocated to this slot. - when(requestRegDequeue.fire && instructionToSlotOH(index)) { + when(instructionToSlotOH(index)) { // instruction metadata control.record.instructionIndex := requestReg.bits.instructionIndex // TODO: remove @@ -725,7 +717,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa (unOrderType && !allSlotFree) || (requestReg.bits.vdIsV0 && existMaskType)) || (vd === 0.U && maskType && slotValid)) - when(requestRegDequeue.fire && instructionToSlotOH(index)) { + when(instructionToSlotOH(index)) { writeBackCounter := 0.U groupCounter := 0.U executeCounter := 0.U @@ -1323,7 +1315,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa val executeFinish: Bool = (lastReduceCounter || !(reduce || popCount) || orderedReduce) && maskUnitIdle val schedulerWrite = decodeResultReg(Decoder.maskDestination) || (reduce && !popCount) || writeMv - // todo: decode val groupSync = decodeResultReg(Decoder.ffo) // 写回 when(readFinish && (executeFinish || writeMv || executeFinishReg)) { @@ -1627,8 +1618,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa requestRegDequeue.ready := executionReady && slotReady && (!gatherNeedRead || gatherReadFinish) && instructionRAWReady && instructionIndexFree - // TODO: change to `requestRegDequeue.fire`. - instructionToSlotOH := Mux(requestRegDequeue.ready, slotToEnqueue, 0.U) + instructionToSlotOH := Mux(requestRegDequeue.fire, slotToEnqueue, 0.U) // instruction commit { diff --git a/t1/src/VectorWrapper.scala b/t1/src/VectorWrapper.scala index ec10d8a35..e12515097 100644 --- a/t1/src/VectorWrapper.scala +++ b/t1/src/VectorWrapper.scala @@ -17,7 +17,6 @@ class VectorWrapper(parameter: T1Parameter) extends Module { val response: ValidIO[VResponse] = IO(Valid(new VResponse(parameter.xLen))) val csrInterface: CSRInterface = IO(Input(new CSRInterface(parameter.laneParam.vlMaxBits))) val storeBufferClear: Bool = IO(Input(Bool())) - // TODO: multiple LSU support. val memoryPorts: Vec[TLBundle] = IO(Vec(parameter.lsuBankParameters.size, parameter.tlParam.bundle())) // v主体 diff --git a/t1/src/laneStage/LaneExecutionBridge.scala b/t1/src/laneStage/LaneExecutionBridge.scala index f36b6f5c2..5b21674ac 100644 --- a/t1/src/laneStage/LaneExecutionBridge.scala +++ b/t1/src/laneStage/LaneExecutionBridge.scala @@ -105,7 +105,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd // data in executionRecord is narrow type val narrowInRecord: Bool = !executionRecord.decodeResult(Decoder.crossWrite) && executionRecord.decodeResult(Decoder.crossRead) - // todo: Need to collapse the results of combined calculations + // reduceReady is false: Need to collapse the results of combined calculations val reduceReady: Bool = WireDefault(true.B) val sendFoldReduce: Option[Bool] = Option.when(isLastSlot)(Wire(Bool())) val recordQueueReadyForNoExecute = Wire(Bool()) diff --git a/t1/src/laneStage/LaneStage0.scala b/t1/src/laneStage/LaneStage0.scala index 1eee6573f..87300d1df 100644 --- a/t1/src/laneStage/LaneStage0.scala +++ b/t1/src/laneStage/LaneStage0.scala @@ -119,7 +119,6 @@ class LaneStage0(parameter: LaneParameter, isLastSlot: Boolean) extends enqueue.bits.maskForMaskGroup, (-1.S(parameter.datapathWidth.W)).asUInt ) - // TODO: use 'record.maskGroupedOrR' & update it val maskForDataGroup: UInt = VecInit(maskCorrection.asBools.grouped(dataGroupSize).map(_.reduce(_ || _)).toSeq).asUInt val groupFilterByMask = maskForDataGroup & groupFilter diff --git a/t1/src/laneStage/LaneStage3.scala b/t1/src/laneStage/LaneStage3.scala index f2cc5a5ef..fb68529f8 100644 --- a/t1/src/laneStage/LaneStage3.scala +++ b/t1/src/laneStage/LaneStage3.scala @@ -23,7 +23,7 @@ class LaneStage3Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends B // pipe state val decodeResult: DecodeBundle = Decoder.bundle(parameter.fpuEnable) val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W) - // todo: Need real-time status + // Need real-time status, no pipe val ffoByOtherLanes: Bool = Bool() val loadStore: Bool = Bool() /** vd or rd */ diff --git a/t1/src/lsu/SimpleAccessUnit.scala b/t1/src/lsu/SimpleAccessUnit.scala index 4fa89307e..9bd2dad48 100644 --- a/t1/src/lsu/SimpleAccessUnit.scala +++ b/t1/src/lsu/SimpleAccessUnit.scala @@ -295,8 +295,7 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { 1.U, (1.U << csrInterface.vlmul(1, 0)).asUInt(3, 0) ), - // TODO: reset to 0.U - 1.U, + 0.U, lsuRequest.valid ) @@ -367,7 +366,7 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { segmentIndex := Mux(segmentEnd || lsuRequest.valid, 0.U, segmentIndexNext) } - // TODO: why [[!isSegmentLoadStore]]? alias segmentEnd + // [[!isSegmentLoadStore]]: segSize = 1 -> always segmentEnd val lastElementForSegment = !isSegmentLoadStore || segmentEnd /** signal indicates this is the last transaction for the element(with handshake) */ @@ -394,11 +393,8 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { /** unsent memory transactions to s0. */ val unsentMemoryRequests: UInt = (~sentMemoryRequests).asUInt - /** mask [[unsentMemoryRequests]] - * TODO: maskFilter = maskReg & unsentMemoryRequests - */ - val maskedUnsentMemoryRequests: UInt = Wire(UInt(param.maskGroupWidth.W)) - maskedUnsentMemoryRequests := maskReg & unsentMemoryRequests + /** mask [[unsentMemoryRequests]] */ + val maskedUnsentMemoryRequests: UInt = (maskReg & unsentMemoryRequests).asUInt(param.maskGroupWidth - 1, 0) /** the find the next [[maskedUnsentMemoryRequests]] */ val findFirstMaskedUnsentMemoryRequests: UInt = ffo(maskedUnsentMemoryRequests) diff --git a/t1/src/lsu/StrideBase.scala b/t1/src/lsu/StrideBase.scala index 36940df6a..e0c406d76 100644 --- a/t1/src/lsu/StrideBase.scala +++ b/t1/src/lsu/StrideBase.scala @@ -170,8 +170,7 @@ abstract class StrideBase(param: MSHRParam) extends Module { 1.U, (1.U << csrInterface.vlmul(1, 0)).asUInt(3, 0) ), - // TODO: reset to 0.U - 1.U, + 0.U, lsuRequest.valid ) diff --git a/t1/src/vfu/ReduceAdder.scala b/t1/src/vfu/ReduceAdder.scala index c9f48c72d..a94a4d0da 100644 --- a/t1/src/vfu/ReduceAdder.scala +++ b/t1/src/vfu/ReduceAdder.scala @@ -26,7 +26,6 @@ class ReduceAdder(datapathWidth: Int) extends Module { @public val response = IO(Output(new ReduceAdderResponse(datapathWidth))) - // todo: decode // ["add", "sub", "slt", "sle", "sgt", "sge", "max", "min", "seq", "sne", "adc", "sbc"] val uopOH: UInt = UIntToOH(request.opcode)(11, 0) val isSub: Bool = !(uopOH(0) || uopOH(10)) diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index e48d0345a..7433ff630 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -40,9 +40,6 @@ object VRFParam { * - we can have more memory ports. * - a big VRF memory is split into small memories, the shell of memory contributes more area... * - * TODO: change to use 32bits memory + mask, - * use portFactor to increase port number - * * TODO: add ECC cc @sharzyL * 8bits -> 5bits * 16bits -> 6bits @@ -126,7 +123,6 @@ class VRFProbe(regNumBits: Int, offsetBits: Int, instructionIndexSize: Int, data * - out of order chaining hazard detection: * TODO: move to Top. * - * TODO: implement [[parameter.portFactor]] for increasing VRF bandwidth. * TODO: probe each ports to benchmark the bandwidth. */ @instantiable