From fa6eb55fa35f043d8c43b03b4a5156b07ffae142 Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Wed, 9 Oct 2024 14:55:18 -0700 Subject: [PATCH 1/7] add float conversion for arbitrary mantissa width and exponent width --- .../floating_point/floating_point_value.dart | 138 ++++++++++++++++-- 1 file changed, 124 insertions(+), 14 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 59f8d74b9..e3bb3f69b 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -526,20 +526,17 @@ class FloatingPointValue implements Comparable { (mantissa == other.mantissa); } - /// Return true if the represented floating point number is considered - /// NaN or 'Not a Number' due to overflow - // TODO(desmonddak): figure out the difference with Infinity - bool isNaN() { - if ((exponent.width == 4) & (mantissa.width == 3)) { - // FP8 E4M3 does not support infinities - final cond1 = (1 + exponent.toInt()) == pow(2, exponent.width).toInt(); - final cond2 = (1 + mantissa.toInt()) == pow(2, mantissa.width).toInt(); - return cond1 & cond2; - } else { - return exponent.toInt() == - computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; - } - } + bool isExponentAllOnes() => + exponent.toInt() == + computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; + + bool isNaN() => isExponentAllOnes() && mantissa.toInt() == 0; + + bool isInf() => isExponentAllOnes() && mantissa.toInt() != 0; + + bool isZero() => exponent.toInt() == 0 && mantissa.toInt() == 0; + + bool isSubnormal() => exponent.toInt() == 0 && mantissa.toInt() != 0; /// Return the value of the floating point number in a Dart [double] type. double toDouble() { @@ -907,4 +904,117 @@ class FloatingPoint8Value extends FloatingPointValue { return FloatingPoint8Value( sign: fpv.sign, exponent: fpv.exponent, mantissa: fpv.mantissa); } + + factory FloatingPointValue.fromFloatingPointValue( + {required FloatingPointValue ingress_fpVal, + required int egressExponentWidth, + required int egressMantissaWidth, + required FloatingPointRoundingMode roundingMode}) { + // Preserve the sign in all cases + final egressSign = ingress_fpVal.sign; + + var egressExponent; + var egressMantissa; + + if (ingress_fpVal.isNaN()) { + // Return NaN with change in scale + egressExponent = computeMaxExponent(egressExponentWidth) + + computeBias(egressExponentWidth) + + 1; + + if (ingress_fpVal.mantissa.width <= egressMantissaWidth) { + // Zero pad the mantissa in the case of precision gain + egressMantissa = ingress_fpVal.mantissa << + (egressMantissaWidth - ingress_fpVal.mantissa.width); + } else { + // Trim the mantissa in the case of precision loss from the right end, preserving the left end + egressMantissa = ingress_fpVal.mantissa.slice( + ingress_fpVal.mantissa.width - 1, + egressMantissaWidth - ingress_fpVal.mantissa.width); + } + + return FloatingPointValue( + sign: egressSign, exponent: egressExponent, mantissa: egressMantissa); + } + + if (ingress_fpVal.isInf()) { + // Return Infinity + egressExponent = computeMaxExponent(egressExponentWidth) + + computeBias(egressExponentWidth) + + 1; + egressMantissa <= LogicValue.zero(egressMantissaWidth); + + return FloatingPointValue( + sign: egressSign, exponent: egressExponent, mantissa: egressMantissa); + } + + // Zero cases + if (ingress_fpVal.isZero()) { + return FloatingPointValue( + sign: egressSign, + exponent: LogicValue.zero(egressExponentWidth), + mantissa: LogicValue.zero(egressMantissaWidth)); + } + + // Subnormal cases + // In subnormal cases, the ingress exponent is all zeroes + // So we are really only concerned about the mantissa + if (ingress_fpVal.isSubnormal()) { + if (egressMantissa < ingress_fpVal.mantissa) { + var mantissaWidthDiff = + egressMantissa.width - ingress_fpVal.mantissa.width; + var mask = ingress_fpVal.mantissa & + LogicValue.filled(mantissaWidthDiff, LogicValue.one); + + // The lower bits of the mantissa and extracted and rounded using the sticky bit from the mask + var lowMantissaBits = + (ingress_fpVal.mantissa >>> mantissaWidthDiff) | (mask != 0); + exgressExponent = + LogicValue.filled(egressExponentWidth, LogicValue.zero); + } else if (egressMantissa > ingress_fpVal.mantissa) { + // Normalize the mantissa by counting the leading zeroes until it hit a non-zero bit + int leadingZeroes = countLeadingZeroes(ingress_fpVal.mantissa); + egressMantissa = ingress_fpVal.mantissa >>> leadingZeroes; + egressExponent = ingress_fpVal.exponent - leadingZeroes; + } else if (egressMantissa == ingress_fpVal.mantissa) {} + } + + // Normal cases + + if (ingress_fpVal.mantissa.width <= egressMantissaWidth) { + // Zero pad the mantissa in the case of precision gain + egressMantissa = ingress_fpVal.mantissa << + (egressMantissaWidth - ingress_fpVal.mantissa.width); + } else { + // Trim the mantissa in the case of precision loss from the right end, preserving the left end + egressMantissa = ingress_fpVal.mantissa.slice( + ingress_fpVal.mantissa.width - 1, + egressMantissaWidth - ingress_fpVal.mantissa.width); + } + + egressExponent = (ingress_fpVal.exponent.toInt() - ingress_fpVal.bias()) + + FloatingPointValue.computeBias(egressExponentWidth); + } +} + +/// Count the number of leading zeroes in a LogicValue. +/// +/// This is a binary count of the number of leading zeroes in the +/// [LogicValue]. +/// +/// The logic value is treated as a binary number, with the rightmost +/// bit being the least significant bit (bit 0). The leading zeroes +/// are counted from the most significant bit (bit width-1) towards +/// the least significant bit. +/// +/// For example, the binary number 0b00101010 would return 3, as +/// there are 3 leading zeroes. +int countLeadingZeroes(LogicValue value) { + var myVal = value.toInt(); + var myCnt = 0; + while ((myVal & 1) == 0) { + myVal = myVal >> 1; + myCnt++; + } + return myCnt; } From 05c77dadb79613ee8170bf5943293b7e5b611f7e Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Tue, 15 Oct 2024 12:24:22 -0700 Subject: [PATCH 2/7] float_conv hardware module --- .../floating_point/floating_point.dart | 1 + .../floating_point_converter.dart | 287 ++++++++++++++++++ .../floating_point/floating_point_logic.dart | 35 ++- .../floating_point/floating_point_value.dart | 113 ------- .../floating_point_conversion_test.dart | 39 +++ 5 files changed, 361 insertions(+), 114 deletions(-) create mode 100644 lib/src/arithmetic/floating_point/floating_point_converter.dart create mode 100644 test/arithmetic/floating_point/floating_point_conversion_test.dart diff --git a/lib/src/arithmetic/floating_point/floating_point.dart b/lib/src/arithmetic/floating_point/floating_point.dart index 231569572..6264f199f 100644 --- a/lib/src/arithmetic/floating_point/floating_point.dart +++ b/lib/src/arithmetic/floating_point/floating_point.dart @@ -2,5 +2,6 @@ // SPDX-License-Identifier: BSD-3-Clause export 'floating_point_adder.dart'; +export 'floating_point_converter.dart'; export 'floating_point_logic.dart'; export 'floating_point_value.dart'; diff --git a/lib/src/arithmetic/floating_point/floating_point_converter.dart b/lib/src/arithmetic/floating_point/floating_point_converter.dart new file mode 100644 index 000000000..5d7e14b13 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_converter.dart @@ -0,0 +1,287 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_converter.dart +// A Floating-point format converter component. +// +// 2024 August 30 +// Author: AI Assistant + +import 'dart:ffi'; +import 'dart:math'; + +import 'package:meta/meta.dart'; +import 'package:rohd/rohd.dart'; +import 'package:rohd_hcl/rohd_hcl.dart'; + +/// A converter module for FloatingPoint values +class FloatingPointConverter extends Module { + /// Source exponent width + final int sourceExponentWidth; + + /// Source mantissa width + final int sourceMantissaWidth; + + /// Destination exponent width + final int destExponentWidth; + + /// Destination mantissa width + final int destMantissaWidth; + + /// Output [FloatingPoint] computed + late final FloatingPoint result = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth) + ..gets(output('result')); + + /// The result of [FloatingPoint] conversion + @protected + late final FloatingPoint _result = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + + /// Convert a floating point number from one format to another + FloatingPointConverter(FloatingPoint source, + {required this.destExponentWidth, + required this.destMantissaWidth, + super.name}) + : sourceExponentWidth = source.exponent.width, + sourceMantissaWidth = source.mantissa.width { + source = source.clone() + ..gets(addInput('source', source, width: source.width)); + addOutput('result', width: _result.width) <= _result; + + // Handle sign + _result.sign <= source.sign; + + Logic normalizedExponent = Logic(name: 'normalizedExponent'); + Logic normalizedMantissa = Logic(name: 'normalizedMantissa'); + + normalizedExponent < _normalizeSubnormalExponent(); + normalizedMantissa < _normalizeSubnormalMantissa(); + + final normalizedFP = FloatingPoint(exponentWidth: 0, mantissaWidth: 0); + normalizedFP < + FloatingPointValue( + sign: source.sign.value, + exponent: normalizedExponent.value, + mantissa: normalizedMantissa.value, + ); + + If.block([ + Iff(source.isNaN(), [ + _result < _handleNaN(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isInfinity(), [ + _result < _handleInfinity(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isZero(), [ + _result < _handleZero(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isSubnormal() | source.isNormal(), [ + _result < + mux( + source.isNormal(), + _convertNormalNumber( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth), + normalizedFP), + ]), + ]); + } + + /// Handle the case where the [sourceFP] is a NaN. + /// + /// The resulting [FloatingPoint] is a NaN with the same sign as the [sourceFP]. + /// + /// [destExponentWidth] and [destMantissaWidth] are the widths of the exponent and + /// mantissa of the destination [FloatingPoint] respectively. + FloatingPoint _handleNaN(FloatingPoint sourceFP, int destExponentWidth, + int destMantissaWidth) => + _packSpecial( + source: sourceFP, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth, + isNaN: true); + + FloatingPoint _handleInfinity(FloatingPoint sourceFP, int destExponentWidth, + int destMantissaWidth) => + _packSpecial( + source: sourceFP, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth, + isNaN: false); + + FloatingPoint _handleZero(FloatingPoint sourceFP, int destExponentWidth, + int destMantissaWidth) => + _packZero( + source: sourceFP, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth); + + FloatingPoint _convertNormalNumber( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth}) { + final adjustedExponent = + _adjustExponent(source.exponent, destExponentWidth); + + final adjustedMantissa = + Logic(name: 'adjustedMantissa', width: destMantissaWidth); + + adjustedMantissa < + _adjustMantissaPrecision(source.mantissa, destMantissaWidth, + FloatingPointRoundingMode.roundNearestEven); + + final isOverflow = adjustedExponent + .gte(FloatingPointValue.computeMaxExponent(destExponentWidth)); + final isUnderflow = adjustedExponent.lte(0); + + final packNormal = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + + If.block([ + Iff(isOverflow, [ + packNormal < + _handleOverflow( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth), + ]), + ElseIf(isUnderflow, [ + packNormal < _handleUnderflow(), + ]), + Else([ + packNormal < + FloatingPointValue( + sign: source.sign.value, + exponent: adjustedExponent.value, + mantissa: adjustedMantissa.value, + ) + ]), + ]); + + return packNormal; + } + + Logic _normalizeSubnormalExponent() => Const(0); + Logic _normalizeSubnormalMantissa() => Const(0); + + FloatingPoint _handleOverflow( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth}) => + _packInfinity( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth, + isNaN: false); + + FloatingPoint _handleUnderflow() => + FloatingPoint(exponentWidth: 0, mantissaWidth: 0); + + /// Pack a special floating point number into a target. + /// + /// The target [FloatingPoint] is modified to represent the given special + /// number. The exponent is set to all ones, the sign is set to the given [sign] + /// value, and the mantissa is set to all zeros for an infinity or all ones for + /// a NaN. + /// + /// [sign] is the sign bit of the special number. + /// + /// [isNaN] is true if the special number is a NaN, false if it is an infinity. + FloatingPoint _packSpecial( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth, + required bool isNaN}) { + final pack = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + + pack.exponent <= Const(1, width: destExponentWidth, fill: true); + pack.sign <= source.sign; + + if (isNaN) { + pack.mantissa <= Const(1) << (destMantissaWidth - 1); + } else { + pack.mantissa <= Const(0, width: destMantissaWidth, fill: true); + } + + return pack; + } + + FloatingPoint _packZero( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth}) { + final pack = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + + pack.exponent <= Const(0, width: destExponentWidth, fill: true); + pack.mantissa <= Const(0, width: destMantissaWidth, fill: true); + pack.sign <= source.sign; + return pack; + } + + FloatingPoint _packInfinity( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth, + required bool isNaN}) => + _packSpecial( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth, + isNaN: false); + + Logic _adjustExponent(Logic sourceExponent, int destExponentWidth) { + if (sourceExponent.width == destExponentWidth) { + return sourceExponent; + } else { + final sourceBias = FloatingPointValue.computeBias(sourceExponent.width); + final destBias = FloatingPointValue.computeBias(destExponentWidth); + + final biasedExponent = + Logic(name: 'biasedExponent', width: destExponentWidth); + biasedExponent <= sourceExponent - (sourceBias + destBias); + return biasedExponent; + } + } + + Logic _adjustMantissaPrecision(Logic sourceMantissa, int destMantissaWidth, + FloatingPointRoundingMode roundingMode) { + final adjustedMantissa = + Logic(name: 'adjustedMantissa', width: destMantissaWidth); + + // In the case where precision is increased, we just need to zero pad or shift the source mantissa bits + if (destMantissaWidth > sourceMantissa.width) { + adjustedMantissa <= + sourceMantissa << (destMantissaWidth - sourceMantissa.width); + } else if (destMantissaWidth < sourceMantissa.width) { + adjustedMantissa <= + _roundMantissa(sourceMantissa, destMantissaWidth, roundingMode); + } else { + adjustedMantissa <= sourceMantissa; + } + + return adjustedMantissa; + } + + Logic _roundMantissa(Logic sourceMantissa, int destMantissaWidth, + FloatingPointRoundingMode roundingMode) { + final shift = sourceMantissa.width - destMantissaWidth; + final roundBit = Const(1) << (shift - 1); + final mask = roundBit - 1; + final roundCondition = (sourceMantissa & roundBit) & + ((sourceMantissa & mask) | (roundBit << 1)); + + final roundedMantissa = (sourceMantissa + roundBit) & ~(roundBit - 1); + final shiftedMantissa = roundedMantissa >> shift; + + final result = Logic(name: 'roundedMantissa', width: destMantissaWidth); + result <= mux(roundCondition, roundedMantissa, shiftedMantissa); + + // TODO : Add If block for rounding modes + + return result; + } +} diff --git a/lib/src/arithmetic/floating_point/floating_point_logic.dart b/lib/src/arithmetic/floating_point/floating_point_logic.dart index fcdca3107..c1260f109 100644 --- a/lib/src/arithmetic/floating_point/floating_point_logic.dart +++ b/lib/src/arithmetic/floating_point/floating_point_logic.dart @@ -48,7 +48,20 @@ class FloatingPoint extends LogicStructure { /// Return a Logic true if this FloatingPoint contains a normal number, /// defined as having mantissa in the range [1,2) - Logic isNormal() => exponent.neq(LogicValue.zero.zeroExtend(exponent.width)); + /// + /// IF exponent != 0 AND exponent != ALL_ONES THEN + Logic isNormal() => + exponent.neq(LogicValue.filled(exponent.width, LogicValue.zero)) & + exponent.neq(LogicValue.filled(exponent.width, LogicValue.one)); + + /// Return a Logic true if this FloatingPoint contains a subnormal number, + /// defined as having an exponent of zero and a mantissa that is non-zero. + /// + /// In the IEEE floating point representation, subnormal numbers are used to + /// represent very small numbers that are close to zero. + Logic isSubnormal() => + exponent.eq(LogicValue.filled(exponent.width, LogicValue.zero)) & + mantissa.neq(LogicValue.filled(exponent.width, LogicValue.zero)); /// Return the zero exponent representation for this type of FloatingPoint Logic zeroExponent() => Const(LogicValue.zero).zeroExtend(exponent.width); @@ -56,6 +69,26 @@ class FloatingPoint extends LogicStructure { /// Return the one exponent representation for this type of FloatingPoint Logic oneExponent() => Const(LogicValue.one).zeroExtend(exponent.width); + /// Return a Logic true if the exponent is all ones and mantissa is zero, indicating an + /// infinite value. + Logic isInfinity() => + exponent.eq(LogicValue.filled(exponent.width, LogicValue.one)) & + mantissa.eq(LogicValue.filled(exponent.width, LogicValue.zero)); + + /// Return a Logic true if the exponent is all ones and mantissa is non-zero, + /// indicating an infinite value. + Logic isNaN() => + exponent.eq(LogicValue.filled(exponent.width, LogicValue.one)) & + mantissa.neq(LogicValue.filled(exponent.width, LogicValue.zero)); + + /// Return a Logic true if the FloatingPoint contains a zero value. + /// + /// Zero values for FloatingPoint are defined as having an exponent of zero + /// and a mantissa of zero. + Logic isZero() => + exponent.eq(LogicValue.filled(exponent.width, LogicValue.zero)) & + mantissa.eq(LogicValue.filled(exponent.width, LogicValue.zero)); + @override void put(dynamic val, {bool fill = false}) { if (val is FloatingPointValue) { diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index e3bb3f69b..adf065626 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -904,117 +904,4 @@ class FloatingPoint8Value extends FloatingPointValue { return FloatingPoint8Value( sign: fpv.sign, exponent: fpv.exponent, mantissa: fpv.mantissa); } - - factory FloatingPointValue.fromFloatingPointValue( - {required FloatingPointValue ingress_fpVal, - required int egressExponentWidth, - required int egressMantissaWidth, - required FloatingPointRoundingMode roundingMode}) { - // Preserve the sign in all cases - final egressSign = ingress_fpVal.sign; - - var egressExponent; - var egressMantissa; - - if (ingress_fpVal.isNaN()) { - // Return NaN with change in scale - egressExponent = computeMaxExponent(egressExponentWidth) + - computeBias(egressExponentWidth) + - 1; - - if (ingress_fpVal.mantissa.width <= egressMantissaWidth) { - // Zero pad the mantissa in the case of precision gain - egressMantissa = ingress_fpVal.mantissa << - (egressMantissaWidth - ingress_fpVal.mantissa.width); - } else { - // Trim the mantissa in the case of precision loss from the right end, preserving the left end - egressMantissa = ingress_fpVal.mantissa.slice( - ingress_fpVal.mantissa.width - 1, - egressMantissaWidth - ingress_fpVal.mantissa.width); - } - - return FloatingPointValue( - sign: egressSign, exponent: egressExponent, mantissa: egressMantissa); - } - - if (ingress_fpVal.isInf()) { - // Return Infinity - egressExponent = computeMaxExponent(egressExponentWidth) + - computeBias(egressExponentWidth) + - 1; - egressMantissa <= LogicValue.zero(egressMantissaWidth); - - return FloatingPointValue( - sign: egressSign, exponent: egressExponent, mantissa: egressMantissa); - } - - // Zero cases - if (ingress_fpVal.isZero()) { - return FloatingPointValue( - sign: egressSign, - exponent: LogicValue.zero(egressExponentWidth), - mantissa: LogicValue.zero(egressMantissaWidth)); - } - - // Subnormal cases - // In subnormal cases, the ingress exponent is all zeroes - // So we are really only concerned about the mantissa - if (ingress_fpVal.isSubnormal()) { - if (egressMantissa < ingress_fpVal.mantissa) { - var mantissaWidthDiff = - egressMantissa.width - ingress_fpVal.mantissa.width; - var mask = ingress_fpVal.mantissa & - LogicValue.filled(mantissaWidthDiff, LogicValue.one); - - // The lower bits of the mantissa and extracted and rounded using the sticky bit from the mask - var lowMantissaBits = - (ingress_fpVal.mantissa >>> mantissaWidthDiff) | (mask != 0); - exgressExponent = - LogicValue.filled(egressExponentWidth, LogicValue.zero); - } else if (egressMantissa > ingress_fpVal.mantissa) { - // Normalize the mantissa by counting the leading zeroes until it hit a non-zero bit - int leadingZeroes = countLeadingZeroes(ingress_fpVal.mantissa); - egressMantissa = ingress_fpVal.mantissa >>> leadingZeroes; - egressExponent = ingress_fpVal.exponent - leadingZeroes; - } else if (egressMantissa == ingress_fpVal.mantissa) {} - } - - // Normal cases - - if (ingress_fpVal.mantissa.width <= egressMantissaWidth) { - // Zero pad the mantissa in the case of precision gain - egressMantissa = ingress_fpVal.mantissa << - (egressMantissaWidth - ingress_fpVal.mantissa.width); - } else { - // Trim the mantissa in the case of precision loss from the right end, preserving the left end - egressMantissa = ingress_fpVal.mantissa.slice( - ingress_fpVal.mantissa.width - 1, - egressMantissaWidth - ingress_fpVal.mantissa.width); - } - - egressExponent = (ingress_fpVal.exponent.toInt() - ingress_fpVal.bias()) + - FloatingPointValue.computeBias(egressExponentWidth); - } -} - -/// Count the number of leading zeroes in a LogicValue. -/// -/// This is a binary count of the number of leading zeroes in the -/// [LogicValue]. -/// -/// The logic value is treated as a binary number, with the rightmost -/// bit being the least significant bit (bit 0). The leading zeroes -/// are counted from the most significant bit (bit width-1) towards -/// the least significant bit. -/// -/// For example, the binary number 0b00101010 would return 3, as -/// there are 3 leading zeroes. -int countLeadingZeroes(LogicValue value) { - var myVal = value.toInt(); - var myCnt = 0; - while ((myVal & 1) == 0) { - myVal = myVal >> 1; - myCnt++; - } - return myCnt; } diff --git a/test/arithmetic/floating_point/floating_point_conversion_test.dart b/test/arithmetic/floating_point/floating_point_conversion_test.dart new file mode 100644 index 000000000..b53416e97 --- /dev/null +++ b/test/arithmetic/floating_point/floating_point_conversion_test.dart @@ -0,0 +1,39 @@ +import 'dart:math'; +import 'package:rohd_hcl/rohd_hcl.dart'; +import 'package:test/test.dart'; +import 'package:rohd/rohd.dart'; + +void main() { + test('FP: FP16 to FP32 conversion test', () { + final fp32 = FloatingPoint32() + ..put(FloatingPoint32Value.fromDouble(1.5).value); + + print(Const(0, width: 3).eq(Const(0, width: 1))); + + // final converter = FloatingPointConverter(fp32, + // Declare a constant for exponent width for FP16 + // const ingress_exponentWidth = 5; + // const ingress_mantissaWidth = 11; + + // const egress_exponentWidth = 8; + // const egress_mantissaWidth = 23; + + // Get FP16 value from a double, we will feed this FP16 value to both the software and hardware model, and then compare the results + // var fp16val = FloatingPointValue.fromDouble(val, + // exponentWidth: ingress_exponentWidth, + // mantissaWidth: ingress_mantissaWidth); + + // // First get the exponent and rebias it + // var fp16_exponent = fp16val.exponent; + + // // Re-bias the exponent for 32 + // var fp32_exponent = (fp16_exponent - fp16.bias()) + + // FloatingPointValue.computeBias(egress_exponentWidth); + + // // Zero extend the mantissa + // var mantissa64 = fp16val.mantissa; + + // // Compare FP16 values + // expect(fp16val, fp16val2); + }); +} From d75a53ef7a03ac76d76a42807ff4e362fc122e92 Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Tue, 15 Oct 2024 17:23:38 -0700 Subject: [PATCH 3/7] some fixes to the converter --- .../floating_point_converter.dart | 38 ++++++++++--------- .../floating_point/floating_point_logic.dart | 8 ++-- .../floating_point_conversion_test.dart | 37 +++++++++++++++++- 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_converter.dart b/lib/src/arithmetic/floating_point/floating_point_converter.dart index 5d7e14b13..200892d34 100644 --- a/lib/src/arithmetic/floating_point/floating_point_converter.dart +++ b/lib/src/arithmetic/floating_point/floating_point_converter.dart @@ -52,19 +52,19 @@ class FloatingPointConverter extends Module { // Handle sign _result.sign <= source.sign; - Logic normalizedExponent = Logic(name: 'normalizedExponent'); - Logic normalizedMantissa = Logic(name: 'normalizedMantissa'); + Logic normalizedExponent = + Logic(name: 'normalizedExponent', width: destExponentWidth); + Logic normalizedMantissa = + Logic(name: 'normalizedMantissa', width: destMantissaWidth); normalizedExponent < _normalizeSubnormalExponent(); normalizedMantissa < _normalizeSubnormalMantissa(); - final normalizedFP = FloatingPoint(exponentWidth: 0, mantissaWidth: 0); - normalizedFP < - FloatingPointValue( - sign: source.sign.value, - exponent: normalizedExponent.value, - mantissa: normalizedMantissa.value, - ); + final normalizedFP = FloatingPoint(exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + + normalizedFP.sign <= source.sign; + normalizedFP.exponent <= normalizedExponent; + normalizedFP.mantissa <= normalizedMantissa; If.block([ Iff(source.isNaN(), [ @@ -97,7 +97,7 @@ class FloatingPointConverter extends Module { /// mantissa of the destination [FloatingPoint] respectively. FloatingPoint _handleNaN(FloatingPoint sourceFP, int destExponentWidth, int destMantissaWidth) => - _packSpecial( + packSpecial( source: sourceFP, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth, @@ -105,7 +105,7 @@ class FloatingPointConverter extends Module { FloatingPoint _handleInfinity(FloatingPoint sourceFP, int destExponentWidth, int destMantissaWidth) => - _packSpecial( + packSpecial( source: sourceFP, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth, @@ -163,8 +163,10 @@ class FloatingPointConverter extends Module { return packNormal; } - Logic _normalizeSubnormalExponent() => Const(0); - Logic _normalizeSubnormalMantissa() => Const(0); + Logic _normalizeSubnormalExponent() => + Const(0, width: destExponentWidth, fill: true); + Logic _normalizeSubnormalMantissa() => + Const(0, width: destMantissaWidth, fill: true); FloatingPoint _handleOverflow( {required FloatingPoint source, @@ -189,7 +191,8 @@ class FloatingPointConverter extends Module { /// [sign] is the sign bit of the special number. /// /// [isNaN] is true if the special number is a NaN, false if it is an infinity. - FloatingPoint _packSpecial( + @visibleForTesting + FloatingPoint packSpecial( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth, @@ -198,10 +201,9 @@ class FloatingPointConverter extends Module { exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); pack.exponent <= Const(1, width: destExponentWidth, fill: true); - pack.sign <= source.sign; if (isNaN) { - pack.mantissa <= Const(1) << (destMantissaWidth - 1); + pack.mantissa <= Const(1, width: destMantissaWidth, fill: true) << (destMantissaWidth - 1); } else { pack.mantissa <= Const(0, width: destMantissaWidth, fill: true); } @@ -227,7 +229,7 @@ class FloatingPointConverter extends Module { required int destExponentWidth, required int destMantissaWidth, required bool isNaN}) => - _packSpecial( + packSpecial( source: source, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth, @@ -269,7 +271,7 @@ class FloatingPointConverter extends Module { Logic _roundMantissa(Logic sourceMantissa, int destMantissaWidth, FloatingPointRoundingMode roundingMode) { final shift = sourceMantissa.width - destMantissaWidth; - final roundBit = Const(1) << (shift - 1); + final roundBit = Const(1, width: sourceMantissa.width) << (shift - 1); final mask = roundBit - 1; final roundCondition = (sourceMantissa & roundBit) & ((sourceMantissa & mask) | (roundBit << 1)); diff --git a/lib/src/arithmetic/floating_point/floating_point_logic.dart b/lib/src/arithmetic/floating_point/floating_point_logic.dart index c1260f109..9dbc2ba6f 100644 --- a/lib/src/arithmetic/floating_point/floating_point_logic.dart +++ b/lib/src/arithmetic/floating_point/floating_point_logic.dart @@ -61,7 +61,7 @@ class FloatingPoint extends LogicStructure { /// represent very small numbers that are close to zero. Logic isSubnormal() => exponent.eq(LogicValue.filled(exponent.width, LogicValue.zero)) & - mantissa.neq(LogicValue.filled(exponent.width, LogicValue.zero)); + mantissa.neq(LogicValue.filled(mantissa.width, LogicValue.zero)); /// Return the zero exponent representation for this type of FloatingPoint Logic zeroExponent() => Const(LogicValue.zero).zeroExtend(exponent.width); @@ -73,13 +73,13 @@ class FloatingPoint extends LogicStructure { /// infinite value. Logic isInfinity() => exponent.eq(LogicValue.filled(exponent.width, LogicValue.one)) & - mantissa.eq(LogicValue.filled(exponent.width, LogicValue.zero)); + mantissa.eq(LogicValue.filled(mantissa.width, LogicValue.zero)); /// Return a Logic true if the exponent is all ones and mantissa is non-zero, /// indicating an infinite value. Logic isNaN() => exponent.eq(LogicValue.filled(exponent.width, LogicValue.one)) & - mantissa.neq(LogicValue.filled(exponent.width, LogicValue.zero)); + mantissa.neq(LogicValue.filled(mantissa.width, LogicValue.zero)); /// Return a Logic true if the FloatingPoint contains a zero value. /// @@ -87,7 +87,7 @@ class FloatingPoint extends LogicStructure { /// and a mantissa of zero. Logic isZero() => exponent.eq(LogicValue.filled(exponent.width, LogicValue.zero)) & - mantissa.eq(LogicValue.filled(exponent.width, LogicValue.zero)); + mantissa.eq(LogicValue.filled(mantissa.width, LogicValue.zero)); @override void put(dynamic val, {bool fill = false}) { diff --git a/test/arithmetic/floating_point/floating_point_conversion_test.dart b/test/arithmetic/floating_point/floating_point_conversion_test.dart index b53416e97..f40b1c6f5 100644 --- a/test/arithmetic/floating_point/floating_point_conversion_test.dart +++ b/test/arithmetic/floating_point/floating_point_conversion_test.dart @@ -1,14 +1,47 @@ +import 'dart:convert'; import 'dart:math'; import 'package:rohd_hcl/rohd_hcl.dart'; import 'package:test/test.dart'; import 'package:rohd/rohd.dart'; + void main() { - test('FP: FP16 to FP32 conversion test', () { + test('FP: packSpecial test', () { final fp32 = FloatingPoint32() ..put(FloatingPoint32Value.fromDouble(1.5).value); - print(Const(0, width: 3).eq(Const(0, width: 1))); + const fp16MantissaWidth = 11; + const fp16ExponentWidth = 5; + + final converter_fp32_fp16 = FloatingPointConverter(fp32, + destExponentWidth: fp16ExponentWidth, + destMantissaWidth: fp16MantissaWidth, + name: 'FP32_to_FP16_Converter'); + + final result = converter_fp32_fp16.result; + final packedFP = converter_fp32_fp16.packSpecial(source: fp32, destExponentWidth: fp16ExponentWidth, destMantissaWidth: fp16MantissaWidth, isNaN: false); + + expect(packedFP.isInfinity(), true); + }); + + test('FP: FP64 to FP32 conversion test', () { + // final fp64 = FloatingPoint64() + // ..put(FloatingPoint64Value.fromDouble(1.5).value); + + // const fp16MantissaWidth = 11; + // const fp16ExponentWidth = 5; + + // final converter = FloatingPointConverter(fp64, + // destExponentWidth: fp16ExponentWidth, + // destMantissaWidth: fp16MantissaWidth, + // name: 'FP64_to_FP32_Converter'); + + // final result = converter.result.value; + // expect(result, equals(FloatingPoint32Value.fromDouble(1.5).value)); + + // final packed = converter.packSpecial(source: fp64, destExponentWidth: fp16ExponentWidth, destMantissaWidth: fp16MantissaWidth, isNaN: false); + // // expect(packed.exponent.width, matcher) + // final converter = FloatingPointConverter(fp32, // Declare a constant for exponent width for FP16 From db6137a390cbd0b7cecef250c51500fbc4a99110 Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Wed, 16 Oct 2024 00:40:30 -0700 Subject: [PATCH 4/7] Unit tests --- .../floating_point_converter.dart | 100 ++++---- .../floating_point_conversion_test.dart | 215 ++++++++++++++---- 2 files changed, 226 insertions(+), 89 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_converter.dart b/lib/src/arithmetic/floating_point/floating_point_converter.dart index 200892d34..8cb786197 100644 --- a/lib/src/arithmetic/floating_point/floating_point_converter.dart +++ b/lib/src/arithmetic/floating_point/floating_point_converter.dart @@ -49,9 +49,6 @@ class FloatingPointConverter extends Module { ..gets(addInput('source', source, width: source.width)); addOutput('result', width: _result.width) <= _result; - // Handle sign - _result.sign <= source.sign; - Logic normalizedExponent = Logic(name: 'normalizedExponent', width: destExponentWidth); Logic normalizedMantissa = @@ -60,32 +57,37 @@ class FloatingPointConverter extends Module { normalizedExponent < _normalizeSubnormalExponent(); normalizedMantissa < _normalizeSubnormalMantissa(); - final normalizedFP = FloatingPoint(exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); - + final normalizedFP = FloatingPoint( + exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + normalizedFP.sign <= source.sign; normalizedFP.exponent <= normalizedExponent; normalizedFP.mantissa <= normalizedMantissa; - If.block([ - Iff(source.isNaN(), [ - _result < _handleNaN(source, destExponentWidth, destMantissaWidth), - ]), - Iff(source.isInfinity(), [ - _result < _handleInfinity(source, destExponentWidth, destMantissaWidth), - ]), - Iff(source.isZero(), [ - _result < _handleZero(source, destExponentWidth, destMantissaWidth), - ]), - Iff(source.isSubnormal() | source.isNormal(), [ - _result < - mux( - source.isNormal(), - _convertNormalNumber( - source: source, - destExponentWidth: destExponentWidth, - destMantissaWidth: destMantissaWidth), - normalizedFP), + Combinational([ + If.block([ + Iff(source.isNaN(), [ + _result < _handleNaN(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isInfinity(), [ + _result < + _handleInfinity(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isZero(), [ + _result < _handleZero(source, destExponentWidth, destMantissaWidth), + ]), + Iff(source.isSubnormal() | source.isNormal(), [ + _result < + mux( + source.isNormal(), + convertNormalNumber( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth), + normalizedFP), + ]), ]), + _result.sign < source.sign ]); } @@ -118,18 +120,17 @@ class FloatingPointConverter extends Module { destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth); - FloatingPoint _convertNormalNumber( + static FloatingPoint convertNormalNumber( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth}) { - final adjustedExponent = - _adjustExponent(source.exponent, destExponentWidth); + final adjustedExponent = adjustExponent(source.exponent, destExponentWidth); final adjustedMantissa = Logic(name: 'adjustedMantissa', width: destMantissaWidth); adjustedMantissa < - _adjustMantissaPrecision(source.mantissa, destMantissaWidth, + adjustMantissaPrecision(source.mantissa, destMantissaWidth, FloatingPointRoundingMode.roundNearestEven); final isOverflow = adjustedExponent @@ -142,13 +143,13 @@ class FloatingPointConverter extends Module { If.block([ Iff(isOverflow, [ packNormal < - _handleOverflow( + handleOverflow( source: source, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth), ]), ElseIf(isUnderflow, [ - packNormal < _handleUnderflow(), + packNormal < handleUnderflow(), ]), Else([ packNormal < @@ -168,17 +169,17 @@ class FloatingPointConverter extends Module { Logic _normalizeSubnormalMantissa() => Const(0, width: destMantissaWidth, fill: true); - FloatingPoint _handleOverflow( + static FloatingPoint handleOverflow( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth}) => - _packInfinity( + packInfinity( source: source, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth, isNaN: false); - FloatingPoint _handleUnderflow() => + static FloatingPoint handleUnderflow() => FloatingPoint(exponentWidth: 0, mantissaWidth: 0); /// Pack a special floating point number into a target. @@ -191,8 +192,7 @@ class FloatingPointConverter extends Module { /// [sign] is the sign bit of the special number. /// /// [isNaN] is true if the special number is a NaN, false if it is an infinity. - @visibleForTesting - FloatingPoint packSpecial( + static FloatingPoint packSpecial( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth, @@ -203,7 +203,9 @@ class FloatingPointConverter extends Module { pack.exponent <= Const(1, width: destExponentWidth, fill: true); if (isNaN) { - pack.mantissa <= Const(1, width: destMantissaWidth, fill: true) << (destMantissaWidth - 1); + pack.mantissa <= + Const(1, width: destMantissaWidth, fill: true) << + (destMantissaWidth - 1); } else { pack.mantissa <= Const(0, width: destMantissaWidth, fill: true); } @@ -224,7 +226,7 @@ class FloatingPointConverter extends Module { return pack; } - FloatingPoint _packInfinity( + static FloatingPoint packInfinity( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth, @@ -235,7 +237,11 @@ class FloatingPointConverter extends Module { destMantissaWidth: destMantissaWidth, isNaN: false); - Logic _adjustExponent(Logic sourceExponent, int destExponentWidth) { + /// Adjust the exponent of a floating-point number to fit the new exponent width. + /// + /// The exponent is biased according to the source and destination exponent widths. + /// If the exponent widths are the same, the exponent is returned unchanged. + static Logic adjustExponent(Logic sourceExponent, int destExponentWidth) { if (sourceExponent.width == destExponentWidth) { return sourceExponent; } else { @@ -249,18 +255,19 @@ class FloatingPointConverter extends Module { } } - Logic _adjustMantissaPrecision(Logic sourceMantissa, int destMantissaWidth, - FloatingPointRoundingMode roundingMode) { + static Logic adjustMantissaPrecision(Logic sourceMantissa, + int destMantissaWidth, FloatingPointRoundingMode roundingMode) { final adjustedMantissa = Logic(name: 'adjustedMantissa', width: destMantissaWidth); // In the case where precision is increased, we just need to zero pad or shift the source mantissa bits if (destMantissaWidth > sourceMantissa.width) { adjustedMantissa <= - sourceMantissa << (destMantissaWidth - sourceMantissa.width); + sourceMantissa.zeroExtend(destMantissaWidth) << + (destMantissaWidth - sourceMantissa.width); } else if (destMantissaWidth < sourceMantissa.width) { adjustedMantissa <= - _roundMantissa(sourceMantissa, destMantissaWidth, roundingMode); + roundMantissa(sourceMantissa, destMantissaWidth, roundingMode); } else { adjustedMantissa <= sourceMantissa; } @@ -268,16 +275,17 @@ class FloatingPointConverter extends Module { return adjustedMantissa; } - Logic _roundMantissa(Logic sourceMantissa, int destMantissaWidth, + static Logic roundMantissa(Logic sourceMantissa, int destMantissaWidth, FloatingPointRoundingMode roundingMode) { final shift = sourceMantissa.width - destMantissaWidth; final roundBit = Const(1, width: sourceMantissa.width) << (shift - 1); final mask = roundBit - 1; - final roundCondition = (sourceMantissa & roundBit) & - ((sourceMantissa & mask) | (roundBit << 1)); + final roundCondition = (sourceMantissa & roundBit).neq(0) & + ((sourceMantissa & mask).neq(0) | (roundBit << 1).neq(0)); final roundedMantissa = (sourceMantissa + roundBit) & ~(roundBit - 1); - final shiftedMantissa = roundedMantissa >> shift; + final shiftedMantissa = + (roundedMantissa >> shift).slice(destMantissaWidth - 1, 0); final result = Logic(name: 'roundedMantissa', width: destMantissaWidth); result <= mux(roundCondition, roundedMantissa, shiftedMantissa); diff --git a/test/arithmetic/floating_point/floating_point_conversion_test.dart b/test/arithmetic/floating_point/floating_point_conversion_test.dart index f40b1c6f5..c6b1f5cc6 100644 --- a/test/arithmetic/floating_point/floating_point_conversion_test.dart +++ b/test/arithmetic/floating_point/floating_point_conversion_test.dart @@ -4,69 +4,198 @@ import 'package:rohd_hcl/rohd_hcl.dart'; import 'package:test/test.dart'; import 'package:rohd/rohd.dart'; - void main() { - test('FP: packSpecial test', () { + const fp32ExponentWidth = 8; + const fp32MantissaWidth = 23; + + const bf19ExponentWidth = 8; + const bf19MantissaWidth = 10; + + const bf16ExponentWidth = 8; + const bf16MantissaWidth = 7; + + const fp16ExponentWidth = 5; + const fp16MantissaWidth = 10; + + const bf8ExponentWidth = 5; + const bf8MantissaWidth = 2; + + const hf8ExponentWidth = 4; + const hf8MantissaWidth = 3; + + test('FP: pack infinity test', () { final fp32 = FloatingPoint32() ..put(FloatingPoint32Value.fromDouble(1.5).value); - const fp16MantissaWidth = 11; - const fp16ExponentWidth = 5; + final packedFPbf19 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf19ExponentWidth, + destMantissaWidth: bf19MantissaWidth, + isNaN: false); + + final packedFPbf16 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf16ExponentWidth, + destMantissaWidth: bf16MantissaWidth, + isNaN: false); - final converter_fp32_fp16 = FloatingPointConverter(fp32, + final packedFPfp16 = FloatingPointConverter.packSpecial( + source: fp32, destExponentWidth: fp16ExponentWidth, destMantissaWidth: fp16MantissaWidth, - name: 'FP32_to_FP16_Converter'); - - final result = converter_fp32_fp16.result; - final packedFP = converter_fp32_fp16.packSpecial(source: fp32, destExponentWidth: fp16ExponentWidth, destMantissaWidth: fp16MantissaWidth, isNaN: false); - - expect(packedFP.isInfinity(), true); + isNaN: false); + + final packedFPbf8 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf8ExponentWidth, + destMantissaWidth: bf8MantissaWidth, + isNaN: false); + + final packedFPhf8 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: hf8ExponentWidth, + destMantissaWidth: hf8MantissaWidth, + isNaN: false); + + expect(packedFPbf19.isInfinity().value.toBool(), true); + expect(packedFPbf16.isInfinity().value.toBool(), true); + expect(packedFPfp16.isInfinity().value.toBool(), true); + expect(packedFPbf8.isInfinity().value.toBool(), true); + expect(packedFPhf8.isInfinity().value.toBool(), true); + + expect(packedFPbf19.isNaN().value.toBool(), false); + expect(packedFPbf16.isNaN().value.toBool(), false); + expect(packedFPfp16.isNaN().value.toBool(), false); + expect(packedFPbf8.isNaN().value.toBool(), false); + expect(packedFPhf8.isNaN().value.toBool(), false); }); - test('FP: FP64 to FP32 conversion test', () { - // final fp64 = FloatingPoint64() - // ..put(FloatingPoint64Value.fromDouble(1.5).value); + test('FP: pack nan test', () { + final fp32 = FloatingPoint32() + ..put(FloatingPoint32Value.fromDouble(1.5).value); + + final packedFPbf19 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf19ExponentWidth, + destMantissaWidth: bf19MantissaWidth, + isNaN: true); - // const fp16MantissaWidth = 11; - // const fp16ExponentWidth = 5; + final packedFPbf16 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf16ExponentWidth, + destMantissaWidth: bf16MantissaWidth, + isNaN: true); - // final converter = FloatingPointConverter(fp64, - // destExponentWidth: fp16ExponentWidth, - // destMantissaWidth: fp16MantissaWidth, - // name: 'FP64_to_FP32_Converter'); + final packedFPfp16 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: fp16ExponentWidth, + destMantissaWidth: fp16MantissaWidth, + isNaN: true); + + final packedFPbf8 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: bf8ExponentWidth, + destMantissaWidth: bf8MantissaWidth, + isNaN: true); + + final packedFPhf8 = FloatingPointConverter.packSpecial( + source: fp32, + destExponentWidth: hf8ExponentWidth, + destMantissaWidth: hf8MantissaWidth, + isNaN: true); + + expect(packedFPbf19.isNaN().value.toBool(), true); + expect(packedFPbf16.isNaN().value.toBool(), true); + expect(packedFPfp16.isNaN().value.toBool(), true); + expect(packedFPbf8.isNaN().value.toBool(), true); + expect(packedFPhf8.isNaN().value.toBool(), true); + + expect(packedFPbf19.isInfinity().value.toBool(), false); + expect(packedFPbf16.isInfinity().value.toBool(), false); + expect(packedFPfp16.isInfinity().value.toBool(), false); + expect(packedFPbf8.isInfinity().value.toBool(), false); + expect(packedFPhf8.isInfinity().value.toBool(), false); + }); - // final result = converter.result.value; - // expect(result, equals(FloatingPoint32Value.fromDouble(1.5).value)); + test('FP: adjust mantissa increase test', () { + final sourceMantissa = + Logic(name: 'sourceMantissa', width: fp16MantissaWidth); - // final packed = converter.packSpecial(source: fp64, destExponentWidth: fp16ExponentWidth, destMantissaWidth: fp16MantissaWidth, isNaN: false); - // // expect(packed.exponent.width, matcher) + sourceMantissa <= + Const(int.parse('1010101010', radix: 2), width: fp16MantissaWidth); + + final fp16Tofp32AdjustedMantissa = + FloatingPointConverter.adjustMantissaPrecision(sourceMantissa, + fp32MantissaWidth, FloatingPointRoundingMode.roundNearestEven); + + expect(fp16Tofp32AdjustedMantissa.value.toInt(), + int.parse('10101010100000000000000', radix: 2)); + }); + test('FP: adjust mantissa decrease test', () { + final sourceMantissa = + Logic(name: 'sourceMantissa', width: fp32MantissaWidth); - // final converter = FloatingPointConverter(fp32, - // Declare a constant for exponent width for FP16 - // const ingress_exponentWidth = 5; - // const ingress_mantissaWidth = 11; + sourceMantissa <= + Const(int.parse('10101010101101001101011', radix: 2), + width: fp32MantissaWidth); - // const egress_exponentWidth = 8; - // const egress_mantissaWidth = 23; + final fp32Tofp16AdjustedMantissa = + FloatingPointConverter.adjustMantissaPrecision(sourceMantissa, + fp16MantissaWidth, FloatingPointRoundingMode.roundNearestEven); - // Get FP16 value from a double, we will feed this FP16 value to both the software and hardware model, and then compare the results - // var fp16val = FloatingPointValue.fromDouble(val, - // exponentWidth: ingress_exponentWidth, - // mantissaWidth: ingress_mantissaWidth); + expect(fp32Tofp16AdjustedMantissa.value.toInt(), + int.parse('1010101010', radix: 2)); + }); - // // First get the exponent and rebias it - // var fp16_exponent = fp16val.exponent; + test('FP: convert normal test', () { + final fp32 = FloatingPoint32() + ..put(FloatingPoint32Value.fromDouble(1.5).value); - // // Re-bias the exponent for 32 - // var fp32_exponent = (fp16_exponent - fp16.bias()) + - // FloatingPointValue.computeBias(egress_exponentWidth); + final packedFPbf19 = FloatingPointConverter.convertNormalNumber( + source: fp32, + destExponentWidth: bf19ExponentWidth, + destMantissaWidth: bf19MantissaWidth, + ); + + final packedFPbf16 = FloatingPointConverter.convertNormalNumber( + source: fp32, + destExponentWidth: bf16ExponentWidth, + destMantissaWidth: bf16MantissaWidth, + ); + + final packedFPfp16 = FloatingPointConverter.convertNormalNumber( + source: fp32, + destExponentWidth: fp16ExponentWidth, + destMantissaWidth: fp16MantissaWidth, + ); + + final packedFPbf8 = FloatingPointConverter.convertNormalNumber( + source: fp32, + destExponentWidth: bf8ExponentWidth, + destMantissaWidth: bf8MantissaWidth, + ); + + final packedFPhf8 = FloatingPointConverter.convertNormalNumber( + source: fp32, + destExponentWidth: hf8ExponentWidth, + destMantissaWidth: hf8MantissaWidth, + ); + + expect(packedFPbf19.isNormal().value.toBool(), true); + expect(packedFPbf16.isNormal().value.toBool(), true); + expect(packedFPfp16.isNormal().value.toBool(), true); + expect(packedFPbf8.isNormal().value.toBool(), true); + expect(packedFPhf8.isNormal().value.toBool(), true); + }); - // // Zero extend the mantissa - // var mantissa64 = fp16val.mantissa; + test('FP: converter builds', () { + final fp32 = FloatingPoint32() + ..put(FloatingPoint32Value.fromDouble(1.5).value); - // // Compare FP16 values - // expect(fp16val, fp16val2); + final converter = FloatingPointConverter(fp32, + destExponentWidth: bf19ExponentWidth, + destMantissaWidth: bf19MantissaWidth, + name: 'fp32_to_bf19'); }); } From b72200bc0e8087f81969ff9027e003372142bb19 Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Tue, 22 Oct 2024 18:18:33 -0700 Subject: [PATCH 5/7] Resolve all errors for floating point converter --- .../floating_point_converter.dart | 195 ++++++++++++++---- .../floating_point_conversion_test.dart | 32 ++- 2 files changed, 181 insertions(+), 46 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_converter.dart b/lib/src/arithmetic/floating_point/floating_point_converter.dart index 8cb786197..d68301c6a 100644 --- a/lib/src/arithmetic/floating_point/floating_point_converter.dart +++ b/lib/src/arithmetic/floating_point/floating_point_converter.dart @@ -5,10 +5,7 @@ // A Floating-point format converter component. // // 2024 August 30 -// Author: AI Assistant - -import 'dart:ffi'; -import 'dart:math'; +// Author: Xue Zheng Saw (Alan) import 'package:meta/meta.dart'; import 'package:rohd/rohd.dart'; @@ -115,11 +112,29 @@ class FloatingPointConverter extends Module { FloatingPoint _handleZero(FloatingPoint sourceFP, int destExponentWidth, int destMantissaWidth) => - _packZero( + packZero( source: sourceFP, destExponentWidth: destExponentWidth, destMantissaWidth: destMantissaWidth); + /// Convert a normal [FloatingPoint] number to a new format. + /// + /// The output [FloatingPoint] has the given [destExponentWidth] and + /// [destMantissaWidth]. + /// + /// The [source] is a normal [FloatingPoint] number. + /// + /// The output [FloatingPoint] is computed as follows: + /// + /// 1. The exponent is adjusted according to the given [destExponentWidth]. + /// 2. The mantissa is adjusted according to the given [destMantissaWidth] and + /// the rounding mode [FloatingPointRoundingMode.roundNearestEven]. + /// 3. If the exponent is all ones, the output [FloatingPoint] is an infinity + /// with the same sign as the [source]. + /// 4. If the exponent is all zeros, the output [FloatingPoint] is a zero with + /// the same sign as the [source]. + /// 5. Otherwise, the output [FloatingPoint] is the result of the adjusted + /// exponent and the adjusted mantissa. static FloatingPoint convertNormalNumber( {required FloatingPoint source, required int destExponentWidth, @@ -129,9 +144,9 @@ class FloatingPointConverter extends Module { final adjustedMantissa = Logic(name: 'adjustedMantissa', width: destMantissaWidth); - adjustedMantissa < + adjustedMantissa <= adjustMantissaPrecision(source.mantissa, destMantissaWidth, - FloatingPointRoundingMode.roundNearestEven); + Const(FloatingPointRoundingMode.roundNearestEven.index)); final isOverflow = adjustedExponent .gte(FloatingPointValue.computeMaxExponent(destExponentWidth)); @@ -140,24 +155,27 @@ class FloatingPointConverter extends Module { final packNormal = FloatingPoint( exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); - If.block([ - Iff(isOverflow, [ - packNormal < - handleOverflow( - source: source, - destExponentWidth: destExponentWidth, - destMantissaWidth: destMantissaWidth), - ]), - ElseIf(isUnderflow, [ - packNormal < handleUnderflow(), - ]), - Else([ - packNormal < - FloatingPointValue( - sign: source.sign.value, - exponent: adjustedExponent.value, - mantissa: adjustedMantissa.value, - ) + Combinational([ + If.block([ + Iff(isOverflow, [ + packNormal < + handleOverflow( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth), + ]), + ElseIf(isUnderflow, [ + packNormal < + handleUnderflow( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth), + ]), + Else([ + packNormal.sign < source.sign.value, + packNormal.exponent < adjustedExponent.value, + packNormal.mantissa < adjustedMantissa.value + ]) ]), ]); @@ -179,8 +197,14 @@ class FloatingPointConverter extends Module { destMantissaWidth: destMantissaWidth, isNaN: false); - static FloatingPoint handleUnderflow() => - FloatingPoint(exponentWidth: 0, mantissaWidth: 0); + static FloatingPoint handleUnderflow( + {required FloatingPoint source, + required int destExponentWidth, + required int destMantissaWidth}) => + packZero( + source: source, + destExponentWidth: destExponentWidth, + destMantissaWidth: destMantissaWidth); /// Pack a special floating point number into a target. /// @@ -213,7 +237,7 @@ class FloatingPointConverter extends Module { return pack; } - FloatingPoint _packZero( + static FloatingPoint packZero( {required FloatingPoint source, required int destExponentWidth, required int destMantissaWidth}) { @@ -255,8 +279,14 @@ class FloatingPointConverter extends Module { } } - static Logic adjustMantissaPrecision(Logic sourceMantissa, - int destMantissaWidth, FloatingPointRoundingMode roundingMode) { + /// Adjust the mantissa of a floating-point number to fit the new mantissa width. + /// + /// If the mantissa width is increased, the mantissa is zero-extended. + /// If the mantissa width is decreased, the mantissa is rounded according to the + /// given rounding mode. + /// If the mantissa widths are the same, the mantissa is returned unchanged. + static Logic adjustMantissaPrecision( + Logic sourceMantissa, int destMantissaWidth, Logic roundingMode) { final adjustedMantissa = Logic(name: 'adjustedMantissa', width: destMantissaWidth); @@ -275,22 +305,101 @@ class FloatingPointConverter extends Module { return adjustedMantissa; } - static Logic roundMantissa(Logic sourceMantissa, int destMantissaWidth, - FloatingPointRoundingMode roundingMode) { - final shift = sourceMantissa.width - destMantissaWidth; - final roundBit = Const(1, width: sourceMantissa.width) << (shift - 1); - final mask = roundBit - 1; - final roundCondition = (sourceMantissa & roundBit).neq(0) & - ((sourceMantissa & mask).neq(0) | (roundBit << 1).neq(0)); - - final roundedMantissa = (sourceMantissa + roundBit) & ~(roundBit - 1); - final shiftedMantissa = - (roundedMantissa >> shift).slice(destMantissaWidth - 1, 0); + /// Rounds a mantissa to a target width. + /// + /// The mantissa is rounded according to the given [roundingMode]. + /// If the mantissa width is increased, the mantissa is zero-extended. + /// If the mantissa width is decreased, the mantissa is rounded according to the + /// given [roundingMode]. + /// If the mantissa widths are the same, the mantissa is returned unchanged. + /// + /// [roundingMode] is a [Logic] value that represents the rounding mode to use. + /// The value should be one of the following: + /// - [FloatingPointRoundingMode.truncate.index] to truncate the mantissa. + /// - [FloatingPointRoundingMode.roundNearestEven.index] to round the mantissa + /// to the nearest even value. + /// - [FloatingPointRoundingMode.roundTowardsZero.index] to round the mantissa + /// towards zero. + /// - [FloatingPointRoundingMode.roundTowardsInfinity.index] to round the + /// mantissa towards positive infinity. + /// - [FloatingPointRoundingMode.roundTowardsNegativeInfinity.index] to round + /// the mantissa towards negative infinity. + /// - [FloatingPointRoundingMode.roundNearestTiesAway.index] to round the + /// mantissa to the nearest value, rounding away from zero in case of a tie. + static Logic roundMantissa( + Logic sourceMantissa, int destMantissaWidth, Logic roundingMode) { + if (sourceMantissa.width <= destMantissaWidth) { + throw StateError( + 'Cannot round a mantissa to a width that is not smaller.'); + } + // First figure out what is the significant number to round to + // Note that we are assuming that sourceMantissa.width > destMantissaWidth here + + // Significant number = desMantissaWidth + // The number of bits to throw away or round = sourceMantissa.width - destMantissaWidth + + final significantNumber = destMantissaWidth; + final numberOfBitsToThrowAway = sourceMantissa.width - destMantissaWidth; + + final throwAwayBits = sourceMantissa.slice(numberOfBitsToThrowAway - 1, 0); + final significantBits = + sourceMantissa.slice(sourceMantissa.width - 1, numberOfBitsToThrowAway); + + // Use the throw away bits to calculate whether to round up or down + // if the most significant bit of the guardbits is 0, just truncate + // if the most significant bit of the guardbits is 1, and there is at least one bit in the rest, then round up + // if the most significant bit of the guardbits is 1, and there is not at least one bit in the rest, check the least significant bit in the mantissa, if it is 1 round up, else truncate + + // Truncate if MSB Guard bit is 0 + // or when MSB Guard bit is 1 and LSB of mantissa is 0 + final atLeastOneBitInGuardBits = + throwAwayBits.slice(throwAwayBits.width - 2, 0).or(); + final truncateCondition = throwAwayBits[-1].eq(0) | + (throwAwayBits[-1].eq(1) & + ~atLeastOneBitInGuardBits & + significantBits[0].eq(0)); + final roundCondition = throwAwayBits[-1].eq(1) & + (atLeastOneBitInGuardBits | + (~atLeastOneBitInGuardBits & significantBits[0].eq(1))); final result = Logic(name: 'roundedMantissa', width: destMantissaWidth); - result <= mux(roundCondition, roundedMantissa, shiftedMantissa); - // TODO : Add If block for rounding modes + final truncatedResult = significantBits; + final roundedResult = significantBits + 1; + + Combinational([ + If.block([ + Iff(roundingMode.eq(FloatingPointRoundingMode.roundNearestEven.index), [ + result < mux(roundCondition, roundedResult, truncatedResult), + ]), + ElseIf(roundingMode.eq(FloatingPointRoundingMode.truncate.index), [ + result < truncatedResult, + ]), + ElseIf( + roundingMode.eq(FloatingPointRoundingMode.roundTowardsZero.index), [ + result < truncatedResult, // TODO: Implement round towards zero + ]), + ElseIf( + roundingMode + .eq(FloatingPointRoundingMode.roundTowardsInfinity.index), + [ + result < truncatedResult, // TODO: Implement + ]), + ElseIf( + roundingMode.eq( + FloatingPointRoundingMode.roundTowardsNegativeInfinity.index), + [ + result < truncatedResult, // TODO: Implement + ]), + ElseIf( + roundingMode + .eq(FloatingPointRoundingMode.roundNearestTiesAway.index), + [ + result < truncatedResult, // TODO: Implement + ]), + Else([result < truncatedResult]) + ]) + ]); return result; } diff --git a/test/arithmetic/floating_point/floating_point_conversion_test.dart b/test/arithmetic/floating_point/floating_point_conversion_test.dart index c6b1f5cc6..317c90208 100644 --- a/test/arithmetic/floating_point/floating_point_conversion_test.dart +++ b/test/arithmetic/floating_point/floating_point_conversion_test.dart @@ -125,8 +125,10 @@ void main() { Const(int.parse('1010101010', radix: 2), width: fp16MantissaWidth); final fp16Tofp32AdjustedMantissa = - FloatingPointConverter.adjustMantissaPrecision(sourceMantissa, - fp32MantissaWidth, FloatingPointRoundingMode.roundNearestEven); + FloatingPointConverter.adjustMantissaPrecision( + sourceMantissa, + fp32MantissaWidth, + Const(FloatingPointRoundingMode.roundNearestEven.index)); expect(fp16Tofp32AdjustedMantissa.value.toInt(), int.parse('10101010100000000000000', radix: 2)); @@ -141,11 +143,35 @@ void main() { width: fp32MantissaWidth); final fp32Tofp16AdjustedMantissa = + FloatingPointConverter.adjustMantissaPrecision( + sourceMantissa, + fp16MantissaWidth, + Const(FloatingPointRoundingMode.roundNearestEven.index)); + + final fp32Tofp16AdjustedMantissaTruncate = + FloatingPointConverter.adjustMantissaPrecision(sourceMantissa, + fp16MantissaWidth, Const(FloatingPointRoundingMode.truncate.index)); + + final fp32Tobf8AdjustedMantissa = + FloatingPointConverter.adjustMantissaPrecision( + sourceMantissa, + bf8MantissaWidth, + Const(FloatingPointRoundingMode.roundNearestEven.index)); + + final fp32Tobf8AdjustedMantissaTruncate = FloatingPointConverter.adjustMantissaPrecision(sourceMantissa, - fp16MantissaWidth, FloatingPointRoundingMode.roundNearestEven); + bf8MantissaWidth, Const(FloatingPointRoundingMode.truncate.index)); expect(fp32Tofp16AdjustedMantissa.value.toInt(), + int.parse('1010101011', radix: 2)); + + expect(fp32Tofp16AdjustedMantissaTruncate.value.toInt(), int.parse('1010101010', radix: 2)); + + expect(fp32Tobf8AdjustedMantissa.value.toInt(), int.parse('11', radix: 2)); + + expect(fp32Tobf8AdjustedMantissaTruncate.value.toInt(), + int.parse('10', radix: 2)); }); test('FP: convert normal test', () { From f925684694e7caf85dcb71c097bd0f1e7f46f72a Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Thu, 24 Oct 2024 01:09:48 -0700 Subject: [PATCH 6/7] Fixes assignment issues with floating point converter. Added more tests --- .../floating_point_converter.dart | 34 +++++++---- .../floating_point_conversion_test.dart | 59 +++++++++++++++++-- 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_converter.dart b/lib/src/arithmetic/floating_point/floating_point_converter.dart index d68301c6a..8422baf2c 100644 --- a/lib/src/arithmetic/floating_point/floating_point_converter.dart +++ b/lib/src/arithmetic/floating_point/floating_point_converter.dart @@ -51,8 +51,8 @@ class FloatingPointConverter extends Module { Logic normalizedMantissa = Logic(name: 'normalizedMantissa', width: destMantissaWidth); - normalizedExponent < _normalizeSubnormalExponent(); - normalizedMantissa < _normalizeSubnormalMantissa(); + normalizedExponent <= _normalizeSubnormalExponent(); + normalizedMantissa <= _normalizeSubnormalMantissa(); final normalizedFP = FloatingPoint( exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); @@ -66,14 +66,14 @@ class FloatingPointConverter extends Module { Iff(source.isNaN(), [ _result < _handleNaN(source, destExponentWidth, destMantissaWidth), ]), - Iff(source.isInfinity(), [ + ElseIf(source.isInfinity(), [ _result < _handleInfinity(source, destExponentWidth, destMantissaWidth), ]), - Iff(source.isZero(), [ + ElseIf(source.isZero(), [ _result < _handleZero(source, destExponentWidth, destMantissaWidth), ]), - Iff(source.isSubnormal() | source.isNormal(), [ + ElseIf(source.isSubnormal() | source.isNormal(), [ _result < mux( source.isNormal(), @@ -149,12 +149,15 @@ class FloatingPointConverter extends Module { Const(FloatingPointRoundingMode.roundNearestEven.index)); final isOverflow = adjustedExponent - .gte(FloatingPointValue.computeMaxExponent(destExponentWidth)); - final isUnderflow = adjustedExponent.lte(0); + .gt(FloatingPointValue.computeMaxExponent(destExponentWidth)); + final isUnderflow = adjustedExponent.lt(0); final packNormal = FloatingPoint( exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + print("isOverflow:"); + print(isOverflow.value.toString()); + Combinational([ If.block([ Iff(isOverflow, [ @@ -172,13 +175,14 @@ class FloatingPointConverter extends Module { destMantissaWidth: destMantissaWidth), ]), Else([ - packNormal.sign < source.sign.value, - packNormal.exponent < adjustedExponent.value, - packNormal.mantissa < adjustedMantissa.value + packNormal.sign < source.sign, + packNormal.exponent < adjustedExponent, + packNormal.mantissa < adjustedMantissa ]) ]), ]); + print(packNormal.value.toString()); return packNormal; } @@ -187,6 +191,15 @@ class FloatingPointConverter extends Module { Logic _normalizeSubnormalMantissa() => Const(0, width: destMantissaWidth, fill: true); + /// Handle the case where the [source] is too large to be represented by + /// a [FloatingPoint] with the given [destExponentWidth] and + /// [destMantissaWidth]. + /// + /// The resulting [FloatingPoint] is a non-NaN infinity with the same sign as + /// the [source]. + /// + /// [destExponentWidth] and [destMantissaWidth] are the widths of the exponent + /// and mantissa of the destination [FloatingPoint] respectively. static FloatingPoint handleOverflow( {required FloatingPoint source, required int destExponentWidth, @@ -224,6 +237,7 @@ class FloatingPointConverter extends Module { final pack = FloatingPoint( exponentWidth: destExponentWidth, mantissaWidth: destMantissaWidth); + pack.sign <= source.sign; pack.exponent <= Const(1, width: destExponentWidth, fill: true); if (isNaN) { diff --git a/test/arithmetic/floating_point/floating_point_conversion_test.dart b/test/arithmetic/floating_point/floating_point_conversion_test.dart index 317c90208..26323fa03 100644 --- a/test/arithmetic/floating_point/floating_point_conversion_test.dart +++ b/test/arithmetic/floating_point/floating_point_conversion_test.dart @@ -1,8 +1,7 @@ -import 'dart:convert'; -import 'dart:math'; +import 'dart:io'; +import 'package:rohd/rohd.dart'; import 'package:rohd_hcl/rohd_hcl.dart'; import 'package:test/test.dart'; -import 'package:rohd/rohd.dart'; void main() { const fp32ExponentWidth = 8; @@ -217,11 +216,63 @@ void main() { test('FP: converter builds', () { final fp32 = FloatingPoint32() - ..put(FloatingPoint32Value.fromDouble(1.5).value); + ..put(FloatingPoint32Value.fromDouble(1.512312312312312).value); final converter = FloatingPointConverter(fp32, destExponentWidth: bf19ExponentWidth, destMantissaWidth: bf19MantissaWidth, name: 'fp32_to_bf19'); + + expect(converter.result.floatingPointValue.toString().contains('x'), false); + expect(converter.result.floatingPointValue.toString().contains('z'), false); + }); + + test('FP: converter processes TestFloat vectors', () async { + // Run testfloat_gen and capture its output + // Get the current test file path + final currentFilePath = Platform.script.toFilePath(); + print('Current test file: $currentFilePath'); + + // Get the directory containing the test file + final testDir = File(currentFilePath).parent.path; + print('Test directory: $testDir'); + + // Run testfloat_gen from a specific directory + final result = await Process.run( + 'testfloat_gen', + ['f32_to_f16'], + workingDirectory: testDir, + ); + if (result.exitCode != 0) { + throw Exception('TestFloat failed: ${result.stderr}'); + } + + final testVectors = (result.stdout as String).split('\n'); + + for (final line in testVectors) { + if (line.trim().isEmpty) continue; + + final parts = line.trim().split(RegExp(r'\s+')); + final inputHex = parts[0]; + final expectedHex = parts[1]; + final flags = parts[2]; + + final inputBits = int.parse(inputHex, radix: 16); + + final fp32 = FloatingPoint32()..put(inputBits); + + final converter = FloatingPointConverter(fp32, + destExponentWidth: fp16ExponentWidth, + destMantissaWidth: fp16MantissaWidth, + name: 'fp32_to_bf19'); + + print('Test case:'); + print(' Input (hex): $inputHex'); + print(' Input (float): ${fp32.floatingPointValue}'); + print(' Expected (hex): $expectedHex'); + print(' Got: ${converter.result.floatingPointValue}'); + print(' Flags: $flags'); + print('---'); + } }); } From 1c1fff926a415b0516e56f1838d16cf4022292e2 Mon Sep 17 00:00:00 2001 From: "Saw, Xue Zheng" Date: Thu, 24 Oct 2024 12:13:02 -0700 Subject: [PATCH 7/7] testfloat vector generation readme --- test/arithmetic/floating_point/README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 test/arithmetic/floating_point/README.md diff --git a/test/arithmetic/floating_point/README.md b/test/arithmetic/floating_point/README.md new file mode 100644 index 000000000..6a42acf7f --- /dev/null +++ b/test/arithmetic/floating_point/README.md @@ -0,0 +1,22 @@ + +# How to generate test vectors using testfloat + +```bash +git clone https://github.com/ucb-bar/berkeley-testfloat-3.git +git clone https://github.com/ucb-bar/berkeley-softfloat-3.git + +cd berkeley-softfloat-3/build/Linux-x86_64-GCC/ +make + +cd berkeley-testfloat-3/build/Linux-x86_64-GCC/ +make + +testfloat_gen f32_to_f16 + +## 4FFFFDFE 7C00 05 +## 5F7FFFFF 7C00 05 +## CBFFF800 FC00 05 +## 3AFFDEFF 17FF 01 +## 5F7FFFFE 7C00 05 +## C27FDFFB D3FF 01 +```