diff --git a/test/TestArithmetic.cpp b/test/TestArithmetic.cpp new file mode 100644 index 00000000..4c2ecc0e --- /dev/null +++ b/test/TestArithmetic.cpp @@ -0,0 +1,890 @@ +/* + * Author: doe300 + * + * See the file "LICENSE" for the full license governing this code. + */ + +#include "TestArithmetic.h" + +#include "Compiler.h" +#include "VC4C.h" +#include "tools.h" + +#include +#include +#include + +template +static std::array generateInput(bool allowNull) +{ + std::array arr; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); + + for(std::size_t i = 0; i < N; ++i) + { + T tmp; + do + { + // to prevent division by zero + tmp = dis(gen); + } while(!allowNull && tmp == 0); + arr[i] = tmp; + } + + return arr; +} + +template +static std::array generateInput(bool allowNull) +{ + std::array arr; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); + + for(std::size_t i = 0; i < N; ++i) + { + float tmp; + do + { + // to prevent division by zero + tmp = dis(gen); + } while(!allowNull && tmp == 0.0f); + arr[i] = tmp; + } + return arr; +} + +static const std::string BINARY_OPERATION = R"( +__kernel void test(__global TYPE* out, const __global TYPE* in0, const __global TYPE* in1) { + size_t gid = get_global_id(0); + out[gid] = in0[gid] OP in1[gid]; +} +)"; + +static const std::string RELATIONAL_OPERATION = R"( +__kernel void test(__global int16* out, const __global TYPE* in0, const __global TYPE* in1) { + size_t gid = get_global_id(0); + out[gid] = convert_int16(in0[gid] OP in1[gid]); +} +)"; + +static const std::string UNARY_OPERATION = R"( +__kernel void test(__global int16* out, const __global TYPE* in) { + size_t gid = get_global_id(0); + out[gid] = convert_int16(OP in[gid]); +} +)"; + +static const std::string CONVERSION_OPERATION = R"( +// trick to allow concatenating macro (content!!) to symbol +#define CONCAT(A, B) CONCAT_(A, B) +#define CONCAT_(A, B) A##B + +__kernel void test(__global OUT* out, const __global IN* in) { + size_t gid = get_global_id(0); + out[gid] = CONCAT(convert_,OUT)(in[gid]); +} +)"; + +static const std::string CAST_OPERATION = R"( +__kernel void test(__global OUT* out, const __global IN* in) { + size_t gid = get_global_id(0); + out[gid] = as_##OUT(in[gid]); +} +)"; + +template +static void checkBinaryResults(const std::array& input0, const std::array& input1, + const std::array& output, const std::function& op, const std::string& opName, + const std::function& onError) +{ + for(std::size_t i = 0; i < N; ++i) + { + if(output[i] != op(input0[i], input1[i])) + { + auto result = std::to_string(output[i]); + auto expected = std::to_string(input0[i]) + " " + opName + " " + std::to_string(input1[i]) + " = " + + std::to_string(op(input0[i], input1[i])); + onError(expected, result); + } + } +} + +template +static void checkUnaryResults(const std::array& input, const std::array& output, + const std::function& op, const std::string& opName, + const std::function& onError) +{ + for(std::size_t i = 0; i < N; ++i) + { + if(output[i] != op(input[i])) + { + auto result = std::to_string(output[i]); + auto expected = opName + " " + std::to_string(input[i]) + " = " + std::to_string(op(input[i])); + onError(expected, result); + } + } +} + +TestArithmetic::TestArithmetic(const vc4c::Configuration& config) : config(config) +{ + TEST_ADD(TestArithmetic::testSignedIntMultiplication); + TEST_ADD(TestArithmetic::testSignedShortMultiplication); + TEST_ADD(TestArithmetic::testSignedIntMultiplication); + TEST_ADD(TestArithmetic::testSignedShortMultiplication); + TEST_ADD(TestArithmetic::testSignedCharMultiplication); + TEST_ADD(TestArithmetic::testUnsignedIntMultiplication); + TEST_ADD(TestArithmetic::testUnsignedShortMultiplication); + TEST_ADD(TestArithmetic::testUnsignedCharMultiplication); + TEST_ADD(TestArithmetic::testSignedIntDivision); + TEST_ADD(TestArithmetic::testSignedShortDivision); + TEST_ADD(TestArithmetic::testSignedCharDivision); + TEST_ADD(TestArithmetic::testUnsignedIntDivision); + TEST_ADD(TestArithmetic::testUnsignedShortDivision); + TEST_ADD(TestArithmetic::testUnsignedCharDivision); + TEST_ADD(TestArithmetic::testSignedIntModulo); + TEST_ADD(TestArithmetic::testSignedShortModulo); + TEST_ADD(TestArithmetic::testSignedCharModulo); + TEST_ADD(TestArithmetic::testUnsignedIntModulo); + TEST_ADD(TestArithmetic::testUnsignedShortModulo); + TEST_ADD(TestArithmetic::testUnsignedCharModulo); + TEST_ADD(TestArithmetic::testFloatingPointDivision); + + TEST_ADD(TestArithmetic::testIntegerEquality); + TEST_ADD(TestArithmetic::testShortEquality); + TEST_ADD(TestArithmetic::testCharEquality); + TEST_ADD(TestArithmetic::testFloatEquality); + TEST_ADD(TestArithmetic::testIntegerInequality); + TEST_ADD(TestArithmetic::testShortInequality); + TEST_ADD(TestArithmetic::testCharInequality); + TEST_ADD(TestArithmetic::testFloatInequality); + TEST_ADD(TestArithmetic::testSignedIntGreater); + TEST_ADD(TestArithmetic::testSignedShortGreater); + TEST_ADD(TestArithmetic::testSignedCharGreater); + TEST_ADD(TestArithmetic::testUnsignedIntGreater); + TEST_ADD(TestArithmetic::testUnsignedShortGreater); + TEST_ADD(TestArithmetic::testUnsignedCharGreater); + TEST_ADD(TestArithmetic::testFloatGreater); + TEST_ADD(TestArithmetic::testSignedIntLess); + TEST_ADD(TestArithmetic::testSignedShortLess); + TEST_ADD(TestArithmetic::testSignedCharLess); + TEST_ADD(TestArithmetic::testUnsignedIntLess); + TEST_ADD(TestArithmetic::testUnsignedShortLess); + TEST_ADD(TestArithmetic::testUnsignedCharLess); + TEST_ADD(TestArithmetic::testFloatLess); + TEST_ADD(TestArithmetic::testSignedIntGreaterEquals); + TEST_ADD(TestArithmetic::testSignedShortGreaterEquals); + TEST_ADD(TestArithmetic::testSignedCharGreaterEquals); + TEST_ADD(TestArithmetic::testUnsignedIntGreaterEquals); + TEST_ADD(TestArithmetic::testUnsignedShortGreaterEquals); + TEST_ADD(TestArithmetic::testUnsignedCharGreaterEquals); + TEST_ADD(TestArithmetic::testFloatGreaterEquals); + TEST_ADD(TestArithmetic::testSignedIntLessEquals); + TEST_ADD(TestArithmetic::testSignedShortLessEquals); + TEST_ADD(TestArithmetic::testSignedCharLessEquals); + TEST_ADD(TestArithmetic::testUnsignedIntLessEquals); + TEST_ADD(TestArithmetic::testUnsignedShortLessEquals); + TEST_ADD(TestArithmetic::testUnsignedCharLessEquals); + TEST_ADD(TestArithmetic::testFloatLessEquals); + + TEST_ADD(TestArithmetic::testSignedIntAnd); + TEST_ADD(TestArithmetic::testSignedShortAnd); + TEST_ADD(TestArithmetic::testSignedCharAnd); + TEST_ADD(TestArithmetic::testUnsignedIntAnd); + TEST_ADD(TestArithmetic::testUnsignedShortAnd); + TEST_ADD(TestArithmetic::testUnsignedCharAnd); + TEST_ADD(TestArithmetic::testFloatAnd); + TEST_ADD(TestArithmetic::testSignedIntOr); + TEST_ADD(TestArithmetic::testSignedShortOr); + TEST_ADD(TestArithmetic::testSignedCharOr); + TEST_ADD(TestArithmetic::testUnsignedIntOr); + TEST_ADD(TestArithmetic::testUnsignedShortOr); + TEST_ADD(TestArithmetic::testUnsignedCharOr); + TEST_ADD(TestArithmetic::testFloatOr); + + TEST_ADD(TestArithmetic::testSignedTruncation); + TEST_ADD(TestArithmetic::testUnsignedTruncation); + TEST_ADD(TestArithmetic::testSignExtension); + TEST_ADD(TestArithmetic::testZeroExtension); + TEST_ADD(TestArithmetic::testSignedToFloat); + TEST_ADD(TestArithmetic::testUnsignedToFloat); + TEST_ADD(TestArithmetic::testFloatToSigned); + TEST_ADD(TestArithmetic::testFloatToUnsigned); +} + +void TestArithmetic::onMismatch(const std::string& expected, const std::string& result) +{ + TEST_ASSERT_EQUALS(expected, result); +} + +static void compileBuffer( + vc4c::Configuration& config, std::stringstream& buffer, const std::string& source, const std::string& options) +{ + config.outputMode = vc4c::OutputMode::BINARY; + config.writeKernelInfo = true; + std::istringstream input(source); + vc4c::Compiler::compile(input, buffer, config, options); +} + +template +static void copyConvert(const I& in, O& out) +{ + if(out.size() < N) + throw vc4c::CompilationError(vc4c::CompilationStep::GENERAL, "Invalid container size for copy"); + auto base = reinterpret_cast(in.data()); + std::copy(base, base + N, out.data()); +} + +template +static std::array runEmulation( + std::stringstream& codeBuffer, const std::vector>& inputs) +{ + using namespace vc4c::tools; + + std::vector>>> parameter; + parameter.emplace_back(std::make_pair(0, std::vector(V * L * G * sizeof(R) / sizeof(uint32_t)))); + for(const auto& input : inputs) + { + parameter.emplace_back(std::make_pair(0, std::vector(V * L * G * sizeof(T) / sizeof(uint32_t)))); + copyConvert(input, parameter.back().second.value()); + } + + WorkGroupConfig workGroups; + workGroups.dimensions = 1; + workGroups.localSizes[0] = L; + workGroups.numGroups[0] = G; + + EmulationData data(codeBuffer, "test", parameter, workGroups); + + auto result = emulate(data); + + if(!result.executionSuccessful) + throw vc4c::CompilationError(vc4c::CompilationStep::GENERAL, "Kernel execution failed"); + + std::array output; + copyConvert(result.results[0].second.value(), output); + return output; +} + +template +static void testBinaryOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, BINARY_OPERATION, options); + + auto in0 = generateInput(false); + auto in1 = generateInput(false); + + auto out = runEmulation(code, {in0, in1}); + auto pos = options.find("-DOP=") + std::string("-DOP=").size(); + checkBinaryResults(in0, in1, out, op, options.substr(pos, options.find(' ', pos) - pos), onError); +} + +template <> +void testBinaryOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, + const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, BINARY_OPERATION, options); + + auto in0 = generateInput<16 * 12>(false); + auto in1 = generateInput<16 * 12>(false); + + auto out = runEmulation(code, {in0, in1}); + auto pos = options.find("-DOP=") + std::string("-DOP=").size(); + checkBinaryResults(in0, in1, out, op, options.substr(pos, options.find(' ', pos) - pos), onError); +} + +template +static void testRelationalOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, RELATIONAL_OPERATION, options); + + auto in0 = generateInput(true); + auto in1 = generateInput(true); + + auto out = runEmulation(code, {in0, in1}); + auto pos = options.find("-DOP=") + std::string("-DOP=").size(); + checkBinaryResults(in0, in1, out, op, options.substr(pos, options.find(' ', pos) - pos), onError); +} + +template <> +void testRelationalOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, + const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, RELATIONAL_OPERATION, options); + + auto in0 = generateInput<16 * 12>(true); + auto in1 = generateInput<16 * 12>(true); + + auto out = runEmulation(code, {in0, in1}); + auto pos = options.find("-DOP=") + std::string("-DOP=").size(); + checkBinaryResults( + in0, in1, out, op, options.substr(pos, options.find(' ', pos) - pos), onError); +} + +template +static void testConversionOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, CONVERSION_OPERATION, options); + + auto in = generateInput(true); + + auto out = runEmulation(code, {in}); + checkUnaryResults(in, out, op, "convert", onError); +} + +template +static void testConversionOperation(vc4c::Configuration& config, const std::string& options, + const std::function& op, const std::function& onError) +{ + std::stringstream code; + compileBuffer(config, code, CONVERSION_OPERATION, options); + + auto in = generateInput<16 * 12>(true); + + auto out = runEmulation(code, {in}); + checkUnaryResults(in, out, op, "convert", onError); +} + +template +static int checkRelation(T arg1, T arg2) +{ + C c{}; + return c(arg1, arg2) ? -1 : 0; +} + +template +static int checkAnd(T arg1, T arg2) +{ + return (arg1 != 0 && arg2 != 0) ? -1 : 0; +} + +template +static int checkOr(T arg1, T arg2) +{ + return (arg1 != 0 || arg2 != 0) ? -1 : 0; +} + +template +static T convert(S s) +{ + return static_cast(s); +} + +void TestArithmetic::testSignedIntMultiplication() +{ + testBinaryOperation(config, "-DTYPE=int16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortMultiplication() +{ + testBinaryOperation(config, "-DTYPE=short16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharMultiplication() +{ + testBinaryOperation(config, "-DTYPE=char16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntMultiplication() +{ + testBinaryOperation(config, "-DTYPE=uint16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortMultiplication() +{ + testBinaryOperation(config, "-DTYPE=ushort16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharMultiplication() +{ + testBinaryOperation(config, "-DTYPE=uchar16 -DOP=*", std::multiplies{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntDivision() +{ + testBinaryOperation(config, "-DTYPE=int16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortDivision() +{ + testBinaryOperation(config, "-DTYPE=short16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharDivision() +{ + testBinaryOperation(config, "-DTYPE=char16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntDivision() +{ + testBinaryOperation(config, "-DTYPE=uint16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortDivision() +{ + testBinaryOperation(config, "-DTYPE=ushort16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharDivision() +{ + testBinaryOperation(config, "-DTYPE=uchar16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntModulo() +{ + testBinaryOperation(config, "-DTYPE=int16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortModulo() +{ + testBinaryOperation(config, "-DTYPE=short16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharModulo() +{ + testBinaryOperation(config, "-DTYPE=char16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntModulo() +{ + testBinaryOperation(config, "-DTYPE=uint16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortModulo() +{ + testBinaryOperation(config, "-DTYPE=ushort16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharModulo() +{ + testBinaryOperation(config, "-DTYPE=uchar16 -DOP=%", std::modulus{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatingPointDivision() +{ + testBinaryOperation(config, "-DTYPE=float16 -DOP=/", std::divides{}, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testIntegerEquality() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP===", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testShortEquality() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP===", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testCharEquality() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP===", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatEquality() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP===", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testIntegerInequality() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=!=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testShortInequality() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=!=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testCharInequality() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=!=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatInequality() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=!=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntGreater() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortGreater() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharGreater() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntGreater() +{ + testRelationalOperation(config, "-DTYPE=uint16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortGreater() +{ + testRelationalOperation(config, "-DTYPE=ushort16 -DOP=>", + checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharGreater() +{ + testRelationalOperation(config, "-DTYPE=uchar16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatGreater() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=>", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntLess() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortLess() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharLess() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntLess() +{ + testRelationalOperation(config, "-DTYPE=uint16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortLess() +{ + testRelationalOperation(config, "-DTYPE=ushort16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharLess() +{ + testRelationalOperation(config, "-DTYPE=uchar16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatLess() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=<", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntGreaterEquals() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortGreaterEquals() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharGreaterEquals() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntGreaterEquals() +{ + testRelationalOperation(config, + "-DTYPE=uint16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortGreaterEquals() +{ + testRelationalOperation(config, + "-DTYPE=ushort16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharGreaterEquals() +{ + testRelationalOperation(config, + "-DTYPE=uchar16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatGreaterEquals() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=>=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntLessEquals() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortLessEquals() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharLessEquals() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntLessEquals() +{ + testRelationalOperation(config, "-DTYPE=uint16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortLessEquals() +{ + testRelationalOperation(config, + "-DTYPE=ushort16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharLessEquals() +{ + testRelationalOperation(config, + "-DTYPE=uchar16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatLessEquals() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=<=", checkRelation>, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntAnd() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortAnd() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharAnd() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntAnd() +{ + testRelationalOperation(config, "-DTYPE=uint16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortAnd() +{ + testRelationalOperation(config, "-DTYPE=ushort16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharAnd() +{ + testRelationalOperation(config, "-DTYPE=uchar16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatAnd() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=&&", checkAnd, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedIntOr() +{ + testRelationalOperation(config, "-DTYPE=int16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedShortOr() +{ + testRelationalOperation(config, "-DTYPE=short16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedCharOr() +{ + testRelationalOperation(config, "-DTYPE=char16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedIntOr() +{ + testRelationalOperation(config, "-DTYPE=uint16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedShortOr() +{ + testRelationalOperation(config, "-DTYPE=ushort16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedCharOr() +{ + testRelationalOperation(config, "-DTYPE=uchar16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatOr() +{ + testRelationalOperation(config, "-DTYPE=float16 -DOP=||", checkOr, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedTruncation() +{ + testConversionOperation(config, "-DIN=int16 -DOUT=short16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=int16 -DOUT=char16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=short16 -DOUT=char16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedTruncation() +{ + testConversionOperation(config, "-DIN=uint16 -DOUT=ushort16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=uint16 -DOUT=uchar16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=ushort16 -DOUT=uchar16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignExtension() +{ + testConversionOperation(config, "-DIN=short16 -DOUT=int16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=char16 -DOUT=int16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=char16 -DOUT=short16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testZeroExtension() +{ + testConversionOperation(config, "-DIN=ushort16 -DOUT=uint16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=uchar16 -DOUT=uint16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=uchar16 -DOUT=ushort16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testSignedToFloat() +{ + testConversionOperation(config, "-DIN=int16 -DOUT=float16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=short16 -DOUT=float16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=char16 -DOUT=float16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testUnsignedToFloat() +{ + testConversionOperation(config, "-DIN=uint16 -DOUT=float16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=ushort16 -DOUT=float16", + convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=uchar16 -DOUT=float16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} + +void TestArithmetic::testFloatToSigned() +{ + testConversionOperation(config, "-DIN=float16 -DOUT=int16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=float16 -DOUT=short16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=float16 -DOUT=char16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} +void TestArithmetic::testFloatToUnsigned() +{ + testConversionOperation(config, "-DIN=float16 -DOUT=uint16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=float16 -DOUT=ushort16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); + testConversionOperation(config, "-DIN=float16 -DOUT=uchar16", convert, + std::bind(&TestArithmetic::onMismatch, this, std::placeholders::_1, std::placeholders::_2)); +} \ No newline at end of file diff --git a/test/TestArithmetic.h b/test/TestArithmetic.h new file mode 100644 index 00000000..f6390861 --- /dev/null +++ b/test/TestArithmetic.h @@ -0,0 +1,127 @@ +/* + * Author: doe300 + * + * See the file "LICENSE" for the full license governing this code. + */ + +#ifndef VC4C_TEST_ARITHMETIC_H +#define VC4C_TEST_ARITHMETIC_H + +#include "cpptest.h" + +#include "config.h" + +class TestArithmetic : public Test::Suite +{ + // TODO write tests for all relevant (e.g. not directly mapped to machine code) instructions from LLVM IR + // (https://llvm.org/docs/LangRef.html) +public: + TestArithmetic(const vc4c::Configuration& config = {}); + + // arithmetic operators + void testSignedIntMultiplication(); + void testSignedShortMultiplication(); + void testSignedCharMultiplication(); + void testUnsignedIntMultiplication(); + void testUnsignedShortMultiplication(); + void testUnsignedCharMultiplication(); + void testSignedIntDivision(); + void testSignedShortDivision(); + void testSignedCharDivision(); + void testUnsignedIntDivision(); + void testUnsignedShortDivision(); + void testUnsignedCharDivision(); + void testSignedIntModulo(); + void testSignedShortModulo(); + void testSignedCharModulo(); + void testUnsignedIntModulo(); + void testUnsignedShortModulo(); + void testUnsignedCharModulo(); + void testFloatingPointDivision(); + + // relational operators + void testIntegerEquality(); + void testShortEquality(); + void testCharEquality(); + void testFloatEquality(); + void testIntegerInequality(); + void testShortInequality(); + void testCharInequality(); + void testFloatInequality(); + void testSignedIntGreater(); + void testSignedShortGreater(); + void testSignedCharGreater(); + void testUnsignedIntGreater(); + void testUnsignedShortGreater(); + void testUnsignedCharGreater(); + void testFloatGreater(); + void testSignedIntLess(); + void testSignedShortLess(); + void testSignedCharLess(); + void testUnsignedIntLess(); + void testUnsignedShortLess(); + void testUnsignedCharLess(); + void testFloatLess(); + void testSignedIntGreaterEquals(); + void testSignedShortGreaterEquals(); + void testSignedCharGreaterEquals(); + void testUnsignedIntGreaterEquals(); + void testUnsignedShortGreaterEquals(); + void testUnsignedCharGreaterEquals(); + void testFloatGreaterEquals(); + void testSignedIntLessEquals(); + void testSignedShortLessEquals(); + void testSignedCharLessEquals(); + void testUnsignedIntLessEquals(); + void testUnsignedShortLessEquals(); + void testUnsignedCharLessEquals(); + void testFloatLessEquals(); + + // logical operators + void testSignedIntAnd(); + void testSignedShortAnd(); + void testSignedCharAnd(); + void testUnsignedIntAnd(); + void testUnsignedShortAnd(); + void testUnsignedCharAnd(); + void testFloatAnd(); + void testSignedIntOr(); + void testSignedShortOr(); + void testSignedCharOr(); + void testUnsignedIntOr(); + void testUnsignedShortOr(); + void testUnsignedCharOr(); + void testFloatOr(); + //XXX + void testSignedNot(); + void testUnsignedNot(); + void testFloatNot(); + void testSelection(); + + //XXX vector instructions + void testExtractElement(); + void testInsertElement(); + void testShuffleVector(); + + //conversion instructions + void testSignedTruncation(); + void testUnsignedTruncation(); + void testSignExtension(); + void testZeroExtension(); + void testSignedToFloat(); + void testUnsignedToFloat(); + void testFloatToSigned(); + void testFloatToUnsigned(); + //XXX + void testVectorBitcastTruncation4To1(); + void testVectorBitcastTruncation2To1(); + void testVectorBitcastExtension1To4(); + void testVectorBitcastExtension1To2(); + +private: + vc4c::Configuration config; + + void onMismatch(const std::string& expected, const std::string& result); +}; + +#endif /* VC4C_TEST_ARITHMETIC_H */ \ No newline at end of file diff --git a/test/sources.list b/test/sources.list index 4aa3df3e..40f67d1a 100644 --- a/test/sources.list +++ b/test/sources.list @@ -4,6 +4,8 @@ target_sources(TestVC4C RegressionTest.h test_cases.h test.cpp + TestArithmetic.cpp + TestArithmetic.h TestEmulator.cpp TestEmulator.h TestGraph.cpp diff --git a/test/test.cpp b/test/test.cpp index d487f5b9..1266141e 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -9,6 +9,7 @@ #include "cpptest.h" #include "cpptest-main.h" +#include "TestArithmetic.h" #include "TestEmulator.h" #include "TestGraph.h" #include "TestInstructions.h" @@ -78,6 +79,7 @@ int main(int argc, char** argv) Test::registerSuite(newEmulatorTest, "test-emulator", "Runs selected code-samples through the emulator"); Test::registerSuite(newStdLibTest, "test-stdlib", "Runs most of the VC4CL std-lib functions in emulator"); Test::registerSuite(Test::newInstance, "test-graph", "Runs basic test for the graph data structure"); + Test::registerSuite(Test::newInstance, "test-arithmetic", "Runs emulation tests for various kind of operations"); for(auto i = 1; i < argc; ++i) { diff --git a/testing/OpenCV/color_lab.cl b/testing/OpenCV/color_lab.cl new file mode 100644 index 00000000..16a96d25 --- /dev/null +++ b/testing/OpenCV/color_lab.cl @@ -0,0 +1,735 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if depth == 0 + #define DATA_TYPE uchar + #define MAX_NUM 255 + #define HALF_MAX_NUM 128 + #define COEFF_TYPE int + #define SAT_CAST(num) convert_uchar_sat(num) + #define DEPTH_0 +#elif depth == 2 + #define DATA_TYPE ushort + #define MAX_NUM 65535 + #define HALF_MAX_NUM 32768 + #define COEFF_TYPE int + #define SAT_CAST(num) convert_ushort_sat(num) + #define DEPTH_2 +#elif depth == 5 + #define DATA_TYPE float + #define MAX_NUM 1.0f + #define HALF_MAX_NUM 0.5f + #define COEFF_TYPE float + #define SAT_CAST(num) (num) + #define DEPTH_5 +#else + #error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)" +#endif + +#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) + +enum +{ + xyz_shift = 12, +}; + +#define scnbytes ((int)sizeof(DATA_TYPE)*scn) +#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) + +#define __CAT(x, y) x##y +#define CAT(x, y) __CAT(x, y) + +#define DATA_TYPE_4 CAT(DATA_TYPE, 4) +#define DATA_TYPE_3 CAT(DATA_TYPE, 3) + +///////////////////////////////////// RGB <-> XYZ ////////////////////////////////////// + +__kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, + int rows, int cols, __constant COEFF_TYPE * coeffs) +{ + int dx = get_global_id(0); + int dy = get_global_id(1) * PIX_PER_WI_Y; + + if (dx < cols) + { + int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset)); + int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (dy < rows) + { + __global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index); + __global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index); + + DATA_TYPE_4 src_pix = vload4(0, src); + DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z; + +#ifdef DEPTH_5 + float x = fma(r, coeffs[0], fma(g, coeffs[1], b * coeffs[2])); + float y = fma(r, coeffs[3], fma(g, coeffs[4], b * coeffs[5])); + float z = fma(r, coeffs[6], fma(g, coeffs[7], b * coeffs[8])); +#else + int x = CV_DESCALE(mad24(r, coeffs[0], mad24(g, coeffs[1], b * coeffs[2])), xyz_shift); + int y = CV_DESCALE(mad24(r, coeffs[3], mad24(g, coeffs[4], b * coeffs[5])), xyz_shift); + int z = CV_DESCALE(mad24(r, coeffs[6], mad24(g, coeffs[7], b * coeffs[8])), xyz_shift); +#endif + dst[0] = SAT_CAST(x); + dst[1] = SAT_CAST(y); + dst[2] = SAT_CAST(z); + + ++dy; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +__kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, + int rows, int cols, __constant COEFF_TYPE * coeffs) +{ + int dx = get_global_id(0); + int dy = get_global_id(1) * PIX_PER_WI_Y; + + if (dx < cols) + { + int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset)); + int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (dy < rows) + { + __global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index); + __global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index); + + DATA_TYPE_4 src_pix = vload4(0, src); + DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z; + +#ifdef DEPTH_5 + float b = fma(x, coeffs[0], fma(y, coeffs[1], z * coeffs[2])); + float g = fma(x, coeffs[3], fma(y, coeffs[4], z * coeffs[5])); + float r = fma(x, coeffs[6], fma(y, coeffs[7], z * coeffs[8])); +#else + int b = CV_DESCALE(mad24(x, coeffs[0], mad24(y, coeffs[1], z * coeffs[2])), xyz_shift); + int g = CV_DESCALE(mad24(x, coeffs[3], mad24(y, coeffs[4], z * coeffs[5])), xyz_shift); + int r = CV_DESCALE(mad24(x, coeffs[6], mad24(y, coeffs[7], z * coeffs[8])), xyz_shift); +#endif + + DATA_TYPE dst0 = SAT_CAST(b); + DATA_TYPE dst1 = SAT_CAST(g); + DATA_TYPE dst2 = SAT_CAST(r); +#if dcn == 3 || defined DEPTH_5 + dst[0] = dst0; + dst[1] = dst1; + dst[2] = dst2; +#if dcn == 4 + dst[3] = MAX_NUM; +#endif +#else + *(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(dst0, dst1, dst2, MAX_NUM); +#endif + + ++dy; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +/////////////////////////////////// [l|s]RGB <-> Lab /////////////////////////// + +#define lab_shift xyz_shift +#define gamma_shift 3 +#define lab_shift2 (lab_shift + gamma_shift) +#define GAMMA_TAB_SIZE 1024 +#define GammaTabScale (float)GAMMA_TAB_SIZE + +inline float splineInterpolate(float x, __global const float * tab, int n) +{ + int ix = clamp(convert_int_sat_rtn(x), 0, n-1); + x -= ix; + tab += ix << 2; + return fma(fma(fma(tab[3], x, tab[2]), x, tab[1]), x, tab[0]); +} + +#ifdef DEPTH_0 + +__kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int rows, int cols, + __global const ushort * gammaTab, __global ushort * LabCbrtTab_b, + __constant int * coeffs, int Lscale, int Lshift) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols) + { + int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset)); + int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows) + { + __global const uchar* src_ptr = src + src_index; + __global uchar* dst_ptr = dst + dst_index; + uchar4 src_pix = vload4(0, src_ptr); + + int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], + C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], + C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; + + int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z]; + int fX = LabCbrtTab_b[CV_DESCALE(mad24(R, C0, mad24(G, C1, B*C2)), lab_shift)]; + int fY = LabCbrtTab_b[CV_DESCALE(mad24(R, C3, mad24(G, C4, B*C5)), lab_shift)]; + int fZ = LabCbrtTab_b[CV_DESCALE(mad24(R, C6, mad24(G, C7, B*C8)), lab_shift)]; + + int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 ); + int a = CV_DESCALE( mad24(500, fX - fY, 128*(1 << lab_shift2)), lab_shift2 ); + int b = CV_DESCALE( mad24(200, fY - fZ, 128*(1 << lab_shift2)), lab_shift2 ); + + dst_ptr[0] = SAT_CAST(L); + dst_ptr[1] = SAT_CAST(a); + dst_ptr[2] = SAT_CAST(b); + + ++y; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +#elif defined DEPTH_5 + +__kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, +#ifdef SRGB + __global const float * gammaTab, +#endif + __constant float * coeffs, float _1_3, float _a) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols) + { + int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset)); + int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows) + { + __global const float * src = (__global const float *)(srcptr + src_index); + __global float * dst = (__global float *)(dstptr + dst_index); + float4 src_pix = vload4(0, src); + + float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], + C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], + C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; + + float R = clamp(src_pix.x, 0.0f, 1.0f); + float G = clamp(src_pix.y, 0.0f, 1.0f); + float B = clamp(src_pix.z, 0.0f, 1.0f); + +#ifdef SRGB + R = splineInterpolate(R * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); +#endif + + // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3 + float X = fma(R, C0, fma(G, C1, B*C2)); + float Y = fma(R, C3, fma(G, C4, B*C5)); + float Z = fma(R, C6, fma(G, C7, B*C8)); + + float FX = X > 0.008856f ? rootn(X, 3) : fma(7.787f, X, _a); + float FY = Y > 0.008856f ? rootn(Y, 3) : fma(7.787f, Y, _a); + float FZ = Z > 0.008856f ? rootn(Z, 3) : fma(7.787f, Z, _a); + + float L = Y > 0.008856f ? fma(116.f, FY, -16.f) : (903.3f * Y); + float a = 500.f * (FX - FY); + float b = 200.f * (FY - FZ); + + dst[0] = L; + dst[1] = a; + dst[2] = b; + + ++y; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +#endif + +inline void Lab2BGR_f(const float * srcbuf, float * dstbuf, +#ifdef SRGB + __global const float * gammaTab, +#endif + __constant float * coeffs, float lThresh, float fThresh) +{ + float li = srcbuf[0], ai = srcbuf[1], bi = srcbuf[2]; + + float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], + C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], + C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; + + float y, fy; + // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4) + if (li <= lThresh) + { + y = li / 903.3f; + fy = fma(7.787f, y, 16.0f / 116.0f); + } + else + { + fy = (li + 16.0f) / 116.0f; + y = fy * fy * fy; + } + + float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f }; + + #pragma unroll + for (int j = 0; j < 2; j++) + if (fxz[j] <= fThresh) + fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f; + else + fxz[j] = fxz[j] * fxz[j] * fxz[j]; + + float x = fxz[0], z = fxz[1]; + float ro = clamp(fma(C0, x, fma(C1, y, C2 * z)), 0.0f, 1.0f); + float go = clamp(fma(C3, x, fma(C4, y, C5 * z)), 0.0f, 1.0f); + float bo = clamp(fma(C6, x, fma(C7, y, C8 * z)), 0.0f, 1.0f); + +#ifdef SRGB + ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + go = splineInterpolate(go * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + bo = splineInterpolate(bo * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); +#endif + + dstbuf[0] = ro, dstbuf[1] = go, dstbuf[2] = bo; +} + +#ifdef DEPTH_0 + +__kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int rows, int cols, +#ifdef SRGB + __global const float * gammaTab, +#endif + __constant float * coeffs, float lThresh, float fThresh) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols) + { + int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset)); + int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows) + { + __global const uchar* src_ptr = src + src_index; + __global uchar * dst_ptr = dst + dst_index; + uchar4 src_pix = vload4(0, src_ptr); + + float srcbuf[3], dstbuf[3]; + srcbuf[0] = src_pix.x*(100.f/255.f); + srcbuf[1] = convert_float(src_pix.y - 128); + srcbuf[2] = convert_float(src_pix.z - 128); + + Lab2BGR_f(&srcbuf[0], &dstbuf[0], +#ifdef SRGB + gammaTab, +#endif + coeffs, lThresh, fThresh); + +#if dcn == 3 + dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f); + dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f); + dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f); +#else + *(__global uchar4 *)dst_ptr = (uchar4)(SAT_CAST(dstbuf[0] * 255.0f), + SAT_CAST(dstbuf[1] * 255.0f), SAT_CAST(dstbuf[2] * 255.0f), MAX_NUM); +#endif + ++y; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +#elif defined DEPTH_5 + +__kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, +#ifdef SRGB + __global const float * gammaTab, +#endif + __constant float * coeffs, float lThresh, float fThresh) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols) + { + int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset)); + int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + { + if (y < rows) + { + __global const float * src = (__global const float *)(srcptr + src_index); + __global float * dst = (__global float *)(dstptr + dst_index); + float4 src_pix = vload4(0, src); + + float srcbuf[3], dstbuf[3]; + srcbuf[0] = src_pix.x, srcbuf[1] = src_pix.y, srcbuf[2] = src_pix.z; + + Lab2BGR_f(&srcbuf[0], &dstbuf[0], +#ifdef SRGB + gammaTab, +#endif + coeffs, lThresh, fThresh); + + dst[0] = dstbuf[0], dst[1] = dstbuf[1], dst[2] = dstbuf[2]; +#if dcn == 4 + dst[3] = MAX_NUM; +#endif + ++y; + dst_index += dst_step; + src_index += src_step; + } + } + } +} + +#endif + +/////////////////////////////////// [l|s]RGB <-> Luv /////////////////////////// + +#define LAB_CBRT_TAB_SIZE 1024 +#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<= 8) + { + Y = fma(L, 1.f/116.f, 16.f/116.f); + Y = Y*Y*Y; + } + else + { + Y = L * (1.0f/903.3f); // L*(3./29.)^3 + } + float up = 3.f*fma(L, _un, u); + float vp = 0.25f/fma(L, _vn, v); + vp = clamp(vp, -0.25f, 0.25f); + X = 3.f*Y*up*vp; + Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f); + + float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2])); + float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5])); + float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8])); + + R = clamp(R, 0.f, 1.f); + G = clamp(G, 0.f, 1.f); + B = clamp(B, 0.f, 1.f); + +#ifdef SRGB + R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); +#endif + + dst[0] = R; + dst[1] = G; + dst[2] = B; +#if dcn == 4 + dst[3] = MAX_NUM; +#endif + ++y; + dst_index += dst_step; + src_index += src_step; + } + } +} + +#elif defined DEPTH_0 + +__kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int rows, int cols, +#ifdef SRGB + __global const float * gammaTab, +#endif + __constant float * coeffs, float _un, float _vn) +{ + int x = get_global_id(0); + int y = get_global_id(1) * PIX_PER_WI_Y; + + if (x < cols) + { + src += mad24(y, src_step, mad24(x, scnbytes, src_offset)); + dst += mad24(y, dst_step, mad24(x, dcnbytes, dst_offset)); + + #pragma unroll + for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) + if (y < rows) + { + float d, X, Y, Z; + float L = src[0]*(100.f/255.f); + // 1.388235294117647 = (220+134)/255 + float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f); + // 1.027450980392157 = (140+122)/255 + float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f); + if(L >= 8) + { + Y = fma(L, 1.f/116.f, 16.f/116.f); + Y = Y*Y*Y; + } + else + { + Y = L * (1.0f/903.3f); // L*(3./29.)^3 + } + float up = 3.f*fma(L, _un, u); + float vp = 0.25f/fma(L, _vn, v); + vp = clamp(vp, -0.25f, 0.25f); + X = 3.f*Y*up*vp; + Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f); + + //limit X, Y, Z to [0, 2] to fit white point + X = clamp(X, 0.f, 2.f); Z = clamp(Z, 0.f, 2.f); + + float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2])); + float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5])); + float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8])); + + R = clamp(R, 0.f, 1.f); + G = clamp(G, 0.f, 1.f); + B = clamp(B, 0.f, 1.f); + +#ifdef SRGB + R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); + B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE); +#endif + + uchar dst0 = SAT_CAST(R * 255.0f); + uchar dst1 = SAT_CAST(G * 255.0f); + uchar dst2 = SAT_CAST(B * 255.0f); + +#if dcn == 4 + *(__global uchar4 *)dst = (uchar4)(dst0, dst1, dst2, MAX_NUM); +#else + dst[0] = dst0; + dst[1] = dst1; + dst[2] = dst2; +#endif + + ++y; + dst += dst_step; + src += src_step; + } + } +} + +#endif diff --git a/testing/bugs/vc4cl_41_hang.cl b/testing/bugs/vc4cl_41_hang.cl new file mode 100644 index 00000000..9e9790fb --- /dev/null +++ b/testing/bugs/vc4cl_41_hang.cl @@ -0,0 +1,18 @@ +__kernel void TestMagic(const int total, const int is_deeper_magic, const float alpha_s, const float fore_th, __global const float* gradx, __global const float* grady, + //in/out + __global float* BSx, __global float* BSy, __global int* mapRes) +{ + private const size_t i = get_global_id(0); + private const size_t gpu_used = get_global_size(0); + + private const size_t elements_count = total / (gpu_used * 16); + private const size_t offset = i * total / gpu_used; + + for (size_t k = 0; k < elements_count; ++k) + { + int16 mr = vload16( k , mapRes + offset); + const int16 twos = 2; + mr += twos; + vstore16(mr, k, mapRes + offset); + } +}