From 707e55baf2ad9cc4a502ed98417773758fe8c79e Mon Sep 17 00:00:00 2001 From: doe300 Date: Sat, 31 Aug 2019 08:55:30 +0200 Subject: [PATCH] Cleans up CMake and fixes some warnings/bugs - Also fixes a few bugs - Fixes some TSAN findings --- CMakeLists.txt | 4 +++- cmake/sanitizers.cmake | 23 ++++++++++++++-------- src/Bitfield.h | 9 ++++++++- src/Compiler.cpp | 3 ++- src/Types.cpp | 7 ++++++- src/Units.h | 2 ++ src/Values.cpp | 4 ++-- src/asm/OpCodes.cpp | 4 ++-- src/helper.h | 2 +- src/intermediate/VectorHelper.cpp | 7 ++++--- src/intermediate/operators.h | 14 +++++++++++++ src/intrinsics/Images.cpp | 1 - src/intrinsics/Intrinsics.cpp | 19 +++++++++++------- src/llvm/BitcodeReader.cpp | 4 ++-- src/main.cpp | 2 +- src/normalization/AddressCalculation.h | 2 +- src/normalization/MemoryAccess.cpp | 15 +++++++------- src/normalization/MemoryMapChecks.cpp | 2 +- src/normalization/Normalizer.cpp | 8 +++++--- src/optimization/ControlFlow.cpp | 15 +++++++------- src/optimization/InstructionScheduler.cpp | 2 +- src/optimization/Optimizer.cpp | 3 ++- src/periphery/VPM.h | 6 ++++++ src/tools/Emulator.cpp | 24 +++++++++++++---------- test/TestMemoryAccess.cpp | 4 ++-- test/emulation_helper.h | 5 +++-- 26 files changed, 125 insertions(+), 66 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6663ea9e..fa2ffa52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,8 @@ option(LLVMLIB_FRONTEND "Enables the front-end using the LLVM library to read LL option(ENABLE_COVERAGE "Enables collection of code coverage via gcov" OFF) # Option whether to enable use of the CLang library (EXPERIMENTAL) option(CLANG_LIBRARY "Uses the libclang library for compilation, uses the clang executable otherwise" OFF) +# Option whether to enable more compile-time checks +option(ADVANCED_CHECKS "Enable advanced compile-time checks" OFF) # Path to the VC4CL standard library # NOTE: Resolving ~ (for home directory) is currently not supported @@ -78,7 +80,7 @@ endif() # clang-tidy find_program(CLANG_TIDY NAMES clang-tidy clang-tidy-5.0 clang-tidy-6.0 clang-tidy-7 clang-tidy-8) -if(FALSE AND BUILD_DEBUG AND CLANG_TIDY) +if(ADVANCED_CHECKS AND CLANG_TIDY) message(STATUS "Enabling clang-tidy compile time checks: ${CLANG_TIDY}") set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY}") endif() diff --git a/cmake/sanitizers.cmake b/cmake/sanitizers.cmake index 761e3bec..0539f318 100644 --- a/cmake/sanitizers.cmake +++ b/cmake/sanitizers.cmake @@ -14,17 +14,24 @@ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "ubsan") message(STATUS "Build will have UBSAN enabled!") endif() +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + set(SANITIZERS_ADDITIONAL_FLAGS "-Og") +else() + # -Og is not supported by CLang + set(SANITIZERS_ADDITIONAL_FLAGS "") +endif() + # AddressSanitize -set(CMAKE_C_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer -g -Og") -set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer -g -Og") +set(CMAKE_C_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") +set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") # ThreadSanitizer -set(CMAKE_C_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer -g -Og") -set(CMAKE_CXX_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer -g -Og") +set(CMAKE_C_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") +set(CMAKE_CXX_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") # LeakSanitizer -set(CMAKE_C_FLAGS_LSAN "-fsanitize=leak -fno-omit-frame-pointer -g -Og") -set(CMAKE_CXX_FLAGS_LSAN "-fsanitize=leak -fno-omit-frame-pointer -g -Og") +set(CMAKE_C_FLAGS_LSAN "-fsanitize=leak -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") +set(CMAKE_CXX_FLAGS_LSAN "-fsanitize=leak -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") # MemorySanitizer (Clang only!) set(CMAKE_C_FLAGS_MSAN "-fsanitize=memory -fno-optimize-sibling-calls -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer -g -O2") @@ -32,5 +39,5 @@ set(CMAKE_CXX_FLAGS_MSAN "-fsanitize=memory -fno-optimize-sibling-calls -fsaniti # UndefinedBehaviour # Run with: UBSAN_OPTIONS=print_stacktrace=1 environment variable -set(CMAKE_C_FLAGS_UBSAN "-fsanitize=undefined -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-omit-frame-pointer -g -Og") -set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-omit-frame-pointer -g -Og") +set(CMAKE_C_FLAGS_UBSAN "-fsanitize=undefined -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") +set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-omit-frame-pointer -g ${SANITIZERS_ADDITIONAL_FLAGS}") diff --git a/src/Bitfield.h b/src/Bitfield.h index 8b933d2e..f34d3a9c 100644 --- a/src/Bitfield.h +++ b/src/Bitfield.h @@ -19,6 +19,11 @@ namespace vc4c { + template + using underlying_type = typename std::conditional::value, std::underlying_type, + typename std::conditional::value || std::is_same::value, std::remove_reference, + T>::type>::type; + /* * A bit-field is an integral type with a specific binary layout where different parts of the binary code have * different meanings. @@ -102,7 +107,8 @@ namespace vc4c template constexpr inline T getEntry(UnderlyingType pos, UnderlyingType mask) const noexcept { - return static_cast(mask & getValue(pos)); + auto tmp = static_cast::type>(mask & getValue(pos)); + return static_cast(tmp); } template @@ -193,6 +199,7 @@ namespace vc4c */ struct InstructionPart { + using type = unsigned char; unsigned char value; explicit constexpr InstructionPart(unsigned char val) noexcept : value(val) {} diff --git a/src/Compiler.cpp b/src/Compiler.cpp index 845932ae..a56a84be 100644 --- a/src/Compiler.cpp +++ b/src/Compiler.cpp @@ -108,8 +108,9 @@ std::size_t Compiler::convert() norm.adjust(module); PROFILE_END(SecondNormalizer); + auto kernels = module.getKernels(); const auto f = [&codeGen](Method* kernelFunc) -> void { codeGen.toMachineCode(*kernelFunc); }; - ThreadPool{"CodeGenerator"}.scheduleAll(module.getKernels(), f); + ThreadPool{"CodeGenerator"}.scheduleAll(kernels, f); // TODO could discard unused globals // since they are exported, they are still in the intermediate code, even if not used (e.g. optimized away) diff --git a/src/Types.cpp b/src/Types.cpp index 5e537c71..bb539717 100644 --- a/src/Types.cpp +++ b/src/Types.cpp @@ -449,7 +449,12 @@ bool StructType::operator==(const ComplexType& other) const const StructType* right = dynamic_cast(&other); if(right == nullptr) return false; - return isPacked == right->isPacked && elementTypes == right->elementTypes; + // NOTE: comparing by name is technically not necessary, since we don't actually care whether two struct types with + // the same layout are the exact same types or not. But it is required to work around a quirk in + // BitcodeReader#toDataType, since there struct types are inserted empty into the TypeHolder and after that the + // element types are resolved. If now the first element type is also a newly created struct type, both types have no + // elements and are therefore considered equal. Occurs e.g. in testing/bullet/jointSolver.cl. + return name == right->name && isPacked == right->isPacked && elementTypes == right->elementTypes; } unsigned int StructType::getStructSize(const int index) const diff --git a/src/Units.h b/src/Units.h index c30b2d2c..74dd4aa4 100644 --- a/src/Units.h +++ b/src/Units.h @@ -17,6 +17,7 @@ namespace vc4c struct Byte { public: + using type = uint64_t; constexpr explicit Byte(uint64_t val) : value(val) {} constexpr explicit operator uint64_t() const @@ -65,6 +66,7 @@ namespace vc4c struct Word { public: + using type = uint64_t; constexpr explicit Word(uint64_t val) : value(val) {} constexpr explicit Word(Byte bytes) : value(bytes.value / sizeof(uint64_t)) {} diff --git a/src/Values.cpp b/src/Values.cpp index 40abcde4..f52662ac 100644 --- a/src/Values.cpp +++ b/src/Values.cpp @@ -540,7 +540,7 @@ SIMDVector SIMDVector::rotate(uint8_t offset) const & SIMDVector copy(*this); //"Rotates the order of the elements in the range [first,last), in such a way that the element pointed by middle // becomes the new first element." - offset = (NATIVE_VECTOR_SIZE - offset); + offset = static_cast(NATIVE_VECTOR_SIZE - offset); std::rotate(copy.begin(), copy.begin() + offset, copy.end()); return copy; } @@ -549,7 +549,7 @@ SIMDVector SIMDVector::rotate(uint8_t offset) && { //"Rotates the order of the elements in the range [first,last), in such a way that the element pointed by middle // becomes the new first element." - offset = (NATIVE_VECTOR_SIZE - offset); + offset = static_cast(NATIVE_VECTOR_SIZE - offset); std::rotate(begin(), begin() + offset, end()); return std::move(*this); } diff --git a/src/asm/OpCodes.cpp b/src/asm/OpCodes.cpp index 5ca6298e..0674cca1 100644 --- a/src/asm/OpCodes.cpp +++ b/src/asm/OpCodes.cpp @@ -754,7 +754,7 @@ static PrecalculatedLiteral calcLiteral(const OpCode& code, Literal firstLit, Li // Tests have shown that on VC4 all shifts (asr, shr, shl) only take the last 5 bits of the offset (modulo 32) auto offset = secondLit.unsignedInt() & 0x1F; // carry is set if bits set are shifted out of the register: val & (2^shift-offset-1) != 0 - auto shiftLoss = firstLit.unsignedInt() & ((1 << offset) - 1); + auto shiftLoss = firstLit.unsignedInt() & ((1u << offset) - 1u); return setFlags(intermediate::asr(firstLit, secondLit), shiftLoss != 0, false); } if(code == OP_CLZ) @@ -851,7 +851,7 @@ static PrecalculatedLiteral calcLiteral(const OpCode& code, Literal firstLit, Li // Tests have shown that on VC4 all shifts (asr, shr, shl) only take the last 5 bits of the offset (modulo 32) auto offset = secondLit.unsignedInt() & 0x1F; // carry is set if bits set are shifted out of the register: val & (2^shift-offset-1) != 0 - auto shiftLoss = firstLit.unsignedInt() & ((1 << offset) - 1); + auto shiftLoss = firstLit.unsignedInt() & ((1u << offset) - 1u); return setFlags(Literal(firstLit.unsignedInt() >> offset), shiftLoss != 0); } if(code == OP_SUB) diff --git a/src/helper.h b/src/helper.h index dff93bbd..c9a03442 100644 --- a/src/helper.h +++ b/src/helper.h @@ -21,7 +21,7 @@ #if __cplusplus > 201402L #define FALL_THROUGH [[fallthrough]]; -#elif defined(__clang_major__) && __clang_major__ >= 4 +#elif defined(__clang_major__) && (__clang_major__ >= 4 || (__clang_major__ == 3 && __clang_minor__ == 9)) #define FALL_THROUGH [[clang::fallthrough]]; #elif defined(__GNUC__) && __GNUC__ >= 7 #define FALL_THROUGH __attribute__((fallthrough)); diff --git a/src/intermediate/VectorHelper.cpp b/src/intermediate/VectorHelper.cpp index de9ff4d4..aa39489e 100644 --- a/src/intermediate/VectorHelper.cpp +++ b/src/intermediate/VectorHelper.cpp @@ -172,7 +172,7 @@ InstructionWalker intermediate::insertVectorInsertion( // other unchanged // we use the mask version of loads to set the elements we want to insert to and then use flags to insert only // those - unsigned maskLit = (1 << value.type.getVectorWidth()) - 1; + unsigned maskLit = (1u << value.type.getVectorWidth()) - 1u; auto shiftedMask = method.addNewLocal(TYPE_INT32, "%vector_mask"); if(auto lit = index.getLiteralValue()) { @@ -392,7 +392,8 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method else { it = insertVectorConcatenation(it, method, source0, source1, destination); - numCorrespondingIndices = source0.type.getVectorWidth() + source1.type.getVectorWidth(); + numCorrespondingIndices = + static_cast(source0.type.getVectorWidth() + source1.type.getVectorWidth()); } } @@ -413,7 +414,7 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method std::make_pair(rotateElementNumberDown(maskContainer[i].unsignedInt() - firstVectorSize, i), 1) : // source is from first vector std::make_pair(rotateElementNumberDown(maskContainer[i].unsignedInt(), i), 0); - relativeSources[source] |= (1 << i); + relativeSources[source] |= (1u << i); } for(const auto& sources : relativeSources) diff --git a/src/intermediate/operators.h b/src/intermediate/operators.h index 91d3a1f5..27a4940d 100644 --- a/src/intermediate/operators.h +++ b/src/intermediate/operators.h @@ -432,6 +432,12 @@ namespace vc4c it.nextInBlock(); } + void operator=(Value&& src) && + { + it.emplace(new intermediate::MoveOperation(Value(result), std::move(src))); + it.nextInBlock(); + } + NODISCARD ConditionCode operator=(ComparisonWrapper&& op) && { return op(it, result, op.arg0, op.arg1); @@ -483,6 +489,14 @@ namespace vc4c it.nextInBlock(); return result; } + + NODISCARD Value operator=(Value&& src) && + { + auto result = method.addNewLocal(type, name); + it.emplace(new intermediate::MoveOperation(Value(result), std::move(src))); + it.nextInBlock(); + return result; + } }; /** diff --git a/src/intrinsics/Images.cpp b/src/intrinsics/Images.cpp index e7719e55..3f33073b 100644 --- a/src/intrinsics/Images.cpp +++ b/src/intrinsics/Images.cpp @@ -320,5 +320,4 @@ InstructionWalker intermediate::insertQueryMeasurements( else throw CompilationError(CompilationStep::GENERAL, "Unsupported image dimensions", std::to_string(static_cast(imageType->dimensions))); - throw CompilationError(CompilationStep::GENERAL, "Unimplemented image-query function", it->to_string()); } diff --git a/src/intrinsics/Intrinsics.cpp b/src/intrinsics/Intrinsics.cpp index 0b02e601..451d51f7 100644 --- a/src/intrinsics/Intrinsics.cpp +++ b/src/intrinsics/Intrinsics.cpp @@ -644,7 +644,8 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct op->conditional, op->setFlags)) ->copyExtrasFrom(it.get())); } - else if(arg0.getLiteralValue() && isPowerTwo(arg0.getLiteralValue()->signedInt())) + else if(arg0.getLiteralValue() && arg0.getLiteralValue()->signedInt() > 0 && + isPowerTwo(arg0.getLiteralValue()->unsignedInt())) { // a * 2^n = a << n CPPLOG_LAZY(logging::Level::DEBUG, @@ -655,7 +656,8 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct op->conditional, op->setFlags)) ->copyExtrasFrom(it.get())); } - else if(arg1.getLiteralValue() && isPowerTwo(arg1.getLiteralValue()->signedInt())) + else if(arg1.getLiteralValue() && arg1.getLiteralValue()->signedInt() > 0 && + isPowerTwo(arg1.getLiteralValue()->unsignedInt())) { // a * 2^n = a << n CPPLOG_LAZY(logging::Level::DEBUG, @@ -674,7 +676,8 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct op->conditional, op->setFlags)) ->copyExtrasFrom(it.get())); } - else if(arg0.getLiteralValue() && isPowerTwo(arg0.getLiteralValue()->signedInt() + 1)) + else if(arg0.getLiteralValue() && arg0.getLiteralValue()->signedInt() > 0 && + isPowerTwo(arg0.getLiteralValue()->unsignedInt() + 1)) { // x * (2^k - 1) = x * 2^k - x = x << k - x CPPLOG_LAZY(logging::Level::DEBUG, @@ -686,7 +689,8 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct it.reset((new Operation(OP_SUB, op->getOutput().value(), tmp, arg1, op->conditional, op->setFlags)) ->copyExtrasFrom(it.get())); } - else if(arg1.getLiteralValue() && isPowerTwo(arg1.getLiteralValue()->signedInt() + 1)) + else if(arg1.getLiteralValue() && arg1.getLiteralValue()->signedInt() > 0 && + isPowerTwo(arg1.getLiteralValue()->unsignedInt() + 1)) { // x * (2^k - 1) = x * 2^k - x = x << k - x CPPLOG_LAZY(logging::Level::DEBUG, @@ -734,7 +738,8 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct it->addDecorations(InstructionDecorations::UNSIGNED_RESULT); } // a / 2^n = a >> n - else if(arg1.getLiteralValue() && isPowerTwo(arg1.getLiteralValue()->signedInt())) + else if(arg1.getLiteralValue() && arg1.getLiteralValue()->signedInt() > 0 && + isPowerTwo(arg1.getLiteralValue()->unsignedInt())) { CPPLOG_LAZY(logging::Level::DEBUG, log << "Intrinsifying division with right-shift: " << op->to_string() << logging::endl); @@ -772,7 +777,7 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct } // a / 2^n = (abs(a) >> n) * sign(a) else if(arg1.isLiteralValue() && arg1.getLiteralValue()->signedInt() > 0 && - isPowerTwo(arg1.getLiteralValue()->signedInt())) + isPowerTwo(arg1.getLiteralValue()->unsignedInt())) { CPPLOG_LAZY(logging::Level::DEBUG, log << "Intrinsifying signed division with right-shift and sign-copy: " << op->to_string() @@ -874,7 +879,7 @@ static NODISCARD InstructionWalker intrinsifyArithmetic(Method& method, Instruct } // a % 2^n = (abs(a) & 2^n-1) * sign(a) else if(arg1.isLiteralValue() && arg1.getLiteralValue()->signedInt() > 0 && - isPowerTwo(arg1.getLiteralValue()->signedInt())) + isPowerTwo(arg1.getLiteralValue()->unsignedInt())) { CPPLOG_LAZY(logging::Level::DEBUG, log << "Intrinsifying signed modulo by power of two: " << op->to_string() << logging::endl); diff --git a/src/llvm/BitcodeReader.cpp b/src/llvm/BitcodeReader.cpp index cbf2be25..995cb73d 100644 --- a/src/llvm/BitcodeReader.cpp +++ b/src/llvm/BitcodeReader.cpp @@ -690,10 +690,10 @@ Method& BitcodeReader::parseFunction(Module& module, const llvm::Function& func) toParameterDecorations(arg, type, func.getCallingConv() == llvm::CallingConv::SPIR_KERNEL))); auto& param = method->parameters.back(); if(arg.getDereferenceableBytes() != 0) - param.maxByteOffset = arg.getDereferenceableBytes(); + param.maxByteOffset = static_cast(arg.getDereferenceableBytes()); #if LLVM_LIBRARY_VERSION >= 39 if(arg.getDereferenceableOrNullBytes() != 0) - param.maxByteOffset = arg.getDereferenceableOrNullBytes(); + param.maxByteOffset = static_cast(arg.getDereferenceableOrNullBytes()); #endif CPPLOG_LAZY(logging::Level::DEBUG, log << "Reading parameter " << param.to_string(true) << logging::endl); if(param.type.getImageType() && func.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) diff --git a/src/main.cpp b/src/main.cpp index 84d217c7..6b536a14 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -100,7 +100,7 @@ static void printHelp() static std::string toVersionString(unsigned version) { std::stringstream s; - s << (version / 10.0f); + s << (static_cast(version) / 10.0f); return s.str(); } diff --git a/src/normalization/AddressCalculation.h b/src/normalization/AddressCalculation.h index b8560c9b..dd7a69d1 100644 --- a/src/normalization/AddressCalculation.h +++ b/src/normalization/AddressCalculation.h @@ -122,4 +122,4 @@ namespace vc4c }; } /* namespace normalization */ } /* namespace vc4c */ -#endif /* VC4C_NORMALIZATION_ADDRESS_CALCULATION_H */ \ No newline at end of file +#endif /* VC4C_NORMALIZATION_ADDRESS_CALCULATION_H */ diff --git a/src/normalization/MemoryAccess.cpp b/src/normalization/MemoryAccess.cpp index 6386e7ba..30a6fb50 100644 --- a/src/normalization/MemoryAccess.cpp +++ b/src/normalization/MemoryAccess.cpp @@ -87,11 +87,12 @@ static BaseAndOffset findBaseAndOffset(const Value& val) ->local() ->getBase(false) ->createReference(), - static_cast((*std::find_if(args.begin(), args.end(), - [](const Value& arg) -> bool { return arg.getLiteralValue().has_value(); })) - .getLiteralValue() - ->signedInt() / - val.type.getElementType().getInMemoryWidth())); + (*std::find_if( + args.begin(), args.end(), [](const Value& arg) -> bool { return arg.getLiteralValue().has_value(); })) + .getLiteralValue() + ->signedInt() / + /* in-memory width can't be > 2^30 anyway */ + static_cast(val.type.getElementType().getInMemoryWidth())); } // 3. an addition with two locals -> one is the base, the other the calculation of the literal @@ -103,10 +104,10 @@ static BaseAndOffset findBaseAndOffset(const Value& val) const auto offset1 = findOffset(args[1]); if(offset0.offset && args[1].checkLocal()) return BaseAndOffset(args[1].local()->getBase(false)->createReference(), - static_cast(offset0.offset.value() / val.type.getElementType().getInMemoryWidth())); + offset0.offset.value() / static_cast(val.type.getElementType().getInMemoryWidth())); if(offset1.offset && args[0].checkLocal()) return BaseAndOffset(args[0].local()->getBase(false)->createReference(), - static_cast(offset1.offset.value() / val.type.getElementType().getInMemoryWidth())); + offset1.offset.value() / static_cast(val.type.getElementType().getInMemoryWidth())); } /* if(writers.size() == 1) diff --git a/src/normalization/MemoryMapChecks.cpp b/src/normalization/MemoryMapChecks.cpp index 3a48e5a3..ec5dd209 100644 --- a/src/normalization/MemoryMapChecks.cpp +++ b/src/normalization/MemoryMapChecks.cpp @@ -60,7 +60,7 @@ Optional normalization::getConstantValue(const Value& source) return globalContainer->at(0).toValue(); if(globalContainer && source.local()->reference.second >= 0) // fixed index - return globalContainer->at(source.local()->reference.second).toValue(); + return globalContainer->at(static_cast(source.local()->reference.second)).toValue(); if(auto val = global->initialValue.toValue()) { if(auto vector = val->checkVector()) diff --git a/src/normalization/Normalizer.cpp b/src/normalization/Normalizer.cpp index ea9bab47..78725660 100644 --- a/src/normalization/Normalizer.cpp +++ b/src/normalization/Normalizer.cpp @@ -169,8 +169,9 @@ void Normalizer::normalize(Module& module) const PROFILE_COUNTER_WITH_PREV(vc4c::profiler::COUNTER_NORMALIZATION + 2, "Eliminate Phi-nodes (after)", method->countInstructions(), vc4c::profiler::COUNTER_NORMALIZATION + 1); } + auto kernels = module.getKernels(); // 2. inline kernel-functions - for(Method* kernelFunc : module.getKernels()) + for(Method* kernelFunc : kernels) { Method& kernel = *kernelFunc; @@ -183,14 +184,15 @@ void Normalizer::normalize(Module& module) const } // 3. run other normalization steps on kernel functions const auto f = [&module, this](Method* kernelFunc) -> void { normalizeMethod(module, *kernelFunc); }; - ThreadPool{"Normalization"}.scheduleAll(module.getKernels(), f); + ThreadPool{"Normalization"}.scheduleAll(kernels, f); } void Normalizer::adjust(Module& module) const { // run adjustment steps on kernel functions + auto kernels = module.getKernels(); const auto f = [&module, this](Method* kernelFunc) -> void { adjustMethod(module, *kernelFunc); }; - ThreadPool{"Adjustment"}.scheduleAll(module.getKernels(), f); + ThreadPool{"Adjustment"}.scheduleAll(kernels, f); } void Normalizer::normalizeMethod(Module& module, Method& method) const diff --git a/src/optimization/ControlFlow.cpp b/src/optimization/ControlFlow.cpp index 19534c50..74c0867b 100644 --- a/src/optimization/ControlFlow.cpp +++ b/src/optimization/ControlFlow.cpp @@ -458,9 +458,9 @@ static std::size_t fixVPMSetups(ControlFlowLoop& loop, unsigned vectorizationFac { periphery::VPWSetupWrapper vpwSetup(static_cast(nullptr)); if(auto load = it.get()) - vpwSetup = periphery::VPWSetupWrapper(it.get()); + vpwSetup = periphery::VPWSetupWrapper(load); else if(auto move = it.get()) - vpwSetup = periphery::VPWSetupWrapper(it.get()); + vpwSetup = periphery::VPWSetupWrapper(move); else throw CompilationError(CompilationStep::OPTIMIZER, "Unsupported instruction to write VPM for vectorized value", it->to_string()); @@ -479,9 +479,9 @@ static std::size_t fixVPMSetups(ControlFlowLoop& loop, unsigned vectorizationFac { periphery::VPRSetupWrapper vprSetup(static_cast(nullptr)); if(auto load = it.get()) - vprSetup = periphery::VPRSetupWrapper(it.get()); + vprSetup = periphery::VPRSetupWrapper(load); else if(auto move = it.get()) - vprSetup = periphery::VPRSetupWrapper(it.get()); + vprSetup = periphery::VPRSetupWrapper(move); else throw CompilationError(CompilationStep::OPTIMIZER, "Unsupported instruction to write VPM for vectorized value", it->to_string()); @@ -611,7 +611,7 @@ static void vectorize(ControlFlowLoop& loop, InductionVariable& inductionVariabl FastSet openInstructions; const_cast(inductionVariable.local->type) = inductionVariable.local->type.toVectorType( - inductionVariable.local->type.getVectorWidth() * static_cast(vectorizationFactor)); + static_cast(inductionVariable.local->type.getVectorWidth() * vectorizationFactor)); scheduleForVectorization(inductionVariable.local, openInstructions, loop); std::size_t numVectorized = 0; @@ -649,7 +649,8 @@ static void vectorize(ControlFlowLoop& loop, InductionVariable& inductionVariabl // since we are in the process of vectorization, the local type is already updated, but the argument // type is pending. For the next steps, we need to update the argument type. arg.type = arg.local()->type; - auto origVectorType = arg.type.toVectorType(arg.type.getVectorWidth() / vectorizationFactor); + auto origVectorType = + arg.type.toVectorType(static_cast(arg.type.getVectorWidth() / vectorizationFactor)); Optional accumulationOp{}; if(auto writer = arg.getSingleWriter()) @@ -1622,7 +1623,7 @@ static FastAccessList findIfElseBlocks(ControlFlowGraph& graph) if(candidateBlock.successor != nullptr && candidateBlock.conditionalBlocks.size() > 1) blocks.emplace_back(std::move(candidateBlock)); - else if(false) // TODO needs testing! + else if(/* DISABLES CODE */ (false)) // TODO needs testing! { // TODO also needs extension in rewriting, special handling for successor which is conditional! // we failed with simple version above, recheck for more complex version: diff --git a/src/optimization/InstructionScheduler.cpp b/src/optimization/InstructionScheduler.cpp index 927e8fe2..ea019b60 100644 --- a/src/optimization/InstructionScheduler.cpp +++ b/src/optimization/InstructionScheduler.cpp @@ -223,7 +223,7 @@ static OpenSet::const_iterator selectInstruction(OpenSet& openNodes, DependencyG // select first entry (with highest priority) for which all dependencies are fulfilled (with latency) int priority = checkDependenciesMet(graph.assertNode(*it), block, openNodes); if(priority < MIN_PRIORITY) - priority -= successiveMandatoryDelays.at(*it); + priority -= static_cast(successiveMandatoryDelays.at(*it)); // TODO use preferred delays? // TODO remove adding/removing priorities in calculateSchedulingPriority? // TODO remove extra cases here? diff --git a/src/optimization/Optimizer.cpp b/src/optimization/Optimizer.cpp index 6233ed91..dd11da03 100644 --- a/src/optimization/Optimizer.cpp +++ b/src/optimization/Optimizer.cpp @@ -227,10 +227,11 @@ static void runOptimizationPasses(const Module& module, Method& method, const Co void Optimizer::optimize(Module& module) const { + auto kernels = module.getKernels(); const auto f = [&](Method* kernelFunc) { runOptimizationPasses(module, *kernelFunc, config, initialPasses, repeatingPasses, finalPasses); }; - ThreadPool{"Optimizer"}.scheduleAll(module.getKernels(), f); + ThreadPool{"Optimizer"}.scheduleAll(kernels, f); } const std::vector Optimizer::ALL_PASSES = { diff --git a/src/periphery/VPM.h b/src/periphery/VPM.h index 2b5274e4..bbac7a7e 100644 --- a/src/periphery/VPM.h +++ b/src/periphery/VPM.h @@ -573,6 +573,9 @@ namespace vc4c Base::value = move->getSource().getLiteralValue().value().toImmediate(); } + SetupWrapper(const SetupWrapper&) = default; + SetupWrapper(SetupWrapper&&) noexcept = default; + ~SetupWrapper() { if(load != nullptr) @@ -581,6 +584,9 @@ namespace vc4c move->setSource(Value(Literal(Base::value), TYPE_INT32)); } + SetupWrapper& operator=(const SetupWrapper&) = default; + SetupWrapper& operator=(SetupWrapper&&) noexcept = default; + inline void resetSetup() const { if(load != nullptr) diff --git a/src/tools/Emulator.cpp b/src/tools/Emulator.cpp index 58d04bee..b07afa8b 100644 --- a/src/tools/Emulator.cpp +++ b/src/tools/Emulator.cpp @@ -153,7 +153,8 @@ void Memory::setUniforms(const std::vector& uniforms, MemoryAddress addres { PROFILE_COUNTER(vc4c::profiler::COUNTER_EMULATOR + 10, "setUniforms", 1); std::size_t offset = address / sizeof(Word); - std::copy_n(uniforms.begin(), uniforms.size(), VariantNamespace::get(data).begin() + offset); + std::copy_n(uniforms.begin(), uniforms.size(), + VariantNamespace::get(data).begin() + static_cast::difference_type>(offset)); } bool Mutex::isLocked() const @@ -1140,12 +1141,12 @@ bool QPU::execute(std::vector::const_iterator firstInstruc { ++instrumentation[inst].numBranchTaken; int32_t offset = 4 /* Branch starts at PC + 4 */ + - static_cast(br->getImmediate() / sizeof(uint64_t)) /* immediate offset is in bytes */; + (br->getImmediate() / static_cast(sizeof(uint64_t))) /* immediate offset is in bytes */; if(br->getAddRegister() == BranchReg::BRANCH_REG || br->getBranchRelative() == BranchRel::BRANCH_ABSOLUTE) throw CompilationError( CompilationStep::GENERAL, "This kind of branch is not yet implemented", br->toASMString()); - nextPC += offset; + nextPC += static_cast(offset); // see Broadcom specification, page 34 registers.writeRegister(toRegister(br->getAddOut(), br->getWriteSwap() == WriteSwap::SWAP), @@ -1290,10 +1291,10 @@ static std::pair applyVectorRotation(std::pair(tmp[0].unsignedInt() & 0xF); + distance = static_cast(16 - (tmp[0].unsignedInt() & 0xF)); } else - distance = 16 - offset.getRotationOffset().value(); + distance = static_cast(16 - offset.getRotationOffset().value()); SIMDVector result(std::move(input.first)); @@ -1882,7 +1883,7 @@ static void dumpMemory(const Memory& memory, const std::string& fileName, Memory f << " " << std::hex << std::setfill('0') << std::setw(8) << memory.readWord(addr); if(addr % (sizeof(tools::Word) * 8) == (sizeof(tools::Word) * 7)) f << std::endl; - addr += sizeof(tools::Word); + addr += static_cast(sizeof(tools::Word)); } f << std::endl; CPPLOG_LAZY(logging::Level::DEBUG, @@ -1971,9 +1972,10 @@ EmulationResult tools::emulate(const EmulationData& data) dumpMemory(mem, data.memoryDump, uniformAddress, true); InstrumentationResults instrumentation; - bool status = - emulate(instructions.begin() + (kernelInfo->getOffset() - module.kernelInfos.front().getOffset()).getValue(), - mem, uniformAddresses, instrumentation, data.maxEmulationCycles); + bool status = emulate(instructions.begin() + + static_cast::difference_type>( + (kernelInfo->getOffset() - module.kernelInfos.front().getOffset()).getValue()), + mem, uniformAddresses, instrumentation, data.maxEmulationCycles); if(!data.memoryDump.empty()) dumpMemory(mem, data.memoryDump, uniformAddress, false); @@ -1998,7 +2000,9 @@ EmulationResult tools::emulate(const EmulationData& data) std::unique_ptr dumpInstrumentation; if(!data.instrumentationDump.empty()) dumpInstrumentation.reset(new std::ofstream(data.instrumentationDump)); - auto it = instructions.begin() + (kernelInfo->getOffset() - module.kernelInfos.front().getOffset()).getValue(); + auto it = instructions.begin() + + static_cast::difference_type>( + (kernelInfo->getOffset() - module.kernelInfos.front().getOffset()).getValue()); result.instrumentation.reserve(kernelInfo->getLength().getValue()); while(true) { diff --git a/test/TestMemoryAccess.cpp b/test/TestMemoryAccess.cpp index 92ad400e..d302c7c2 100644 --- a/test/TestMemoryAccess.cpp +++ b/test/TestMemoryAccess.cpp @@ -229,9 +229,9 @@ void TestMemoryAccess::testLocalStorage() // actual results are indeterministic, since the order of the loads/stores across work-items is not guaranteed for(auto res : result.results.at(1).second.value()) { - if(res % 7 != 0) + if(res % 7u != 0) { - TEST_ASSERT_EQUALS(7, res) + TEST_ASSERT_EQUALS(7u, res) } } } diff --git a/test/emulation_helper.h b/test/emulation_helper.h index 9284dfff..4b2ec362 100644 --- a/test/emulation_helper.h +++ b/test/emulation_helper.h @@ -69,7 +69,7 @@ struct NormalDistribution : public std::normal_distribution }; template -struct InfNaNWrapper : private Distribution +struct InfNaNWrapper : public Distribution { explicit InfNaNWrapper(double min, double max) : Distribution(min, max), infNaNDistribution(0.0, 1.0) {} @@ -111,7 +111,8 @@ std::array generateInput(bool allowNull, std::mt19937 gen(rd()); // NOTE: double here allows for float::lowest() and float::max() to be used, see also // https://stackoverflow.com/a/36826730/8720655 - Distribution dis(min, max); + Distribution dis( + static_cast(min), static_cast(max)); for(std::size_t i = 0; i < N; ++i) {