diff --git a/CMakeLists.txt b/CMakeLists.txt index 45299188b..7056689db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,7 @@ add_subdirectory(debugging/kernel-logger) # Profilers if(NOT WIN32) + add_subdirectory(profiling/functor-size) add_subdirectory(profiling/simple-kernel-timer) add_subdirectory(profiling/memory-hwm) if(KokkosTools_ENABLE_MPI) diff --git a/README.md b/README.md index 73f1a902a..a0b8c0d33 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,10 @@ The following provides an overview of the tools available in the set of Kokkos T Prints Kokkos Kernel and Region events during runtime. ++ [**Functor Size**](https://github.com/kokkos/kokkos-tools/wiki/FunctorSize) + + Prints information about the size of the functor objects passed to Kokkos parallel regions. + ### 3rd Party Profiling Tool Hooks + [**VTuneConnector:**](https://github.com/kokkos/kokkos-tools/wiki/VTuneConnector) diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h index c4aa9cce3..2c0e0a9a2 100644 --- a/profiling/all/impl/Kokkos_Profiling_C_Interface.h +++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h @@ -1,3 +1,4 @@ +/* //@HEADER // ************************************************************************ // @@ -9,10 +10,11 @@ // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. +// // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER +*/ #ifndef KOKKOS_PROFILING_C_INTERFACE_HPP #define KOKKOS_PROFILING_C_INTERFACE_HPP @@ -26,7 +28,7 @@ #include #endif -#define KOKKOSP_INTERFACE_VERSION 20210623 +#define KOKKOSP_INTERFACE_VERSION 20211015 // Profiling @@ -38,6 +40,13 @@ struct Kokkos_Profiling_SpaceHandle { char name[64]; }; +#define KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE 512 +struct Kokkos_Profiling_Kernel_Static_Info { + uint64_t functor_size; // sizeof the functor + + char padding[KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE - sizeof(uint64_t)]; +}; + // NOLINTNEXTLINE(modernize-use-using): C compatibility typedef void (*Kokkos_Profiling_initFunction)( const int, const uint64_t, const uint32_t, @@ -54,6 +63,10 @@ typedef void (*Kokkos_Profiling_beginFunction)(const char*, const uint32_t, // NOLINTNEXTLINE(modernize-use-using): C compatibility typedef void (*Kokkos_Profiling_endFunction)(uint64_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_markKernelStaticInfoFunction)( + uint64_t, const struct Kokkos_Profiling_Kernel_Static_Info*); + // NOLINTNEXTLINE(modernize-use-using): C compatibility typedef void (*Kokkos_Profiling_pushFunction)(const char*); // NOLINTNEXTLINE(modernize-use-using): C compatibility @@ -247,6 +260,7 @@ struct Kokkos_Profiling_EventSet { Kokkos_Profiling_dualViewSyncFunction sync_dual_view; Kokkos_Profiling_dualViewModifyFunction modify_dual_view; Kokkos_Profiling_declareMetadataFunction declare_metadata; + Kokkos_Profiling_markKernelStaticInfoFunction mark_kernel_static_info; Kokkos_Tools_provideToolProgrammingInterfaceFunction provide_tool_programming_interface; Kokkos_Tools_requestToolSettingsFunction request_tool_settings; @@ -257,7 +271,8 @@ struct Kokkos_Profiling_EventSet { Kokkos_Tools_contextBeginFunction begin_tuning_context; Kokkos_Tools_contextEndFunction end_tuning_context; Kokkos_Tools_optimizationGoalDeclarationFunction declare_optimization_goal; - char padding[232 * + + char padding[231 * sizeof( Kokkos_Tools_functionPointer)]; // allows us to add another // 256 events to the Tools diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp index 82ba15d19..22f8601b1 100644 --- a/profiling/all/impl/Kokkos_Profiling_Interface.hpp +++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp @@ -19,6 +19,7 @@ #include #include +#include #include @@ -45,6 +46,7 @@ enum struct DeviceType { HPX, Threads, SYCL, + OpenACC, Unknown }; @@ -53,6 +55,12 @@ struct ExecutionSpaceIdentifier { uint32_t device_id; uint32_t instance_id; }; + +constexpr const uint32_t num_type_bits = 8; +constexpr const uint32_t num_device_bits = 7; +constexpr const uint32_t num_instance_bits = 17; +constexpr const uint32_t num_avail_bits = sizeof(uint32_t) * CHAR_BIT; + inline DeviceType devicetype_from_uint32t(const uint32_t in) { switch (in) { case 0: return DeviceType::Serial; @@ -63,37 +71,35 @@ inline DeviceType devicetype_from_uint32t(const uint32_t in) { case 5: return DeviceType::HPX; case 6: return DeviceType::Threads; case 7: return DeviceType::SYCL; + case 8: return DeviceType::OpenACC; default: return DeviceType::Unknown; // TODO: error out? } } inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) { - // ExecutionSpaceIdentifier out; - // out.type = in >> 24; - // out.device_id = in >> 17; - // out.instance_id = ((uint32_t(-1)) << 17 ) & in; - return {devicetype_from_uint32t(in >> 24), - (~((uint32_t(-1)) << 24)) & (in >> 17), - (~((uint32_t(-1)) << 17)) & in}; + constexpr const uint32_t shift = num_avail_bits - num_type_bits; + + return {devicetype_from_uint32t(in >> shift), /*First 8 bits*/ + (~((uint32_t(-1)) << num_device_bits)) & + (in >> num_instance_bits), /*Next 7 bits */ + (~((uint32_t(-1)) << num_instance_bits)) & in}; /*Last 17 bits*/ } template struct DeviceTypeTraits; -constexpr const size_t device_type_bits = 8; -constexpr const size_t instance_bits = 24; template constexpr uint32_t device_id_root() { - /** uncomment when C++14 is enabled constexpr auto device_id = static_cast(DeviceTypeTraits::id); - return (device_id << instance_bits); - */ - return 0; + return (device_id << (num_instance_bits + num_device_bits)); } template inline uint32_t device_id(ExecutionSpace const& space) noexcept { - return device_id_root() + space.impl_instance_id(); + return device_id_root() + + (DeviceTypeTraits::device_id(space) + << num_instance_bits) + + space.impl_instance_id(); } } // namespace Experimental } // namespace Tools @@ -116,6 +122,13 @@ using SpaceHandle = Kokkos_Profiling_SpaceHandle; namespace Tools { +using KernelStaticInfo = Kokkos_Profiling_Kernel_Static_Info; + +static_assert(sizeof(KernelStaticInfo) == + KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE, + "Internal kokkos developer error. Please report this error, and " + "provide information about your compiler and target platform."); + namespace Experimental { using EventSet = Kokkos_Profiling_EventSet; static_assert(sizeof(EventSet) / sizeof(Kokkos_Tools_functionPointer) == 275, @@ -162,6 +175,8 @@ using endFenceFunction = Kokkos_Profiling_endFenceFunction; using dualViewSyncFunction = Kokkos_Profiling_dualViewSyncFunction; using dualViewModifyFunction = Kokkos_Profiling_dualViewModifyFunction; using declareMetadataFunction = Kokkos_Profiling_declareMetadataFunction; +using markKernelStaticInfoFunction = + Kokkos_Profiling_markKernelStaticInfoFunction; } // namespace Tools diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp index c63db1863..17e3f3b41 100644 --- a/profiling/all/kp_core.hpp +++ b/profiling/all/kp_core.hpp @@ -49,6 +49,7 @@ using Kokkos::Tools::SpaceHandle; #define EXPOSE_BEGIN_FENCE(FUNC_NAME) #define EXPOSE_END_FENCE(FUNC_NAME) #define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) +#define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME) #else @@ -197,5 +198,13 @@ using Kokkos::Tools::SpaceHandle; const char* name, const void* const ptr, bool is_device) { \ FUNC_NAME(name, ptr, is_device); \ } + +#define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_mark_kernel_static_info( \ + const uint64_t kernelID, \ + const Kokkos_Profiling_Kernel_Static_Info* info) { \ + FUNC_NAME(kernelID, info); \ + } + #endif #endif // KOKKOSTOOLS_KOKKOSINTERFACE_HPP diff --git a/profiling/functor-size/CMakeLists.txt b/profiling/functor-size/CMakeLists.txt new file mode 100644 index 000000000..018512a80 --- /dev/null +++ b/profiling/functor-size/CMakeLists.txt @@ -0,0 +1 @@ +kp_add_library(kp_functor_size kp_functor_size.cpp) \ No newline at end of file diff --git a/profiling/functor-size/Makefile b/profiling/functor-size/Makefile new file mode 100644 index 000000000..bb3b0c2f8 --- /dev/null +++ b/profiling/functor-size/Makefile @@ -0,0 +1,14 @@ + + +CXX=g++ +CXXFLAGS=-shared -O3 -fPIC -std=c++17 + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all + +kp_functor_size.so: ${MAKEFILE_PATH}kp_functor_size.cpp + $(CXX) $(CXXFLAGS) -o $@ $< + +clean: + rm *.so diff --git a/profiling/functor-size/kp_functor_size.cpp b/profiling/functor-size/kp_functor_size.cpp new file mode 100644 index 000000000..4d9ca2c5c --- /dev/null +++ b/profiling/functor-size/kp_functor_size.cpp @@ -0,0 +1,160 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" + +namespace KokkosTools { +namespace FunctorSize { + +bool show_warnings = true; +#define WARN(x) \ + { \ + if (show_warnings) { \ + std::cerr << "KokkosP: Functor Size: WARNING: " << x << std::endl; \ + } \ + } +#define ERROR(x) \ + { std::cerr << "KokkosP: Functor Size: ERROR: " << x << std::endl; } + +std::unordered_map anonCount; // [size] = count +std::unordered_map> + nameCounts; // [name][size] = count +std::vector names; +uint64_t uniqueID = 0; + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t /*devInfoCount*/, + Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { + std::cerr << "KokkosP: FunctorSize Library Initialized (sequence is " + << loadSeq << ", interface version: " << interfaceVer << std::endl; +} + +void dump_csv(std::ostream& os, const std::string_view delim = ",") { + os << "size" << delim << "count" << delim << "name" << std::endl; + + for (const auto& [name, counts] : nameCounts) { + for (const auto& [size, count] : counts) { + os << size << delim << count << delim << name << std::endl; + } + } + for (const auto& [size, count] : anonCount) { + os << size << delim << count << delim + << "KOKKOSP_FUNCTOR_SIZE_ANONYMOUS_FUNCTION" << std::endl; + } +} + +void kokkosp_finalize_library() { + std::cout << std::endl + << "KokkosP: Finalization Functor Size profiling library." + << std::endl; + + const char* output_csv_path = + std::getenv("KOKKOSP_FUNCTOR_SIZE_OUTPUT_CSV_PATH"); + + if (output_csv_path && std::string_view(output_csv_path) != "") { + std::ofstream os(output_csv_path); + if (os) { + dump_csv(os); + } else { + ERROR(output_csv_path << " counldn't be opened"); + } + } + dump_csv(std::cout, ","); +} + +void begin_parallel(const char* name, uint64_t* kID) { + *kID = uniqueID++; + if (nullptr == name) { + WARN("Ignording kernel ID " + << *kID << " with null name. Results may be incomplete"); + return; + } + + if (*kID < names.size()) { + WARN("set new name \"" << name << "\" for previously-seen kernel ID " + << *kID); + } else { + names.resize((*kID) + 1); // may have skipped if name was null previously + } + names[*kID] = name; +} + +void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, + uint64_t* kID) { + begin_parallel(name, kID); +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, + uint64_t* kID) { + begin_parallel(name, kID); +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, + uint64_t* kID) { + begin_parallel(name, kID); +} + +void kokkosp_mark_kernel_static_info( + const uint64_t kernelID, const Kokkos_Profiling_Kernel_Static_Info* info) { + if (!info) { + WARN("Kokkos provided null info"); + return; + } + const uint64_t size = info->functor_size; + + if (kernelID < names.size()) { + const std::string& name = names[kernelID]; + if (0 == nameCounts.count(name)) { + nameCounts[name] = {{size, 0}}; + } + std::unordered_map& nameCount = nameCounts[name]; + + if (0 == nameCount.count(size)) { + nameCount[size] = 0; + } + nameCount[size]++; + } else { + WARN("never-before seen kernel ID \"" << kernelID << "\"."); + + if (0 == anonCount.count(size)) { + anonCount[size] = 0; + } + anonCount[size]++; + } +} + +} // namespace FunctorSize +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::FunctorSize; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_MARK_KERNEL_STATIC_INFO(impl::kokkosp_mark_kernel_static_info) + +} // extern "C"