From b4b6177deec930d008d8bb42f87f944c8c4a2d73 Mon Sep 17 00:00:00 2001 From: marty1885 Date: Thu, 21 Nov 2024 09:39:32 +0800 Subject: [PATCH 1/4] avoid using host when reshaping from 1D to ND when possible --- ggml/src/ggml-metalium/ggml-metalium.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-metalium/ggml-metalium.cpp b/ggml/src/ggml-metalium/ggml-metalium.cpp index 240601457b093..096c5de878f96 100644 --- a/ggml/src/ggml-metalium/ggml-metalium.cpp +++ b/ggml/src/ggml-metalium/ggml-metalium.cpp @@ -614,11 +614,21 @@ static std::shared_ptr realize_ggml_view_impl(const ggml_t else if(ggml_n_dims(src0) == 1 && ggml_n_dims(tensor) > 1) { // slow: grab the source tensor and unpad it uint32_t offset_elements = offset / ggml_type_size(src0->type); - ttnn::SimpleShape start{0, 0, 0, offset_elements}; + auto dst_volume = ggml_nelements(tensor); - ttnn::SimpleShape end({1, 1, 1, uint32_t(dst_volume) + offset_elements}); - auto t = ttnn::untilize(*parent).cpu().unpad(start, end); - res = reshape_host_tt_tensor_into_ggml(t, parent->device(), tensor); + if(offset_elements % tt::constants::TILE_WIDTH == 0 && dst_volume % tt::constants::TILE_HEIGHT == 0) { + std::array step = {1, 1, 1, 1}; + auto t = ttnn::slice(*parent, start, end, step, tt::tt_metal::MemoryConfig()); + res = reshape_tt_tensor_into_ggml(t, tensor); + } + else { + // THIS is EXTREMELY SLOW. But it works + ttnn::SimpleShape start{0, 0, 0, offset_elements}; + ttnn::SimpleShape end({1, 1, 1, uint32_t(dst_volume) + offset_elements}); + tt::tt_metal::Tensor tmp = ttnn::untilize(*parent).cpu().unpad(start, end); + tmp = ttnn::tilize_with_zero_padding(tmp.to(bufctx->device)); + res = reshape_host_tt_tensor_into_ggml(tmp, parent->device(), tensor); + } } // The fast path, this is what TTNN is designed for else if(dst_size[0] % tt::constants::TILE_WIDTH == 0 && dst_size[1] % tt::constants::TILE_HEIGHT == 0 && From a2f81b84e21a8c71bff5396af170cac71a1e4d84 Mon Sep 17 00:00:00 2001 From: marty1885 Date: Thu, 21 Nov 2024 11:02:37 +0800 Subject: [PATCH 2/4] support FP32 conversion --- ggml/src/ggml-metalium/ggml-metalium.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-metalium/ggml-metalium.cpp b/ggml/src/ggml-metalium/ggml-metalium.cpp index 096c5de878f96..ebeb40ac366cf 100644 --- a/ggml/src/ggml-metalium/ggml-metalium.cpp +++ b/ggml/src/ggml-metalium/ggml-metalium.cpp @@ -400,7 +400,8 @@ void tensor2ggml(const tt::tt_metal::Tensor& tensor, void* dst, [[maybe_unused]] } // Just putting the integer types here to remind me TT tensors can have integer types // But not supported on Grayskull. - else if ((std::is_same_v && dst_ggtype == GGML_TYPE_BF16) || + else if ((std::is_same_v && dst_ggtype == GGML_TYPE_F32) || + (std::is_same_v && dst_ggtype == GGML_TYPE_BF16) || (std::is_same_v && dst_ggtype == GGML_TYPE_I32) || (std::is_same_v && dst_ggtype == GGML_TYPE_I16) || (std::is_same_v && dst_ggtype == GGML_TYPE_I8)) { @@ -1455,8 +1456,6 @@ static void ggml_backend_metalium_outer_product(ggml_backend_metalium_context * auto src0 = realize_ggml_view(dst->src[0]); auto src1 = realize_ggml_view(dst->src[1]); - std::cout << "src0: " << src0->shape() << " src1: " << src1->shape() << std::endl; - auto res = ttnn::outer(*src0, *src1); *dst_meta = { .tensor = std::make_shared(res), From 20e76d1bdc9fbda8790b2959b3ff53264c7407d6 Mon Sep 17 00:00:00 2001 From: marty1885 Date: Thu, 21 Nov 2024 11:58:43 +0800 Subject: [PATCH 3/4] update PCH --- ggml/src/ggml-metalium/metalium-pch.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-metalium/metalium-pch.hpp b/ggml/src/ggml-metalium/metalium-pch.hpp index 3cc32d902c4cb..422a4f95218bf 100644 --- a/ggml/src/ggml-metalium/metalium-pch.hpp +++ b/ggml/src/ggml-metalium/metalium-pch.hpp @@ -1,7 +1,9 @@ #include #ifdef __cplusplus +#include "common/base_types.hpp" #include "common/bfloat16.hpp" #include "common/constants.hpp" +#include "common/logger.hpp" #include "device/tt_arch_types.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" @@ -12,18 +14,24 @@ #include "host_api.hpp" #include "impl/dispatch/command_queue.hpp" +#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp" +#include "ttnn/operations/data_movement/untilize_with_unpadding/untilize_with_unpadding.hpp" +#include "ttnn/operations/eltwise/binary/binary_composite.hpp" #include "ttnn/operations/eltwise/unary/unary.hpp" -#include "ttnn/operations/experimental/auto_format/auto_format.hpp" +#include "ttnn/operations/moreh/moreh_group_norm/moreh_group_norm.hpp" #include "ttnn/operations/normalization/softmax/device/softmax_op.hpp" +#include "ttnn/tensor/tensor.hpp" #include "ttnn/tensor/types.hpp" #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -36,13 +44,12 @@ #include #include #include +#include #include #include #include -#include #include -#include -#include +#include #include #include #include @@ -51,4 +58,5 @@ #include #include #include +#include #endif From 9ff79cb764b452daee119ef96dd9b2d13d97c63e Mon Sep 17 00:00:00 2001 From: marty1885 Date: Thu, 21 Nov 2024 12:34:27 +0800 Subject: [PATCH 4/4] update --- ggml/src/ggml-metalium/ggml-metalium.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ggml/src/ggml-metalium/ggml-metalium.cpp b/ggml/src/ggml-metalium/ggml-metalium.cpp index ebeb40ac366cf..3a0a4e9bea97a 100644 --- a/ggml/src/ggml-metalium/ggml-metalium.cpp +++ b/ggml/src/ggml-metalium/ggml-metalium.cpp @@ -13,8 +13,6 @@ #include "host_api.hpp" #include "impl/dispatch/command_queue.hpp" #include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp" -#include "ttnn/operations/data_movement/reshape_on_device/reshape.hpp" -#include "ttnn/operations/data_movement/untilize_with_unpadding/untilize_with_unpadding.hpp" #include "ttnn/operations/eltwise/binary/binary_composite.hpp" #include "ttnn/operations/eltwise/unary/unary.hpp" #include "ttnn/operations/moreh/moreh_group_norm/moreh_group_norm.hpp" @@ -56,7 +54,6 @@ #include #include -#include #include #include @@ -1622,8 +1619,6 @@ static void ggml_backend_metalium_buffer_set_tensor(ggml_backend_buffer_t buffer tt::tt_metal::Tensor t(std::move(storage), ttnn::Shape(shape) , tt::tt_metal::DataType::BFLOAT16, tt::tt_metal::Layout::ROW_MAJOR); - // I think we can allow this.. right? - // GGML_ASSERT(!bufctx->tensors.contains(offset)); tt::ARCH processor_class = bufctx->device->arch(); t = ttnn::tilize_with_zero_padding(t.to(bufctx->device)); tt::tt_metal::DataType final_type = ggml2tt_type(ggtype, processor_class);