Skip to content

Commit

Permalink
Revert "Optimize transpose copy on CPU using fbgemm transpose (pytorc…
Browse files Browse the repository at this point in the history
…h#83327)"

This reverts commit f56720e.

Reverted pytorch#83327 on behalf of https://github.com/janeyx99 due to Sorry, reverting as this breaks mac functorch tests on trunk https://hud.pytorch.org/pytorch/pytorch/commit/f56720ea7c7ad0bcb4c5af669e28bf7de8122cb6
  • Loading branch information
pytorchmergebot committed Aug 22, 2022
1 parent d1be36c commit 53cda90
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 67 deletions.
54 changes: 0 additions & 54 deletions aten/src/ATen/native/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
#include <torch/library.h>
#include <ATen/native/cpu/utils.h>

#ifdef USE_FBGEMM
#include <fbgemm/Fbgemm.h>
Expand All @@ -27,53 +26,6 @@ namespace {

using namespace at;

bool fbgemm_copy_transpose_valid(const Tensor& self, const Tensor& src) {
const int MIN_SZ = 16 * 32;
if ((self.device().is_cpu() && src.device().is_cpu()) &&
(self.layout() == c10::kStrided) && (src.layout() == c10::kStrided) &&
!self.is_sparse() && !src.is_sparse() && self.is_contiguous() &&
(self.is_conj() == src.is_conj()) && (self.is_neg() == src.is_neg()) &&
!self.is_complex() && !src.is_complex() &&
self.sizes().equals(src.sizes()) && self.dim() >= 2 &&
src.size(src.dim() - 1) * src.size(src.dim() - 2) >= MIN_SZ &&
src.stride(src.dim() - 2) == 1 && src.stride(src.dim() - 1) == src.size(src.dim() - 2) &&
!(src.size(src.dim() - 2) == 1 && src.size(src.dim() - 1) == 1)) {
// Check src is in contiguous block
for (long i = 0; i < src.dim() - 2; i++) {
if (!(src.stride(i) == ((i + 1) == (src.dim() - 2)) ?
src.stride(src.dim() - 1) * src.size(src.dim() - 1) : src.stride(i + 1) * src.size(i + 1))){
return false;
}
}
} else {
return false;
}
return true;
}

void fbgemm_copy_transpose_same_type(Tensor& self, const Tensor& src) {
auto block_size = src.size(src.dim() - 1) * src.size(src.dim() - 2);
auto ntrans = src.numel() / block_size;
AT_DISPATCH_ALL_TYPES_AND(kBFloat16, src.scalar_type(),
"fbgemm_transpose_copy_same_type", [&] {
at::parallel_for(
0,
ntrans,
at::internal::GRAIN_SIZE / block_size,
[&](int64_t begin, int64_t end) {
for (int64_t i = begin; i < end; i++) {
native::utils::transpose(
src.size(src.dim() - 1),
src.size(src.dim() - 2),
src.data_ptr<scalar_t>() + i * block_size,
src.stride(src.dim() - 1),
self.data_ptr<scalar_t>() + i * block_size,
self.stride(self.dim() - 2));
}
});
});
}

bool copy_transpose_valid(const Tensor& self, const Tensor& src) {
const int MIN_SZ = 60 * 60;
return self.is_contiguous() && src.numel() != 0 && src.dim() == 2 &&
Expand Down Expand Up @@ -206,12 +158,6 @@ static Tensor & copy_impl(Tensor & self, const Tensor & src, bool non_blocking)
}
return self;
}

if (fbgemm_copy_transpose_valid(self, src) && src.dtype() == self.dtype() &&
(src.dtype() == at::kFloat || src.dtype() == at::kBFloat16)) {
fbgemm_copy_transpose_same_type(self, src);
return self;
}
#endif

if (self.is_same(src)) {
Expand Down
13 changes: 0 additions & 13 deletions aten/src/ATen/native/cpu/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,6 @@ inline void transpose<float>(int64_t M, int64_t N, const float* src, int64_t ld_
TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU does not support FBGEMM.");
fbgemm::transpose_simd<float>(M, N, src, ld_src, dst, ld_dst);
}

template <>
inline void transpose<BFloat16>(int64_t M, int64_t N, const BFloat16* src, int64_t ld_src, BFloat16* dst, int64_t ld_dst) {
TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU does not support FBGEMM.");
fbgemm::transpose_simd<uint16_t>(M, N, reinterpret_cast<const uint16_t*>(src), ld_src, reinterpret_cast<uint16_t*>(dst), ld_dst);
}

template <>
inline void transpose<uint8_t>(int64_t M, int64_t N, const uint8_t* src, int64_t ld_src, uint8_t* dst, int64_t ld_dst) {
TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU does not support FBGEMM.");
fbgemm::transpose_simd<uint8_t>(M, N, src, ld_src, dst, ld_dst);
}

#endif

} // namespace utils
Expand Down

0 comments on commit 53cda90

Please sign in to comment.