From 0a07488ed2c47765e337e290bd138c0e6e459cbd Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Tue, 19 Oct 2021 03:25:14 -0700 Subject: [PATCH] use irange for loops 1 (#66741) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/66741 Modified loops in files under fbsource/fbcode/caffe2/ from the format `for(TYPE var=x0;var #include +#include #include #include @@ -97,7 +98,7 @@ static at::Tensor newAtTensor( std::vector shapeVec{}; shapeVec.reserve(rank); auto numel = 1; - for (auto i = 0; i < rank; ++i) { + for (const auto i : c10::irange(rank)) { shapeVec.push_back(shapeArr[i]); numel *= shapeArr[i]; } @@ -521,7 +522,7 @@ at::IValue JIValue::JIValueToAtIValue( std::vector elements; elements.reserve(n); - for (auto i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { auto jivalue_element = jarray->getElement(i); auto element = JIValue::JIValueToAtIValue(jivalue_element); elements.push_back(std::move(element)); @@ -535,7 +536,7 @@ at::IValue JIValue::JIValueToAtIValue( size_t n = jArrayPinned.size(); c10::List list{}; list.reserve(n); - for (size_t i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { list.push_back(jArrayPinned[i]); } return at::IValue{std::move(list)}; @@ -547,7 +548,7 @@ at::IValue JIValue::JIValueToAtIValue( size_t n = jArrayPinned.size(); c10::List list{}; list.reserve(n); - for (size_t i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { list.push_back(jArrayPinned[i]); } return at::IValue{std::move(list)}; @@ -559,7 +560,7 @@ at::IValue JIValue::JIValueToAtIValue( size_t n = jArrayPinned.size(); c10::List list{}; list.reserve(n); - for (size_t i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { list.push_back(jArrayPinned[i]); } return at::IValue{std::move(list)}; @@ -572,7 +573,7 @@ at::IValue JIValue::JIValueToAtIValue( size_t n = jArray->size(); c10::List list{}; list.reserve(n); - for (size_t i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { list.push_back( TensorHybrid::newAtTensorFromJTensor(jArray->getElement(i))); } @@ -594,7 +595,7 @@ at::IValue JIValue::JIValueToAtIValue( c10::impl::GenericList list{c10::unshapedType(first_element.type())}; list.reserve(n); list.push_back(first_element); - for (auto i = 1; i < n; ++i) { + for (const auto i : c10::irange(1, n)) { auto jivalue_element = jarray->getElement(i); auto element = JIValue::JIValueToAtIValue(jivalue_element); list.push_back(element); diff --git a/android/pytorch_android/src/main/cpp/pytorch_jni_lite.cpp b/android/pytorch_android/src/main/cpp/pytorch_jni_lite.cpp index b67799672ec291..86fd1e2260f9ca 100644 --- a/android/pytorch_android/src/main/cpp/pytorch_jni_lite.cpp +++ b/android/pytorch_android/src/main/cpp/pytorch_jni_lite.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -157,7 +158,7 @@ class PytorchJni : public facebook::jni::HybridClass { std::vector inputs{}; size_t n = jinputs->size(); inputs.reserve(n); - for (size_t i = 0; i < n; i++) { + for (const auto i : c10::irange(n)) { at::IValue atIValue = JIValue::JIValueToAtIValue(jinputs->getElement(i)); if (at::kVulkan == deviceType_) { inputs.push_back( @@ -186,7 +187,7 @@ class PytorchJni : public facebook::jni::HybridClass { std::vector inputs{}; size_t n = jinputs->size(); inputs.reserve(n); - for (size_t i = 0; i < n; i++) { + for (const auto i : c10::irange(n)) { at::IValue atIValue = JIValue::JIValueToAtIValue(jinputs->getElement(i)); if (at::kVulkan == deviceType_) { inputs.push_back( diff --git a/aten/src/ATen/BatchingRegistrations.cpp b/aten/src/ATen/BatchingRegistrations.cpp index b2dcaa04b12c7d..e2292e1964e029 100644 --- a/aten/src/ATen/BatchingRegistrations.cpp +++ b/aten/src/ATen/BatchingRegistrations.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace at { @@ -329,7 +330,7 @@ Tensor permute_batching_rule(const Tensor& self, IntArrayRef dims) { VmapDimVector all_dims_physical; all_dims_physical.reserve(self_physical.tensor().dim()); - for (int64_t bdim = 0; bdim < self_physical.numBatchDims(); bdim++) { + for (const auto bdim : c10::irange(self_physical.numBatchDims())) { all_dims_physical.push_back(bdim); } all_dims_physical.insert( diff --git a/aten/src/ATen/CPUApplyUtils.h b/aten/src/ATen/CPUApplyUtils.h index 229eaf48be5020..d2fb3ac96305fa 100644 --- a/aten/src/ATen/CPUApplyUtils.h +++ b/aten/src/ATen/CPUApplyUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -130,7 +131,7 @@ inline Tensor sort_strides(Tensor& tensor_) { IntArrayRef strides = tensor_.strides(); std::vector indices; indices.reserve(tensor_.ndimension()); - for (int64_t i = 0; i < tensor_.ndimension(); i++) { + for (const auto i : c10::irange(tensor_.ndimension())) { indices.push_back(i); } std::sort(indices.begin(), indices.end(), [&strides](int64_t i1, int64_t i2) { @@ -196,7 +197,7 @@ inline bool _all_equal_numel(at::ArrayRef tensors) { if (tensors.size() == 0) return true; int64_t all_numel = tensors[0].numel(); - for (size_t i = 1; i < tensors.size(); i++) { + for (const auto i : c10::irange(1, tensors.size())) { if (tensors[i].numel() != all_numel) return false; } diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h index bbd021ef5504c2..28d7ea35094a35 100644 --- a/aten/src/ATen/Context.h +++ b/aten/src/ATen/Context.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -351,7 +352,7 @@ static inline void manual_seed(uint64_t seed) { // available. In that case, we must not seed CUDA; it will fail! const auto num_gpus = detail::getCUDAHooks().getNumGPUs(); if (hasCUDA() && num_gpus > 0) { - for (int i = 0; i < num_gpus; i++) { + for (const auto i : c10::irange(num_gpus)) { auto cuda_gen = globalContext().defaultGenerator( Device(at::kCUDA, static_cast(i)) ); diff --git a/aten/src/ATen/ExpandUtils.cpp b/aten/src/ATen/ExpandUtils.cpp index 710c27170958af..35588ac62a29cc 100644 --- a/aten/src/ATen/ExpandUtils.cpp +++ b/aten/src/ATen/ExpandUtils.cpp @@ -197,7 +197,7 @@ std::vector infer_dense_strides(IntArrayRef tensor_sizes, IntArrayRef t // compute output strides which preserves the input tensor's memory layout std::vector out_strides(ndim); int64_t curr_stride = 1; - for (size_t i = 0; i < ndim; ++i) { + for (const auto i : c10::irange(ndim)) { int64_t idx = perm[i]; out_strides[idx] = curr_stride; // Note: for size 0, we simply treated it as 1, it really doesn't matter here diff --git a/aten/src/ATen/ExpandUtils.h b/aten/src/ATen/ExpandUtils.h index 3f15e778618b68..55a392c8d9cc3a 100644 --- a/aten/src/ATen/ExpandUtils.h +++ b/aten/src/ATen/ExpandUtils.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -266,7 +267,7 @@ inline std::vector expand_outplace(TensorList to_expand) { // expands a list of Tensors; ignores undefined (null) tensors bool first = true; DimVector sizes; - for (size_t i = 0; i < to_expand.size(); ++i) { + for (const auto i : c10::irange(to_expand.size())) { if (!to_expand[i].defined()) { continue; } else if (first) { @@ -278,7 +279,7 @@ inline std::vector expand_outplace(TensorList to_expand) { } std::vector result(to_expand.size()); - for (size_t i = 0; i < to_expand.size(); ++i) { + for (const auto i : c10::irange(to_expand.size())) { if (!to_expand[i].defined()) { continue; } else if (to_expand[i].sizes().equals(sizes)) { @@ -299,7 +300,7 @@ static inline Tensor sum_to(Tensor tensor, const IntArrayRef shape) { c10::SmallVector reduce_dims; const at::IntArrayRef sizes = tensor.sizes(); const int64_t leading_dims = sizes.size() - shape.size(); - for (int64_t i = 0; i < leading_dims; ++i) { + for (const auto i : c10::irange(leading_dims)) { reduce_dims.push_back(i); } for (int64_t i = leading_dims; i < static_cast(sizes.size()); ++i) { @@ -320,7 +321,7 @@ static inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) { if (ndim > target_dim) { return false; } - for (size_t i = 0; i < ndim; i++) { + for (const auto i : c10::irange(ndim)) { int64_t size = shape[ndim - i - 1]; int64_t target = desired[target_dim - i - 1]; if (size != target && size != 1) { diff --git a/aten/src/ATen/MemoryOverlap.cpp b/aten/src/ATen/MemoryOverlap.cpp index 232fda6bac10f0..edeca5e4bac1a4 100644 --- a/aten/src/ATen/MemoryOverlap.cpp +++ b/aten/src/ATen/MemoryOverlap.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace at { @@ -17,7 +18,7 @@ MemOverlap has_internal_overlap(TensorImpl* t) { auto strides = t->strides(); auto sizes = t->sizes(); - for (size_t i = 0; i < strides.size(); ++i) { + for (const auto i : c10::irange(strides.size())) { if (strides[i] == 0 && sizes[i] > 1) { return MemOverlap::YES; } diff --git a/aten/src/ATen/NamedTensorUtils.cpp b/aten/src/ATen/NamedTensorUtils.cpp index 782a266a8aeda3..24a85b4ce7085a 100644 --- a/aten/src/ATen/NamedTensorUtils.cpp +++ b/aten/src/ATen/NamedTensorUtils.cpp @@ -225,7 +225,7 @@ std::vector compute_squeeze_outnames(const Tensor& tensor) { } std::vector outnames; auto tensor_names = tensor.names(); - for (int64_t d = 0; d < tensor.dim(); d++) { + for (const auto d : c10::irange(tensor.dim())) { if (tensor.sizes()[d] != 1) { outnames.push_back(tensor_names[d]); } @@ -242,7 +242,7 @@ std::vector compute_diagonal_outnames( } std::vector outnames; auto tensor_names = tensor.names(); - for (int64_t d = 0; d < tensor.dim(); d++) { + for (const auto d : c10::irange(tensor.dim())) { if (d == dim1 || d == dim2) { continue; } diff --git a/aten/src/ATen/ParallelNative.cpp b/aten/src/ATen/ParallelNative.cpp index 753dbdb751e68e..bade0b26d54d81 100644 --- a/aten/src/ATen/ParallelNative.cpp +++ b/aten/src/ATen/ParallelNative.cpp @@ -6,6 +6,7 @@ #ifndef C10_MOBILE #include +#include #else #include #endif // C10_MOBILE @@ -87,7 +88,7 @@ TaskThreadPoolBase& _get_intraop_pool() { // `fn` will be called with params: (thread_pool_task_id, task_id). void _run_with_pool(const std::function& fn, size_t range) { #ifndef C10_MOBILE - for (size_t i = 1; i < range; ++i) { + for (const auto i : c10::irange(1, range)) { _get_intraop_pool().run([fn, i]() { fn((int)i, i); }); } // Run the first task on the current thread directly. diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index e2fc89a9db8498..78ebb25e15b1f5 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace at { struct TORCH_API SparseTensorImpl : public TensorImpl { @@ -109,7 +110,7 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { bool shrinking_dense_dim = false; auto sparse_size_original = sizes().slice(0, sparse_dim); auto sparse_size_new = size.slice(0, sparse_dim); - for (int64_t i = 0; i < sparse_dim; i++) { + for (const auto i : c10::irange(sparse_dim)) { if (sparse_size_new[i] < sparse_size_original[i]) { shrinking_sparse_dims = true; break; @@ -117,7 +118,7 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { } auto dense_size_original = sizes().slice(sparse_dim); auto dense_size_new = size.slice(sparse_dim); - for (int64_t i = 0; i < dense_dim; i++) { + for (const auto i : c10::irange(dense_dim)) { if (dense_size_new[i] < dense_size_original[i]) { shrinking_dense_dim = true; break; diff --git a/aten/src/ATen/SparseTensorUtils.cpp b/aten/src/ATen/SparseTensorUtils.cpp index 564eeda03c3daa..d5811b933e7ca5 100644 --- a/aten/src/ATen/SparseTensorUtils.cpp +++ b/aten/src/ATen/SparseTensorUtils.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace at { namespace sparse { @@ -98,7 +99,7 @@ Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) { at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) int64_t h, hp0, hp1; - for (auto i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { hp0 = indices[i]; hp1 = (i+1 == nnz) ? dim : indices[i+1]; if (hp0 != hp1) { diff --git a/aten/src/ATen/TensorIndexing.cpp b/aten/src/ATen/TensorIndexing.cpp index 5c402155c6feaa..95d70132f43f95 100644 --- a/aten/src/ATen/TensorIndexing.cpp +++ b/aten/src/ATen/TensorIndexing.cpp @@ -1,6 +1,7 @@ #include #include +#include namespace at { namespace indexing { @@ -31,7 +32,7 @@ std::ostream& operator<<(std::ostream& stream, const TensorIndex& tensor_index) std::ostream& operator<<(std::ostream& stream, const std::vector& tensor_indices) { stream << "("; - for (size_t i = 0; i < tensor_indices.size(); i++) { + for (const auto i : c10::irange(tensor_indices.size())) { stream << tensor_indices[i]; if (i < tensor_indices.size() - 1) stream << ", "; } diff --git a/aten/src/ATen/TensorIndexing.h b/aten/src/ATen/TensorIndexing.h index 263f4914e0d5e2..71c9c3feb9e76b 100644 --- a/aten/src/ATen/TensorIndexing.h +++ b/aten/src/ATen/TensorIndexing.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -335,7 +336,7 @@ static inline Tensor scalarToTensor(const Scalar& v, const TensorOptions& option // strip away unit dimensions from the left of 'src' static inline IntArrayRef slicePrefix1sSize(const IntArrayRef& sizes) { size_t first_non1_src = sizes.size(); - for (size_t i = 0; i < sizes.size(); ++i) { + for (const auto i : c10::irange(sizes.size())) { if (sizes[i] != 1) { first_non1_src = i; break; @@ -439,7 +440,7 @@ static inline Tensor applySlicing( "too many indices for tensor of dimension ", (int)self_sizes.size()); Tensor result = self; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { auto& obj = indices[i]; result = handleDimInMultiDimIndexing( /*prev_dim_result=*/result, diff --git a/aten/src/ATen/TensorIterator.cpp b/aten/src/ATen/TensorIterator.cpp index 84298660aedfa1..cea4805bc080af 100644 --- a/aten/src/ATen/TensorIterator.cpp +++ b/aten/src/ATen/TensorIterator.cpp @@ -36,8 +36,8 @@ inline void get_base_ptrs(char** ptrs, ArrayRef operands) { } inline void get_strides(int64_t* strides, ArrayRef operands, int64_t ndim) { - for (int64_t dim = 0; dim < ndim; ++dim) { - for (size_t arg = 0; arg < operands.size(); ++arg) { + for (const auto dim : c10::irange(ndim)) { + for (const auto arg : c10::irange(operands.size())) { *strides++ = operands[arg].stride_bytes[dim]; } } @@ -214,7 +214,7 @@ void TensorIteratorBase::reorder_dimensions() { // returns 1 if the dim0 should come after dim1, -1 if dim0 should come // before dim1, and 0 if the comparison is ambiguous. auto should_swap = [&](size_t dim0, size_t dim1) { - for (int arg = 0; arg < ntensors(); arg++) { + for (const auto arg : c10::irange(ntensors())) { // ignore undefined or incorrectly sized tensors if (operands_[arg].stride_bytes.empty() || operands_[arg].will_resize) { continue; @@ -251,7 +251,7 @@ void TensorIteratorBase::reorder_dimensions() { }; // insertion sort with support for ambiguous comparisons - for (int i = 1; i < ndim(); i++) { + for (const auto i : c10::irange(1, ndim())) { int dim1 = i; for (int dim0 = i - 1; dim0 >= 0; dim0--) { int comparison = should_swap(perm_[dim0], perm_[dim1]); @@ -497,7 +497,7 @@ void TensorIteratorBase::compute_types(const TensorIteratorConfig& config) { StrideVector TensorIteratorBase::compatible_stride(int element_size) const { auto stride = StrideVector(); int64_t next_stride = element_size; - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { stride.push_back(next_stride); next_stride *= shape_[dim]; } @@ -510,14 +510,14 @@ DimVector TensorIteratorBase::invert_perm(IntArrayRef input) const { TORCH_INTERNAL_ASSERT(!has_coalesced_dimensions_); TORCH_INTERNAL_ASSERT(input.size()==perm_.size()); auto res = DimVector(input.size()); //no initialization needed, every value in res should be written to. - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { res[perm_[dim]] = input[dim]; } return res; } void TensorIteratorBase::allocate_or_resize_outputs() { - for (int i = 0; i < num_outputs_; i++) { + for (const auto i : c10::irange(num_outputs_)) { auto& op = operands_[i]; if (!op.tensor_base().defined() || op.will_resize) { TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i); @@ -525,8 +525,8 @@ void TensorIteratorBase::allocate_or_resize_outputs() { op.stride_bytes = compatible_stride(element_size); // check if permutation is just an inverted order bool inverted = true; - for (int i = 0; i < ndim(); i++) { - if (perm_[i] != ndim() - i - 1) { + for (const auto j : c10::irange(ndim())) { + if (perm_[j] != ndim() - j - 1) { inverted = false; break; } @@ -539,7 +539,7 @@ void TensorIteratorBase::allocate_or_resize_outputs() { set_output(i, tensor_shape, {}, original_options(op), names_); } else { auto tensor_stride = invert_perm(op.stride_bytes); - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { tensor_stride[dim] /= element_size; } set_output(i, tensor_shape, tensor_stride, original_options(op), names_); @@ -593,7 +593,7 @@ void TensorIteratorBase::coalesce_dimensions() { if (shape0 == 1 || shape1 == 1) { return true; } - for (int i = 0; i < ntensors(); i++) { + for (const auto i : c10::irange(ntensors())) { auto& stride = operands_[i].stride_bytes; if (shape0 * stride[dim0] != stride[dim1]) { return false; @@ -604,14 +604,14 @@ void TensorIteratorBase::coalesce_dimensions() { // replace each operands stride at dim0 with its stride at dim1 auto replace_stride = [&](int dim0, int dim1) { - for (int i = 0; i < ntensors(); i++) { + for (const auto i : c10::irange(ntensors())) { auto& stride = operands_[i].stride_bytes; stride[dim0] = stride[dim1]; } }; int prev_dim = 0; - for (int dim = 1; dim < ndim(); dim++) { + for (const auto dim : c10::irange(1, ndim())) { if (can_coalesce(prev_dim, dim)) { if (shape_[prev_dim] == 1) { replace_stride(prev_dim, dim); @@ -627,7 +627,7 @@ void TensorIteratorBase::coalesce_dimensions() { } shape_.resize(prev_dim + 1); - for (int i = 0; i < ntensors(); i++) { + for (const auto i : c10::irange(ntensors())) { operands_[i].stride_bytes.resize(ndim()); } has_coalesced_dimensions_ = true; @@ -670,7 +670,7 @@ void TensorIteratorBase::permute_dimensions(IntArrayRef perm) { auto reorder = [perm](IntArrayRef data) { auto res = DimVector(data.size(), 0); - for (size_t i = 0; i < perm.size(); i++) { + for (const auto i : c10::irange(perm.size())) { res[i] = data[perm[i]]; } return res; @@ -687,7 +687,7 @@ void TensorIteratorBase::permute_dimensions(IntArrayRef perm) { int64_t TensorIteratorBase::num_output_elements() const { int64_t elem = 1; - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { if (operands_[0].stride_bytes[dim] != 0 || shape_[dim] == 0) { elem *= shape_[dim]; } @@ -697,7 +697,7 @@ int64_t TensorIteratorBase::num_output_elements() const { int TensorIteratorBase::num_reduce_dims() const { int count = 0; - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { if (operands_[0].stride_bytes[dim] == 0) { count++; } @@ -760,7 +760,7 @@ bool TensorIteratorBase::is_contiguous() const { bool TensorIteratorBase::is_scalar(int arg) const { const auto& stride = operands_[arg].stride_bytes; - for (int i = 0; i < ndim(); i++) { + for (const auto i : c10::irange(ndim())) { if (stride[i] != 0 && shape_[i] != 1) { return false; } @@ -815,7 +815,7 @@ void TensorIteratorBase::narrow(int dim, int64_t start, int64_t size) { void TensorIteratorBase::select_all_keeping_dim(int start_dim, IntArrayRef indices) { TORCH_INTERNAL_ASSERT(start_dim <= ndim()); - for (int i = start_dim; i < ndim(); ++i) { + for (const auto i : c10::irange(start_dim, ndim())) { for (auto& op : operands_) { op.data = ((char*)op.data) + op.stride_bytes[i] * indices[i - start_dim]; } @@ -1063,13 +1063,13 @@ void TensorIteratorBase::populate_operands(TensorIteratorConfig& config) { void TensorIteratorBase::mark_outputs() { // TODO: merge this into populate_operands - for (int i = 0; i < num_outputs_; i++) { + for (const auto i : c10::irange(num_outputs_)) { operands_[i].is_output = true; const auto& output = tensor(i); if (!output.defined()) continue; // check if output is also an input - for (int arg = num_outputs_; arg < ntensors(); arg++) { + for (const auto arg : c10::irange(num_outputs_, ntensors())) { const auto& input = tensor(arg); if (output.is_same(input)) { operands_[i].is_read_write = true; @@ -1086,7 +1086,7 @@ void TensorIteratorBase::mark_resize_outputs(const TensorIteratorConfig& config) if (config.static_shape_.has_value()) { return; } - for (int i = 0; i < num_outputs_; i++) { + for (const auto i : c10::irange(num_outputs_)) { const auto& output = tensor(i); if (output.defined() && !output.sizes().equals(shape_)) { if (config.resize_outputs_ && !operands_[i].is_read_write) { @@ -1104,11 +1104,11 @@ void TensorIteratorBase::compute_mem_overlaps(const TensorIteratorConfig& config if (!config.check_mem_overlap_) { return; } - for (int i = 0; i < num_outputs_; i++) { + for (const auto i : c10::irange(num_outputs_)) { const auto& output = tensor_base(i); if (!output.defined()) continue; assert_no_internal_overlap(output); - for (int j = num_outputs_; j < ntensors(); j++) { + for (const auto j : c10::irange(num_outputs_, ntensors())) { const auto& input = tensor_base(j); if (!input.is_same(output)) { assert_no_partial_overlap(output, input); @@ -1164,7 +1164,7 @@ void TensorIteratorBase::compute_strides(const TensorIteratorConfig& config) { op.stride_bytes.resize(ndim(), 0); else op.stride_bytes.resize(ndim()); - for (size_t i = 0; i < original_shape.size(); i++) { + for (const auto i : c10::irange(original_shape.size())) { // see NOTE: [Computing output strides] if (original_shape[i] == 1 && shape_[offset + i] !=1) { op.stride_bytes[offset + i] = 0; @@ -1183,7 +1183,7 @@ bool TensorIteratorBase::can_use_32bit_indexing() const { } for (auto& op : operands_) { int64_t max_offset = 1; - for (int dim = 0; dim < ndim(); dim++) { + for (const auto dim : c10::irange(ndim())) { max_offset += (shape_[dim] - 1) * op.stride_bytes[dim]; } if (max_offset > max_value) { @@ -1245,7 +1245,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) { switch (setup_type) { case FastSetupType::CONTIGUOUS: { - for (int i = 0; i < num_outputs_; i++){ + for (const auto i : c10::irange(num_outputs_)) { auto& op = operands_[i]; if (!op.tensor_base().defined()) { TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i); @@ -1256,7 +1256,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) { } case FastSetupType::CHANNELS_LAST: { - for (int i = 0; i < num_outputs_; i++){ + for (const auto i : c10::irange(num_outputs_)) { auto& op = operands_[i]; if (!op.tensor_base().defined()) { TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i); @@ -1273,7 +1273,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) { if (tensor(i_defined).defined()) break; } TORCH_CHECK(i_defined >= 0, "Can not find a defined tensor when fast allocating memory to outputs"); - for (int i = 0; i < num_outputs_; i++){ + for (const auto i : c10::irange(num_outputs_)) { auto& op = operands_[i]; if (!op.tensor_base().defined()) { TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i); diff --git a/aten/src/ATen/TensorIterator.h b/aten/src/ATen/TensorIterator.h index 329598695dc9cc..6a35650c96d203 100644 --- a/aten/src/ATen/TensorIterator.h +++ b/aten/src/ATen/TensorIterator.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -322,9 +323,9 @@ struct TORCH_API TensorIteratorBase : public impl::MetaBase { char** base, const int64_t* strides, int64_t size0, int64_t size1) { PtrVector data(base, base + ntensor); const int64_t* outer_strides = &strides[ntensor]; - for (int64_t i = 0; i < size1; i++) { + for (const auto i : c10::irange(size1)) { if (i > 0) { - for (int64_t arg = 0; arg < ntensor; arg++) { + for (const auto arg : c10::irange(ntensor)) { data[arg] += outer_strides[arg]; } } @@ -397,7 +398,7 @@ struct TORCH_API TensorIteratorBase : public impl::MetaBase { bool has_contiguous_first_dim() const { int num_tensors = ntensors(); - for (int i = 0; i < num_tensors; i++) { + for (const auto i : c10::irange(num_tensors)) { if (strides(i)[0] != element_size(i)) { return false; } diff --git a/aten/src/ATen/TensorIteratorInternal.h b/aten/src/ATen/TensorIteratorInternal.h index 57477bcb1d4030..72e5939b351798 100644 --- a/aten/src/ATen/TensorIteratorInternal.h +++ b/aten/src/ATen/TensorIteratorInternal.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace at { @@ -24,9 +25,9 @@ inline void get_data_ptrs( const int64_t ntensors = base.size(); const int64_t ndim = counter.size(); std::copy(base.begin(), base.end(), ptrs); - for (int64_t dim = 0; dim < ndim; ++dim) { + for (const auto dim : c10::irange(ndim)) { int64_t value = counter[dim]; - for (int64_t arg = 0; arg < ntensors; ++arg) { + for (const auto arg : c10::irange(ntensors)) { ptrs[arg] += value * strides[dim * ntensors + arg]; } } diff --git a/aten/src/ATen/TensorNames.cpp b/aten/src/ATen/TensorNames.cpp index 9c28924dc0aee2..683de258a2ebd8 100644 --- a/aten/src/ATen/TensorNames.cpp +++ b/aten/src/ATen/TensorNames.cpp @@ -56,7 +56,7 @@ TensorNames::TensorNames(ArrayRef names, int64_t start, int64_t end) { start = maybe_wrap_dim(start, names.size()); end = maybe_wrap_dim(end, names.size()); names_.reserve(end - start); - for (int64_t idx = start; idx < end; ++idx) { + for (const auto idx : c10::irange(start, end)) { names_.emplace_back(names, idx); } } diff --git a/aten/src/ATen/TensorUtils.cpp b/aten/src/ATen/TensorUtils.cpp index 1ec9f9c291c0ab..3426bff7b4b8da 100644 --- a/aten/src/ATen/TensorUtils.cpp +++ b/aten/src/ATen/TensorUtils.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -323,7 +324,7 @@ size_t computeStorageNbytes( // size of the underlying storage is 1 bigger than the offset // of the last element according to stride size_t size = 1; - for(size_t i = 0; i < sizes.size(); i++) { + for (const auto i : c10::irange(sizes.size())) { if(sizes[i] == 0) { return 0; } diff --git a/aten/src/ATen/VmapTransforms.cpp b/aten/src/ATen/VmapTransforms.cpp index 07ff77fe2b746f..4bda903545fdf8 100644 --- a/aten/src/ATen/VmapTransforms.cpp +++ b/aten/src/ATen/VmapTransforms.cpp @@ -83,7 +83,7 @@ VmapDimVector VmapPhysicalView::getPhysicalShape(IntArrayRef logical_shape) cons static BatchDims computeFrontBatchDimsFromLevels(std::bitset levels_bitset) { BatchDims bdims; int64_t dim = 0; - for (int64_t level = 0; level < kVmapNumLevels; level++) { + for (const auto level : c10::irange(kVmapNumLevels)) { if (!levels_bitset[level]) { continue; } @@ -208,7 +208,7 @@ MultiBatchVmapTransform::logicalToPhysical(TensorList logical_tensors) { VmapDimVector batch_sizes(num_batch_dims, 1); for (const auto& physical_tensor : physical_tensors) { auto physical_sizes = physical_tensor.sizes(); - for (int64_t dim = 0; dim < num_batch_dims; dim++) { + for (const auto dim : c10::irange(num_batch_dims)) { if (physical_sizes[dim] != 1) { batch_sizes[dim] = physical_sizes[dim]; } diff --git a/aten/src/ATen/WrapDimUtils.h b/aten/src/ATen/WrapDimUtils.h index 13e605c920ec13..24fe684c6dc61c 100644 --- a/aten/src/ATen/WrapDimUtils.h +++ b/aten/src/ATen/WrapDimUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace at { @@ -40,7 +41,7 @@ static inline void maybe_wrap_dims_n(int64_t* dims, int64_t ndims, int64_t dim_p } int64_t min = -dim_post_expr; int64_t max = dim_post_expr - 1; - for (int64_t i = 0; i < ndims; ++i) { + for (const auto i : c10::irange(ndims)) { auto &dim = dims[i]; if (dim < min || dim > max) { TORCH_CHECK_INDEX(false, @@ -85,7 +86,7 @@ static inline int64_t legacy_cat_wrap_dim(int64_t dim, TensorList tensors) { // wrap negative dims in a vector static inline void wrap_all_dims(std::vector& dims_to_wrap, int64_t tensor_total_dims) { - for (size_t i = 0; i < dims_to_wrap.size(); i++) { + for (const auto i : c10::irange(dims_to_wrap.size())) { dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims); } } diff --git a/aten/src/ATen/WrapDimUtilsMulti.h b/aten/src/ATen/WrapDimUtilsMulti.h index a2af1b0dcd7195..e1d2266e24efba 100644 --- a/aten/src/ATen/WrapDimUtilsMulti.h +++ b/aten/src/ATen/WrapDimUtilsMulti.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -15,7 +16,7 @@ constexpr size_t dim_bitset_size = 64; static inline std::bitset dim_list_to_bitset(IntArrayRef dims, int64_t ndims) { TORCH_CHECK(ndims <= (int64_t) dim_bitset_size, "only tensors with up to ", dim_bitset_size, " dims are supported"); std::bitset seen; - for (size_t i = 0; i < dims.size(); i++) { + for (const auto i : c10::irange(dims.size())) { size_t dim = maybe_wrap_dim(dims[i], ndims); TORCH_CHECK(!seen[dim], "dim ", dim, " appears multiple times in the list of dims"); seen[dim] = true; diff --git a/aten/src/ATen/benchmarks/stateful_conv1d.cpp b/aten/src/ATen/benchmarks/stateful_conv1d.cpp index 60502773ca57a0..527dcc439dcdce 100644 --- a/aten/src/ATen/benchmarks/stateful_conv1d.cpp +++ b/aten/src/ATen/benchmarks/stateful_conv1d.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -33,7 +34,7 @@ static void stateful_conv1d(benchmark::State& state) { )"); std::vector> inputs; - for (int i = 0; i < 10; ++i) { + for (const auto i : c10::irange(10)) { std::vector input; // NOLINTNEXTLINE(modernize-use-emplace) input.push_back(torch::rand({batch_size, input_channels, width})); @@ -70,8 +71,8 @@ static void GenerateSizes(benchmark::internal::Benchmark* b) { for (size_t input_channels = 32; input_channels < 256; input_channels *= 2) { for (size_t output_channels = 32; output_channels < 256; output_channels *= 2) { - for (size_t kernel = 3; kernel < 8; ++kernel) { - for (size_t batch_size = 1; batch_size < 5; ++batch_size) { + for (const auto kernel : c10::irange(3, 8)) { + for (const auto batch_size : c10::irange(1, 5)) { for (size_t width = 32; width < 256; width *= 2) { b->Args({input_channels, output_channels, kernel, batch_size, width, true}); b->Args({input_channels, output_channels, kernel, batch_size, width, false}); diff --git a/aten/src/ATen/core/Array.h b/aten/src/ATen/core/Array.h index 6e0fce606efc80..4754f72cda0f74 100644 --- a/aten/src/ATen/core/Array.h +++ b/aten/src/ATen/core/Array.h @@ -4,6 +4,7 @@ // device code. #include +#include namespace at { namespace detail { diff --git a/aten/src/ATen/core/Formatting.cpp b/aten/src/ATen/core/Formatting.cpp index dbbed6e3b07858..68fcd8c29398b1 100644 --- a/aten/src/ATen/core/Formatting.cpp +++ b/aten/src/ATen/core/Formatting.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -44,7 +45,7 @@ static std::tuple __printFormat(std::ostream& stream, const Ten } bool intMode = true; auto self_p = self.data_ptr(); - for(int64_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { auto z = self_p[i]; if(std::isfinite(z)) { if(z != std::ceil(z)) { @@ -70,7 +71,7 @@ static std::tuple __printFormat(std::ostream& stream, const Ten } else { expMin = fabs(self_p[offset]); expMax = fabs(self_p[offset]); - for(int64_t i = offset; i < size; i++) { + for (const auto i : c10::irange(offset, size)) { double z = fabs(self_p[i]); if(std::isfinite(z)) { if(z < expMin) { @@ -130,7 +131,8 @@ static std::tuple __printFormat(std::ostream& stream, const Ten static void __printIndent(std::ostream &stream, int64_t indent) { - for(int64_t i = 0; i < indent; i++) { + for (const auto i : c10::irange(indent)) { + (void)i; //Suppress unused variable warning stream << " "; } } @@ -168,7 +170,7 @@ static void __printMatrix(std::ostream& stream, const Tensor& self, int64_t line printScale(stream,scale); __printIndent(stream, indent); } - for(int64_t l = 0; l < self.size(0); l++) { + for (const auto l : c10::irange(self.size(0))) { Tensor row = self.select(0,l); double *row_ptr = row.data_ptr(); for(int64_t c = firstColumn; c < lastColumn+1; c++) { @@ -198,8 +200,9 @@ void __printTensor(std::ostream& stream, Tensor& self, int64_t linesize) bool start = true; bool finished = false; counter[0] = -1; - for(size_t i = 1; i < counter.size(); i++) + for (const auto i : c10::irange(1, counter.size())) { counter[i] = 0; + } while(true) { for(int64_t i = 0; self.ndimension()-2; i++) { counter[i] = counter[i] + 1; @@ -269,7 +272,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi printScale(stream, scale); } double* tensor_p = tensor.data_ptr(); - for (int64_t i = 0; i < tensor.size(0); i++) { + for (const auto i : c10::irange(tensor.size(0))) { stream << std::setw(sz) << tensor_p[i]/scale << std::endl; } } @@ -284,7 +287,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi __printTensor(stream, tensor, linesize); } stream << "[ " << tensor_.toString() << "{" << tensor.size(0); - for(int64_t i = 1; i < tensor.ndimension(); i++) { + for (const auto i : c10::irange(1, tensor.ndimension())) { stream << "," << tensor.size(i); } stream << "}"; diff --git a/aten/src/ATen/core/MT19937RNGEngine.h b/aten/src/ATen/core/MT19937RNGEngine.h index 40c1ba5f584ade..68b9c0c7e64c46 100644 --- a/aten/src/ATen/core/MT19937RNGEngine.h +++ b/aten/src/ATen/core/MT19937RNGEngine.h @@ -1,5 +1,7 @@ #pragma once +#include + // define constants like M_PI and C keywords for MSVC #ifdef _MSC_VER #ifndef _USE_MATH_DEFINES @@ -8,9 +10,9 @@ #include #endif -#include -#include #include +#include +#include namespace at { @@ -155,7 +157,7 @@ class mt19937_engine { data_.seed_ = seed; data_.seeded_ = true; data_.state_[0] = seed & 0xffffffff; - for(int j = 1; j < MERSENNE_STATE_N; j++) { + for (const auto j : c10::irange(1, MERSENNE_STATE_N)) { data_.state_[j] = (1812433253 * (data_.state_[j-1] ^ (data_.state_[j-1] >> 30)) + j); } data_.left_ = 1; diff --git a/aten/src/ATen/core/TensorAccessor.h b/aten/src/ATen/core/TensorAccessor.h index e18f9d35ca2f04..9d65522b5d96b2 100644 --- a/aten/src/ATen/core/TensorAccessor.h +++ b/aten/src/ATen/core/TensorAccessor.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -134,7 +135,7 @@ class GenericPackedTensorAccessorBase { const source_index_t* sizes_, const source_index_t* strides_) : data_(data_) { - for (int i = 0; i < N; i++) { + for (const auto i : c10::irange(N)) { this->sizes_[i] = sizes_[i]; this->strides_[i] = strides_[i]; } diff --git a/aten/src/ATen/core/boxing/impl/test_helpers.h b/aten/src/ATen/core/boxing/impl/test_helpers.h index 9ca06878f1539f..93b11dc853f00f 100644 --- a/aten/src/ATen/core/boxing/impl/test_helpers.h +++ b/aten/src/ATen/core/boxing/impl/test_helpers.h @@ -7,6 +7,7 @@ #include #include #include +#include template inline std::vector makeStack(Inputs&&... inputs) { @@ -87,7 +88,7 @@ inline void expectThrows(Functor&& functor, const char* expectMessageContains) { template void expectListEquals(c10::ArrayRef expected, std::array actual) { EXPECT_EQ(expected.size(), actual.size()); - for (size_t i = 0; i < expected.size(); ++i) { + for (const auto i : c10::irange(expected.size())) { EXPECT_EQ(expected[i], actual[i]); } } @@ -95,7 +96,7 @@ void expectListEquals(c10::ArrayRef expected, std::array actual) { template void expectListEquals(c10::ArrayRef expected, c10::ArrayRef actual) { EXPECT_EQ(expected.size(), actual.size()); - for (size_t i = 0; i < expected.size(); ++i) { + for (const auto i : c10::irange(expected.size())) { EXPECT_EQ(expected[i], actual[i]); } } @@ -103,7 +104,7 @@ void expectListEquals(c10::ArrayRef expected, c10::ArrayRef actual) { template void expectListEquals(c10::ArrayRef expected, c10::List actual) { EXPECT_EQ(expected.size(), actual.size()); - for (size_t i = 0; i < expected.size(); ++i) { + for (const auto i : c10::irange(expected.size())) { EXPECT_EQ(expected[i], actual.get(i)); } } @@ -111,7 +112,7 @@ void expectListEquals(c10::ArrayRef expected, c10::List actual) { template void expectListEquals(c10::ArrayRef expected, std::vector actual) { EXPECT_EQ(expected.size(), actual.size()); - for (size_t i = 0; i < expected.size(); ++i) { + for (const auto i : c10::irange(expected.size())) { EXPECT_EQ(expected[i], actual[i]); } } diff --git a/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h b/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h index 5289f9fa01142f..31dd09836cbe66 100644 --- a/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h +++ b/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -171,7 +172,7 @@ struct TORCH_API DispatchKeyExtractor final { "The function schema has ", schema.arguments().size(), " arguments but this PyTorch build only supports ", c10::utils::bitset::NUM_BITS()); c10::utils::bitset dispatch_arg_indices_reverse; - for (size_t index = 0; index < schema.arguments().size(); ++index) { + for (const auto index : c10::irange(schema.arguments().size())) { if (schema.arguments()[index].type()->isSubtypeOf(*TensorType::get()) || schema.arguments()[index].type()->isSubtypeOf( *ListType::ofTensors()) || diff --git a/aten/src/ATen/core/dispatch/backend_fallback_test.cpp b/aten/src/ATen/core/dispatch/backend_fallback_test.cpp index 1fb14cf205b94c..19981988962a95 100644 --- a/aten/src/ATen/core/dispatch/backend_fallback_test.cpp +++ b/aten/src/ATen/core/dispatch/backend_fallback_test.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include using namespace at; @@ -51,7 +52,7 @@ void generic_wrapper_fallback(const c10::OperatorHandle& op, torch::jit::Stack* // Unwrap all arguments auto args = torch::jit::pop(*stack, num_arguments); - for (size_t i = 0; i < num_arguments; i++) { + for (const auto i : c10::irange(num_arguments)) { // TODO: Handle tensor list if (args[i].isTensor()) { auto* impl = args[i].unsafeToTensorImpl(); @@ -70,7 +71,7 @@ void generic_wrapper_fallback(const c10::OperatorHandle& op, torch::jit::Stack* // Rewrap outputs auto rets = torch::jit::pop(*stack, num_returns); - for (size_t i = 0; i < num_returns; i++) { + for (const auto i : c10::irange(num_returns)) { // TODO: Handle tensor list if (rets[i].isTensor()) { torch::jit::push(*stack, at::detail::make_tensor(std::move(rets[i]).toTensor())); // yes move! diff --git a/aten/src/ATen/core/function_schema.h b/aten/src/ATen/core/function_schema.h index 3da6958eaf3441..211c55662f2b82 100644 --- a/aten/src/ATen/core/function_schema.h +++ b/aten/src/ATen/core/function_schema.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/aten/src/ATen/core/function_schema_inl.h b/aten/src/ATen/core/function_schema_inl.h index 712192b0823062..b7aab0730c7d5c 100644 --- a/aten/src/ATen/core/function_schema_inl.h +++ b/aten/src/ATen/core/function_schema_inl.h @@ -16,7 +16,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) out << "("; bool seen_kwarg_only = false; - for(size_t i = 0; i < schema.arguments().size(); ++i) { + for (const auto i : c10::irange(schema.arguments().size())) { if (i > 0) out << ", "; if (schema.arguments()[i].kwarg_only() && !seen_kwarg_only) { out << "*, "; @@ -35,7 +35,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) const auto& returns = schema.returns(); out << "("; - for(size_t i = 0; i < returns.size(); ++i) { + for (const auto i : c10::irange(returns.size())) { if (i > 0) { out << ", "; } @@ -53,7 +53,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) inline size_t findFirstOutArg(const std::vector& args) { // find the start of out args in the schema - for (size_t out_start_idx = 0; out_start_idx < args.size(); out_start_idx++) { + for (const auto out_start_idx : c10::irange(args.size())) { if (args.at(out_start_idx).is_out()) { return out_start_idx; } @@ -122,7 +122,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith( && arguments().size() >= old.arguments().size())) { return false; } - for (size_t i = 0; i < returns().size(); ++i) { + for (const auto i : c10::irange(returns().size())) { // Backwards compatibility requires covariance on argument types // (i.e. more generic), and contravariance on return types (i.e. // more specific). @@ -138,7 +138,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith( size_t new_out_start_idx = findFirstOutArg(arguments()); // make sure among the default args, they are backward compatible - for (size_t i = 0; i < old_out_start_idx; i++) { + for (const auto i : c10::irange(old_out_start_idx)) { if (!arguments().at(i).isBackwardCompatibleWith( old.arguments().at(i), why_not)) { return false; @@ -146,7 +146,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith( } // // Validate that all new arguments provided has a default value - for (size_t i = old_out_start_idx; i < new_out_start_idx; ++i) { + for (const auto i : c10::irange(old_out_start_idx, new_out_start_idx)) { if (!arguments().at(i).default_value()) { if (why_not) { *why_not @@ -160,7 +160,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith( } // now compare the out args - for (size_t i = old_out_start_idx; i < old.arguments().size(); i++) { + for (const auto i : c10::irange(old_out_start_idx, old.arguments().size())) { if (!arguments() .at(i - old_out_start_idx + new_out_start_idx) .isBackwardCompatibleWith(old.arguments().at(i), why_not)) { @@ -238,7 +238,7 @@ inline void FunctionSchema::checkAndNormalizeInputs( *this); size_t consumed_kwargs = 0; - for (size_t pos = 0; pos < arguments().size(); ++pos) { + for (const auto pos : c10::irange(arguments().size())) { const auto& argument = arguments()[pos]; if (pos < inputs.size()) { checkArg(inputs[pos], argument, pos); @@ -298,7 +298,7 @@ inline bool isSubtypeOfList( if (child.size() != parent.size()) { return false; } - for (size_t i = 0; i < child.size(); ++i) { + for (const auto i : c10::irange(child.size())) { const Argument& c = child[i]; const Argument& p = parent[i]; if (c.name() != p.name()) { diff --git a/aten/src/ATen/core/ivalue_inl.h b/aten/src/ATen/core/ivalue_inl.h index 062af97af793b8..7fe5aa48258d28 100644 --- a/aten/src/ATen/core/ivalue_inl.h +++ b/aten/src/ATen/core/ivalue_inl.h @@ -1114,7 +1114,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target { } std::ostringstream oss; oss << devices[0]; - for (size_t idx = 1; idx < devices.size(); idx++) { + for (const auto idx : c10::irange(1, devices.size())) { if (idx == devices.size() - 1) { oss << " and "; } else { @@ -1131,7 +1131,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target { return c10::kCPU; } c10::DeviceType deviceType = devices[0].type(); - for (size_t idx = 1; idx < devices.size(); idx++) { + for (const auto idx : c10::irange(1, devices.size())) { TORCH_CHECK_VALUE( devices[idx].type() == deviceType, "Expected all devices to be of the same type, but got a mismatch between ", @@ -1151,7 +1151,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target { [](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); }); // Deduplicate by compacting. size_t targetIdx = 0; - for (size_t sourceIdx = 0; sourceIdx < devices.size(); sourceIdx++) { + for (const auto sourceIdx : c10::irange(devices.size())) { TORCH_CHECK_VALUE( devices[sourceIdx].has_index(), "Expected devices to have indices, got ", devices[sourceIdx]); diff --git a/aten/src/ATen/core/op_registration/infer_schema.cpp b/aten/src/ATen/core/op_registration/infer_schema.cpp index 3807e420086a7f..df1925aba5ed1a 100644 --- a/aten/src/ATen/core/op_registration/infer_schema.cpp +++ b/aten/src/ATen/core/op_registration/infer_schema.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace c10 { @@ -20,7 +21,7 @@ std::string fastToString(size_t x) { std::vector createArgumentVector(c10::ArrayRef args) { std::vector result; result.reserve(args.size()); - for (size_t i = 0; i < args.size(); ++i) { + for (const auto i : c10::irange(args.size())) { // Arguments are named "_" result.emplace_back(fastToString(i), (*args[i].getTypeFn)()); } @@ -49,7 +50,7 @@ C10_EXPORT c10::optional findSchemaDifferences(const FunctionSchema " vs " + guts::to_string(rhs.returns().size()); } - for (size_t i = 0; i < lhs.arguments().size(); ++i) { + for (const auto i : c10::irange(lhs.arguments().size())) { const TypePtr& leftType = lhs.arguments()[i].type(); const TypePtr& rightType = rhs.arguments()[i].type(); // Type::operator== is virtual. Comparing pointers first is @@ -61,7 +62,7 @@ C10_EXPORT c10::optional findSchemaDifferences(const FunctionSchema } } - for (size_t i = 0; i < lhs.returns().size(); ++i) { + for (const auto i : c10::irange(lhs.returns().size())) { const TypePtr& leftType = lhs.returns()[i].type(); const TypePtr& rightType = rhs.returns()[i].type(); // See above about comparing pointers first. diff --git a/aten/src/ATen/core/qualified_name.h b/aten/src/ATen/core/qualified_name.h index 4770a3cf334080..b8065d9d5085f7 100644 --- a/aten/src/ATen/core/qualified_name.h +++ b/aten/src/ATen/core/qualified_name.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace c10 { @@ -69,7 +70,7 @@ struct QualifiedName { // Can't be a prefix if it's bigger return false; } - for (size_t i = 0; i < thisAtoms.size(); i++) { + for (const auto i : c10::irange(thisAtoms.size())) { if (thisAtoms[i] != otherAtoms[i]) { return false; } @@ -116,7 +117,7 @@ struct QualifiedName { reserve += e.size() + 1; } out.reserve(reserve); - for (size_t i = 0; i < v.size(); ++i) { + for (const auto i : c10::irange(v.size())) { if (i != 0) { out.push_back(delimiter); } diff --git a/aten/src/ATen/core/stack.h b/aten/src/ATen/core/stack.h index 021e8a02104f22..35bb9964eb398e 100644 --- a/aten/src/ATen/core/stack.h +++ b/aten/src/ATen/core/stack.h @@ -4,6 +4,7 @@ #include #include +#include // TODO move this to c10 namespace @@ -108,7 +109,7 @@ static inline IValue pop(Stack* stack) { static inline std::vector pop(Stack& stack, size_t n) { std::vector result; result.reserve(n); - for (size_t i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { result.push_back(std::move(peek(stack, i, n))); } drop(stack, n); diff --git a/aten/src/ATen/cpu/vec/functional_base.h b/aten/src/ATen/cpu/vec/functional_base.h index 7bd04e637c7e3c..eb160577e8694e 100644 --- a/aten/src/ATen/cpu/vec/functional_base.h +++ b/aten/src/ATen/cpu/vec/functional_base.h @@ -4,6 +4,7 @@ // See Note [Do not compile initializers with AVX] #include +#include namespace at { namespace vec { @@ -16,7 +17,7 @@ inline scalar_t vec_reduce_all( using Vec = vec::Vectorized; scalar_t acc_arr[Vec::size()]; acc_vec.store(acc_arr); - for (int64_t i = 1; i < size; i++) { + for (const auto i : c10::irange(1, size)) { std::array acc_arr_next = {0}; acc_arr_next[0] = acc_arr[i]; Vec acc_vec_next = Vec::loadu(acc_arr_next.data()); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h b/aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h index 40276ba8365d51..f6db6fdc49a4a9 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h @@ -4,6 +4,7 @@ // See Note [Do not compile initializers with AVX] #include +#include #include #include @@ -109,7 +110,7 @@ template <> class Vectorized> { Vectorized> map(c10::complex (*const f)(const c10::complex &)) const { __at_align__ c10::complex tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -293,7 +294,7 @@ template <> class Vectorized> { __at_align__ c10::complex y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h b/aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h index f40196320022bc..a4181a8abb8b21 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h @@ -4,6 +4,7 @@ // See Note [Do not compile initializers with AVX] #include +#include #include #include #if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER) @@ -144,7 +145,7 @@ template <> class Vectorized> { Vectorized> map(c10::complex (*const f)(const c10::complex &)) const { __at_align__ c10::complex tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -327,7 +328,7 @@ template <> class Vectorized> { __at_align__ c10::complex y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_double.h b/aten/src/ATen/cpu/vec/vec256/vec256_double.h index f92f44e562a9d4..b64f910fbb6d8a 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_double.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_double.h @@ -5,6 +5,7 @@ #include #include +#include #if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER) #include #endif @@ -72,7 +73,7 @@ template <> class Vectorized { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0.0; } std::memcpy( @@ -103,7 +104,7 @@ template <> class Vectorized { Vectorized map(double (*const f)(double)) const { __at_align__ double tmp[size()]; store(tmp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -180,7 +181,7 @@ template <> class Vectorized { __at_align__ double tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igamma(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -190,7 +191,7 @@ template <> class Vectorized { __at_align__ double tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igammac(tmp[i], tmp_x[i]); } return loadu(tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_float.h b/aten/src/ATen/cpu/vec/vec256/vec256_float.h index deb95429843738..57a594f6354c49 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_float.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_float.h @@ -5,6 +5,7 @@ #include #include +#include #if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER) #include #endif @@ -80,7 +81,7 @@ template <> class Vectorized { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0.0; } std::memcpy( @@ -109,7 +110,7 @@ template <> class Vectorized { Vectorized map(float (*const f)(float)) const { __at_align__ float tmp[size()]; store(tmp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -217,7 +218,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igamma(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -227,7 +228,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igammac(tmp[i], tmp_x[i]); } return loadu(tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_float_neon.h b/aten/src/ATen/cpu/vec/vec256/vec256_float_neon.h index 2aac442d2123d9..465266b8b55dac 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_float_neon.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_float_neon.h @@ -5,6 +5,7 @@ #include #include +#include // Sleef offers vectorized versions of some transcedentals // such as sin, cos, tan etc.. // However for now opting for STL, since we are not building @@ -221,7 +222,7 @@ template <> class Vectorized { } else { __at_align__ float tmp_values[size()]; - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0.0; } std::memcpy( @@ -287,7 +288,7 @@ template <> class Vectorized { __at_align__ float tmp[size()]; __at_align__ float res[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { if (_isnan(tmp[i])) { std::memset(static_cast(&res[i]), 0xFF, sizeof(float)); } else { @@ -299,7 +300,7 @@ template <> class Vectorized { Vectorized map(float (*const f)(float)) const { __at_align__ float tmp[size()]; store(tmp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -336,7 +337,7 @@ template <> class Vectorized { __at_align__ float tmp_exp[size()]; store(tmp); exp.store(tmp_exp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = std::atan2(tmp[i], tmp_exp[i]); } return loadu(tmp); @@ -371,7 +372,7 @@ template <> class Vectorized { __at_align__ float tmp_q[size()]; store(tmp); q.store(tmp_q); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = std::fmod(tmp[i], tmp_q[i]); } return loadu(tmp); @@ -381,7 +382,7 @@ template <> class Vectorized { __at_align__ float tmp_b[size()]; store(tmp); b.store(tmp_b); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = std::hypot(tmp[i], tmp_b[i]); } return loadu(tmp); @@ -397,7 +398,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igamma(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -407,7 +408,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igammac(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -429,7 +430,7 @@ template <> class Vectorized { __at_align__ float tmp_b[size()]; store(tmp); b.store(tmp_b); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = std::nextafter(tmp[i], tmp_b[i]); } return loadu(tmp); @@ -494,7 +495,7 @@ template <> class Vectorized { __at_align__ float tmp_exp[size()]; store(tmp); exp.store(tmp_exp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = std::pow(tmp[i], tmp_exp[i]); } return loadu(tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_int.h b/aten/src/ATen/cpu/vec/vec256/vec256_int.h index 5ee9919abca02c..2808c19bb3bb3d 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_int.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_int.h @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace at { @@ -98,7 +99,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int64_t)); @@ -221,7 +222,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int32_t)); @@ -435,7 +436,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int16_t)); @@ -684,7 +685,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int8_t)); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_qint.h b/aten/src/ATen/cpu/vec/vec256/vec256_qint.h index 65ef6c62683943..504fac94dde44d 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_qint.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_qint.h @@ -6,6 +6,8 @@ #include #include #include + +#include #include #include #include @@ -745,7 +747,7 @@ struct VectorizedQuantizedConverter { std::array vals; VectorizedQuantizedConverter(T val) { - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { vals[i] = val.val_; } } @@ -763,9 +765,9 @@ struct VectorizedQuantizedConverter { Vectorized zero_point, Vectorized scale_zp_premul) const { float_vec_return_type rv; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { float tmp_vals[8]; - for (int j = 0; j < 8; ++j) { + for (const auto j : c10::irange(8)) { tmp_vals[j] = at::native::dequantize_val( scale[j], zero_point[j], T(vals[8 * i + j])); } @@ -822,7 +824,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 8], 8); } @@ -838,7 +840,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -846,7 +848,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -861,7 +863,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -870,7 +872,7 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval[0].vals[i] = vals[i] - b.vals[i]; } return retval; @@ -881,7 +883,7 @@ struct Vectorized : public VectorizedQuantizedConverter< float multiplier, int32_t zero_point) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = nearbyint(static_cast(inp[0].vals[i]) * multiplier) + zero_point; @@ -954,7 +956,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 8], 8); } @@ -970,7 +972,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -978,7 +980,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -992,7 +994,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -1002,8 +1004,8 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; constexpr int elem_per_int_vec = size() / int_num_vecs(); - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { retval[i].vals[j] = static_cast(vals[i * elem_per_int_vec + j]) - static_cast(b.vals[i * elem_per_int_vec + j]); @@ -1019,8 +1021,8 @@ struct Vectorized : public VectorizedQuantizedConverter< constexpr auto min_val = std::numeric_limits::min(); constexpr auto max_val = std::numeric_limits::max(); Vectorized retval; - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { int32_t rounded = nearbyint(static_cast(inp[i].vals[j]) * multiplier) + zero_point; @@ -1074,7 +1076,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 8], 8); } @@ -1090,7 +1092,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -1098,7 +1100,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -1113,7 +1115,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -1123,8 +1125,8 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; constexpr int elem_per_int_vec = size() / int_num_vecs(); - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { retval[i].vals[j] = static_cast(vals[i * elem_per_int_vec + j]) - static_cast(b.vals[i * elem_per_int_vec + j]); @@ -1140,8 +1142,8 @@ struct Vectorized : public VectorizedQuantizedConverter< constexpr auto min_val = std::numeric_limits::min(); constexpr auto max_val = std::numeric_limits::max(); Vectorized retval; - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { int32_t rounded = nearbyint(static_cast(inp[i].vals[j]) * multiplier) + zero_point; diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h index 3a3e0daade098b..fefe5a0a4c9aba 100644 --- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h +++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace at { namespace vec { @@ -167,7 +168,7 @@ class Vectorized { Vectorized map(ComplexDbl (*const f)(ComplexDbl)) const { __at_align__ ComplexDbl tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -176,7 +177,7 @@ class Vectorized { Vectorized map(ComplexDbl (*const f)(const ComplexDbl&)) const { __at_align__ ComplexDbl tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -454,7 +455,7 @@ class Vectorized { __at_align__ ComplexDbl y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h index 712de24597dcfa..92beb6bc227ff2 100644 --- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h +++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace at { namespace vec { @@ -222,7 +223,7 @@ class Vectorized { Vectorized map(ComplexFlt (*const f)(ComplexFlt)) const { __at_align__ ComplexFlt tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -231,7 +232,7 @@ class Vectorized { Vectorized map(ComplexFlt (*const f)(const ComplexFlt&)) const { __at_align__ ComplexFlt tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -430,7 +431,7 @@ class Vectorized { __at_align__ ComplexFlt y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h index 2482af6ec2324f..7a80c24e42c6a9 100644 --- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h +++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h @@ -3,6 +3,8 @@ #include #include #include + +#include #include #include diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_complex_double.h b/aten/src/ATen/cpu/vec/vec512/vec512_complex_double.h index 6fc22f0f7d3362..7dc3fdc6eafc38 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_complex_double.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_complex_double.h @@ -4,6 +4,7 @@ // See Note [Do not compile initializers with AVX] #include +#include #include #include #if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER) @@ -149,7 +150,7 @@ template <> class Vectorized> { Vectorized> map(c10::complex (*const f)(const c10::complex &)) const { __at_align__ c10::complex tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -357,7 +358,7 @@ template <> class Vectorized> { __at_align__ c10::complex y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_complex_float.h b/aten/src/ATen/cpu/vec/vec512/vec512_complex_float.h index dfd070604c40c5..a9876dd5fcadc5 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_complex_float.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_complex_float.h @@ -4,6 +4,7 @@ // See Note [Do not compile initializers with AVX] #include +#include #include #include #if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER) @@ -667,7 +668,7 @@ template <> class Vectorized> { Vectorized> map(c10::complex (*const f)(const c10::complex &)) const { __at_align__ c10::complex tmp[size()]; store(tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -858,7 +859,7 @@ template <> class Vectorized> { __at_align__ c10::complex y_tmp[size()]; store(x_tmp); exp.store(y_tmp); - for (int i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]); } return loadu(x_tmp); diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_double.h b/aten/src/ATen/cpu/vec/vec512/vec512_double.h index 7128219748a061..7035b3e0f5d4b8 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_double.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_double.h @@ -5,6 +5,7 @@ #include #include +#include #if (defined(CPU_CAPABILITY_AVX512)) && !defined(_MSC_VER) #include #endif @@ -87,7 +88,7 @@ template <> class Vectorized { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0.0; } std::memcpy( @@ -120,7 +121,7 @@ template <> class Vectorized { Vectorized map(double (*const f)(double)) const { __at_align__ double tmp[size()]; store(tmp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -200,7 +201,7 @@ template <> class Vectorized { __at_align__ double tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igamma(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -210,7 +211,7 @@ template <> class Vectorized { __at_align__ double tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igammac(tmp[i], tmp_x[i]); } return loadu(tmp); diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_float.h b/aten/src/ATen/cpu/vec/vec512/vec512_float.h index 1a2b113de9d367..70866b15eb7085 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_float.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_float.h @@ -5,6 +5,7 @@ #include #include +#include #if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER) #include #endif @@ -104,7 +105,7 @@ template <> class Vectorized { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0.0; } std::memcpy( @@ -135,7 +136,7 @@ template <> class Vectorized { Vectorized map(float (*const f)(float)) const { __at_align__ float tmp[size()]; store(tmp); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = f(tmp[i]); } return loadu(tmp); @@ -246,7 +247,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igamma(tmp[i], tmp_x[i]); } return loadu(tmp); @@ -256,7 +257,7 @@ template <> class Vectorized { __at_align__ float tmp_x[size()]; store(tmp); x.store(tmp_x); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { tmp[i] = calc_igammac(tmp[i], tmp_x[i]); } return loadu(tmp); diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_int.h b/aten/src/ATen/cpu/vec/vec512/vec512_int.h index f28c14ed3f73f4..92cbe6b6abd6d5 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_int.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_int.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace at { namespace vec { @@ -100,7 +101,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int64_t)); @@ -253,7 +254,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int32_t)); @@ -485,7 +486,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (auto i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int16_t)); @@ -761,7 +762,7 @@ class Vectorized : public Vectorizedi { // Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502 // for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two // instructions while a loop would be compiled to one instruction. - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { tmp_values[i] = 0; } std::memcpy(tmp_values, ptr, count * sizeof(int8_t)); diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_qint.h b/aten/src/ATen/cpu/vec/vec512/vec512_qint.h index 3a1eda8874f1af..3ed7899bb75b60 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_qint.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_qint.h @@ -6,6 +6,8 @@ #include #include #include + +#include #include #include #include @@ -744,7 +746,7 @@ struct VectorizedQuantizedConverter { std::array vals; VectorizedQuantizedConverter(T val) { - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { vals[i] = val.val_; } } @@ -762,9 +764,9 @@ struct VectorizedQuantizedConverter { Vectorized zero_point, Vectorized scale_zp_premul) const { float_vec_return_type rv; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { float tmp_vals[16]; - for (int j = 0; j < 16; ++j) { + for (const auto j : c10::irange(16)) { tmp_vals[j] = at::native::dequantize_val( scale[j], zero_point[j], T(vals[16 * i + j])); } @@ -829,7 +831,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 16], 16); } @@ -845,7 +847,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -853,7 +855,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -868,7 +870,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -877,7 +879,7 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval[0].vals[i] = vals[i] - b.vals[i]; } return retval; @@ -888,7 +890,7 @@ struct Vectorized : public VectorizedQuantizedConverter< float multiplier, int32_t zero_point) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = nearbyint(static_cast(inp[0].vals[i]) * multiplier) + zero_point; @@ -961,7 +963,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 16], 16); } @@ -977,7 +979,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -985,7 +987,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -999,7 +1001,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -1009,8 +1011,8 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; constexpr int elem_per_int_vec = size() / int_num_vecs(); - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { retval[i].vals[j] = static_cast(vals[i * elem_per_int_vec + j]) - static_cast(b.vals[i * elem_per_int_vec + j]); @@ -1026,8 +1028,8 @@ struct Vectorized : public VectorizedQuantizedConverter< constexpr auto min_val = std::numeric_limits::min(); constexpr auto max_val = std::numeric_limits::max(); Vectorized retval; - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { int32_t rounded = nearbyint(static_cast(inp[i].vals[j]) * multiplier) + zero_point; @@ -1081,7 +1083,7 @@ struct Vectorized : public VectorizedQuantizedConverter< std::array qvals; std::array float_vals; - for (int i = 0; i < float_num_vecs(); ++i) { + for (const auto i : c10::irange(float_num_vecs())) { rhs[i].store(&float_vals[i * 16], 16); } @@ -1097,7 +1099,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized maximum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::max(vals[i], b.vals[i]); } return retval; @@ -1105,7 +1107,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized minimum(Vectorized b) const { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min(vals[i], b.vals[i]); } return retval; @@ -1120,7 +1122,7 @@ struct Vectorized : public VectorizedQuantizedConverter< Vectorized zero_point, Vectorized q_six) { Vectorized retval; - for (size_t i = 0; i < size(); ++i) { + for (const auto i : c10::irange(size())) { retval.vals[i] = std::min( std::max(vals[i], zero_point.vals[i]), q_six.vals[i]); } @@ -1130,8 +1132,8 @@ struct Vectorized : public VectorizedQuantizedConverter< int_vec_return_type widening_subtract(Vectorized b) const { int_vec_return_type retval; constexpr int elem_per_int_vec = size() / int_num_vecs(); - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { retval[i].vals[j] = static_cast(vals[i * elem_per_int_vec + j]) - static_cast(b.vals[i * elem_per_int_vec + j]); @@ -1147,8 +1149,8 @@ struct Vectorized : public VectorizedQuantizedConverter< constexpr auto min_val = std::numeric_limits::min(); constexpr auto max_val = std::numeric_limits::max(); Vectorized retval; - for (size_t i = 0; i < int_num_vecs(); ++i) { - for (size_t j = 0; j < elem_per_int_vec; ++j) { + for (const auto i : c10::irange(int_num_vecs())) { + for (const auto j : c10::irange(elem_per_int_vec)) { int32_t rounded = nearbyint(static_cast(inp[i].vals[j]) * multiplier) + zero_point; diff --git a/aten/src/ATen/cpu/vec/vec_base.h b/aten/src/ATen/cpu/vec/vec_base.h index da5f318bf530cc..d6c921eddde262 100644 --- a/aten/src/ATen/cpu/vec/vec_base.h +++ b/aten/src/ATen/cpu/vec/vec_base.h @@ -31,6 +31,7 @@ #include #include #include +#include // These macros helped us unify vec_base.h #ifdef CPU_CAPABILITY_AVX512 @@ -150,7 +151,7 @@ struct Vectorized { static Vectorized blend(const Vectorized& a, const Vectorized& b) { int64_t mask = mask_; Vectorized vector; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { if (mask & 0x01) { vector[i] = b[i]; } else { @@ -165,7 +166,7 @@ struct Vectorized { Vectorized vector; int_same_size_t buffer[size()]; mask.store(buffer); - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { if (buffer[i] & 0x01) { vector[i] = b[i]; @@ -178,14 +179,14 @@ struct Vectorized { template // step sometimes requires a higher precision type (e.g., T=int, step_t=double) static Vectorized arange(T base = static_cast(0), step_t step = static_cast(1)) { Vectorized vector; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { vector.values[i] = base + i * step; } return vector; } static Vectorized set(const Vectorized& a, const Vectorized& b, int64_t count = size()) { Vectorized vector; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { if (i < count) { vector[i] = b[i]; } else { @@ -340,7 +341,7 @@ struct Vectorized { } Vectorized atan2(const Vectorized &exp) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = std::atan2(values[i], exp[i]); } return ret; @@ -380,7 +381,7 @@ struct Vectorized { // U is for SFINAE purposes only. Make sure it is not changed. static_assert(std::is_same::value, "U must be T"); Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = std::fmod(values[i], q[i]); } return ret; @@ -423,7 +424,7 @@ struct Vectorized { } Vectorized hypot(const Vectorized &b) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = std::hypot(values[i], b[i]); } return ret; @@ -436,14 +437,14 @@ struct Vectorized { } Vectorized igamma(const Vectorized &x) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = calc_igamma(values[i], x[i]); } return ret; } Vectorized igammac(const Vectorized &x) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = calc_igammac(values[i], x[i]); } return ret; @@ -456,7 +457,7 @@ struct Vectorized { } Vectorized nextafter(const Vectorized &b) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = std::nextafter(values[i], b[i]); } return ret; @@ -494,7 +495,7 @@ struct Vectorized { } Vectorized pow(const Vectorized &exp) const { Vectorized ret; - for (int64_t i = 0; i < size(); i++) { + for (const auto i : c10::irange(size())) { ret[i] = std::pow(values[i], exp[i]); } return ret; @@ -808,7 +809,7 @@ inline gather(T const* base_addr, const Vectorized>& vindex) int_same_size_t index_arr[size]; vindex.store(static_cast(index_arr)); T buffer[size]; - for (int64_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { buffer[i] = base_addr[index_arr[i] * scale / sizeof(T)]; } return Vectorized::loadu(static_cast(buffer)); @@ -826,7 +827,7 @@ inline mask_gather(const Vectorized& src, T const* base_addr, mask.store(static_cast(mask_arr)); vindex.store(static_cast(index_arr)); T buffer[size]; - for (int64_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { if (mask_arr[i] & 0x01) { // check highest bit buffer[i] = base_addr[index_arr[i] * scale / sizeof(T)]; } else { @@ -872,7 +873,7 @@ inline Vectorized> convert_to_int_of_same_size(const Vectoriz T src_arr[size]; src.store(static_cast(src_arr)); int_same_size_t buffer[size]; - for (int64_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { buffer[i] = static_cast>(src_arr[i]); } return Vectorized>::loadu(static_cast(buffer)); @@ -899,7 +900,7 @@ deinterleave2(const Vectorized& a, const Vectorized& b) { T buffer2[size]; a.store(static_cast(a_arr)); b.store(static_cast(b_arr)); - for (int64_t i = 0; i < half_size; i++) { + for (const auto i : c10::irange(half_size)) { buffer1[i] = a_arr[i * 2]; buffer1[half_size + i] = b_arr[i * 2]; buffer2[i] = a_arr[i * 2 + 1]; @@ -931,7 +932,7 @@ interleave2(const Vectorized& a, const Vectorized& b) { T buffer2[size]; a.store(static_cast(a_arr)); b.store(static_cast(b_arr)); - for (int64_t i = 0; i < half_size; i++) { + for (const auto i : c10::irange(half_size)) { buffer1[i * 2] = a_arr[i]; buffer1[i * 2 + 1] = b_arr[i]; buffer2[i * 2] = a_arr[half_size + i]; @@ -946,7 +947,8 @@ inline void convert(const src_T *src, dst_T *dst, int64_t n) { #ifndef _MSC_VER # pragma unroll #endif - for (int64_t i = 0; i < n; i++) { + for (const auto i : c10::irange(n)) { + (void)i; //Suppress unused variable warning *dst = c10::static_cast_with_inter_type::apply(*src); src++; dst++; diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp index ab542cb3bdab04..d6a6205ab1c249 100644 --- a/aten/src/ATen/cuda/CUDABlas.cpp +++ b/aten/src/ATen/cuda/CUDABlas.cpp @@ -4,6 +4,7 @@ #include #include +#include #define CUDABLAS_POSINT_CHECK(FD, X) \ TORCH_CHECK( \ @@ -295,7 +296,7 @@ void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::Half)) { c, CUDA_R_16F, ldc, stridec, num_batches, CUDA_R_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP)); } else { - for (int64_t i = 0; i < num_batches; ++i) { + for (const auto i : c10::irange(num_batches)) { at::cuda::blas::gemm( transa, transb, m, n, k, diff --git a/aten/src/ATen/cudnn/Descriptors.cpp b/aten/src/ATen/cudnn/Descriptors.cpp index f52280e9d2401d..6a617edaf2777f 100644 --- a/aten/src/ATen/cudnn/Descriptors.cpp +++ b/aten/src/ATen/cudnn/Descriptors.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -47,11 +48,11 @@ void TensorDescriptor::set(cudnnDataType_t datatype, IntArrayRef t_sizes, IntArr #undef STR int size[CUDNN_DIM_MAX]; int stride[CUDNN_DIM_MAX]; - for (size_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { size[i] = static_cast(t_sizes[i]); stride[i] = static_cast(t_strides[i]); } - for (size_t i = dim; i < pad; ++i) { + for (const auto i : c10::irange(dim, pad)) { size[i] = 1; stride[i] = 1; } @@ -126,10 +127,10 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo "cuDNN filters (a.k.a. weights) must be contiguous in desired memory_format"); int size[CUDNN_DIM_MAX]; - for (int i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { size[i] = (int) t.size(i); } - for (int i = dim; i < pad; ++i) { + for (const auto i : c10::irange(dim, pad)) { size[i] = (int) 1; } dim = std::max(dim, pad); diff --git a/aten/src/ATen/miopen/Descriptors.cpp b/aten/src/ATen/miopen/Descriptors.cpp index 6911b1ad216bd3..ead45a52dad1a1 100644 --- a/aten/src/ATen/miopen/Descriptors.cpp +++ b/aten/src/ATen/miopen/Descriptors.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -39,11 +40,11 @@ void TensorDescriptor::set(miopenDataType_t datatype, IntArrayRef t_sizes, IntAr #undef STR int size[MIOPEN_DIM_MAX]; int stride[MIOPEN_DIM_MAX]; - for (size_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { size[i] = static_cast(t_sizes[i]); stride[i] = static_cast(t_strides[i]); } - for (size_t i = dim; i < pad; ++i) { + for (const auto i : c10::irange(dim, pad)) { size[i] = 1; stride[i] = 1; } @@ -103,10 +104,10 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo int size[MIOPEN_DIM_MAX]; int stride[MIOPEN_DIM_MAX]; - for (int i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { size[i] = (int) t.size(i); } - for (int i = dim; i < pad; ++i) { + for (const auto i : c10::irange(dim, pad)) { size[i] = (int) 1; } diff --git a/aten/src/ATen/native/Activation.cpp b/aten/src/ATen/native/Activation.cpp index 37700bb5867939..9db577792942ee 100644 --- a/aten/src/ATen/native/Activation.cpp +++ b/aten/src/ATen/native/Activation.cpp @@ -500,7 +500,7 @@ inline void _rrelu_with_noise_train( scalar_t* noise_data = noise.data_ptr(); auto gen = at::get_generator_or_default(generator, detail::getDefaultCPUGenerator()); std::lock_guard lock(gen->mutex_); - for (int64_t i = 0; i < input.numel(); i++) { + for (const auto i : c10::irange(input.numel())) { if (input_data[i] <= 0) { at::uniform_real_distribution uniform(lower, upper); const scalar_t r = (scalar_t)uniform(gen); @@ -610,7 +610,7 @@ void inline prelu_cpu_kernel_share_weights( auto weight_val = weight.data_ptr()[0]; at::parallel_for(0, input_numel, 1000, [&](int64_t start, int64_t end) { - for (auto i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { scalar_t input_data_val = input_data[i]; // to allow for compiler optimization, here splitting into two lines: scalar_t r = (input_data_val > 0) ? scalar_t(1) : weight_val; @@ -725,7 +725,7 @@ void inline prelu_cpu_backward_kernel_share_weights( scalar_t sum = at::parallel_reduce(0, input_numel, 1000, scalar_t(0), [&](int64_t start, int64_t end, scalar_t ident) -> scalar_t { scalar_t partial_sum = ident; - for (auto i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { scalar_t input_data_val = input_data[i]; scalar_t grad_out_data_val = grad_out_data[i]; // to allow for compiler optimization, here splitting into two lines: @@ -839,7 +839,9 @@ std::tuple prelu_backward_cpu(const Tensor& grad_out_, const Ten std::vector reduce_dims; reduce_dims.push_back(0); if (dims > 2) { - for(int64_t i = 2; i < dims; i++) reduce_dims.push_back(i); + for (const auto i : c10::irange(2, dims)) { + reduce_dims.push_back(i); + } } weight_grad = weight_grad_collector.sum(reduce_dims); } diff --git a/aten/src/ATen/native/AdaptiveAveragePooling.cpp b/aten/src/ATen/native/AdaptiveAveragePooling.cpp index 2324b958b34f51..b0be043e30692b 100644 --- a/aten/src/ATen/native/AdaptiveAveragePooling.cpp +++ b/aten/src/ATen/native/AdaptiveAveragePooling.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace at { @@ -16,7 +17,7 @@ namespace { { TORCH_CHECK(output_size.size() == 2, "adaptive_avg_pool2d: output_size must be 2"); int64_t ndim = input.ndimension(); - for (int64_t i = 1; i < ndim; i++) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK(input.size(i) > 0, "adaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, " "but input has sizes ", input.sizes(), " with dimension ", i, " being " @@ -52,7 +53,7 @@ namespace { const Tensor& input) { int64_t ndim = grad_output.ndimension(); - for (int64_t i = 1; i < ndim; i++) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK(grad_output.size(i) > 0, "adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero size for non-batch dimensions, " "but grad_output has sizes ", grad_output.sizes(), " with dimension ", i, " being " diff --git a/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp b/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp index f7565b554d896e..41515259c33e1a 100644 --- a/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp +++ b/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace at { namespace native { @@ -33,19 +34,19 @@ static void adaptive_avg_pool3d_out_frame( int64_t istrideH, int64_t istrideW) { at::parallel_for(0, sizeD, 1, [&](int64_t start, int64_t end) { - for (int64_t d = start; d < end; d++) { + for (const auto d : c10::irange(start, end)) { /* loop over output */ - for (int64_t ot = 0; ot < osizeT; ot++) { + for (const auto ot : c10::irange(osizeT)) { int istartT = start_index(ot, osizeT, isizeT); int iendT = end_index(ot, osizeT, isizeT); int kT = iendT - istartT; - for (int64_t oh = 0; oh < osizeH; oh++) { + for (const auto oh : c10::irange(osizeH)) { int istartH = start_index(oh, osizeH, isizeH); int iendH = end_index(oh, osizeH, isizeH); int kH = iendH - istartH; - for (int64_t ow = 0; ow < osizeW; ow++) { + for (const auto ow : c10::irange(osizeW)) { int istartW = start_index(ow, osizeW, isizeW); int iendW = end_index(ow, osizeW, isizeW); int kW = iendW - istartW; @@ -58,9 +59,9 @@ static void adaptive_avg_pool3d_out_frame( /* compute local average: */ scalar_t sum = 0; - for (int it = 0; it < kT; it++) { - for (int ih = 0; ih < kH; ih++) { - for (int iw = 0; iw < kW; iw++) { + for (const auto it : c10::irange(kT)) { + for (const auto ih : c10::irange(kH)) { + for (const auto iw : c10::irange(kW)) { scalar_t val = *(ip + it * istrideT + ih * istrideH + iw * istrideW); sum += val; @@ -83,7 +84,7 @@ void adaptive_avg_pool3d_out_cpu_template( IntArrayRef output_size) { TORCH_CHECK(output_size.size() == 3, "adaptive_avg_pool3d: output_size must be 3"); - for (int64_t i = 1; i < input.ndimension(); i++) { + for (const auto i : c10::irange(1, input.ndimension())) { TORCH_CHECK( input.size(i) > 0, "adaptive_avg_pool3d(): Expected input to have non-zero size for non-batch dimensions, " @@ -148,7 +149,7 @@ void adaptive_avg_pool3d_out_cpu_template( auto input_data = input.data_ptr(); auto output_data = output.data_ptr(); at::parallel_for(0, n, 1, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; ++b) { + for (const auto b : c10::irange(start, end)) { adaptive_avg_pool3d_out_frame( input_data + b * input.stride(0), output_data + b * sizeD * osizeT * osizeH * osizeW, @@ -181,22 +182,22 @@ static void adaptive_avg_pool3d_backward_out_frame( int64_t osizeH, int64_t osizeW) { at::parallel_for(0, sizeD, 1, [&](int64_t start, int64_t end) { - for (int64_t d = start; d < end; d++) { + for (const auto d : c10::irange(start, end)) { scalar_t* gradInput_p_d = gradInput_p + d * isizeT * isizeW * isizeH; scalar_t* gradOutput_p_d = gradOutput_p + d * osizeT * osizeW * osizeH; /* calculate average */ - for (int64_t ot = 0; ot < osizeT; ot++) { + for (const auto ot : c10::irange(osizeT)) { int istartT = start_index(ot, osizeT, isizeT); int iendT = end_index(ot, osizeT, isizeT); int kT = iendT - istartT; - for (int64_t oh = 0; oh < osizeH; oh++) { + for (const auto oh : c10::irange(osizeH)) { int istartH = start_index(oh, osizeH, isizeH); int iendH = end_index(oh, osizeH, isizeH); int kH = iendH - istartH; - for (int64_t ow = 0; ow < osizeW; ow++) { + for (const auto ow : c10::irange(osizeW)) { int istartW = start_index(ow, osizeW, isizeW); int iendW = end_index(ow, osizeW, isizeW); int kW = iendW - istartW; @@ -205,9 +206,9 @@ static void adaptive_avg_pool3d_backward_out_frame( gradOutput_p_d[ot * osizeH * osizeW + oh * osizeW + ow] / kT / kH / kW; - for (int it = istartT; it < iendT; it++) { - for (int ih = istartH; ih < iendH; ih++) { - for (int iw = istartW; iw < iendW; iw++) { + for (const auto it : c10::irange(istartT, iendT)) { + for (const auto ih : c10::irange(istartH, iendH)) { + for (const auto iw : c10::irange(istartW, iendW)) { /* update gradient */ gradInput_p_d[it * isizeH * isizeW + ih * isizeW + iw] += grad_delta; @@ -265,7 +266,7 @@ Tensor& adaptive_avg_pool3d_backward_out_cpu_template( scalar_t* gradInput_data = gradInput.data_ptr(); scalar_t* gradOutput_data = gradOutput.data_ptr(); at::parallel_for(0, n, 1, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; b++) { + for (const auto b : c10::irange(start, end)) { adaptive_avg_pool3d_backward_out_frame( gradInput_data + b * sizeD * isizeT * isizeH * isizeW, gradOutput_data + b * sizeD * osizeT * osizeH * osizeW, diff --git a/aten/src/ATen/native/AdaptiveMaxPooling2d.cpp b/aten/src/ATen/native/AdaptiveMaxPooling2d.cpp index bc9bc60b9da957..6634d74a2e3f84 100644 --- a/aten/src/ATen/native/AdaptiveMaxPooling2d.cpp +++ b/aten/src/ATen/native/AdaptiveMaxPooling2d.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace at { @@ -10,7 +11,7 @@ TORCH_META_FUNC(adaptive_max_pool2d) (const Tensor& input, IntArrayRef output_si TORCH_CHECK(ndim == 3 || ndim == 4, "adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: ", input.sizes()); - for (int64_t i = 1; i < ndim; i++) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK(input.size(i) > 0, "adaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, " "but input has sizes ", input.sizes(), " with dimension ", i, @@ -51,7 +52,7 @@ TORCH_META_FUNC(adaptive_max_pool2d_backward) int64_t ndim = grad_output.ndimension(); TORCH_CHECK(ndim == 3 || ndim == 4, "adaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: ", grad_output.sizes()); - for (int64_t i = 1; i < ndim; i++) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK(grad_output.size(i) > 0, "adaptive_max_pooling2d_backward(): Expected grad_output to have non-zero size for non-batch dimensions, " "but grad_output has sizes ", grad_output.sizes(), " with dimension ", i, diff --git a/aten/src/ATen/native/AdaptiveMaxPooling3d.cpp b/aten/src/ATen/native/AdaptiveMaxPooling3d.cpp index 257670fc7c9d09..3bf1186b3bce82 100644 --- a/aten/src/ATen/native/AdaptiveMaxPooling3d.cpp +++ b/aten/src/ATen/native/AdaptiveMaxPooling3d.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -11,7 +12,7 @@ TORCH_META_FUNC(adaptive_max_pool3d) (const Tensor& input, IntArrayRef output_si TORCH_CHECK( ndim == 4 || ndim == 5, "adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: ", input.sizes()); - for (int64_t i = 1; i < ndim; i++) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK( input.size(i) > 0, "adaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, " @@ -96,8 +97,7 @@ static void adaptive_max_pool3d_single_out_frame( int64_t istrideW) { at::parallel_for(0, sizeD, 0, [&](int64_t start, int64_t end) { - for (auto d = start; d < end; d++) - { + for (const auto d : c10::irange(start, end)) { /* loop over output */ int64_t ot, oh, ow; for(ot = 0; ot < osizeT; ot++) @@ -176,8 +176,7 @@ static void adaptive_max_pool3d_out_frame( int64_t istrideW) { at::parallel_for(0, sizeB, 0, [&](int64_t start, int64_t end) { - for (auto b = start; b < end; b++) - { + for (const auto b : c10::irange(start, end)) { adaptive_max_pool3d_single_out_frame(input_data+b*istrideB, output_data+b*sizeD*osizeT*osizeH*osizeW, indices_data+b*sizeD*osizeT*osizeH*osizeW, sizeD, @@ -203,8 +202,7 @@ static void adaptive_max_pool3d_backward_single_out_frame( int64_t osizeW) { at::parallel_for(0, sizeD, 0, [&](int64_t start, int64_t end) { - for (auto d = start; d < end; d++) - { + for (const auto d : c10::irange(start, end)) { scalar_t *gradInput_p_d = gradInput_p + d*isizeT*isizeH*isizeW; scalar_t *gradOutput_p_d = gradOutput_p + d*osizeT*osizeH*osizeW; int64_t *ind_p_d = ind_p + d*osizeT*osizeH*osizeW; @@ -244,8 +242,7 @@ static void adaptive_max_pool3d_backward_out_frame( int64_t osizeW) { at::parallel_for(0, sizeB, 0, [&](int64_t start, int64_t end) { - for (auto b = start; b < end; b++) - { + for (const auto b : c10::irange(start, end)) { adaptive_max_pool3d_backward_single_out_frame(gradInput_data+b*sizeD*isizeT*isizeH*isizeW, gradOutput_data+b*sizeD*osizeT*osizeH*osizeW, indices_data+b*sizeD*osizeT*osizeH*osizeW, sizeD, diff --git a/aten/src/ATen/native/AveragePool3d.cpp b/aten/src/ATen/native/AveragePool3d.cpp index 658936f329fcd6..7d3febede6f9a5 100644 --- a/aten/src/ATen/native/AveragePool3d.cpp +++ b/aten/src/ATen/native/AveragePool3d.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -169,8 +170,7 @@ static void avg_pool3d_out_frame( c10::optional divisor_override) { at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) { - for (auto k = start; k < end; k++) - { + for (const auto k : c10::irange(start, end)) { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) int64_t i, j, ti; @@ -315,7 +315,7 @@ TORCH_IMPL_FUNC(avg_pool3d_out_cpu) ( scalar_t *output_data = output.data_ptr(); at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { - for (auto p = start; p < end; p++) { + for (const auto p : c10::irange(start, end)) { avg_pool3d_out_frame( input_data + p * istride, output_data + p * ostride, nslices, itime, iwidth, iheight, @@ -358,8 +358,7 @@ static void avg_pool3d_backward_out_frame( c10::optional divisor_override) { at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) { - for (auto k = start; k < end; k++) - { + for (const auto k : c10::irange(start, end)) { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) int64_t i, j, ti; @@ -500,8 +499,7 @@ TORCH_IMPL_FUNC(avg_pool3d_backward_out_cpu) ( scalar_t *gradOutput_data = gradOutput.data_ptr(); at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { - for (auto p = start; p < end; p++) - { + for (const auto p : c10::irange(start, end)) { avg_pool3d_backward_out_frame( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, diff --git a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp index 16713ab03786eb..a910cf1fd46fc9 100644 --- a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp +++ b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp @@ -63,7 +63,7 @@ void apply_reflect_conj_tri_single(scalar_t* self, int64_t n, int64_t stride, bo std::function loop = [](int64_t, int64_t){}; if (upper) { loop = [&](int64_t start, int64_t end) { - for (int64_t i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { for (int64_t j = i + 1; j < n; j++) { self[i * stride + j] = conj_impl(self[j * stride + i]); } @@ -71,8 +71,8 @@ void apply_reflect_conj_tri_single(scalar_t* self, int64_t n, int64_t stride, bo }; } else { loop = [&](int64_t start, int64_t end) { - for (int64_t i = start; i < end; i++) { - for (int64_t j = 0; j < i; j++) { + for (const auto i : c10::irange(start, end)) { + for (const auto j : c10::irange(i)) { self[i * stride + j] = conj_impl(self[j * stride + i]); } } @@ -106,7 +106,7 @@ void apply_cholesky_inverse(Tensor& input, Tensor& infos, bool upper) { auto n = input.size(-2); auto lda = std::max(1, n); - for (int64_t i = 0; i < batch_size; i++) { + for (const auto i : c10::irange(batch_size)) { scalar_t* input_working_ptr = &input_data[i * input_matrix_stride]; int* info_working_ptr = &infos_data[i]; lapackCholeskyInverse(uplo, n, input_working_ptr, lda, info_working_ptr); @@ -501,7 +501,7 @@ inline void apply_orgqr(Tensor& self, const Tensor& tau) { lwork = std::max(1, real_impl(wkopt)); Tensor work = at::empty({lwork}, self.options()); - for (int64_t i = 0; i < batch_size; i++) { + for (const auto i : c10::irange(batch_size)) { scalar_t* self_working_ptr = &self_data[i * self_matrix_stride]; scalar_t* tau_working_ptr = &tau_data[i * tau_stride]; diff --git a/aten/src/ATen/native/BlasKernel.cpp b/aten/src/ATen/native/BlasKernel.cpp index b52a0f20a35c9c..9cf1f995f3ca9a 100644 --- a/aten/src/ATen/native/BlasKernel.cpp +++ b/aten/src/ATen/native/BlasKernel.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #if AT_BUILD_WITH_BLAS() extern "C" double ddot_(int *n, double *x, int *incx, double *y, int *incy); @@ -151,7 +152,7 @@ inline void scal(int64_t n, scalar_t a, scalar_t *x, int64_t incx) blas_impl::scal_fast_path(&i_n, &a, x, &i_incx); return; } - for (int64_t i = 0; i < n; i++) { + for (const auto i : c10::irange(n)) { if (a == scalar_t(0)) { x[i * incx] = 0; } else { @@ -176,11 +177,10 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t } if ((trans == 'T') || (trans == 't')) { - for (int64_t i = 0; i < n; i++) - { + for (const auto i : c10::irange(n)) { scalar_t sum = 0; scalar_t *row_ = a + lda * i; - for (int64_t j = 0; j < m; j++) { + for (const auto j : c10::irange(m)) { sum += x[j * incx] * row_[j]; } if (beta == scalar_t(0)) { @@ -192,10 +192,10 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t } else { if (beta != scalar_t(1) && beta != scalar_t(0)) scal(m, beta, y, incy); - for (int64_t j = 0; j < n; j++) { + for (const auto j : c10::irange(n)) { scalar_t *column_ = a + lda * j; scalar_t z = alpha * x[j * incx]; - for (int64_t i = 0; i < m; i++) { + for (const auto i : c10::irange(m)) { //output values are ignored if beta is 0, and set to 0, nans and infs are not propagated if (j==0 && beta==scalar_t(0)) { y[i * incy] = scalar_t(0); diff --git a/aten/src/ATen/native/Bucketization.cpp b/aten/src/ATen/native/Bucketization.cpp index e4ad35f59fc698..63b88510a6f4e9 100644 --- a/aten/src/ATen/native/Bucketization.cpp +++ b/aten/src/ATen/native/Bucketization.cpp @@ -2,6 +2,7 @@ #include #include #include +#include /* Implement a TF like searchsorted and a bucketize function running on cpu * @@ -58,7 +59,7 @@ void searchsorted_cpu_contiguous(Tensor& result, const Tensor& input, const Tens bool is_1d_boundaries = boundaries.dim() == 1; at::parallel_for(0, numel_in, SEARCHSORTED_GRAIN_SIZE, [&](int64_t start, int64_t end) { - for (int64_t i = start; i < end; ++i) { + for (const auto i : c10::irange(start, end)) { // If boundaries tensor is 1d, we always search the entire boundary tensor int64_t start_bd = is_1d_boundaries ? 0 : i / idim_in * idim_bd; const input_t *data_bd_start = &data_bd[start_bd]; diff --git a/aten/src/ATen/native/Col2Im.cpp b/aten/src/ATen/native/Col2Im.cpp index efc41bea0c207a..f1e08a887c841e 100644 --- a/aten/src/ATen/native/Col2Im.cpp +++ b/aten/src/ATen/native/Col2Im.cpp @@ -5,6 +5,7 @@ #include #include +#include // Note [im2col/col2im output padding] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -150,7 +151,7 @@ static void col2im_out_cpu_template( stride_width + 1; - for (int64_t elt = 0; elt < batch_size; elt++) { + for (const auto elt : c10::irange(batch_size)) { input_n = input.select(0, elt); output_n = output.select(0, elt); diff --git a/aten/src/ATen/native/ComplexHelper.h b/aten/src/ATen/native/ComplexHelper.h index 1b9a94e9089068..e9efd4b7c88db2 100644 --- a/aten/src/ATen/native/ComplexHelper.h +++ b/aten/src/ATen/native/ComplexHelper.h @@ -24,7 +24,7 @@ inline Tensor view_tensor( inline DimVector computeStrideForViewAsReal(IntArrayRef oldstride) { DimVector res(oldstride.size() + 1); - for(size_t i = 0; i < oldstride.size(); i++) { + for (const auto i : c10::irange(oldstride.size())) { res[i] = oldstride[i] * 2; } res.back() = 1; diff --git a/aten/src/ATen/native/ConstantPadNd.cpp b/aten/src/ATen/native/ConstantPadNd.cpp index 71bbfde152895a..f7a2d76ed52280 100644 --- a/aten/src/ATen/native/ConstantPadNd.cpp +++ b/aten/src/ATen/native/ConstantPadNd.cpp @@ -47,7 +47,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value) new_shape.emplace_back(input_sizes[i]); } - for (size_t i = 0; i < (size_t)l_pad; i++) { + for (const auto i : c10::irange((size_t)l_pad)) { auto pad_idx = pad.size() - ((i + 1) * 2); auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1]; TORCH_CHECK(new_dim > 0, "The input size ", input_sizes[l_diff + i], ", plus negative padding ", diff --git a/aten/src/ATen/native/ConvUtils.h b/aten/src/ATen/native/ConvUtils.h index 9f0abaf7e61cc3..7a1dcde7b8ce37 100644 --- a/aten/src/ATen/native/ConvUtils.h +++ b/aten/src/ATen/native/ConvUtils.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace at { namespace native { @@ -35,7 +36,7 @@ static inline std::vector conv_output_size( std::vector output_size(dim); output_size[0] = input_size[input_batch_size_dim]; output_size[1] = weight_size[weight_output_channels_dim]; - for (size_t d = 2; d < dim; ++d) { + for (const auto d : c10::irange(2, dim)) { auto dilation_ = has_dilation ? dilation[d - 2] : 1; auto kernel = dilation_ * (weight_size[d] - 1) + 1; output_size[d] = (input_size[d] + (2 * padding[d - 2]) - kernel) / stride[d - 2] + 1; @@ -53,7 +54,7 @@ static inline std::vector conv_input_size( std::vector input_size(dim); input_size[0] = output_size[output_batch_size_dim]; input_size[1] = weight_size[weight_input_channels_dim] * groups; - for (size_t d = 2; d < dim; ++d) { + for (const auto d : c10::irange(2, dim)) { int kernel = dilation[d - 2] * (weight_size[d] - 1) + 1; input_size[d] = (output_size[d] - 1) * stride[d - 2] - (2 * padding[d - 2]) + kernel + output_padding[d - 2]; @@ -69,7 +70,7 @@ static inline std::vector conv_weight_size( std::vector weight_size(dim); weight_size[0] = output_size[1]; weight_size[1] = input_size[1] / groups; - for (size_t d = 2; d < dim; ++d) { + for (const auto d : c10::irange(2, dim)) { int kernel = input_size[d] - (output_size[d] - 1) * stride[d - 2] + 2 * padding[d - 2] - output_padding[d - 2]; weight_size[d] = (kernel - 1) / dilation[d - 2] + 1; diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp index 78eb889f8cfa6e..e8baf42b8c9bb1 100644 --- a/aten/src/ATen/native/Convolution.cpp +++ b/aten/src/ATen/native/Convolution.cpp @@ -975,7 +975,7 @@ at::Tensor _convolution( } else { std::vector outputs(params.groups); input = input.contiguous(); - for (int g = 0; g < params.groups; ++g) { + for (const auto g : c10::irange(params.groups)) { auto input_g = subtensor(input, 1, params.groups, g); auto weight_g = subtensor(weight, 0, params.groups, g); auto bias_g = subtensor(bias, 0, params.groups, g); @@ -1212,7 +1212,7 @@ std::tuple _convolution_double_backward( const c10::option } } else { std::vector gWt_list(groups); - for (int g = 0; g < groups; ++g) { + for (const auto g : c10::irange(groups)) { auto ggIt_g = subvariable(ggIt, 0, groups, g); auto gOt_g = subvariable(gOt, 0, groups, g); if (gOt_g.is_cuda()) { @@ -1239,7 +1239,7 @@ std::tuple _convolution_double_backward( const c10::option // the ConvForward kernels don't support asymmetric padding. auto gW_size = gW.sizes(); auto w_size = weight.sizes(); - for (size_t i = 2; i < gW_size.size(); ++i) { + for (const auto i : c10::irange(2, gW_size.size())) { if (gW_size[i] > w_size[i]) { gW = gW.narrow(i, 0, w_size[i]); gW_size = gW.sizes(); @@ -1268,7 +1268,7 @@ std::tuple _convolution_double_backward( const c10::option // rather than narrowing the computed gI auto gI_size = gI.sizes(); auto i_size = input.sizes(); - for (size_t i = 2; i < gI_size.size(); ++i) { + for (const auto i : c10::irange(2, gI_size.size())) { if (gI_size[i] > i_size[i]) { gI = gI.narrow(i, 0, i_size[i]); gI_size = gI.sizes(); @@ -1289,7 +1289,7 @@ std::tuple _convolution_double_backward( const c10::option gi_conv_params.output_padding[1] = input_shape[0] - expected_input_shape; } } else { - for(size_t i = 0; i < kernel_size.size(); ++i) { + for (const auto i : c10::irange(kernel_size.size())) { // Check if whole input has been used or not auto expected_input_shape = (kernel_size[i] - 1) * gi_conv_params.dilation[i] - 2 * gi_conv_params.padding[i] diff --git a/aten/src/ATen/native/ConvolutionMM2d.cpp b/aten/src/ATen/native/ConvolutionMM2d.cpp index 67a045ad1a198d..f9f2bb88daf1c2 100644 --- a/aten/src/ATen/native/ConvolutionMM2d.cpp +++ b/aten/src/ATen/native/ConvolutionMM2d.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace at { namespace native { @@ -299,7 +300,7 @@ void slow_conv2d_backward_out_cpu_template( at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { auto fgrad_input = std::make_unique( c10::multiply_integers(finput.sizes().slice(1))); - for (int64_t t = start; t < end; t++) { + for (const auto t : c10::irange(start, end)) { auto grad_input_t = grad_input_a[t]; auto grad_output_t = grad_output_a[t]; slow_conv2d_backward_update_grad_input_frame( @@ -478,7 +479,7 @@ std::tuple slow_conv2d_forward_out_cpu( auto weight_2d_a = weight_2d.accessor(); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (int64_t t = start; t < end; t++) { + for (const auto t : c10::irange(start, end)) { auto input_t = input_a[t]; auto output_t = output_a[t]; auto finput_t = finput_a[t]; diff --git a/aten/src/ATen/native/ConvolutionMM3d.cpp b/aten/src/ATen/native/ConvolutionMM3d.cpp index cd8ba16903f0b6..88d4245f9d93c8 100644 --- a/aten/src/ATen/native/ConvolutionMM3d.cpp +++ b/aten/src/ATen/native/ConvolutionMM3d.cpp @@ -6,6 +6,7 @@ #include #include #include +#include constexpr int64_t CONV3D_GRAIN_SALT = 20; @@ -358,7 +359,7 @@ void slow_conv3d_backward_out_cpu_template( auto fgrad_input_a = fgrad_input.accessor(); auto weight_2d_a = weight2d.accessor(); - for (int64_t t = start; t < end; t++) { + for (const auto t : c10::irange(start, end)) { auto grad_input_t = grad_input_a[t]; auto grad_output_t = grad_output_a[t]; auto fgrad_input_t = fgrad_input_a[t]; @@ -462,7 +463,7 @@ static void slow_conv3d_backward_parameters_out_cpu_template( auto grad_weight_2d_a = grad_weight_2d.accessor(); auto grad_output_a = grad_output_contiguous.accessor(); auto finput_a = finput.accessor(); - for (int64_t t = 0; t < batch_size; t++) { + for (const auto t : c10::irange(batch_size)) { auto grad_output_t = grad_output_a[t]; auto finput_t = finput_a[t]; slow_conv3d_backward_weight_frame( @@ -564,7 +565,7 @@ std::tuple slow_conv3d_forward_out_cpu(const Tensor& at::parallel_for( 0, batch_size, CONV3D_GRAIN_SALT, [&](int64_t start, int64_t end) { - for (int64_t t = start; t < end; t++) { + for (const auto t : c10::irange(start, end)) { auto input_t = input_a[t]; auto output_t = output_a[t]; auto finput_t = finput_a[t]; diff --git a/aten/src/ATen/native/ConvolutionTBC.cpp b/aten/src/ATen/native/ConvolutionTBC.cpp index 2bd0f5ae4b9e3b..c90577822218e9 100644 --- a/aten/src/ATen/native/ConvolutionTBC.cpp +++ b/aten/src/ATen/native/ConvolutionTBC.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace at { @@ -39,7 +40,7 @@ Tensor conv_tbc(const Tensor& self, const Tensor& weight, const Tensor& bias, in weight_size[2], }, self.options()); output.copy_(bias.expand(output.sizes())); - for (int k = 0; k < kw; k++) { + for (const auto k : c10::irange(kw)) { int iShift = std::max(0, static_cast(k - real_pad)); int oShift = std::max(0, static_cast(real_pad - k)); // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) diff --git a/aten/src/ATen/native/Copy.cpp b/aten/src/ATen/native/Copy.cpp index e2c2a1150c5729..c28ca2b66ef8f7 100644 --- a/aten/src/ATen/native/Copy.cpp +++ b/aten/src/ATen/native/Copy.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #ifdef USE_FBGEMM @@ -65,16 +66,16 @@ void copy_same_type_transpose_(Tensor& self, const Tensor& src) { int nc = std::min(NC - C, BLOCK_SZ); // 1. copy columns from src to buf - for (int c = 0; c < nc; c++) { + for (const auto c : c10::irange(nc)) { memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(scalar_t)); } // 2. transpose buf in place int rc_max = std::max(nr, nc); int rc_min = std::min(nr, nc); - for (int r = 0; r < rc_max; r++) { + for (const auto r : c10::irange(rc_max)) { int end = std::min(r, rc_min); - for (int c = 0; c < end; c++) { + for (const auto c : c10::irange(end)) { scalar_t tmp = bp[r + BLOCK_SZ * c]; bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c]; bp[r * BLOCK_SZ + c] = tmp; @@ -82,7 +83,7 @@ void copy_same_type_transpose_(Tensor& self, const Tensor& src) { } // 3. copy rows from buf to dst - for (int r = 0; r < nr; r++) { + for (const auto r : c10::irange(nr)) { memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(scalar_t)); } } diff --git a/aten/src/ATen/native/Cross.cpp b/aten/src/ATen/native/Cross.cpp index 70dba97520c008..49f3c80e27d509 100644 --- a/aten/src/ATen/native/Cross.cpp +++ b/aten/src/ATen/native/Cross.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace at { namespace native { @@ -30,7 +31,7 @@ Tensor & cross_out(const Tensor & input, const Tensor & other, const c10::option int64_t dim = -1; if(!dimension.has_value()) { - for(int64_t i = 0; i < input.dim(); i++) { + for (const auto i : c10::irange(input.dim())) { if(input.size(i) == 3) { dim = i; break; diff --git a/aten/src/ATen/native/DilatedConvolutionUtils.h b/aten/src/ATen/native/DilatedConvolutionUtils.h index 0f9bf90ab5a169..2d4815799b10f2 100644 --- a/aten/src/ATen/native/DilatedConvolutionUtils.h +++ b/aten/src/ATen/native/DilatedConvolutionUtils.h @@ -5,6 +5,7 @@ #include #include +#include #define TORCH_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \ TORCH_CHECK( \ @@ -43,7 +44,7 @@ std::vector get_output_size( IntArrayRef pad_size, IntArrayRef dilation_size) { std::vector sizes; - for (int index = 0; index < dim; index++) { + for (const auto index : c10::irange(dim)) { sizes.push_back( div_rtn( input.size(index + input.dim() - dim) + 2 * pad_size[index] - diff --git a/aten/src/ATen/native/DilatedMaxPool3d.cpp b/aten/src/ATen/native/DilatedMaxPool3d.cpp index 21398c09067598..57fa6f9ea691cf 100644 --- a/aten/src/ATen/native/DilatedMaxPool3d.cpp +++ b/aten/src/ATen/native/DilatedMaxPool3d.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -37,8 +38,7 @@ static void max_pool3d_with_indices_single_out_frame( int dilationH) { at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) { - for (auto k = start; k < end; k++) - { + for (const auto k : c10::irange(start, end)) { /* loop over output */ // NOLINTNEXTLINE(cppcoreguidelines-init-variables) int64_t i, j, ti; @@ -120,8 +120,7 @@ static void max_pool3d_with_indices_out_frame( int dilationT, int dilationW, int dilationH) { at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { - for (auto p = start; p < end; p++) - { + for (const auto p : c10::irange(start, end)) { max_pool3d_with_indices_single_out_frame( input_data + p * istride, output_data + p * ostride, @@ -285,8 +284,7 @@ static void max_pool3d_with_indices_backward_single_out_frame( int dilationH) { at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) { - for (auto k = start; k < end; k++) - { + for (const auto k : c10::irange(start, end)) { scalar_t *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight; scalar_t *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight; int64_t *indz_p_k = indz_p + k * otime * owidth * oheight; @@ -330,8 +328,7 @@ static void max_pool3d_with_indices_backward_out_frame( int dilationT, int dilationW, int dilationH) { at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) { - for (auto p = start; p < end; p++) - { + for (const auto p : c10::irange(start, end)) { max_pool3d_with_indices_backward_single_out_frame( gradInput_data + p * istride, gradOutput_data + p * ostride, diff --git a/aten/src/ATen/native/Dropout.cpp b/aten/src/ATen/native/Dropout.cpp index ac56071edb8054..c4a6ec6cef5561 100644 --- a/aten/src/ATen/native/Dropout.cpp +++ b/aten/src/ATen/native/Dropout.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace at { namespace native { @@ -16,8 +17,10 @@ Tensor make_feature_noise(const Tensor& input) { sizes.reserve(input.dim()); sizes.push_back(input_sizes[0]); sizes.push_back(input_sizes[1]); - for (int64_t i = 2; i < input.dim(); ++i) + for (const auto i : c10::irange(2, input.dim())) { + (void)i; //Suppress unused variable warning sizes.push_back(1); + } return at::empty(sizes, input.options()); } diff --git a/aten/src/ATen/native/Embedding.cpp b/aten/src/ATen/native/Embedding.cpp index 959005c52b2fc4..cac0cbe7130f26 100644 --- a/aten/src/ATen/native/Embedding.cpp +++ b/aten/src/ATen/native/Embedding.cpp @@ -123,7 +123,7 @@ Tensor embedding_dense_backward_cpu( auto parallel_section = [&](index_t start, index_t end) { TensorIterator iter(add_iter); - for (int64_t i = 0; i < numel; i++) { + for (const auto i : c10::irange(numel)) { if (indices_data[i] != padding_idx) { index_t k = indices_data[i]; if (k >= start && k < end) { @@ -167,7 +167,7 @@ Tensor & embedding_renorm_cpu_( // Note that we cannot use at::parallel_for here because we perform operations on // Tensor inside the loop. See github.com/pytorch/pytorch/issues/28370 for more details. - for (auto i = 0; i < num_indices; i++) { + for (const auto i : c10::irange(num_indices)) { if (i > 0 && sorted_indices[i] == sorted_indices[i - 1]) { continue; } diff --git a/aten/src/ATen/native/EmbeddingBag.cpp b/aten/src/ATen/native/EmbeddingBag.cpp index 53477e2e20e989..66ae4b4f7956cb 100644 --- a/aten/src/ATen/native/EmbeddingBag.cpp +++ b/aten/src/ATen/native/EmbeddingBag.cpp @@ -107,7 +107,7 @@ index_select_add(const Tensor &select_indices, auto output_stride0 = output.strides()[0]; auto output_stride1 = output.strides()[1]; - for (int64_t i = 0; i < numel; i++) { + for (const auto i : c10::irange(numel)) { // We can skip indices equal to padding_idx so they are not included in // the reduction if (select_indices_data[i] != padding_idx) { @@ -247,7 +247,7 @@ index_select_add(const Tensor &select_indices, auto output_stride0 = output.strides()[0]; auto output_stride1 = output.strides()[1]; auto numel = add_indices.numel(); - for (int64_t i = 0; i < numel; i++) { + for (const auto i : c10::irange(numel)) { // We can skip indices equal to padding_idx so they are not included in // the reduction if (select_indices_data[i] != padding_idx) { @@ -302,14 +302,14 @@ index_select_scale_add(const Tensor &select_indices, auto* scale_data = scale.data_ptr(); auto scale_stride = scale.strides()[0]; - for (int64_t i = 0; i < numel; i++) { + for (const auto i : c10::irange(numel)) { // We can skip indices equal to padding_idx so they are not included in // the reduction if (select_indices_data[i] != padding_idx) { auto* src_base = src_data + src_stride0 * select_indices_data[i]; auto* output_base = output_data + output_stride0 * add_indices_data[i]; auto scale = scale_data[i * scale_stride]; - for (int64_t j = 0; j < ddim; j++) { + for (const auto j : c10::irange(ddim)) { output_base[j * output_stride1] += src_base[j * src_stride1] * scale; } } else if (bag_size.defined()) { @@ -419,14 +419,14 @@ index_select_scale_add(const Tensor &select_indices, auto numel = add_indices.numel(); - for (int64_t i = 0; i < numel; i++) { + for (const auto i : c10::irange(numel)) { // We can skip indices equal to padding_idx so they are not included in // the reduction if (select_indices_data[i] != padding_idx) { auto* src_base = src_data + src_stride0 * select_indices_data[i]; auto* output_base = output_data + output_stride0 * add_indices_data[i]; auto scale = scale_data[i * scale_stride]; - for (int64_t j = 0; j < ddim; j++) { + for (const auto j : c10::irange(ddim)) { output_base[j * output_stride1] += src_base[j * src_stride1] * scale; } } else if (bag_size.defined()) { diff --git a/aten/src/ATen/native/Fill.cpp b/aten/src/ATen/native/Fill.cpp index 91f0534d5fa1f6..acaad52a299591 100644 --- a/aten/src/ATen/native/Fill.cpp +++ b/aten/src/ATen/native/Fill.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace at { namespace native { @@ -63,7 +64,7 @@ Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) { if (nDims > 2) { int64_t dim1 = height; - for (int64_t i = 1; i < nDims; i++) { + for (const auto i : c10::irange(1, nDims)) { if (self.size(i) != dim1) { AT_ERROR("all dimensions of input must be of equal length"); } @@ -76,7 +77,7 @@ Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) { int64_t size = std::min(height, width); int64_t stride = 0; - for (int64_t i = 0; i < nDims; i++) { + for (const auto i : c10::irange(nDims)) { stride += self.stride(i); } strides.push_back(stride); diff --git a/aten/src/ATen/native/FractionalMaxPool2d.cpp b/aten/src/ATen/native/FractionalMaxPool2d.cpp index e7eacd3e76e0f7..bdff052e94b001 100644 --- a/aten/src/ATen/native/FractionalMaxPool2d.cpp +++ b/aten/src/ATen/native/FractionalMaxPool2d.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ TORCH_META_FUNC(fractional_max_pool2d) ( int64_t ndims = input.ndimension(); TORCH_CHECK(ndims == 3 || ndims == 4, "fractional_max_pool2d(): Expected 3D or 4D tensor, but got: ", input.sizes()); - for (int64_t i = 1; i < ndims; ++i) { + for (const auto i : c10::irange(1, ndims)) { TORCH_CHECK(input.size(i) > 0, "fractional_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but got", input.sizes(), " with dimension ", i, " being empty."); @@ -106,7 +107,7 @@ static void fractional_max_pool2d_out_single_batch_frame( int outputW, int outputH, int poolSizeW, int poolSizeH) { at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) { - for (auto plane = start; plane < end; ++plane) { + for (const auto plane : c10::irange(start, end)) { /* each plane contains 2 random samples, one for W and one for H */ scalar_t* randomSamplesForPlane = randomSamples + plane * 2; @@ -177,7 +178,7 @@ static void fractional_max_pool2d_out_frame( return; } at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) { - for (auto batch = start; batch < end; ++batch) { + for (const auto batch : c10::irange(start, end)) { fractional_max_pool2d_out_single_batch_frame( input + batch * numPlanes * inputH * inputW, output + batch * numPlanes * outputH * outputW, @@ -254,7 +255,7 @@ static void fractional_max_pool2d_backward_out_single_batch_frame( int inputW, int inputH, int outputW, int outputH) { at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) { - for (auto plane = start; plane < end; plane++) { + for (const auto plane : c10::irange(start, end)) { scalar_t* gradInputForPlane = gradInput + plane * inputW * inputH; scalar_t* gradOutputForPlane = gradOutput + plane * outputW * outputH; int64_t* indicesForPlane = indices + plane * outputW * outputH; @@ -291,7 +292,7 @@ static void fractional_max_pool2d_backward_out_frame( return; } at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) { - for (auto batch = start; batch < end; ++batch) { + for (const auto batch : c10::irange(start, end)) { fractional_max_pool2d_backward_out_single_batch_frame( gradInput + batch * numPlanes * inputH * inputW, gradOutput + batch * numPlanes * outputH * outputW, diff --git a/aten/src/ATen/native/FractionalMaxPool3d.cpp b/aten/src/ATen/native/FractionalMaxPool3d.cpp index 279ff92467733d..237f9d4395bcea 100644 --- a/aten/src/ATen/native/FractionalMaxPool3d.cpp +++ b/aten/src/ATen/native/FractionalMaxPool3d.cpp @@ -44,7 +44,7 @@ static void fractional_max_pool3d_out_single_batch_frame( int64_t poolSizeT, int64_t poolSizeH, int64_t poolSizeW) { at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) { - for (auto plane = start; plane < end; ++plane) { + for (const auto plane : c10::irange(start, end)) { /* each plane contains 3 random samples, one for T, one for W, and one for H */ scalar_t* randomSamplesForPlane = randomSamples + plane * 3; @@ -126,7 +126,7 @@ static void fractional_max_pool3d_out_frame( } at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) { - for (auto batch = start; batch < end; ++batch) { + for (const auto batch : c10::irange(start, end)) { fractional_max_pool3d_out_single_batch_frame( input + batch * numPlanes * inputW * inputH * inputT, output + batch * numPlanes * outputW * outputH * outputT, @@ -171,7 +171,7 @@ void fractional_max_pool3d_out_cpu_template( TORCH_CHECK(ndims == 4 || ndims == 5, "fractional_max_pool3d_out(): Expected 4D or 5D tensor, but got: ", input_.sizes()); - for (int64_t i = 1; i < ndims; ++i) { + for (const auto i : c10::irange(1, ndims)) { TORCH_CHECK(input_.size(i) > 0, "fractional_max_pool3d_out(): Expected input to have non-zero size for non-batch dimensions, but got", input_.sizes(), " with dimension ", i, " being empty."); @@ -243,7 +243,7 @@ static void fractional_max_pool3d_backward_out_single_batch_frame( int64_t outputT, int64_t outputH, int64_t outputW) { at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) { - for (auto plane = start; plane < end; plane++) { + for (const auto plane : c10::irange(start, end)) { scalar_t* gradInputForPlane = gradInput + plane * inputT * inputH * inputW; scalar_t* gradOutputForPlane = gradOutput + plane * outputT * outputH * outputW; @@ -284,7 +284,7 @@ static void fractional_max_pool3d_backward_out_frame( } at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) { - for (auto batch = start; batch < end; ++batch) { + for (const auto batch : c10::irange(start, end)) { fractional_max_pool3d_backward_out_single_batch_frame( gradInput + batch * numPlanes * inputW * inputH * inputT, gradOutput + batch * numPlanes * outputW * outputH * outputT, diff --git a/aten/src/ATen/native/GridSampler.cpp b/aten/src/ATen/native/GridSampler.cpp index df3ff5d73f7ebb..740f725167a63a 100644 --- a/aten/src/ATen/native/GridSampler.cpp +++ b/aten/src/ATen/native/GridSampler.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace at { namespace native { @@ -51,12 +52,12 @@ namespace { scalar_t *grid_ptr = grid.data_ptr(); // loop over each output pixel at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) { - for (int64_t n = start; n < end; ++n) { + for (const auto n : c10::irange(start, end)) { scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - for (int64_t d = 0; d < out_D; ++d) { - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w) { + for (const auto d : c10::irange(out_D)) { + for (const auto h : c10::irange(out_H)) { + for (const auto w : c10::irange(out_W)) { // get the corresponding input x, y, z co-ordinates from grid scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW; scalar_t ix = *grid_ptr_NDHW; @@ -222,12 +223,12 @@ namespace { scalar_t *gGrid_ptr = grad_grid.data_ptr(); // loop over each output pixel at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) { - for (int64_t n = start; n < end; ++n) { + for (const auto n : c10::irange(start, end)) { scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; scalar_t *gGrid_ptr_NDHW = gGrid_ptr + n * gGrid_sN; - for (int64_t d = 0; d < out_D; ++d) { - for (int64_t h = 0; h < out_H; ++h) { + for (const auto d : c10::irange(out_D)) { + for (const auto h : c10::irange(out_H)) { for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NDHW += gGrid_sW /* grad_grid is contiguous */ ) { // get the corresponding input x, y, z co-ordinates from grid scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW; @@ -416,11 +417,11 @@ Tensor _grid_sampler_2d_cpu_fallback(const Tensor& input, const Tensor& grid, scalar_t *grid_ptr = grid.data_ptr(); // loop over each output pixel at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) { - for (int64_t n = start; n < end; ++n) { + for (const auto n : c10::irange(start, end)) { scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w) { + for (const auto h : c10::irange(out_H)) { + for (const auto w : c10::irange(out_W)) { // get the corresponding input x, y, z co-ordinates from grid scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; scalar_t x = *grid_ptr_NHW; @@ -505,7 +506,7 @@ Tensor _grid_sampler_2d_cpu_fallback(const Tensor& input, const Tensor& grid, scalar_t coefficients[4]; // Interpolate 4 values in the x directon - for (int64_t i = 0; i < 4; ++i) { + for (const auto i : c10::irange(4)) { coefficients[i] = cubic_interp1d( get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), @@ -578,11 +579,11 @@ _grid_sampler_2d_cpu_fallback_backward(const Tensor& grad_output, scalar_t *gGrid_ptr = grad_grid.data_ptr(); // loop over each output pixel at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) { - for (int64_t n = start; n < end; ++n) { + for (const auto n : c10::irange(start, end)) { scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; scalar_t *gGrid_ptr_NHW = gGrid_ptr + n * gGrid_sN; - for (int64_t h = 0; h < out_H; ++h) { + for (const auto h : c10::irange(out_H)) { for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NHW += gGrid_sW /* grad_grid is contiguous */ ) { // get the corresponding input x, y co-ordinates from grid scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; @@ -703,8 +704,8 @@ _grid_sampler_2d_cpu_fallback_backward(const Tensor& grad_output, for (int64_t c = 0; c < C; ++c, gOut_ptr_NCHW += gOut_sC, gInp_ptr_NC += gInp_sC, inp_ptr_NC+= inp_sC) { scalar_t gOut = *gOut_ptr_NCHW; - for (int64_t i = 0; i < 4; ++i) { - for (int64_t j = 0; j < 4; ++j) { + for (const auto i : c10::irange(4)) { + for (const auto j : c10::irange(4)) { // set input gradient add_value_bounded(gInp_ptr_NC, ix_nw - 1 + i, iy_nw - 1 + j, @@ -857,7 +858,7 @@ Tensor grid_sampler(const Tensor& input, const Tensor& grid, !(input.dim() == 5 && static_cast(interpolation_mode) == GridSamplerInterpolation::Bicubic), "grid_sampler(): bicubic interpolation only supports 4D input" ); - for (int64_t i = 2; i < input.dim(); i++) { + for (const auto i : c10::irange(2, input.dim())) { TORCH_CHECK(input.size(i) > 0, "grid_sampler(): expected input to have non-empty spatial dimensions, " "but input has sizes ", input.sizes(), " with dimension ", i, " being " diff --git a/aten/src/ATen/native/Im2Col.cpp b/aten/src/ATen/native/Im2Col.cpp index f66e4d44544616..c4b05bc18b566f 100644 --- a/aten/src/ATen/native/Im2Col.cpp +++ b/aten/src/ATen/native/Im2Col.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace at { namespace native { @@ -91,7 +92,7 @@ static void im2col_out_cpu_template( Tensor input_n; Tensor output_n; - for (int64_t elt = 0; elt < batch_size; elt++) { + for (const auto elt : c10::irange(batch_size)) { input_n = input.select(0, elt); output_n = output.select(0, elt); diff --git a/aten/src/ATen/native/IndexingUtils.h b/aten/src/ATen/native/IndexingUtils.h index 5b938e9536c486..2dea9a0e94d416 100644 --- a/aten/src/ATen/native/IndexingUtils.h +++ b/aten/src/ATen/native/IndexingUtils.h @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -31,7 +32,7 @@ static C10_UNUSED std::vector expandTensors(const Tensor & self, const t } // The sizes of the ByteTensor mask or bool tensor must match the sizes of the // corresponding dimensions in self - for (int64_t j = 0; j < index.dim(); j++) { + for (const auto j : c10::irange(index.dim())) { int64_t srcIdx = result.size() + j; if (index.size(j) != self.size(srcIdx)) { invalid_mask(self, srcIdx, index, j); @@ -39,7 +40,7 @@ static C10_UNUSED std::vector expandTensors(const Tensor & self, const t } // Replace with nonzeros auto nonzero = index.nonzero(); - for (int64_t j = 0; j < index.dim(); j++) { + for (const auto j : c10::irange(index.dim())) { result.emplace_back(nonzero.select(1, j)); } } else { diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp index 4c9be81055bfd8..d97a143e2fd9c1 100644 --- a/aten/src/ATen/native/LinearAlgebra.cpp +++ b/aten/src/ATen/native/LinearAlgebra.cpp @@ -1264,7 +1264,7 @@ static void addbmm_impl_( } auto adjusted_beta(beta); - for (int64_t batch = 0; batch < num_batches; ++batch) { + for (const auto batch : c10::irange(num_batches)) { result.addmm_(batch1[batch], batch2[batch], adjusted_beta, alpha); adjusted_beta = 1; // accumulate output once } @@ -1321,23 +1321,23 @@ inline void baddbmm_cpu_kernel(const Tensor& result, const Tensor& self, const T int64_t grain_size = std::min(internal::GRAIN_SIZE / (is * js * ks), (int64_t)1); parallel_for(0, bs, grain_size, [&](int64_t b_begin, int64_t b_end) { - for (int64_t b = b_begin; b < b_end; b++) { + for (const auto b : c10::irange(b_begin, b_end)) { auto r1 = r0[b]; auto s1 = s0[b]; auto m1 = m0[b]; - for (int64_t i = 0; i < is; i++) { + for (const auto i : c10::irange(is)) { auto r2 = r1[i]; auto s2 = s1[i]; - for (int64_t j = 0; j < js; j++) { + for (const auto j : c10::irange(js)) { scalar_t &r = r2[j]; if (is_bmm) { r = 0; - for (int64_t k = 0; k < ks; k++) { + for (const auto k : c10::irange(ks)) { r += s2[k] * m1[k][j]; } } else { r *= beta; - for (int64_t k = 0; k < ks; k++) { + for (const auto k : c10::irange(ks)) { r += alpha * s2[k] * m1[k][j]; } } @@ -2100,10 +2100,11 @@ void compute_T18_scale_square( auto mexp_scaled = at::native::compute_T18(a_scaled); auto s_cpu = (s.device().type() == at::kCPU) ? s : s.to(at::kCPU); - for (int64_t i = 0; i < mexp_scaled.size(0); ++i) { + for (const auto i : c10::irange(mexp_scaled.size(0))) { auto s_val = s_cpu.select(0, i).template item(); auto mexp = mexp_scaled.select(0, i); - for (int64_t p = 0; p < s_val; ++p) { + for (const auto p : c10::irange(s_val)) { + (void)p; //Suppress unused variable warning mexp = at::matmul(mexp, mexp); } mexp_out.select(0, i).copy_(mexp); @@ -2371,7 +2372,7 @@ Tensor& nuclear_norm_out(const Tensor& self, IntArrayRef dim, bool keepdim, Tens // (e.g. [0, 1, 2, ..., ndim-1]) static std::vector make_dim_list(int64_t ndim) { std::vector dim_list(ndim); - for (int64_t ind = 0; ind < ndim; ind++) { + for (const auto ind : c10::irange(ndim)) { dim_list[ind] = ind; } return dim_list; @@ -2924,7 +2925,7 @@ struct KronImpl final { a_reshape = c10::SmallVector(2 * maxdim); b_reshape = c10::SmallVector(2 * maxdim); result_reshape = c10::SmallVector(maxdim); - for (int64_t i = 0; i < maxdim; i++) { + for (const auto i : c10::irange(maxdim)) { a_reshape[2 * i] = (i >= pad_self ? self.sizes()[i - pad_self] : 1); a_reshape[2 * i + 1] = 1; b_reshape[2 * i] = 1; @@ -2939,7 +2940,7 @@ struct KronImpl final { TORCH_INTERNAL_ASSERT(result.defined(), "Cannot call kron_out with an undefined result tensor as the out argument. Please allocate a Tensor before calling kron_out with it."); c10::SmallVector mul_shape(2 * maxdim); - for (int64_t i = 0; i < maxdim; i++) { + for (const auto i : c10::irange(maxdim)) { mul_shape[2 * i] = a_reshape[2 * i]; mul_shape[2 * i + 1] = b_reshape[2 * i + 1]; } diff --git a/aten/src/ATen/native/LinearAlgebraUtils.h b/aten/src/ATen/native/LinearAlgebraUtils.h index bbabca6bbfb8aa..c495fc83075654 100644 --- a/aten/src/ATen/native/LinearAlgebraUtils.h +++ b/aten/src/ATen/native/LinearAlgebraUtils.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -169,7 +170,8 @@ void batch_iterator_with_broadcasting(const Tensor& a, const Tensor& b, const fu auto* b_batch_idx_ptr = data[0]; auto* a_batch_idx_ptr = data[1]; - for (int64_t elem = 0; elem < nelems; ++elem) { + for (const auto elem : c10::irange(nelems)) { + (void)elem; //Suppress unused variable warning auto b_curr_linear_batch_idx = *reinterpret_cast(b_batch_idx_ptr); auto a_curr_linear_batch_idx = *reinterpret_cast(a_batch_idx_ptr); @@ -332,7 +334,7 @@ static inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) { const int64_t ndim = self.ndimension(); std::vector perm; - for (int64_t i = 0; i < ndim; i++) { + for (const auto i : c10::irange(ndim)) { auto it = std::find(a.begin(), a.end(), i); if (it == a.end()) { perm.push_back(i); @@ -476,7 +478,7 @@ static inline std::vector create_dim_backshift_permutation(int64_t dim0 "duplicate or invalid dimensions"); std::vector permutation(ndim); int64_t cur_permuted_dim = 0; - for (int64_t dim_ind = 0; dim_ind < ndim; dim_ind++) { + for (const auto dim_ind : c10::irange(ndim)) { if ((dim_ind != dim0) && (dim_ind != dim1)) { permutation[cur_permuted_dim++] = dim_ind; } @@ -493,7 +495,7 @@ static inline std::vector create_dim_backshift_permutation(int64_t dim0 static inline std::vector create_reverse_permutation(std::vector permutation) { int64_t ndim = permutation.size(); std::vector reverse_permutation(ndim); - for (int64_t dim_ind = 0; dim_ind < ndim; dim_ind++) { + for (const auto dim_ind : c10::irange(ndim)) { reverse_permutation[permutation[dim_ind]] = dim_ind; } return reverse_permutation; diff --git a/aten/src/ATen/native/LossCTC.cpp b/aten/src/ATen/native/LossCTC.cpp index dbf0e2cc990950..19af04b9731de4 100644 --- a/aten/src/ATen/native/LossCTC.cpp +++ b/aten/src/ATen/native/LossCTC.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const std::vector tg_batch_offsets(batch_size); if (targets.dim() == 1) { // concatenated targets int64_t pos = 0; - for (int64_t i = 0; i < batch_size; i++) { + for (const auto i : c10::irange(batch_size)) { tg_batch_offsets[i] = pos; pos += target_lengths[i]; if (max_target_length < target_lengths[i]) @@ -72,7 +73,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const else { // batch x max_target_length // dim is 2 int64_t tg_batch_stride = targets.stride(0); - for (int64_t i = 0; i < batch_size; i++) { + for (const auto i : c10::irange(batch_size)) { tg_batch_offsets[i] = i * tg_batch_stride; if (max_target_length < target_lengths[i]) max_target_length = target_lengths[i]; @@ -84,7 +85,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const " (while checking arguments for ", c, ")"); } int64_t max_input_length = log_probs.size(0); - for (int64_t b = 0; b < batch_size; b++) { + for (const auto b : c10::irange(batch_size)) { TORCH_CHECK(input_lengths[b] <= max_input_length, "Expected input_lengths to have value at most ", max_input_length, ", but got value ", input_lengths[b], " (while checking arguments for ", c, ")"); @@ -103,7 +104,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const // first the default log_alpha.narrow(1, 0, 1).fill_(neginf); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; b++) { + for (const auto b : c10::irange(start, end)) { int64_t input_length = input_lengths[b]; int64_t target_length = target_lengths[b]; auto log_probs_a = log_probs_a_global[b]; @@ -116,7 +117,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const log_alpha_a[0][1] = log_probs_a[0][get_target_prime(targets_data, tg_batch_offset, tg_target_stride, 1, BLANK)]; // now the loop over the inputs - for (int64_t t=1; t()[b]; auto grad_a = grad_a_global[b]; if (zero_infinity && nll == std::numeric_limits::infinity()) { @@ -322,8 +323,8 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_ // this could be a great target for further vectorization. // grad is the output gradient, nll is the loss. Note that the likelihood -nll is the Z of eq (16) scalar_t gr = grad_out.accessor()[b]; - for (int64_t t = 0; t < input_length; t++) { // or go for the full thing? - for (int64_t c = 0; c < num_labels; c++) { + for (const auto t : c10::irange(input_length)) { // or go for the full thing? + for (const auto c : c10::irange(num_labels)) { scalar_t& res = grad_a[t][c]; scalar_t lp = log_probs_a[t][c]; res = (std::exp(lp)-std::exp(res + nll - lp)) * gr; diff --git a/aten/src/ATen/native/LossMultiLabelMargin.cpp b/aten/src/ATen/native/LossMultiLabelMargin.cpp index fa71663716f5d3..f59de5c8817a42 100644 --- a/aten/src/ATen/native/LossMultiLabelMargin.cpp +++ b/aten/src/ATen/native/LossMultiLabelMargin.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace at { namespace native { @@ -17,21 +18,21 @@ inline scalar_t multilabel_margin_loss_forward_inner_sum_cpu( int64_t dim) { using accscalar_t = at::acc_type; accscalar_t sum = 0; - for (int64_t ddt = 0; ddt < dim; ddt++) { + for (const auto ddt : c10::irange(dim)) { int64_t target_idx = target_data[ddt]; if (target_idx < 0) { break; } is_target_data[target_idx] = 1; } - for (int64_t dt = 0; dt < dim; dt++) { + for (const auto dt : c10::irange(dim)) { int64_t target_idx = target_data[dt]; if (target_idx < 0) { break; } scalar_t input_target = input_data[target_idx]; - for (int64_t d = 0; d < dim; d++) { + for (const auto d : c10::irange(dim)) { if (!is_target_data[d]) { scalar_t z = 1 - input_target + input_data[d]; if (z > 0) { @@ -63,7 +64,8 @@ static void multilabel_margin_loss_forward_out_frame( accscalar_t sum = 0; - for (int64_t t = 0; t < nframe; t++) { + for (const auto t : c10::irange(nframe)) { + (void)t; //Suppress unused variable warning sum += multilabel_margin_loss_forward_inner_sum_cpu( input_data, target_data, is_target_data, dim); @@ -81,7 +83,7 @@ static void multilabel_margin_loss_forward_out_frame( } else { auto output_acc = output.accessor(); - for (int64_t t = 0; t < nframe; t++) { + for (const auto t : c10::irange(nframe)) { scalar_t sum = multilabel_margin_loss_forward_inner_sum_cpu( input_data, target_data, is_target_data, dim); @@ -171,15 +173,16 @@ static void multilabel_margin_loss_backward_out_frame( reduction == Reduction::Mean ? 1. / (nframe * dim) : 1. / dim); scalar_t* grad_input_row_data = grad_input.data_ptr(); - for (int64_t t = 0; t < nframe; t++) { - for (int64_t dt = 0; dt < dim; dt++) { + for (const auto t : c10::irange(nframe)) { + (void)t; //Suppress unused variable warning + for (const auto dt : c10::irange(dim)) { int64_t target_idx = target_data[dt]; if (target_idx < 0) { break; } scalar_t input_target = input_data[target_idx]; - for (int64_t d = 0; d < dim; d++) { + for (const auto d : c10::irange(dim)) { if (!is_target_data[d]) { scalar_t z = 1 - input_target + input_data[d]; if (z > 0) { @@ -206,8 +209,8 @@ static void multilabel_margin_loss_backward_out_frame( } else { check_dim_size(grad_output, 1, 0, nframe); auto grad_output_acc = grad_output.accessor(); - for (int64_t t = 0; t < nframe; t++) { - for (int64_t d = 0; d < dim; d++) { + for (const auto t : c10::irange(nframe)) { + for (const auto d : c10::irange(dim)) { grad_input_data[t * dim + d] *= grad_output_acc[t]; } } diff --git a/aten/src/ATen/native/LossMultiMargin.cpp b/aten/src/ATen/native/LossMultiMargin.cpp index b65aaf9b6adce2..c7ab53f1d211b7 100644 --- a/aten/src/ATen/native/LossMultiMargin.cpp +++ b/aten/src/ATen/native/LossMultiMargin.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace at { namespace native { @@ -18,7 +19,7 @@ inline scalar_t multi_margin_inner_sum_cpu( const int64_t target_idx) { const scalar_t input_target = input_data[target_idx]; scalar_t sum = 0; - for (int64_t d = 0; d < dim; d++) { + for (const auto d : c10::irange(dim)) { if (d == target_idx) { continue; } @@ -63,7 +64,7 @@ static inline void multi_margin_loss_cpu_kernel( // cannot be handled by TensorAccessor) if (reduction == Reduction::None && output.dim() > 0) { auto output_acc = output.accessor(); - for (int64_t t = 0; t < nframe; t++) { + for (const auto t : c10::irange(nframe)) { const auto idx = target_index_checked(target_data, t, dim); auto sum = multi_margin_inner_sum_cpu( input_data, weight_data, p, margin, dim, idx); @@ -73,7 +74,7 @@ static inline void multi_margin_loss_cpu_kernel( } else { accscalar_t sum = 0; auto output_acc = output.data_ptr(); - for (int64_t t = 0; t < nframe; t++) { + for (const auto t : c10::irange(nframe)) { const auto idx = target_index_checked(target_data, t, dim); sum += multi_margin_inner_sum_cpu( input_data, weight_data, p, margin, dim, idx); @@ -149,11 +150,11 @@ static void multi_margin_loss_backward_cpu_kernel( int64_t dim, int64_t reduction) { scalar_t* grad_input_row_data = grad_input_data; - for (int64_t t = 0; t < nframe; t++) { + for (const auto t : c10::irange(nframe)) { int64_t target_idx = target_index_checked(target_data, t, dim); scalar_t input_target = input_data[target_idx]; scalar_t grad_input_target = 0; - for (int64_t d = 0; d < dim; d++) { + for (const auto d : c10::irange(dim)) { scalar_t z = margin - input_target + input_data[d]; if (d == target_idx) { continue; @@ -186,8 +187,8 @@ static void multi_margin_loss_backward_cpu_kernel( } } else { auto grad_output_acc = grad_output.accessor(); - for (int64_t t = 0; t < nframe; t++) { - for (int64_t d = 0; d < dim; d++) { + for (const auto t : c10::irange(nframe)) { + for (const auto d : c10::irange(dim)) { grad_input_data[t * dim + d] *= grad_output_acc[t]; } } diff --git a/aten/src/ATen/native/LossNLL.cpp b/aten/src/ATen/native/LossNLL.cpp index 78f982afcd871d..dfb4aced85c41f 100644 --- a/aten/src/ATen/native/LossNLL.cpp +++ b/aten/src/ATen/native/LossNLL.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace at { namespace meta { @@ -155,7 +156,7 @@ static void nll_loss_out_frame( auto output_acc = output.accessor(); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (auto i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { const auto cur_target = target_acc[i]; if (cur_target == ignore_index) { @@ -215,7 +216,7 @@ static void nll_loss_out_frame( scalar_t weight_partial_sums[cascade_sum_num_levels] = {0}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) scalar_t loss_partial_sums[cascade_sum_num_levels] = {0}; - for (int64_t b = 0; b < batch_size; b++) { + for (const auto b : c10::irange(batch_size)) { const int64_t cur_target = target_data[b]; if (cur_target == ignore_index) { ++num_ignored; @@ -330,7 +331,7 @@ static void nll_loss_backward_out_frame( auto grad_input_acc = grad_input.accessor(); auto grad_output_acc = grad_output.accessor(); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (auto i = start; i < end; i++) { + for (const auto i : c10::irange(start, end)) { auto cur_target = target_acc[i]; if (cur_target == ignore_index) { continue; diff --git a/aten/src/ATen/native/LossNLL2d.cpp b/aten/src/ATen/native/LossNLL2d.cpp index 0e3e9e6fec77ba..d7ebf65231f1ed 100644 --- a/aten/src/ATen/native/LossNLL2d.cpp +++ b/aten/src/ATen/native/LossNLL2d.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace at { namespace native { @@ -109,9 +110,9 @@ static void nll_loss2d_forward_out_frame( auto target_acc = target.accessor(); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; b++) { - for (int64_t h = 0; h < H; h++) { - for (int64_t w = 0; w < W; w++) { + for (const auto b : c10::irange(start, end)) { + for (const auto h : c10::irange(H)) { + for (const auto w : c10::irange(W)) { const int64_t cur_target = (int64_t)target_acc[b][h][w]; if (cur_target == ignore_index) { @@ -176,8 +177,8 @@ static void nll_loss2d_forward_out_frame( const int64_t level_mask = level_step - 1; int64_t num_ignored = 0; - for (int64_t b = 0; b < batch_size; b++) { - for (int64_t elem = 0; elem < map_size; elem++) { + for (const auto b : c10::irange(batch_size)) { + for (const auto elem : c10::irange(map_size)) { const int64_t cur_target = target_data[b * map_size + elem]; if (cur_target == ignore_index) { ++num_ignored; @@ -286,9 +287,9 @@ static void nll_loss2d_backward_out_frame( auto target_acc = target.accessor(); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; b++) { - for (int64_t h = 0; h < H; h++) { - for (int64_t w = 0; w < W; w++) { + for (const auto b : c10::irange(start, end)) { + for (const auto h : c10::irange(H)) { + for (const auto w : c10::irange(W)) { const int64_t cur_target = target_acc[b][h][w]; if (cur_target == ignore_index) { continue; @@ -329,8 +330,8 @@ static void nll_loss2d_backward_out_frame( : grad_output_value); at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) { - for (int64_t b = start; b < end; b++) { - for (int64_t elem = 0; elem < map_size; elem++) { + for (const auto b : c10::irange(start, end)) { + for (const auto elem : c10::irange(map_size)) { const int64_t t = target_data[b * map_size + elem]; if (t != ignore_index) { diff --git a/aten/src/ATen/native/NNPACK.cpp b/aten/src/ATen/native/NNPACK.cpp index fa1d1d86c6930d..e83320e09fa6eb 100644 --- a/aten/src/ATen/native/NNPACK.cpp +++ b/aten/src/ATen/native/NNPACK.cpp @@ -60,6 +60,7 @@ bool _nnpack_available() { #include #include #include +#include namespace at { namespace native { @@ -238,7 +239,7 @@ Tensor _nnpack_spatial_convolution( const size_t input_size_per_batch = input_channels * input_size.width * input_size.height; const size_t output_size_per_batch = output_channels * output_size.width * output_size.height; - for (size_t batch = 0u; batch < batch_size; ++batch) { + for (const auto batch : c10::irange(0u, batch_size)) { const nnp_status status = nnp_convolution_inference( algorithm, nnp_convolution_transform_strategy_compute, diff --git a/aten/src/ATen/native/NamedTensor.cpp b/aten/src/ATen/native/NamedTensor.cpp index 1d5d8e4a4a6982..c987f72261ab47 100644 --- a/aten/src/ATen/native/NamedTensor.cpp +++ b/aten/src/ATen/native/NamedTensor.cpp @@ -100,7 +100,7 @@ Tensor refine_names(const Tensor& self, DimnameList names) { self_names.size(), " and ", names.size(), " respectively)."); check_names_valid_for(self, names); - for (size_t idx = 0; idx < self_names.size(); idx++) { + for (const auto idx : c10::irange(self_names.size())) { const auto& self_name = self_names[idx]; const auto& out_name = names[idx]; if (self_name == out_name || self_name.isWildcard()) { @@ -221,7 +221,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) { }; // Fill in the non-ellipsis dimensions - for (auto order_idx = 0U; order_idx < order.size(); ++order_idx) { + for (const auto order_idx : c10::irange(0U, order.size())) { auto out_idx = order_idx; if (order_idx >= ellipsis_idx) { out_idx = order_idx + num_ellipsis_names; diff --git a/aten/src/ATen/native/Normalization.cpp b/aten/src/ATen/native/Normalization.cpp index 25ae1a765e85ff..fdce903c0806d9 100644 --- a/aten/src/ATen/native/Normalization.cpp +++ b/aten/src/ATen/native/Normalization.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -156,7 +157,7 @@ std::tuple batch_norm_cpu_update_stats_template( // Reduce all dimensions except dim=1 DimVector reduce_dims(ndim - 1); reduce_dims[0] = 0; - for (int64_t i = 2; i < ndim; ++i) { + for (const auto i : c10::irange(2, ndim)) { reduce_dims[i - 1] = i; } @@ -178,7 +179,7 @@ std::tuple batch_norm_cpu_update_stats_template( batch_norm_cpu_collect_stats_stub(kCPU, _mean, _var_sum, input); parallel_for(0, n_input, 1, [&](int64_t b_begin, int64_t b_end) { - for (int64_t f = b_begin; f < b_end; ++f) { + for (const auto f : c10::irange(b_begin, b_end)) { save_mean_a[f] = _mean_a[f]; save_var_transform_a[f] = VarTransform{}(_var_sum_a[f] / n, eps); @@ -206,7 +207,7 @@ std::tuple batch_norm_cpu_update_stats_template( parallel_for(0, n_input, 1, [&](int64_t b_begin, int64_t b_end) { TensorIterator iter(reduce_iter); - for (int64_t f = b_begin; f < b_end; ++f) { + for (const auto f : c10::irange(b_begin, b_end)) { // compute variance per input iter.unsafe_replace_operand(0, in_data + channel_stride * f); accscalar_t var_sum = 0; @@ -283,7 +284,7 @@ std::tuple batch_norm_backward_cpu_template( // Reduce all dimensions except dim=1 DimVector reduce_dims(ndim - 1); reduce_dims[0] = 0; - for (int64_t i = 2; i < ndim; ++i) { + for (const auto i : c10::irange(2, ndim)) { reduce_dims[i - 1] = i; } @@ -330,7 +331,7 @@ std::tuple batch_norm_backward_cpu_template( TensorIterator unary_iter_local(unary_iter); TensorIterator binary_iter_local(binary_iter); - for (int64_t f = b_begin; f < b_end; ++f) { + for (const auto f : c10::irange(b_begin, b_end)) { scalar_t w = weight.defined() ? weight_a[f] : 1; scalar_t mean, invstd; diff --git a/aten/src/ATen/native/PackedSequence.cpp b/aten/src/ATen/native/PackedSequence.cpp index 798672ccdeaeff..ec997d86aa1b59 100644 --- a/aten/src/ATen/native/PackedSequence.cpp +++ b/aten/src/ATen/native/PackedSequence.cpp @@ -77,7 +77,7 @@ std::tuple _pack_padded_sequence(const Tensor& _input, const Ten // more elements below in our column, we lower the counter (prev_l), and append the new // block to the output. int64_t prev_l = 0; - for (int64_t i = 0; i < batch_size; ++i) { + for (const auto i : c10::irange(batch_size)) { int64_t l = lengths[batch_size - 1 - i]; if (l > prev_l) { auto current_batch_size = batch_size - i; @@ -109,7 +109,7 @@ Tensor _pack_padded_sequence_backward(const Tensor& grad, at::IntArrayRef input_ int64_t offset = 0; int64_t max_seq_len = batch_sizes_t.size(0); int64_t * batch_sizes = batch_sizes_t.data_ptr(); - for (int64_t i = 0; i < max_seq_len; ++i) { + for (const auto i : c10::irange(max_seq_len)) { grad_input[i].slice(0, 0, batch_sizes[i]).copy_(grad.slice(0, offset, offset + batch_sizes[i])); offset += batch_sizes[i]; } @@ -170,7 +170,8 @@ std::tuple _pad_packed_sequence(const Tensor& data, const Tensor } int64_t dec = prev_batch_size - batch_size; if (dec > 0) { - for (int64_t j = 0; j < dec; ++j) { + for (const auto j : c10::irange(dec)) { + (void)j; //Suppress unused variable warning (*lengths--) = i; } } @@ -206,7 +207,7 @@ Tensor pad_sequence(TensorList sequences, bool batch_first, double padding_value out_dims.insert(out_dims.end(), trailing_dims.begin(), trailing_dims.end()); Tensor out = at::full(out_dims, padding_value, sequences[0].options()); - for (int64_t i = 0; i < sequences_size; i++) { + for (const auto i : c10::irange(sequences_size)) { const Tensor currseq = sequences[i]; const int64_t length_i = currseq.size(0); // use index notation to prevent duplicate references to the tensor diff --git a/aten/src/ATen/native/Pool.h b/aten/src/ATen/native/Pool.h index da774911b5737e..3db102ad855053 100644 --- a/aten/src/ATen/native/Pool.h +++ b/aten/src/ATen/native/Pool.h @@ -2,6 +2,7 @@ #include #include #include +#include #pragma once @@ -212,7 +213,7 @@ pool3d_shape_check( TORCH_CHECK(ndim == 4 || ndim == 5, fn_name, ": Expected 4D or 5D tensor for input, but got: ", input.sizes()); - for (int64_t i = 1; i < ndim; ++i) { + for (const auto i : c10::irange(1, ndim)) { TORCH_CHECK(input.size(i) > 0, fn_name, "Expected input to have non-zero size for non-batch dimensions, but got", input.sizes(), " with dimension ", i, " being empty."); diff --git a/aten/src/ATen/native/QuantizedLinear.cpp b/aten/src/ATen/native/QuantizedLinear.cpp index 0c4256f5272041..e3030f71d16517 100644 --- a/aten/src/ATen/native/QuantizedLinear.cpp +++ b/aten/src/ATen/native/QuantizedLinear.cpp @@ -206,9 +206,9 @@ void CalcColOffsetsTranspose( const int8_t* Bint8, int32_t B_zero_point, int32_t* col_offsets) { - for (int i = 0; i < N; ++i) { + for (const auto i : c10::irange(N)) { int32_t sum = 0; - for (int j = 0; j < K; ++j) { + for (const auto j : c10::irange(K)) { sum += Bint8[i * K + j]; } col_offsets[i] = sum - B_zero_point * K; @@ -353,7 +353,7 @@ bool CheckAndSaturate(T max_val, T* element) { void HandleWeightsSaturation(int64_t N, float* weight) { const float kFp16Max = RawUint16ToFp16(0x7BFF); bool found_out_of_range = false; - for (int64_t i = 0; i < N; ++i) { + for (const auto i : c10::irange(N)) { if (CheckAndSaturate(kFp16Max, weight + i)) { found_out_of_range = true; }