Skip to content

Commit

Permalink
[infra/onert] Bump up ARM Compute Library v24.07 (Samsung#14009)
Browse files Browse the repository at this point in the history
This commit bumps up ARM Compute Library v24.07.
It disables some neon NNAPI unittest to skip failures as workaround.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored Sep 23, 2024
1 parent e1fc0d4 commit 355813f
Show file tree
Hide file tree
Showing 22 changed files with 223 additions and 423 deletions.
2 changes: 1 addition & 1 deletion Makefile.template
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ runtime_tar_internal:
tar -zcf $(WORKSPACE)/onert-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)

acl_tar_internal:
tar -zcf $(WORKSPACE)/onert-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
tar -zcf $(WORKSPACE)/onert-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_graph.so

install_acl_internal:
# Workaround to install acl for test (ignore error when there is no file to copy)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,46 +50,16 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"

namespace arm_compute
{
/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
* the following kernels:
*
* -# @ref CLTransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
class CLFullyConnectedHybridLayerReshapeWeights : public ICLSimpleFunction
{
public:
/** Set the input and output tensors.
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
* S8.
* @param[out] output Destination tensor which stores the transposed input tensor. Data type
* supported: Same as @p input.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref
* CLFullyConnectedHybridLayerReshapeWeights
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
* S8.
* @param[in] output Destination tensor which stores the transposed input tensor. Data type
* supported: Same as @p input.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};

/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
* OpenCL kernels:
*
* -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref CLFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false
* -# @ref CLTranspose (if @p are_weights_reshaped is set to false
* and transpose_weights is set to true ) (called once)
* -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized symmetric)
* -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
Expand Down Expand Up @@ -165,7 +135,7 @@ class CLFullyConnectedHybridLayer : public IFunction
bool retain_internal_weights);

MemoryGroup _memory_group;
CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
CLTranspose _reshape_weights_kernel;
CLScaleFactorSymm8Kernel _scale_factor_kernel;
CLQuantizationSymmetricKernel _quant_input_kernel;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,45 +50,15 @@
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"

namespace arm_compute
{
/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
* the following kernels:
*
* -# @ref CLTransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
class CLFullyConnectedLayerReshapeWeightsEx : public ICLSimpleFunction
{
public:
/** Set the input and output tensors.
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
* QASYMM8/F16/F32.
* @param[out] output Destination tensor which stores the transposed input tensor. Data type
* supported: Same as @p input.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref
* CLFullyConnectedLayerReshapeWeightsEx
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
* QASYMM8/F16/F32.
* @param[in] output Destination tensor which stores the transposed input tensor. Data type
* supported: Same as @p input.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};

namespace weights_transformations
{
/** Basic function to manage the reshape weights generated from @ref
* CLFullyConnectedLayerReshapeWeightsEx */
* CLTranspose */
class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights
{
public:
Expand Down Expand Up @@ -118,7 +88,7 @@ class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights
private:
static constexpr uint32_t _uid = 0x0;
CLTensor _output{};
CLFullyConnectedLayerReshapeWeightsEx _func{};
CLTranspose _func{};
};
} // namespace weights_transformations

Expand Down Expand Up @@ -209,7 +179,7 @@ class CLFullyConnectedLayerEx : public IFunction
weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
_reshape_weights_managed_function;
CLFlattenLayer _flatten_layer;
CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
CLTranspose _reshape_weights_function;
CLGEMM _mm_gemm;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
CLTensor _flatten_output;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
// #include "arm_compute/runtime/CL/functions/CLCopy.h"
#include "arm_compute/runtime/CL/functions/CLCopy.h"
#include <memory>

namespace arm_compute
Expand Down Expand Up @@ -123,7 +122,7 @@ class CLPadLayerEx : public IFunction
void configure_reflect_mode(ICLTensor *input, ICLTensor *output);

std::unique_ptr<CLPadLayerKernelEx> _pad_kernel;
std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel;
std::unique_ptr<CLCopy> _copy_kernel;
bool _perform_pad;
};
} // namespace arm_compute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,43 +48,15 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"

namespace arm_compute
{
/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls
* the following kernels:
*
* -# @ref NETransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
class NEFullyConnectedHybridLayerReshapeWeights : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
* QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref
* NEFullyConnectedHybridLayerReshapeWeights
*
* @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported:
* QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: Same as @p input.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};

/** Basic function to compute a Fully Connected layer on NEON. This function calls the following
* NEON kernels:
* -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NEFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false
* -# @ref NETranspose (if @p are_weights_reshaped is set to false
* and transpose_weights is set to true ) (called once)
* -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized
* asymmetric)
Expand Down Expand Up @@ -162,7 +134,7 @@ class NEFullyConnectedHybridLayer : public IFunction
void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);

MemoryGroup _memory_group;
NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function;
NETranspose _reshape_weights_function;
NEQuantizationSymmetricKernel _quant_input_kernel;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
NEMultiplyScaleFactorKernel _multiply_scale_kernel;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,17 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"

namespace arm_compute
{
/** Basic function to compute a Fully Connected layer on NEON. This function calls the following
* NEON kernels:
* -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and
* -# @ref NETranspose (if @p are_weights_reshaped is set to false and
* transpose_weights is set to true ) (called once)
* -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized
* asymmetric)
* -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref
* NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
* not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
* @note The difference from NEFullyConnectedLayer is that this class supports weights as input
Expand Down Expand Up @@ -136,29 +132,28 @@ class NEFullyConnectedLayerEx : public IFunction
void prepare() override;

private:
void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output);
void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *bias,
ITensor *output, const FullyConnectedLayerInfo &fc_info);
void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *bias,
ITensor *output, const FullyConnectedLayerInfo &fc_info);
void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *bias,
ITensor *output, const FullyConnectedLayerInfo &fc_info);

MemoryGroup _memory_group;
NEFlattenLayer _flatten_kernel;
NEConvertFullyConnectedWeights _convert_weights;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
NEFlattenLayer _flatten_kernel;
NETranspose _reshape_weights_function;
NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
Tensor _flatten_output;
Tensor _gemmlowp_output;
Tensor _converted_weights_output;
Tensor _reshape_weights_output;
const ITensor *_original_weights;
bool _are_weights_converted;
bool _are_weights_reshaped;
bool _is_fc_after_conv;
bool _accumulate_biases;
bool _is_quantized;
bool _is_prepared;
const ITensor *_original_weights;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__ */
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"

#include "src/core/helpers/AutoConfiguration.h"

Expand Down Expand Up @@ -164,7 +165,7 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
_original_weights = weights;
_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
_weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
_flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
_flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis, false);

auto out_dims = transposeconv_output_dimensions(
input->info()->dimension(idx_w), input->info()->dimension(idx_h),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,6 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
}
} // namespace

void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
{
auto k = std::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}

Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input,
const ITensorInfo *output)
{
return CLTransposeKernel::validate(input, output);
}

CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
Expand Down Expand Up @@ -245,8 +232,7 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
if (!weights_reshaped)
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}

Expand Down
Loading

0 comments on commit 355813f

Please sign in to comment.