Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FLLM on static llama #7489

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ include_directories(

set(_qnn_schema__srcs
backends/qualcomm/serialization/qc_compiler_spec.fbs
backends/qualcomm/serialization/qc_binary_info.fbs
)
set(_qnn_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
# Paths to headers generated from the .fbs files.
Expand Down Expand Up @@ -116,6 +115,7 @@ add_library(qcir_utils STATIC)
add_library(qnn_backend STATIC)
add_library(qnn_backend_cache STATIC)
add_library(qnn_context STATIC)
add_library(qnn_custom_protocol STATIC)
add_library(qnn_device STATIC)
add_library(qnn_executorch_backend SHARED)
add_library(qnn_executorch_header INTERFACE)
Expand Down Expand Up @@ -155,6 +155,7 @@ target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema)
target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging)
target_link_libraries(qnn_logger PRIVATE qnn_implementation ${android_log})
target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger)
target_link_libraries(qnn_custom_protocol PRIVATE qcir_utils)
target_link_libraries(
qnn_device PRIVATE qnn_executorch_logging qnn_implementation qnn_logger
)
Expand All @@ -177,7 +178,7 @@ target_link_libraries(
qnn_factory
PUBLIC qnn_header
PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
qnn_mem_manager
qnn_mem_manager qnn_custom_protocol
)
target_link_libraries(
qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
Expand Down
9 changes: 5 additions & 4 deletions backends/qualcomm/aot/ir/qcir.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,18 @@ table Tensor {
type: TensorType;
dtype: DataType;
qparam: QuantizeParam;
data: [ubyte];
size: uint;
offset: ulong;
}

table Operator {
name: string;
package_name: string;
type_name: string;
// keep only tensor indexes
inputs: [int];
outputs: [int];
params: [int];
inputs: [uint];
outputs: [uint];
params: [uint];
}

table Graph {
Expand Down
14 changes: 6 additions & 8 deletions backends/qualcomm/aot/ir/qcir_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,8 @@ Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) {

flatbuffers::Offset<qcir::Tensor> ToTensor(
const Qnn_Tensor_t& tensor,
const uint64_t data_offset,
flatbuffers::FlatBufferBuilder* builder) {
std::vector<uint8_t> buffer(
static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data),
static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data) +
QNN_VER_PTR(tensor)->clientBuf.dataSize);
std::vector<uint32_t> shape(
QNN_VER_PTR(tensor)->dimensions,
QNN_VER_PTR(tensor)->dimensions + QNN_VER_PTR(tensor)->rank);
Expand All @@ -251,10 +248,11 @@ flatbuffers::Offset<qcir::Tensor> ToTensor(
ToTensorType(QNN_VER_PTR(tensor)->type),
ToDataType(QNN_VER_PTR(tensor)->dataType),
ToQuantizeParam(tensor, builder),
&buffer);
QNN_VER_PTR(tensor)->clientBuf.dataSize,
data_offset);
}

Qnn_Tensor_t ToTensor(const tensor_type& tensor) {
Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) {
auto is_io_tensor = [](Qnn_TensorType_t type) {
return type < QNN_TENSOR_TYPE_STATIC;
};
Expand All @@ -266,10 +264,10 @@ Qnn_Tensor_t ToTensor(const tensor_type& tensor) {
QNN_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
QNN_VER_PTR(t)->rank = tensor->shape()->size();
QNN_VER_PTR(t)->dimensions = const_cast<uint32_t*>(tensor->shape()->data());
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->data()->size();
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->size();
QNN_VER_PTR(t)->clientBuf.data = is_io_tensor(QNN_VER_PTR(t)->type)
? nullptr
: static_cast<void*>(const_cast<uint8_t*>(tensor->data()->Data()));
: static_cast<void*>(const_cast<uint8_t*>(data_ptr));
return t;
}

Expand Down
3 changes: 2 additions & 1 deletion backends/qualcomm/aot/ir/qcir_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor);

flatbuffers::Offset<qcir::Tensor> ToTensor(
const Qnn_Tensor_t& tensor,
const uint64_t data_offset,
flatbuffers::FlatBufferBuilder* builder);
Qnn_Tensor_t ToTensor(const tensor_type& tensor);
Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr);

} // namespace qnn
} // namespace backends
Expand Down
Loading
Loading