Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
throw std::runtime_error("Unsupported VIEW case");
}
op_case = 2;
if (m_model_is_splitted && m_model_inputs.find(std::string(src->name)) != m_model_inputs.end()) {
op_case = 0;
}
}
{
auto * src = node->src[0];
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-openvino/ggml-openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,15 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
}
break;
}
case GGML_OP_VIEW: {
if (ggml_nelements(op) != ggml_nelements(op->src[0])) {
std::cout << __func__ << ": OpenVINO backend does not support VIEW with different number of elements: "
<< op->name << " " << ggml_nelements(op)
<< " vs " << ggml_nelements(op->src[0]) << std::endl;
return true;
}
break;
}
default:
break;
}
Expand Down
39 changes: 36 additions & 3 deletions ggml/src/ggml-openvino/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
std::lock_guard<std::mutex> lock(*(mutex));
ggml_decoder = it->second->ptr;
old_m_params = ggml_decoder->get_model_params();
cache_hit = old_m_params.can_reuse_dynamically(m_params);
if (!ggml_decoder->is_splited_model()) {
cache_hit = old_m_params.can_reuse_dynamically(m_params);
}
} else {
mutex = std::make_shared<std::mutex>();
r_ctx->decoder_cache[key] = std::make_shared<decoder_runtime_ctx>(mutex);
Expand Down Expand Up @@ -598,7 +600,7 @@ namespace {
ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string & name) {
const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(name);

if (ggml_tensor->extra != nullptr) {
if (ggml_tensor->extra != nullptr && !ggml_decoder->is_splited_model()) {
// GGML_LOG_DEBUG("Using ggml_tensor->extra as ov::Tensor for input: %s\n", name.c_str());
auto * extra_base = static_cast<ggml_openvino_extra_base *>(ggml_tensor->extra);
if (extra_base->type != ggml_openvino_extra_base::Type::TENSOR) {
Expand All @@ -611,12 +613,43 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
// GGML_LOG_DEBUG("Converting ggml tensor to ov::Tensor for input: %s\n", name.c_str());
auto * input_data = ggml_tensor->data;
ov::Shape input_shape;
if (ggml_tensor->op == GGML_OP_VIEW) {
if (ggml_tensor->op == GGML_OP_VIEW && !ggml_decoder->is_splited_model()) {
// This case is added to make test-backend-ops work
input_shape = ggml_decoder->get_shape(ggml_tensor->view_src);
} else {
input_shape = ggml_decoder->get_shape(ggml_tensor);
}

// Add explicit strided-copy reconstruction for PERMUTE and VIEW tensors in split
// models: iterate over all 4 dimensions using `nb[]` strides and `view_offs` to
// copy non-contiguous source data into a contiguous `ov::Tensor` buffer
if ((ggml_tensor->op == GGML_OP_PERMUTE || ggml_tensor->op == GGML_OP_VIEW) && ggml_decoder->is_splited_model()) {
// Create OpenVINO input tensor, the data need to reconstructed based on the view tensor shape & stride
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
const auto * src_tensor = ggml_tensor->view_src;
std::vector<uint8_t> data;
auto n_bytes = ggml_nbytes(src_tensor);
data.resize(n_bytes);
ggml_backend_tensor_get(src_tensor, data.data(), 0, n_bytes);

size_t des_index = 0;
for (size_t i0 = 0; i0 < static_cast<size_t>(ggml_tensor->ne[3]); i0++) {
for (size_t i1 = 0; i1 < static_cast<size_t>(ggml_tensor->ne[2]); i1++) {
for (size_t i2 = 0; i2 < static_cast<size_t>(ggml_tensor->ne[1]); i2++) {
for (size_t i3 = 0; i3 < static_cast<size_t>(ggml_tensor->ne[0]); i3++) {
size_t src_index = ggml_tensor->view_offs + i0 * ggml_tensor->nb[3] + i1 * ggml_tensor->nb[2] +
i2 * ggml_tensor->nb[1] + i3 * ggml_tensor->nb[0];

memcpy(static_cast<char *>(input_tensor.data()) + des_index,
reinterpret_cast<const char *>(data.data()) + src_index, ggml_tensor->nb[0]);
des_index += ggml_tensor->nb[0];
}
}
}
}
return input_tensor;
}

auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
return input_tensor;
}
Expand Down
Loading