ravi9 · wine99 · Mar 23, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -221,6 +221,9 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
                 throw std::runtime_error("Unsupported VIEW case");
             }
             op_case = 2;
+            if (m_model_is_splitted && m_model_inputs.find(std::string(src->name)) != m_model_inputs.end()) {
+                op_case = 0;
+            }
         }
         {
             auto * src = node->src[0];

@@ -909,6 +909,15 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
         }
         break;
     }
+    case GGML_OP_VIEW: {
+        if (ggml_nelements(op) != ggml_nelements(op->src[0])) {
+            std::cout << __func__ << ": OpenVINO backend does not support VIEW with different number of elements: "
+                      << op->name << " " << ggml_nelements(op)
+                      << " vs " << ggml_nelements(op->src[0]) << std::endl;
+            return true;
+        }
+        break;
+    }
     default:
         break;
     }

@@ -119,7 +119,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
             std::lock_guard<std::mutex> lock(*(mutex));
             ggml_decoder = it->second->ptr;
             old_m_params = ggml_decoder->get_model_params();
-            cache_hit = old_m_params.can_reuse_dynamically(m_params);
+            if (!ggml_decoder->is_splited_model()) {
+                cache_hit = old_m_params.can_reuse_dynamically(m_params);
+            }
         } else {
             mutex = std::make_shared<std::mutex>();
             r_ctx->decoder_cache[key] = std::make_shared<decoder_runtime_ctx>(mutex);
@@ -598,7 +600,7 @@ namespace {
 ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string & name) {
     const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(name);
 
-    if (ggml_tensor->extra != nullptr) {
+    if (ggml_tensor->extra != nullptr && !ggml_decoder->is_splited_model()) {
         // GGML_LOG_DEBUG("Using ggml_tensor->extra as ov::Tensor for input: %s\n", name.c_str());
         auto * extra_base = static_cast<ggml_openvino_extra_base *>(ggml_tensor->extra);
         if (extra_base->type != ggml_openvino_extra_base::Type::TENSOR) {
@@ -611,12 +613,43 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
     // GGML_LOG_DEBUG("Converting ggml tensor to ov::Tensor for input: %s\n", name.c_str());
     auto * input_data = ggml_tensor->data;
     ov::Shape input_shape;
-    if (ggml_tensor->op == GGML_OP_VIEW) {
+    if (ggml_tensor->op == GGML_OP_VIEW && !ggml_decoder->is_splited_model()) {
         // This case is added to make test-backend-ops work
         input_shape = ggml_decoder->get_shape(ggml_tensor->view_src);
     } else {
         input_shape = ggml_decoder->get_shape(ggml_tensor);
     }
+
+    //   Add explicit strided-copy reconstruction for PERMUTE and VIEW tensors in split
+    //   models: iterate over all 4 dimensions using `nb[]` strides and `view_offs` to
+    //   copy non-contiguous source data into a contiguous `ov::Tensor` buffer
+    if ((ggml_tensor->op == GGML_OP_PERMUTE || ggml_tensor->op == GGML_OP_VIEW) && ggml_decoder->is_splited_model()) {
+        // Create OpenVINO input tensor, the data need to reconstructed based on the view tensor shape & stride
+        ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
+        const auto * src_tensor = ggml_tensor->view_src;
+        std::vector<uint8_t>    data;
+        auto n_bytes = ggml_nbytes(src_tensor);
+        data.resize(n_bytes);
+        ggml_backend_tensor_get(src_tensor, data.data(), 0, n_bytes);
+
+        size_t des_index = 0;
+        for (size_t i0 = 0; i0 < static_cast<size_t>(ggml_tensor->ne[3]); i0++) {
+            for (size_t i1 = 0; i1 < static_cast<size_t>(ggml_tensor->ne[2]); i1++) {
+                for (size_t i2 = 0; i2 < static_cast<size_t>(ggml_tensor->ne[1]); i2++) {
+                    for (size_t i3 = 0; i3 < static_cast<size_t>(ggml_tensor->ne[0]); i3++) {
+                        size_t src_index = ggml_tensor->view_offs + i0 * ggml_tensor->nb[3] + i1 * ggml_tensor->nb[2] +
+                                           i2 * ggml_tensor->nb[1] + i3 * ggml_tensor->nb[0];
+
+                        memcpy(static_cast<char *>(input_tensor.data()) + des_index,
+                               reinterpret_cast<const char *>(data.data()) + src_index, ggml_tensor->nb[0]);
+                        des_index += ggml_tensor->nb[0];
+                    }
+                }
+            }
+        }
+        return input_tensor;
+    }
+
     auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
     return input_tensor;
 }