Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ggml/src/ggml-openvino/ggml-openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,18 @@ static void * ggml_backend_openvino_buffer_get_base(ggml_backend_buffer_t buffer
return ctx->data;
}

static bool is_stateful_enabled() {
static const auto * stateful = getenv("GGML_OPENVINO_STATEFUL_EXECUTION");
return stateful && strcmp(stateful, "1") == 0;
}

static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
// GGML_LOG_DEBUG("%s: buffer usage=%d, tensor name=%s\n", __func__, buffer->usage, tensor->name);
ggml_backend_openvino_buffer_context * ctx = (ggml_backend_openvino_buffer_context *) buffer->context;

// Put kvcache on device memory for GPU (NPU memory is too small even for kvcache)
if (strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote && ggml_openvino_get_device_name() == "GPU" &&
!getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) {
!is_stateful_enabled()) {
GGML_ASSERT(ctx->tensor_extras.empty());
auto device = ctx->device;
auto size = ctx->size;
Expand Down Expand Up @@ -664,7 +669,7 @@ GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device) {

std::shared_ptr<ov_runtime_context> r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context);
r_ctx->device = ggml_openvino_get_device_name();
r_ctx->stateful = getenv("GGML_OPENVINO_STATEFUL_EXECUTION") && !ggml_openvino_is_npu();
r_ctx->stateful = is_stateful_enabled() && !ggml_openvino_is_npu();

ggml_backend_t openvino_backend = new ggml_backend{
/* .guid = */ ggml_backend_openvino_guid(),
Expand Down
Loading