mirror

programistro · programistro · commit 113ac80dcfe3 · 2025-04-26T11:02:46.000+03:00
diff --git a/LLama.WebAPI/Hubs/ChatHub.cs b/LLama.WebAPI/Hubs/ChatHub.cs
@@ -1,79 +1,95 @@
-﻿using System.Text;
+﻿using System.Collections.Concurrent;
+using System.Text;
 using LLama.Abstractions;
 using LLama.Common;
 using LLama.Sampling;
+using LLama.WebAPI.Services;
 using Microsoft.AspNetCore.SignalR;
 
 namespace LLama.WebApi.Hubs;
 
 public class LlamaService : IDisposable
 {
-    private LLamaWeights _model;
-    private LLamaContext _context;
-    private ChatSession _session;
-    private ChatHistory _history;
+    private readonly LLamaWeights _model;
+    private readonly ISessionRepository _sessionRepository;
+    private readonly ConcurrentDictionary<string, LLamaContext> _contexts = new();
 
-    public LlamaService()
+    public LlamaService(ISessionRepository sessionRepository)
     {
-        var parameters = new ModelParams("C:/Users/katana/Downloads/llama-2-7b-guanaco-qlora.Q3_K_S.gguf")
+        _sessionRepository = sessionRepository;
+        
+        var parameters = new ModelParams("C:/Users/katana/Downloads/Llama-2-7b-chat-hf-finetune-q5_k_m-v1.0.gguf")
         {
             ContextSize = 2048,
             GpuLayerCount = 5
         };
-        
         _model = LLamaWeights.LoadFromFile(parameters);
-        _context = _model.CreateContext(parameters);
-        var executor = new InteractiveExecutor(_context);
-        _session = new ChatSession(executor);
-        _history = new ChatHistory();
     }
 
-    public async IAsyncEnumerable<string> GenerateResponse(string prompt)
+    public async IAsyncEnumerable<string> GenerateResponse(string connectionId, string prompt)
     {
-        var inferenceParams = new InferenceParams
-        {
-            AntiPrompts = new List<string> { "User:" },
-            TokensKeep = 20
-        };
+        var session = _sessionRepository.GetOrCreateSession(connectionId);
+        
+        // Создаем контекст для каждого подключения
+        var context = _contexts.GetOrAdd(connectionId,
+            id => _model.CreateContext(new ModelParams("C:/Users/katana/Downloads/Llama-2-7b-chat-hf-finetune-q5_k_m-v1.0.gguf")
+                { ContextSize = 2048, GpuLayerCount = 5 }));
+        
+        var executor = new InteractiveExecutor(context);
+        var chatSession = new ChatSession(executor);
 
-        _history.AddMessage(AuthorRole.User, prompt);
+        session.History.AddMessage(AuthorRole.User, prompt);
         
-        var response = _session.ChatAsync(
-            _history, 
-            inferenceParams, 
+        var response = chatSession.ChatAsync(
+            session.History, 
+            new InferenceParams { AntiPrompts = ["User:"], TokensKeep = 20 }, 
             CancellationToken.None
         );
 
+        var fullResponse = new StringBuilder();
         await foreach(var token in response)
         {
+            fullResponse.Append(token);
             yield return token;
         }
         
-        _history.AddMessage(AuthorRole.Assistant, string.Join("", response));
+        session.History.AddMessage(AuthorRole.Assistant, fullResponse.ToString());
+        session.LastActivity = DateTime.UtcNow;
+        _sessionRepository.UpdateSession(connectionId, session);
     }
 
     public void Dispose()
     {
-        _session?.SaveSession("session.json");
-        _context?.Dispose();
-        _model?.Dispose();
+        foreach (var context in _contexts.Values)
+            context.Dispose();
+        _model.Dispose();
     }
 }
 
-
 public class AiHub : Hub
 {
     private readonly LlamaService _llama;
+    private readonly ISessionRepository _sessionRepository;
 
-    public AiHub(LlamaService llama) => _llama = llama;
+    public AiHub(LlamaService llama, ISessionRepository sessionRepository)
+    {
+        _llama = llama;
+        _sessionRepository = sessionRepository;
+    }
+
+    public override async Task OnDisconnectedAsync(Exception? exception)
+    {
+        _sessionRepository.RemoveSession(Context.ConnectionId);
+        await base.OnDisconnectedAsync(exception);
+    }
 
     public async Task SendPrompt(string prompt)
     {
-        await foreach(var token in _llama.GenerateResponse(prompt))
+        await foreach(var token in _llama.GenerateResponse(Context.ConnectionId, prompt))
         {
             await Clients.Caller.SendAsync("ReceiveToken", token);
         }
         
         await Clients.Caller.SendAsync("StreamComplete");
     }
-}
+}
diff --git a/LLama.WebAPI/LLama.WebAPI.csproj b/LLama.WebAPI/LLama.WebAPI.csproj
@@ -8,6 +8,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="LLamaSharp.Backend.Cpu" Version="0.23.0" />
     <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" />
     <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.14" />
     <PackageReference Include="Swashbuckle.AspNetCore" Version="7.3.1" />
diff --git a/LLama.WebAPI/Program.cs b/LLama.WebAPI/Program.cs
@@ -10,8 +10,10 @@
 builder.Services.AddSingleton<StatefulChatService>();
 builder.Services.AddScoped<StatelessChatService>();
 builder.Services.AddSignalR();
+builder.Services.AddSingleton<ISessionRepository, MemorySessionRepository>();
 builder.Services.AddSingleton<LlamaService>();
 
+
 var app = builder.Build();
 app.UseRouting();
 
diff --git a/LLama.WebAPI/Services/MemorySessionRepository.cs b/LLama.WebAPI/Services/MemorySessionRepository.cs
@@ -0,0 +1,42 @@
+﻿using System.Collections.Concurrent;
+using System.Text;
+using LLama.Common;
+
+namespace LLama.WebAPI.Services;
+
+// Модель пользовательской сессии
+public class UserSession
+{
+    public string ConnectionId { get; set; }
+    public ChatHistory History { get; set; } = new();
+    public DateTime LastActivity { get; set; } = DateTime.UtcNow;
+}
+
+// Репозиторий для работы с сессиями
+public interface ISessionRepository
+{
+    UserSession GetOrCreateSession(string connectionId);
+    void UpdateSession(string connectionId, UserSession session);
+    void RemoveSession(string connectionId);
+}
+
+// In-memory реализация (для примера)
+public class MemorySessionRepository : ISessionRepository
+{
+    private readonly ConcurrentDictionary<string, UserSession> _sessions = new();
+
+    public UserSession GetOrCreateSession(string connectionId)
+    {
+        return _sessions.GetOrAdd(connectionId, id => new UserSession { ConnectionId = id });
+    }
+
+    public void UpdateSession(string connectionId, UserSession session)
+    {
+        _sessions.AddOrUpdate(connectionId, session, (id, old) => session);
+    }
+
+    public void RemoveSession(string connectionId)
+    {
+        _sessions.TryRemove(connectionId, out _);
+    }
+}
diff --git a/LLama.WebAPI/wwwroot/index.html b/LLama.WebAPI/wwwroot/index.html