1- using System . Text ;
1+ using System . Collections . Concurrent ;
2+ using System . Text ;
23using LLama . Abstractions ;
34using LLama . Common ;
45using LLama . Sampling ;
6+ using LLama . WebAPI . Services ;
57using Microsoft . AspNetCore . SignalR ;
68
79namespace LLama . WebApi . Hubs ;
810
911public class LlamaService : IDisposable
1012{
11- private LLamaWeights _model ;
12- private LLamaContext _context ;
13- private ChatSession _session ;
14- private ChatHistory _history ;
13+ private readonly LLamaWeights _model ;
14+ private readonly ISessionRepository _sessionRepository ;
15+ private readonly ConcurrentDictionary < string , LLamaContext > _contexts = new ( ) ;
1516
16- public LlamaService ( )
17+ public LlamaService ( ISessionRepository sessionRepository )
1718 {
18- var parameters = new ModelParams ( "C:/Users/katana/Downloads/llama-2-7b-guanaco-qlora.Q3_K_S.gguf" )
19+ _sessionRepository = sessionRepository ;
20+
21+ var parameters = new ModelParams ( "C:/Users/katana/Downloads/Llama-2-7b-chat-hf-finetune-q5_k_m-v1.0.gguf" )
1922 {
2023 ContextSize = 2048 ,
2124 GpuLayerCount = 5
2225 } ;
23-
2426 _model = LLamaWeights . LoadFromFile ( parameters ) ;
25- _context = _model . CreateContext ( parameters ) ;
26- var executor = new InteractiveExecutor ( _context ) ;
27- _session = new ChatSession ( executor ) ;
28- _history = new ChatHistory ( ) ;
2927 }
3028
31- public async IAsyncEnumerable < string > GenerateResponse ( string prompt )
29+ public async IAsyncEnumerable < string > GenerateResponse ( string connectionId , string prompt )
3230 {
33- var inferenceParams = new InferenceParams
34- {
35- AntiPrompts = new List < string > { "User:" } ,
36- TokensKeep = 20
37- } ;
31+ var session = _sessionRepository . GetOrCreateSession ( connectionId ) ;
32+
33+ // Создаем контекст для каждого подключения
34+ var context = _contexts . GetOrAdd ( connectionId ,
35+ id => _model . CreateContext ( new ModelParams ( "C:/Users/katana/Downloads/Llama-2-7b-chat-hf-finetune-q5_k_m-v1.0.gguf" )
36+ { ContextSize = 2048 , GpuLayerCount = 5 } ) ) ;
37+
38+ var executor = new InteractiveExecutor ( context ) ;
39+ var chatSession = new ChatSession ( executor ) ;
3840
39- _history . AddMessage ( AuthorRole . User , prompt ) ;
41+ session . History . AddMessage ( AuthorRole . User , prompt ) ;
4042
41- var response = _session . ChatAsync (
42- _history ,
43- inferenceParams ,
43+ var response = chatSession . ChatAsync (
44+ session . History ,
45+ new InferenceParams { AntiPrompts = [ "User:" ] , TokensKeep = 20 } ,
4446 CancellationToken . None
4547 ) ;
4648
49+ var fullResponse = new StringBuilder ( ) ;
4750 await foreach ( var token in response )
4851 {
52+ fullResponse . Append ( token ) ;
4953 yield return token ;
5054 }
5155
52- _history . AddMessage ( AuthorRole . Assistant , string . Join ( "" , response ) ) ;
56+ session . History . AddMessage ( AuthorRole . Assistant , fullResponse . ToString ( ) ) ;
57+ session . LastActivity = DateTime . UtcNow ;
58+ _sessionRepository . UpdateSession ( connectionId , session ) ;
5359 }
5460
5561 public void Dispose ( )
5662 {
57- _session ? . SaveSession ( "session.json" ) ;
58- _context ? . Dispose ( ) ;
59- _model ? . Dispose ( ) ;
63+ foreach ( var context in _contexts . Values )
64+ context . Dispose ( ) ;
65+ _model . Dispose ( ) ;
6066 }
6167}
6268
63-
6469public class AiHub : Hub
6570{
6671 private readonly LlamaService _llama ;
72+ private readonly ISessionRepository _sessionRepository ;
6773
68- public AiHub ( LlamaService llama ) => _llama = llama ;
74+ public AiHub ( LlamaService llama , ISessionRepository sessionRepository )
75+ {
76+ _llama = llama ;
77+ _sessionRepository = sessionRepository ;
78+ }
79+
80+ public override async Task OnDisconnectedAsync ( Exception ? exception )
81+ {
82+ _sessionRepository . RemoveSession ( Context . ConnectionId ) ;
83+ await base . OnDisconnectedAsync ( exception ) ;
84+ }
6985
7086 public async Task SendPrompt ( string prompt )
7187 {
72- await foreach ( var token in _llama . GenerateResponse ( prompt ) )
88+ await foreach ( var token in _llama . GenerateResponse ( Context . ConnectionId , prompt ) )
7389 {
7490 await Clients . Caller . SendAsync ( "ReceiveToken" , token ) ;
7591 }
7692
7793 await Clients . Caller . SendAsync ( "StreamComplete" ) ;
7894 }
79- }
95+ }
0 commit comments