diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f949c4..1b53b09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **Service statistics endpoint**: Implemented `GET /servicestats` returning resource counters (documents, indexes, indexers, data sources, storage, synonym maps, skillsets, vector index size) and service limits. Usage values are computed from actual simulator state; quotas and limits use Azure AI Search Standard (S1) tier defaults. + +### Added + - **Expanded language analyzer support**: AnalyzerFactory now supports all 27 Lucene-backed language analyzers (Arabic, Armenian, Basque, Brazilian Portuguese, Bulgarian, Catalan, CJK, Czech, Danish, Dutch, English, Finnish, French, Galician, German, Greek, Hindi, Hungarian, Indonesian, Irish, Italian, Latvian, Norwegian, Persian, Portuguese, Romanian, Russian, Spanish, Swedish, Turkish) plus 22 Microsoft-only languages that fall back to StandardAnalyzer. Both `.lucene` and `.microsoft` name variants are accepted. - **Complete normalizer token filter support**: NormalizerFactory now implements all 14 Azure AI Search token filters for custom normalizers: `arabic_normalization`, `asciifolding`, `cjk_width`, `elision`, `german_normalization`, `hindi_normalization`, `indic_normalization`, `lowercase`, `persian_normalization`, `scandinavian_folding`, `scandinavian_normalization`, `sorani_normalization`, `trim`, `uppercase`. diff --git a/docs/API-REFERENCE.md b/docs/API-REFERENCE.md index 1ca9169..b8eff16 100644 --- a/docs/API-REFERENCE.md +++ b/docs/API-REFERENCE.md @@ -14,6 +14,7 @@ This document provides a detailed reference for all REST API endpoints supported | Indexers | ✅ Implemented | Full CRUD, run, reset, status, scheduled execution | | Document Cracking | ✅ Implemented | PDF, Word, Excel, HTML, JSON, CSV, TXT | | Skillsets | ✅ Implemented | Text skills, embedding skill, custom Web API skill | +| Service Statistics | ✅ Implemented | Counters and limits (quotas use S1 defaults) | ## Base URL @@ -1562,6 +1563,59 @@ api-key: --- +## Service Statistics + +Returns service-level resource counters and limits. + +### [Get Service Statistics](https://learn.microsoft.com/en-us/rest/api/searchservice/get-service-statistics) + +```http +GET /servicestats?api-version=2024-07-01 +api-key: +``` + +**Response:** + +```json +{ + "@odata.context": "https://localhost:7250/$metadata#Microsoft.Azure.Search.V2024_07_01.ServiceStatistics", + "counters": { + "documentCount": { "usage": 153956, "quota": null }, + "indexesCount": { "usage": 2, "quota": 15 }, + "indexersCount": { "usage": 1, "quota": 15 }, + "dataSourcesCount": { "usage": 1, "quota": 15 }, + "storageSize": { "usage": 274215358, "quota": 16106127360 }, + "synonymMaps": { "usage": 0, "quota": 3 }, + "skillsetCount": { "usage": 0, "quota": 15 }, + "vectorIndexSize": { "usage": 0, "quota": 5368709120 } + }, + "limits": { + "maxStoragePerIndex": 16106127360, + "maxFieldsPerIndex": 1000, + "maxFieldNestingDepthPerIndex": 10, + "maxComplexCollectionFieldsPerIndex": 40, + "maxComplexObjectsInCollectionsPerDocument": 3000 + } +} +``` + +**Counter Details:** + +| Counter | Usage | Quota | +| ------- | ----- | ----- | +| `documentCount` | Actual total across all indexes | `null` (unlimited, same as Azure) | +| `indexesCount` | Actual count | Hardcoded S1 default (15) | +| `indexersCount` | Actual count | Hardcoded S1 default (15) | +| `dataSourcesCount` | Actual count | Hardcoded S1 default (15) | +| `storageSize` | Actual Lucene index storage in bytes | Hardcoded S1 default (~15 GB) | +| `synonymMaps` | Always 0 (not yet implemented) | Hardcoded S1 default (3) | +| `skillsetCount` | Actual count | Hardcoded S1 default (15) | +| `vectorIndexSize` | Actual HNSW index size in bytes | Hardcoded S1 default (5 GB) | + +> **Note**: The simulator does not enforce quotas. All `quota` values and `limits` are hardcoded to Azure AI Search **Standard (S1) tier** defaults. The `usage` values for `documentCount`, `indexesCount`, `indexersCount`, `dataSourcesCount`, `storageSize`, `skillsetCount`, and `vectorIndexSize` reflect actual simulator state. `synonymMaps` usage is always 0 because synonym map management is not yet implemented. + +--- + ## Admin Endpoints Administrative endpoints for token management and diagnostics. diff --git a/docs/LIMITATIONS.md b/docs/LIMITATIONS.md index c098d75..ca33032 100644 --- a/docs/LIMITATIONS.md +++ b/docs/LIMITATIONS.md @@ -63,7 +63,7 @@ The simulator is designed for **development, learning, and testing purposes only | Synonym maps | Not yet implemented | | Custom analyzers | Basic tokenizers and filters only | | CORS | Simplified implementation | -| Service statistics | Basic stats only | +| Service statistics | Quotas and limits use hardcoded S1 tier defaults; not enforced | ### ❌ Not Supported @@ -322,6 +322,15 @@ Not supported: | Private endpoints | ❌ Not supported | | Document-level security | ❌ Not supported | +## Service Statistics Limitations + +The `GET /servicestats` endpoint returns resource counters and service limits. Because the simulator has no real quota system: + +- **Usage values** (`documentCount`, `indexesCount`, `indexersCount`, `dataSourcesCount`, `storageSize`, `skillsetCount`, `vectorIndexSize`) are computed from actual simulator state. +- **Quota values** and **limits** are hardcoded to Azure AI Search **Standard (S1) tier** defaults and are **not enforced**. +- `synonymMaps` usage is always `0` because synonym map management is not yet implemented. +- `documentCount.quota` is `null` (unlimited), matching Azure's Standard tier behavior. + ## Recommendations ### When to Use the Simulator diff --git a/src/AzureAISearchSimulator.Api/Controllers/ServiceStatsController.cs b/src/AzureAISearchSimulator.Api/Controllers/ServiceStatsController.cs new file mode 100644 index 0000000..0f0c7c6 --- /dev/null +++ b/src/AzureAISearchSimulator.Api/Controllers/ServiceStatsController.cs @@ -0,0 +1,155 @@ +using AzureAISearchSimulator.Core.Models; +using AzureAISearchSimulator.Core.Services; +using AzureAISearchSimulator.Search; +using AzureAISearchSimulator.Search.Hnsw; +using AzureAISearchSimulator.Api.Services; +using AzureAISearchSimulator.Api.Services.Authorization; +using Microsoft.AspNetCore.Mvc; + +namespace AzureAISearchSimulator.Api.Controllers; + +/// +/// Controller for service-level statistics. +/// +[ApiController] +[Route("servicestats")] +[Produces("application/json")] +public class ServiceStatsController : ControllerBase +{ + // Default quotas matching Azure AI Search Standard (S1) tier + private const long DefaultIndexesQuota = 15; + private const long DefaultIndexersQuota = 15; + private const long DefaultDataSourcesQuota = 15; + private const long DefaultSynonymMapsQuota = 3; + private const long DefaultSkillsetQuota = 15; + private const long DefaultStorageSizeQuota = 16_106_127_360; // ~15 GB (S1) + private const long DefaultVectorIndexSizeQuota = 5_368_709_120; // 5 GB (S1) + private const long DefaultMaxStoragePerIndex = 16_106_127_360; // ~15 GB (S1) + private const int DefaultMaxFieldsPerIndex = 1000; + private const int DefaultMaxFieldNestingDepthPerIndex = 10; + private const int DefaultMaxComplexCollectionFieldsPerIndex = 40; + private const int DefaultMaxComplexObjectsInCollectionsPerDocument = 3000; + + private readonly IIndexService _indexService; + private readonly IDocumentService _documentService; + private readonly IIndexerService _indexerService; + private readonly IDataSourceService _dataSourceService; + private readonly ISkillsetService _skillsetService; + private readonly LuceneIndexManager _luceneManager; + private readonly IHnswIndexManager _hnswManager; + private readonly IAuthorizationService _authorizationService; + private readonly ILogger _logger; + + public ServiceStatsController( + IIndexService indexService, + IDocumentService documentService, + IIndexerService indexerService, + IDataSourceService dataSourceService, + ISkillsetService skillsetService, + LuceneIndexManager luceneManager, + IHnswIndexManager hnswManager, + IAuthorizationService authorizationService, + ILogger logger) + { + _indexService = indexService; + _documentService = documentService; + _indexerService = indexerService; + _dataSourceService = dataSourceService; + _skillsetService = skillsetService; + _luceneManager = luceneManager; + _hnswManager = hnswManager; + _authorizationService = authorizationService; + _logger = logger; + } + + /// + /// Gets service-level statistics including resource counters and limits. + /// + [HttpGet] + [ProducesResponseType(typeof(ServiceStatistics), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(ODataError), StatusCodes.Status403Forbidden)] + public async Task GetServiceStatistics( + [FromQuery(Name = "api-version")] string apiVersion, + CancellationToken cancellationToken) + { + var authResult = this.CheckAuthorization(_authorizationService, SearchOperation.GetServiceStatistics); + if (authResult != null) return authResult; + + // Gather usage counts + var indexes = await _indexService.ListIndexesAsync(cancellationToken); + var indexList = indexes.ToList(); + var indexers = await _indexerService.ListAsync(); + var dataSources = await _dataSourceService.ListAsync(); + var skillsets = await _skillsetService.ListAsync(cancellationToken); + + // Sum document counts and storage sizes across all indexes + long totalDocumentCount = 0; + long totalStorageSize = 0; + long totalVectorIndexSize = 0; + + foreach (var index in indexList) + { + totalDocumentCount += await _documentService.GetDocumentCountAsync(index.Name!); + totalStorageSize += _luceneManager.GetStorageSize(index.Name!); + totalVectorIndexSize += _hnswManager.GetVectorIndexSize(index.Name!); + } + + var stats = new ServiceStatistics + { + ODataContext = $"{Request.Scheme}://{Request.Host}/$metadata#Microsoft.Azure.Search.V2024_07_01.ServiceStatistics", + Counters = new ServiceCounters + { + DocumentCount = new ResourceCounter + { + Usage = totalDocumentCount, + Quota = null // No quota for document count (same as Azure Standard tier) + }, + IndexesCount = new ResourceCounter + { + Usage = indexList.Count, + Quota = DefaultIndexesQuota + }, + IndexersCount = new ResourceCounter + { + Usage = indexers.Count(), + Quota = DefaultIndexersQuota + }, + DataSourcesCount = new ResourceCounter + { + Usage = dataSources.Count(), + Quota = DefaultDataSourcesQuota + }, + StorageSize = new ResourceCounter + { + Usage = totalStorageSize, + Quota = DefaultStorageSizeQuota + }, + SynonymMaps = new ResourceCounter + { + Usage = 0, // Synonym maps not yet implemented + Quota = DefaultSynonymMapsQuota + }, + SkillsetCount = new ResourceCounter + { + Usage = skillsets.Count(), + Quota = DefaultSkillsetQuota + }, + VectorIndexSize = new ResourceCounter + { + Usage = totalVectorIndexSize, + Quota = DefaultVectorIndexSizeQuota + } + }, + Limits = new ServiceLimits + { + MaxStoragePerIndex = DefaultMaxStoragePerIndex, + MaxFieldsPerIndex = DefaultMaxFieldsPerIndex, + MaxFieldNestingDepthPerIndex = DefaultMaxFieldNestingDepthPerIndex, + MaxComplexCollectionFieldsPerIndex = DefaultMaxComplexCollectionFieldsPerIndex, + MaxComplexObjectsInCollectionsPerDocument = DefaultMaxComplexObjectsInCollectionsPerDocument + } + }; + + return Ok(stats); + } +} diff --git a/src/AzureAISearchSimulator.Core/Models/ServiceStatistics.cs b/src/AzureAISearchSimulator.Core/Models/ServiceStatistics.cs new file mode 100644 index 0000000..eafbe19 --- /dev/null +++ b/src/AzureAISearchSimulator.Core/Models/ServiceStatistics.cs @@ -0,0 +1,84 @@ +using System.Text.Json.Serialization; + +namespace AzureAISearchSimulator.Core.Models; + +/// +/// Service-level statistics for Azure AI Search, returned by GET /servicestats. +/// +public class ServiceStatistics +{ + [JsonPropertyName("@odata.context")] + public string? ODataContext { get; set; } + + [JsonPropertyName("counters")] + public ServiceCounters Counters { get; set; } = new(); + + [JsonPropertyName("limits")] + public ServiceLimits Limits { get; set; } = new(); +} + +/// +/// Resource counters with usage and quota information. +/// +public class ServiceCounters +{ + [JsonPropertyName("documentCount")] + public ResourceCounter DocumentCount { get; set; } = new(); + + [JsonPropertyName("indexesCount")] + public ResourceCounter IndexesCount { get; set; } = new(); + + [JsonPropertyName("indexersCount")] + public ResourceCounter IndexersCount { get; set; } = new(); + + [JsonPropertyName("dataSourcesCount")] + public ResourceCounter DataSourcesCount { get; set; } = new(); + + [JsonPropertyName("storageSize")] + public ResourceCounter StorageSize { get; set; } = new(); + + [JsonPropertyName("synonymMaps")] + public ResourceCounter SynonymMaps { get; set; } = new(); + + [JsonPropertyName("skillsetCount")] + public ResourceCounter SkillsetCount { get; set; } = new(); + + [JsonPropertyName("vectorIndexSize")] + public ResourceCounter VectorIndexSize { get; set; } = new(); +} + +/// +/// A single resource counter with current usage and optional quota. +/// +public class ResourceCounter +{ + [JsonPropertyName("usage")] + public long Usage { get; set; } + + /// + /// The quota limit. Null means unlimited (e.g., document count on Standard tier). + /// + [JsonPropertyName("quota")] + public long? Quota { get; set; } +} + +/// +/// Service-level limits. +/// +public class ServiceLimits +{ + [JsonPropertyName("maxStoragePerIndex")] + public long MaxStoragePerIndex { get; set; } + + [JsonPropertyName("maxFieldsPerIndex")] + public int MaxFieldsPerIndex { get; set; } + + [JsonPropertyName("maxFieldNestingDepthPerIndex")] + public int MaxFieldNestingDepthPerIndex { get; set; } + + [JsonPropertyName("maxComplexCollectionFieldsPerIndex")] + public int MaxComplexCollectionFieldsPerIndex { get; set; } + + [JsonPropertyName("maxComplexObjectsInCollectionsPerDocument")] + public int MaxComplexObjectsInCollectionsPerDocument { get; set; } +}