Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ async def create_email(self, email_data: Dict[str, Any]) -> Optional[Dict[str, A
# Invalidate sorted cache
self._sorted_emails_cache = None

# Invalidate query cache as new email might match existing queries
self.caching_manager.clear_query_cache()

return self._add_category_details(light_email_record)

async def get_email_by_id(
Expand Down Expand Up @@ -683,6 +686,9 @@ async def update_email_by_message_id(
# Invalidate sorted cache
self._sorted_emails_cache = None

# Invalidate query cache
self.caching_manager.clear_query_cache()

return self._add_category_details(email_to_update)

async def get_email_by_message_id(
Expand Down Expand Up @@ -725,12 +731,24 @@ async def search_emails(self, query: str) -> List[Dict[str, Any]]:
"""Searches for emails matching a query."""
return await self.search_emails_with_limit(query, limit=50)

def _get_search_cache_key(self, search_term: str, limit: int) -> str:
"""Generates a canonical cache key for search queries."""
return f"search:{search_term.lower()}:{limit}"

async def search_emails_with_limit(self, search_term: str, limit: int = 50) -> List[Dict[str, Any]]:
"""Search emails with limit parameter. Searches subject/sender in-memory, and content on-disk."""
if not search_term:
return await self.get_emails(limit=limit, offset=0)

search_term_lower = search_term.lower()

# Check query cache
cache_key = self._get_search_cache_key(search_term_lower, limit)
cached_results = self.caching_manager.get_query_result(cache_key)
if cached_results is not None:
# Return a copy to avoid mutation of cached data
return list(cached_results)

filtered_emails = []

# Optimization: Iterate over sorted emails and stop once we reach the limit.
Expand Down Expand Up @@ -782,11 +800,17 @@ async def search_emails_with_limit(self, search_term: str, limit: int = 50) -> L
logger.error(f"Could not search content for email {email_id}: {e}")

# Results are already sorted because we iterated source_emails (which is sorted)
return [self._add_category_details(email) for email in filtered_emails]
results = [self._add_category_details(email) for email in filtered_emails]

# Cache the results as a tuple to ensure immutability
results_tuple = tuple(results)
self.caching_manager.put_query_result(cache_key, results_tuple)

# Return a fresh list from the cached tuple to mirror the cache-hit path
return list(results_tuple)

# TODO(P1, 6h): Optimize search performance to avoid disk I/O per STATIC_ANALYSIS_REPORT.md
# TODO(P2, 4h): Implement search indexing to improve query performance
# TODO(P3, 3h): Add support for search result caching

async def _update_email_fields(
self, email: Dict[str, Any], update_data: Dict[str, Any]
Expand Down Expand Up @@ -865,6 +889,9 @@ async def update_email(
# Invalidate sorted cache
self._sorted_emails_cache = None

# Invalidate query cache
self.caching_manager.clear_query_cache()

return self._add_category_details(email_to_update)

async def add_tags(self, email_id: Any, tags: List[str]) -> bool:
Expand Down
4 changes: 4 additions & 0 deletions src/core/enhanced_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def invalidate_query_result(self, query_key: str) -> None:
"""Invalidate query result cache."""
self.query_cache.invalidate(query_key)

def clear_query_cache(self) -> None:
"""Clear the query result cache."""
self.query_cache.clear()

def clear_all_caches(self) -> None:
"""Clear all caches."""
self.email_record_cache.clear()
Expand Down
5 changes: 5 additions & 0 deletions src/core/notmuch_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,11 @@ async def delete_email(self, email_id: int) -> bool:

# For now, return a mock implementation
# In a full implementation, this would actually delete the email

# Invalidate query cache if database manager is available
if self.db and hasattr(self.db, 'caching_manager'):
self.db.caching_manager.clear_query_cache()

return True

@log_performance(operation="get_dashboard_aggregates")
Expand Down
46 changes: 46 additions & 0 deletions src/core/performance_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,52 @@ def __init__(

logger.info("OptimizedPerformanceMonitor initialized")

_ALLOWED_OPERATIONS = {
"db",
"http",
"cache",
"job",
"unknown",
"search_emails",
"get_email_by_message_id",
"get_emails",
"get_all_categories",
"create_email",
"update_email_by_message_id",
"update_email",
"get_dashboard_aggregates",
"get_category_breakdown",
"update_tags_for_message",
"build_indexes",
"load_data",
"save_data_to_file"
}

def log_performance(self, log_entry: Dict[str, Any]) -> None:
"""
Adapter for legacy log_performance calls to record metrics.
"""
try:
raw_operation = str(log_entry.get("operation") or "").lower()

# Normalize to allowed operations
if raw_operation in self._ALLOWED_OPERATIONS:
operation = raw_operation
else:
operation = "other"

duration = log_entry.get("duration_seconds", 0.0)

# Record as metric, converting seconds to ms
self.record_metric(
name="operation_duration",
value=duration * 1000,
unit="ms",
tags={"operation": operation}
)
except (TypeError, KeyError, AttributeError) as e:
logger.warning(f"Failed to record legacy performance metric: {e}")

def record_metric(
self,
name: str,
Expand Down
2 changes: 2 additions & 0 deletions tests/core/test_database_search_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def db_manager(db_config):
# Mock caching manager to isolate behavior
manager.caching_manager = MagicMock()
manager.caching_manager.get_email_content.return_value = None
# Ensure query cache miss so we test the search logic
manager.caching_manager.get_query_result.return_value = None
manager.emails_data = []
manager.emails_by_id = {}
# Ensure initialized to avoid side effects
Expand Down
143 changes: 143 additions & 0 deletions tests/core/test_search_query_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@

import pytest
import pytest_asyncio
import asyncio
from unittest.mock import MagicMock, AsyncMock
from src.core.database import DatabaseManager, DatabaseConfig, FIELD_ID, FIELD_CONTENT, FIELD_SUBJECT
from src.core.enhanced_caching import EnhancedCachingManager

@pytest.fixture
def db_config(tmp_path):
data_dir = tmp_path / "data"
data_dir.mkdir()
return DatabaseConfig(data_dir=str(data_dir))

@pytest_asyncio.fixture
async def db_manager(db_config):
manager = DatabaseManager(config=db_config)
await manager._ensure_initialized()
return manager

@pytest.mark.asyncio
async def test_search_emails_caches_results(db_manager):
Comment on lines +21 to +22
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add coverage for cache keys differing by limit to ensure results are not incorrectly reused across limits.

Currently we only test cache reuse for the same search_term/limit pair, so we don’t verify that limit actually affects the cache key. Please add a test that:

  • Calls search_emails_with_limit("keyword", limit=5) to warm the cache
  • Then calls search_emails_with_limit("keyword", limit=10) and asserts you get more than 5 results and/or a distinct cache entry

This will catch regressions where limit is accidentally omitted from the cache key and smaller-result sets are incorrectly reused for larger limits.

Suggested implementation:

    # First search (should miss cache)
    results1 = await db_manager.search_emails_with_limit("keyword", limit=10)
    assert len(results1) == 1


@pytest.mark.asyncio
async def test_search_emails_cache_respects_limit(db_manager):
    """Test that cache keys differ when only the limit changes."""
    # Setup - create more than 10 emails that match the same keyword
    for i in range(12):
        await db_manager.create_email({
            "subject": f"Keyword Email {i}",
            "sender": f"sender{i}@example.com",
            FIELD_CONTENT: "This content includes the keyword term"
        })

    # Warm cache with a smaller limit
    results_limit_5 = await db_manager.search_emails_with_limit("keyword", limit=5)
    assert len(results_limit_5) == 5

    # Call again with a larger limit; results should not be truncated to 5
    results_limit_10 = await db_manager.search_emails_with_limit("keyword", limit=10)
    assert len(results_limit_10) == 10

    # Ensure the result sets differ, confirming they are not the same cached entry
    ids_limit_5 = {email.id for email in results_limit_5}
    ids_limit_10 = {email.id for email in results_limit_10}
    assert ids_limit_5.issubset(ids_limit_10)
    assert ids_limit_5 != ids_limit_10

This change assumes:

  1. search_emails_with_limit returns objects with a stable id attribute. If the attribute is named differently (e.g. email_id or uid), adjust the set comprehensions accordingly.
  2. The fixture db_manager and constant FIELD_CONTENT are available in this test module as shown in your snippet.

If your database setup enforces additional required fields for create_email, add those fields to the email dictionaries in the new test.

"""Test that search results are cached and reused."""
# Setup - Create an email that matches
await db_manager.create_email({
"subject": "Test Search Subject",
"sender": "sender@example.com",
FIELD_CONTENT: "Content with keyword"
})

# First search (should miss cache)
results1 = await db_manager.search_emails_with_limit("keyword", limit=10)
assert len(results1) == 1

# Verify it's in cache
cache_key = db_manager._get_search_cache_key("keyword", 10)
assert db_manager.caching_manager.get_query_result(cache_key) is not None

# Verify returned result is a copy (list) not the internal cache object (if implementation supports it)
cached_obj = db_manager.caching_manager.get_query_result(cache_key)
# Compare content, allowing type difference (list vs tuple)
assert tuple(results1) == tuple(cached_obj)
assert results1 is not cached_obj # Should be a copy/different object

# Modify cache and verify subsequent search gets modified result
fake_result = [{"fake": "data"}]
db_manager.caching_manager.put_query_result(cache_key, tuple(fake_result))

results2 = await db_manager.search_emails_with_limit("keyword", limit=10)
assert results2 == fake_result
assert results2 is not fake_result # Should be a copy

@pytest.mark.asyncio
async def test_search_cache_invalidation_on_create(db_manager):
"""Test that search cache is invalidated when new email is created."""
# Setup
await db_manager.create_email({
"subject": "First email",
FIELD_CONTENT: "keyword"
})

# Cache the search
await db_manager.search_emails_with_limit("keyword", limit=10)
cache_key = db_manager._get_search_cache_key("keyword", 10)
assert db_manager.caching_manager.get_query_result(cache_key) is not None

# Create new email
await db_manager.create_email({
"subject": "Second email",
FIELD_CONTENT: "keyword"
})

# Verify cache is cleared
assert db_manager.caching_manager.get_query_result(cache_key) is None

# Search again should find both
results = await db_manager.search_emails_with_limit("keyword", limit=10)
assert len(results) == 2

@pytest.mark.asyncio
async def test_search_cache_invalidation_on_update(db_manager):
"""Test that search cache is invalidated when email is updated."""
# Setup
email = await db_manager.create_email({
"subject": "First email",
FIELD_CONTENT: "keyword"
})
email_id = email[FIELD_ID]

# Cache the search
await db_manager.search_emails_with_limit("keyword", limit=10)
cache_key = db_manager._get_search_cache_key("keyword", 10)
assert db_manager.caching_manager.get_query_result(cache_key) is not None

# Update email
await db_manager.update_email(email_id, {"subject": "Updated Subject"})

# Verify cache is cleared
assert db_manager.caching_manager.get_query_result(cache_key) is None

@pytest.mark.asyncio
async def test_search_cache_case_insensitive_key(db_manager):
"""Test that cache key uses lowercase search term."""
await db_manager.create_email({
"subject": "Subject",
FIELD_CONTENT: "Keyword"
})

await db_manager.search_emails_with_limit("Keyword", limit=10)

# Key should be lowercased
cache_key = db_manager._get_search_cache_key("keyword", 10)
assert db_manager.caching_manager.get_query_result(cache_key) is not None

@pytest.mark.asyncio
async def test_search_emails_cache_respects_limit(db_manager):
"""Test that cache keys differ when only the limit changes."""
# Setup - create more than 10 emails that match the same keyword
for i in range(12):
await db_manager.create_email({
"subject": f"Keyword Email {i}",
"sender": f"sender{i}@example.com",
FIELD_CONTENT: "This content includes the keyword term"
})

# Warm cache with a smaller limit
results_limit_5 = await db_manager.search_emails_with_limit("keyword", limit=5)
assert len(results_limit_5) == 5

# Call again with a larger limit; results should not be truncated to 5
results_limit_10 = await db_manager.search_emails_with_limit("keyword", limit=10)
assert len(results_limit_10) == 10

# Verify different cache keys were used (implicitly via result count)
# Check cache keys directly
assert db_manager.caching_manager.get_query_result(db_manager._get_search_cache_key("keyword", 5)) is not None
assert db_manager.caching_manager.get_query_result(db_manager._get_search_cache_key("keyword", 10)) is not None

# Ensure the result sets differ, confirming they are not the same cached entry
ids_limit_5 = {email[FIELD_ID] for email in results_limit_5}
ids_limit_10 = {email[FIELD_ID] for email in results_limit_10}
assert ids_limit_5.issubset(ids_limit_10)
assert ids_limit_5 != ids_limit_10
Loading