Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/backend/python_backend/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,15 @@ def _parse_json_fields(self, row: Dict[str, Any], fields: List[str]) -> Dict[str
"""
if not row:
return row
for field in row:
for field in fields:
if field in row and isinstance(row[field], str):
try:
row[field] = json.loads(row[field])
except json.JSONDecodeError:
logger.warning(
f"Failed to parse JSON for field {field} in row {row.get(FIELD_ID)}"
)
if field in (FIELD_ANALYSIS_METADATA, "metadata"):
if field in (FIELD_ANALYSIS_METADATA, "metadata", "filterResults"):
row[field] = {}
else:
row[field] = []
Expand Down Expand Up @@ -692,6 +692,9 @@ async def get_weekly_growth(self) -> Dict[str, Any]:
# This is initialized via FastAPI startup event
_db_manager_instance = None

# A singleton for imports assuming a single database manager exists initially (used by main)
db_manager = DatabaseManager()
_db_manager_instance = db_manager
Comment on lines +695 to +697
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instantiating DatabaseManager at the module level introduces side effects on import (e.g., file system operations) and makes the code harder to test. It's better to manage singletons through a factory function or a dependency injection framework. This allows for lazy initialization and easier mocking in tests.

Comment on lines +695 to +697
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

This eager singleton now skips disk initialization.

By setting _db_manager_instance up front, both initialize_db() and get_db() bypass the branch that calls _ensure_initialized(). Startup now keeps empty in-memory caches unless some later code initializes manually, so persisted emails/categories/users are never loaded and new IDs can be generated against an empty store.

Suggested fix
 # A singleton for imports assuming a single database manager exists initially (used by main)
 db_manager = DatabaseManager()
 _db_manager_instance = db_manager

 async def get_db() -> DatabaseManager:
@@
     global _db_manager_instance
     if _db_manager_instance is None:
         # This should ideally not be reached if startup event is properly set
-        _db_manager_instance = DatabaseManager()
-        await _db_manager_instance._ensure_initialized()
+        _db_manager_instance = db_manager
+    await _db_manager_instance._ensure_initialized()
     return _db_manager_instance

 async def initialize_db():
@@
     global _db_manager_instance
     if _db_manager_instance is None:
-        _db_manager_instance = DatabaseManager()
-        await _db_manager_instance._ensure_initialized()
+        _db_manager_instance = db_manager
+    await _db_manager_instance._ensure_initialized()
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/backend/python_backend/database.py` around lines 695 - 697, The singleton
is being eagerly instantiated which bypasses initialization; change the
module-level code to avoid creating DatabaseManager() at import time by removing
the eager creation of db_manager and instead set _db_manager_instance = None,
then ensure initialize_db() or get_db() lazily constructs DatabaseManager
(calling _ensure_initialized() as needed) so that DatabaseManager,
initialize_db(), get_db(), and _ensure_initialized() run normal disk
initialization and populate caches before use.


async def get_db() -> DatabaseManager:
"""
Expand Down
15 changes: 6 additions & 9 deletions src/backend/python_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

from ..plugins.plugin_manager import plugin_manager
from . import (
action_routes,
ai_routes,
category_routes,
dashboard_routes,
Expand Down Expand Up @@ -268,7 +267,6 @@ async def validation_exception_handler(request: Request, exc: ValidationError):
app.include_router(workflow_routes.router)
app.include_router(model_routes.router)
app.include_router(performance_routes.router)
app.include_router(action_routes.router)
app.include_router(dashboard_routes.router)
app.include_router(ai_routes.router)

Expand Down Expand Up @@ -377,10 +375,9 @@ async def get_error_stats():

if __name__ == "__main__":
import uvicorn

port = int(os.getenv("PORT", 8000))
env = os.getenv("NODE_ENV", "development")
host = os.getenv("HOST", "127.0.0.1" if env == "development" else "0.0.0.0")
reload = env == "development"
# Use string app path to support reload
uvicorn.run("main:app", host=host, port=port, reload=reload, log_level="info")
port = int(os.getenv("PORT", 8000))
env = os.getenv("NODE_ENV", "development")
host = os.getenv("HOST", "127.0.0.1" if env == "development" else "0.0.0.0")
reload = env == "development"
# Use string app path to support reload
uvicorn.run("main:app", host=host, port=port, reload=reload, log_level="info")
11 changes: 11 additions & 0 deletions src/backend/python_backend/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ def get_active_models(self) -> List[Dict[str, Any]]:
def check_health(self) -> bool:
"""Check model manager health. Stub implementation."""
return True

def discover_models(self) -> None:
"""Discover models. Stub implementation."""
pass

def list_models(self) -> List[Dict[str, Any]]:
"""List models. Stub implementation."""
return []


model_manager = ModelManager()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instantiating ModelManager at the module level creates a global state that can be problematic for testing and can lead to unexpected behavior if the module is imported in different contexts. A better practice is to use a factory function or dependency injection to control the lifecycle of the manager instance, which improves modularity and testability.

2 changes: 1 addition & 1 deletion src/backend/python_backend/routes/v1/email_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from src.core.models import EmailResponse, EmailCreate, EmailUpdate
from backend.python_backend.services.email_service import EmailService
from backend.python_backend.dependencies import get_email_service
from src.core.exceptions import EmailNotFoundException
from backend.python_backend.exceptions import EmailNotFoundException
from src.core.performance_monitor import log_performance

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/python_backend/training_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
from typing import Any, Dict

from fastapi import APIRouter, BackgroundTasks, HTTPException
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException

from src.core.auth import get_current_active_user

Expand All @@ -29,7 +29,7 @@
async def start_training(
model_config: ModelConfig,
background_tasks: BackgroundTasks,
current_user: str = Depends(get_current_active_user),

Check failure on line 32 in src/backend/python_backend/training_routes.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use "Annotated" type hints for FastAPI dependency injection

See more on https://sonarcloud.io/project/issues?id=MasumRab_EmailIntelligence&issues=AZ0tMy-SlhVC2U7ds-x3&open=AZ0tMy-SlhVC2U7ds-x3&pullRequest=565
):
"""
Start training a model with the given configuration.
Expand Down Expand Up @@ -63,7 +63,7 @@

@router.get("/api/training/status/{job_id}")
@log_performance(operation="get_training_status")
async def get_training_status(job_id: str, current_user: str = Depends(get_current_active_user)):

Check failure on line 66 in src/backend/python_backend/training_routes.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use "Annotated" type hints for FastAPI dependency injection

See more on https://sonarcloud.io/project/issues?id=MasumRab_EmailIntelligence&issues=AZ0tMy-SlhVC2U7ds-x4&open=AZ0tMy-SlhVC2U7ds-x4&pullRequest=565
"""
Get the status of a training job.

Expand Down
11 changes: 8 additions & 3 deletions src/backend/python_nlp/gmail_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,14 @@ class EmailCache:
def __init__(self, cache_path: str = str(DEFAULT_CACHE_PATH)):
"""Initializes the EmailCache."""
# Secure path validation
self.cache_path = str(
PathValidator.validate_database_path(cache_path, Path(cache_path).parent)
)
try:
self.cache_path = str(
PathValidator.validate_database_path(cache_path, Path(cache_path).parent)
)
except AttributeError:
self.cache_path = str(
PathValidator.validate_and_resolve_db_path(cache_path, Path(cache_path).parent)
)
Comment on lines +149 to +156
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a try...except AttributeError block to handle what appears to be an API change in PathValidator is brittle. This can mask other AttributeError exceptions that might occur for different reasons. A more robust approach is to explicitly check for the method's existence using hasattr. This makes the code's intent clearer and prevents unintended error suppression.

Suggested change
try:
self.cache_path = str(
PathValidator.validate_database_path(cache_path, Path(cache_path).parent)
)
except AttributeError:
self.cache_path = str(
PathValidator.validate_and_resolve_db_path(cache_path, Path(cache_path).parent)
)
# Secure path validation
if hasattr(PathValidator, 'validate_database_path'):
self.cache_path = str(
PathValidator.validate_database_path(cache_path, Path(cache_path).parent)
)
else:
self.cache_path = str(
PathValidator.validate_and_resolve_db_path(cache_path, Path(cache_path).parent)
)

Comment on lines +149 to +156
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use a fixed trusted base directory for cache-path validation.

On Line 151 and Line 155, allowed_dir is derived from cache_path itself (Path(cache_path).parent), which makes the boundary check effectively self-referential. This weakens traversal protection for caller-provided paths.

🔧 Proposed fix
     def __init__(self, cache_path: str = str(DEFAULT_CACHE_PATH)):
         """Initializes the EmailCache."""
-        # Secure path validation
-        try:
-            self.cache_path = str(
-                PathValidator.validate_database_path(cache_path, Path(cache_path).parent)
-            )
-        except AttributeError:
-            self.cache_path = str(
-                PathValidator.validate_and_resolve_db_path(cache_path, Path(cache_path).parent)
-            )
+        candidate_path = Path(cache_path)
+        if not candidate_path.is_absolute():
+            candidate_path = PROJECT_ROOT / PathValidator.sanitize_filename(candidate_path.name)
+
+        validator = getattr(PathValidator, "validate_database_path", None)
+        if callable(validator):
+            self.cache_path = str(validator(candidate_path, PROJECT_ROOT))
+        else:
+            self.cache_path = str(
+                PathValidator.validate_and_resolve_db_path(candidate_path, PROJECT_ROOT)
+            )
+
+        Path(self.cache_path).parent.mkdir(parents=True, exist_ok=True)
         self.conn = sqlite3.connect(self.cache_path, check_same_thread=False)
         self._init_cache()
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/backend/python_nlp/gmail_integration.py` around lines 149 - 156, The
cache-path validation currently uses the caller-provided path's parent
(Path(cache_path).parent) as the allowed_dir, which is self-referential and
defeats traversal protection; update both calls to
PathValidator.validate_database_path and
PathValidator.validate_and_resolve_db_path to pass a fixed trusted base
directory (e.g., a class/config constant like self.trusted_cache_base or an
app-configured TRUSTED_CACHE_DIR Path) instead of Path(cache_path).parent so
validation enforces boundaries against a known safe root.

self.conn = sqlite3.connect(self.cache_path, check_same_thread=False)
self._init_cache()

Expand Down
9 changes: 8 additions & 1 deletion src/backend/python_nlp/smart_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ def __init__(self, db_path: str = None):
db_path = os.path.join(DATA_DIR, filename)

# Validate the final path
self.db_path = str(PathValidator.validate_database_path(db_path, DATA_DIR))
try:
self.db_path = str(PathValidator.validate_database_path(db_path, DATA_DIR))
except AttributeError:
Comment on lines +118 to +120
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): The AttributeError-based fallback between validator methods can obscure genuine errors.

Catching all AttributeErrors here means any AttributeError raised inside validate_database_path will be treated as if the method is missing and we’ll fall back to validate_and_resolve_db_path. Please either check capability explicitly (e.g. hasattr(PathValidator, "validate_database_path")) or catch only the specific AttributeError that indicates the method is absent, so real validator bugs aren’t silently redirected.

self.db_path = str(PathValidator.validate_and_resolve_db_path(db_path, DATA_DIR))
Comment on lines +118 to +121
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using try...except AttributeError to handle different method names in PathValidator is not a robust pattern. It can hide other AttributeError bugs and makes the code's intent less clear. For better maintainability, it's recommended to use hasattr to check for the method's existence before calling it.

Suggested change
try:
self.db_path = str(PathValidator.validate_database_path(db_path, DATA_DIR))
except AttributeError:
self.db_path = str(PathValidator.validate_and_resolve_db_path(db_path, DATA_DIR))
# Validate the final path
if hasattr(PathValidator, 'validate_database_path'):
self.db_path = str(PathValidator.validate_database_path(db_path, DATA_DIR))
else:
self.db_path = str(PathValidator.validate_and_resolve_db_path(db_path, DATA_DIR))

self.logger = logging.getLogger(__name__)
self.conn = None
if self.db_path == ":memory:":
Expand All @@ -129,6 +132,10 @@ def _get_db_connection(self) -> sqlite3.Connection:
"""Establishes and returns a database connection."""
if self.conn:
return self.conn
# Ensure parent directory exists before connecting
db_dir = os.path.dirname(self.db_path)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
Expand Down
Loading