Skip to content
Merged

PR #111

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions backend/python_backend/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Constants for the Email Intelligence backend.
"""

DEFAULT_CATEGORY_COLOR = "#6366f1"

DEFAULT_CATEGORIES = [
{"name": "Primary", "description": "Default primary category", "color": "#4CAF50", "count": 0},
{"name": "Promotions", "description": "Promotional emails", "color": "#2196F3", "count": 0},
{"name": "Social", "description": "Social media notifications", "color": "#FFC107", "count": 0},
{"name": "Updates", "description": "Updates and notifications", "color": "#9C27B0", "count": 0},
{"name": "Forums", "description": "Forum discussions", "color": "#795548", "count": 0},
]
15 changes: 3 additions & 12 deletions backend/python_backend/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Literal
from .constants import DEFAULT_CATEGORY_COLOR, DEFAULT_CATEGORIES

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,9 +45,6 @@
FIELD_CATEGORY_NAME = 'categoryName'
FIELD_CATEGORY_COLOR = 'categoryColor'

# Default values
DEFAULT_COLOR = "#6366f1"

class DatabaseManager:
"""Async database manager for email data using JSON file storage."""

Expand Down Expand Up @@ -138,14 +136,7 @@ async def initialize(self) -> None:

# Seed default categories if categories.json is empty
if not self.categories_data:
default_categories = [
{FIELD_NAME: "Primary", "description": "Default primary category", FIELD_COLOR: "#4CAF50", FIELD_COUNT: 0},
{FIELD_NAME: "Promotions", "description": "Promotional emails", FIELD_COLOR: "#2196F3", FIELD_COUNT: 0},
{FIELD_NAME: "Social", "description": "Social media notifications", FIELD_COLOR: "#FFC107", FIELD_COUNT: 0},
{FIELD_NAME: "Updates", "description": "Updates and notifications", FIELD_COLOR: "#9C27B0", FIELD_COUNT: 0},
{FIELD_NAME: "Forums", "description": "Forum discussions", FIELD_COLOR: "#795548", FIELD_COUNT: 0},
]
for cat_data in default_categories:
for cat_data in DEFAULT_CATEGORIES:
await self.create_category(cat_data)
logger.info("Seeded default categories.")

Expand Down Expand Up @@ -293,7 +284,7 @@ async def create_category(self, category_data: Dict[str, Any]) -> Optional[Dict[
FIELD_ID: new_id,
FIELD_NAME: category_data[FIELD_NAME],
"description": category_data.get("description"),
FIELD_COLOR: category_data.get(FIELD_COLOR, DEFAULT_COLOR),
FIELD_COLOR: category_data.get(FIELD_COLOR, DEFAULT_CATEGORY_COLOR),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Field name mismatch risk in category color assignment.

Ensure that the key used for color in category_data matches FIELD_COLOR, or add a mapping to handle differences.

FIELD_COUNT: category_data.get(FIELD_COUNT, 0),
}
self.categories_data.append(category_record)
Expand Down
47 changes: 47 additions & 0 deletions backend/python_backend/gradio_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import gradio as gr
from backend.python_nlp.nlp_engine import NLPEngine

# Initialize the NLP Engine
nlp_engine = NLPEngine()

def analyze_email_interface(subject, content):
"""
Analyzes email subject and content using the NLPEngine.
Returns the analysis result as a dictionary for Gradio to display.
"""
if not subject and not content:
return {"error": "Subject and content cannot both be empty."}

email_data = {"subject": subject, "content": content}
# The NLPEngine's analyze_emails method expects a list of emails
analysis_result = nlp_engine.analyze_emails([email_data])

# Return the analysis of the first (and only) email
if analysis_result:
Comment on lines +16 to +20
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
# The NLPEngine's analyze_emails method expects a list of emails
analysis_result = nlp_engine.analyze_emails([email_data])
# Return the analysis of the first (and only) email
if analysis_result:
if analysis_result := nlp_engine.analyze_emails([email_data]):

return analysis_result[0]
return {"error": "Failed to analyze email."}

# Create the Gradio interface
with gr.Blocks(title="Email Intelligence Analysis", theme=gr.themes.Soft()) as iface:
gr.Markdown("## Email Intelligence Analysis UI")
gr.Markdown("Enter the subject and content of an email to analyze its sentiment, topic, intent, and urgency.")

with gr.Row():
with gr.Column(scale=2):
email_subject = gr.Textbox(label="Email Subject", placeholder="Enter email subject...")
email_content = gr.Textbox(label="Email Content", lines=10, placeholder="Enter email content...")
analyze_button = gr.Button("Analyze Email", variant="primary")
with gr.Column(scale=1):
gr.Markdown("### Analysis Results")
analysis_output = gr.JSON(label="AI Analysis")

analyze_button.click(
fn=analyze_email_interface,
inputs=[email_subject, email_content],
outputs=analysis_output
)

# To launch this app, you can run this file directly.
if __name__ == "__main__":
print("Launching Gradio UI for Email Intelligence Analysis...")
iface.launch()
9 changes: 0 additions & 9 deletions backend/python_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,6 @@
# app.include_router(action_routes.router) # Removed
# app.include_router(dashboard_routes.router) # Removed

# Mount the static files directories
app.mount("/src", StaticFiles(directory="client/src"), name="src")
app.mount("/", StaticFiles(directory="client", html=True), name="client")

@app.get("/{full_path:path}")
async def catch_all(full_path: str):
# This is a fallback to serve index.html for any path that is not an API route or a file.
# This is necessary for single-page applications.
return FileResponse('client/index.html')

# Request/Response Models previously defined here are now in .models
# Ensure route files import them from .models
Expand Down
3 changes: 2 additions & 1 deletion backend/python_backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from .constants import DEFAULT_CATEGORY_COLOR

from pydantic import BaseModel, Field, field_validator, ConfigDict

Expand Down Expand Up @@ -101,7 +102,7 @@ class EmailResponse(EmailBase):
class CategoryBase(BaseModel):
name: str = Field(..., min_length=1, max_length=255)
description: Optional[str] = None
color: str = Field(default="#6366f1", pattern=r"^#[0-9A-Fa-f]{6}$")
color: str = Field(default=DEFAULT_CATEGORY_COLOR, pattern=r"^#[0-9A-Fa-f]{6}$")


class CategoryCreate(CategoryBase):
Expand Down
91 changes: 52 additions & 39 deletions backend/python_nlp/gmail_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ def cache_email(self, email_data: Dict[str, Any]) -> None:

self.conn.execute(
"""
INSERT OR REPLACE INTO emails
(message_id, thread_id, subject, sender, sender_email, content,
INSERT OR REPLACE INTO emails
(message_id, thread_id, subject, sender, sender_email, content,
labels, timestamp, retrieved_at, content_hash)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
Expand Down Expand Up @@ -222,7 +222,7 @@ def update_sync_state(self, sync_data: Dict[str, Any]) -> None:
"""Update synchronization state"""
self.conn.execute(
"""
INSERT OR REPLACE INTO sync_metadata
INSERT OR REPLACE INTO sync_metadata
(sync_id, query_filter, last_sync, total_messages, processed_messages, next_page_token)
VALUES (?, ?, ?, ?, ?, ?)
""",
Expand Down Expand Up @@ -265,23 +265,23 @@ def _load_credentials(self):
try:
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
except Exception as e:
self.logger.error(f"Error loading credentials from {token_path}: {e}")
self.logger.error("Error loading credentials from %s: %s", token_path, e)
creds = None # Ensure creds is None if loading fails

if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
self.logger.info(f"Refreshing expired credentials from {token_path}...")
self.logger.info("Refreshing expired credentials from %s...", token_path)
try:
creds.refresh(Request())
except Exception as e:
self.logger.error(f"Error refreshing credentials: {e}")
self.logger.error("Error refreshing credentials: %s", e)
# Potentially delete token.json and force re-authentication
if os.path.exists(token_path):
try:
os.remove(token_path)
self.logger.info(f"Removed invalid token file: {token_path}")
self.logger.info("Removed invalid token file: %s", token_path)
except OSError as oe:
self.logger.error(f"Error removing token file {token_path}: {oe}")
self.logger.error("Error removing token file %s: %s", token_path, oe)
creds = None # Force re-authentication
Comment on lines 271 to 285
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Merge nested if conditions (merge-nested-ifs)

Suggested change
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
self.logger.info(f"Refreshing expired credentials from {token_path}...")
self.logger.info("Refreshing expired credentials from %s...", token_path)
try:
creds.refresh(Request())
except Exception as e:
self.logger.error(f"Error refreshing credentials: {e}")
self.logger.error("Error refreshing credentials: %s", e)
# Potentially delete token.json and force re-authentication
if os.path.exists(token_path):
try:
os.remove(token_path)
self.logger.info(f"Removed invalid token file: {token_path}")
self.logger.info("Removed invalid token file: %s", token_path)
except OSError as oe:
self.logger.error(f"Error removing token file {token_path}: {oe}")
self.logger.error("Error removing token file %s: %s", token_path, oe)
creds = None # Force re-authentication
if (not creds or not creds.valid) and (creds and creds.expired and creds.refresh_token):
self.logger.info("Refreshing expired credentials from %s...", token_path)
try:
creds.refresh(Request())
except Exception as e:
self.logger.error("Error refreshing credentials: %s", e)
# Potentially delete token.json and force re-authentication
if os.path.exists(token_path):
try:
os.remove(token_path)
self.logger.info("Removed invalid token file: %s", token_path)
except OSError as oe:
self.logger.error("Error removing token file %s: %s", token_path, oe)
creds = None # Force re-authentication


ExplanationToo much nesting can make code difficult to understand, and this is especially
true in Python, where there are no brackets to help out with the delineation of
different nesting levels.

Reading deeply nested code is confusing, since you have to keep track of which
conditions relate to which levels. We therefore strive to reduce nesting where
possible, and the situation where two if conditions can be combined using
and is an easy win.

# If creds are still None (not loaded or refresh failed), _authenticate will be called

Expand All @@ -299,9 +299,9 @@ def _store_credentials(self, creds):
try:
with open(token_path, "w") as token_file:
token_file.write(creds.to_json())
self.logger.info(f"Credentials stored in {token_path}")
self.logger.info("Credentials stored in %s", token_path)
except OSError as e:
self.logger.error(f"Error storing credentials to {token_path}: {e}")
self.logger.error("Error storing credentials to %s: %s", token_path, e)

def _authenticate(self):
"""Authenticates the user and obtains credentials using GMAIL_CREDENTIALS_JSON env var."""
Expand All @@ -310,27 +310,31 @@ def _authenticate(self):

if not credentials_json_str:
self.logger.error(
f"Environment variable {GMAIL_CREDENTIALS_ENV_VAR} is not set. "
"This variable should contain the JSON content of your Google Cloud credentials file."
"Environment variable %s is not set. "
"This variable should contain the JSON content of your Google Cloud credentials file.",
GMAIL_CREDENTIALS_ENV_VAR,
)
# Attempt to fall back to local credentials.json if GMAIL_CREDENTIALS_JSON is not set
# This maintains previous behavior if the env var is not set, but logs a warning.
self.logger.warning(
f"Attempting to use local '{CREDENTIALS_PATH}' as fallback for OAuth. "
f"It is recommended to set the {GMAIL_CREDENTIALS_ENV_VAR} environment variable."
"Attempting to use local '%s' as fallback for OAuth. "
"It is recommended to set the %s environment variable.",
CREDENTIALS_PATH,
GMAIL_CREDENTIALS_ENV_VAR,
)
if os.path.exists(CREDENTIALS_PATH):
try:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_PATH, SCOPES)
except Exception as e:
self.logger.error(
f"Error loading fallback credentials from {CREDENTIALS_PATH}: {e}"
"Error loading fallback credentials from %s: %s", CREDENTIALS_PATH, e
)
return # Exit if fallback also fails
else:
self.logger.error(
f"Fallback credentials file '{CREDENTIALS_PATH}' not found. "
"Please set the GMAIL_CREDENTIALS_JSON environment variable or provide the file."
"Fallback credentials file '%s' not found. "
"Please set the GMAIL_CREDENTIALS_JSON environment variable or provide the file.",
CREDENTIALS_PATH,
)
return # Exit if no credentials source is found
else:
Expand All @@ -339,13 +343,14 @@ def _authenticate(self):
flow = InstalledAppFlow.from_client_config(credentials_info, SCOPES)
except json.JSONDecodeError:
self.logger.error(
f"Invalid JSON content in {GMAIL_CREDENTIALS_ENV_VAR}. "
"Please ensure it's a valid JSON string."
"Invalid JSON content in %s. "
"Please ensure it's a valid JSON string.",
GMAIL_CREDENTIALS_ENV_VAR,
)
return
except Exception as e: # Catch other potential errors from from_client_config
self.logger.error(
f"Error loading credentials from {GMAIL_CREDENTIALS_ENV_VAR}: {e}"
"Error loading credentials from %s: %s", GMAIL_CREDENTIALS_ENV_VAR, e
)
return

Expand All @@ -356,7 +361,7 @@ def _authenticate(self):
try:
creds = flow.run_local_server(port=0)
except Exception as e: # Catch generic exception from run_local_server
self.logger.error(f"OAuth flow failed: {e}")
self.logger.error("OAuth flow failed: %s", e)
return

if creds:
Expand Down Expand Up @@ -446,10 +451,10 @@ async def collect_emails_incremental(
page_token = message_list["nextPageToken"]

# Log progress
self.logger.info(f"Collected {len(collected_messages)} emails so far...")
self.logger.info("Collected %s emails so far...", len(collected_messages))

except Exception as e:
self.logger.error(f"Error collecting emails: {e}")
self.logger.error("Error collecting emails: %s", e)
# Save current state for resumption
self.cache.update_sync_state(sync_state)
raise
Expand Down Expand Up @@ -485,14 +490,14 @@ async def _get_message_list(
.execute()
)
except HttpError as error:
self.logger.error(f"An API error occurred: {error}")
self.logger.error("An API error occurred: %s", error)
# Implement more sophisticated error handling and retry logic if needed
return {
"messages": [],
"resultSizeEstimate": 0,
} # Return empty on error
except Exception as e:
self.logger.error(f"Unexpected error in _get_message_list: {e}")
self.logger.error("Unexpected error in _get_message_list: %s", e)
return {"messages": [], "resultSizeEstimate": 0}

# Fallback to simulated response if gmail_service is not available
Expand Down Expand Up @@ -557,58 +562,64 @@ async def _get_message_content(self, message_id: str) -> Optional[Dict[str, Any]
# Check cache first
cached_email = self.cache.get_cached_email(message_id)
if cached_email:
self.logger.debug(f"Cache hit for message {message_id}")
self.logger.debug("Cache hit for message %s", message_id)
return cached_email
self.logger.debug(f"Cache miss for message {message_id}")
self.logger.debug("Cache miss for message %s", message_id)

if self.gmail_service:
try:
self.logger.debug(f"Attempting to fetch message {message_id} from Gmail API.")
self.logger.debug("Attempting to fetch message %s from Gmail API.", message_id)
message = (
self.gmail_service.users()
.messages()
.get(userId="me", id=message_id, format="full")
.execute()
)
self.logger.debug(f"Successfully fetched message {message_id} from API.")
self.logger.debug("Successfully fetched message %s from API.", message_id)

email_data = self._parse_message_payload(message)

if email_data:
self.cache.cache_email(email_data)
self.logger.debug(f"Successfully parsed and cached message {message_id}.")
self.logger.debug("Successfully parsed and cached message %s.", message_id)
return email_data
else:
self.logger.warning(
f"Could not parse email data for message {message_id}. This message will not be processed further."
"Could not parse email data for message %s. This message will not be processed further.",
message_id,
)
return None # Parsing failure, do not simulate for this specific case
except HttpError as error:
self.logger.error(
f"API error fetching message {message_id}: {error}. Falling back to simulation."
"API error fetching message %s: %s. Falling back to simulation.",
message_id,
error,
)
# Fall through to simulation block below
except Exception as e:
self.logger.error(
f"Unexpected error retrieving message {message_id}: {e}. Falling back to simulation."
"Unexpected error retrieving message %s: %s. Falling back to simulation.",
message_id,
e,
)
# Fall through to simulation block below
else:
self.logger.warning(
f"Gmail service not available. Falling back to simulation for message {message_id}."
"Gmail service not available. Falling back to simulation for message %s.",
message_id,
)
# Fall through to simulation block below

# Fallback to simulated response if API call failed or service was unavailable
self.logger.info(f"Using simulated content for message {message_id}.")
self.logger.info("Using simulated content for message %s.", message_id)
email_data = await self._simulate_email_content(message_id)

# Ensure message_id is present in simulated data for caching
if "message_id" not in email_data: # _simulate_email_content should guarantee this
email_data["message_id"] = message_id

self.cache.cache_email(email_data)
self.logger.debug(f"Cached simulated content for message {message_id}.")
self.logger.debug("Cached simulated content for message %s.", message_id)
return email_data

def _parse_message_payload(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -647,7 +658,9 @@ def _parse_message_payload(self, message: Dict[str, Any]) -> Optional[Dict[str,
).isoformat(),
}
except Exception as e:
self.logger.error(f"Error parsing message payload for {message.get('id')}: {e}")
self.logger.error(
"Error parsing message payload for %s: %s", message.get("id"), e
)
return None

def _extract_email_address(self, sender_header: str) -> str:
Expand Down Expand Up @@ -746,8 +759,8 @@ async def execute_collection_strategy(self, strategy_name: str) -> EmailBatch:

strategy = strategies[strategy_name]

self.logger.info(f"Executing collection strategy: {strategy_name}")
self.logger.info(f"Description: {strategy['description']}")
self.logger.info("Executing collection strategy: %s", strategy_name)
self.logger.info("Description: %s", strategy['description'])

return await self.collect_emails_incremental(
query_filter=strategy["query"], max_emails=strategy["max_emails"]
Expand Down
Loading
Loading