diff --git a/deployment/deploy.py b/deployment/deploy.py index 4ec69e9f0..66c8a43ab 100644 --- a/deployment/deploy.py +++ b/deployment/deploy.py @@ -72,9 +72,7 @@ def run_command(command, cwd=None): PROD_COMPOSE_FILE = "docker-compose.prod.yml" -def docker_environment( - command, base_compose_file, env_compose_file, remaining_args=None -): +def docker_environment(command, base_compose_file, env_compose_file, remaining_args=None): """Manage a Docker-based environment.""" if remaining_args is None: remaining_args = [] @@ -125,9 +123,7 @@ def docker_environment( def main(): """Main entry point for the deployment script.""" - parser = argparse.ArgumentParser( - description="Deployment Script for EmailIntelligence" - ) + parser = argparse.ArgumentParser(description="Deployment Script for EmailIntelligence") parser.add_argument( "environment", choices=["dev", "staging", "prod"], help="Deployment environment" ) @@ -173,9 +169,7 @@ def main(): logger.error(f"Base Docker Compose file not found: {base_file}") sys.exit(1) if not env_specific_file or not env_specific_file.exists(): - logger.error( - f"Environment-specific Docker Compose file not found: {env_specific_file}" - ) + logger.error(f"Environment-specific Docker Compose file not found: {env_specific_file}") sys.exit(1) else: # This case should ideally not be reached if choices are correctly defined in argparser diff --git a/deployment/extensions.py b/deployment/extensions.py index f1ac48f93..1a235716d 100644 --- a/deployment/extensions.py +++ b/deployment/extensions.py @@ -99,16 +99,12 @@ def get_info(self) -> Dict[str, Any]: class ExtensionsManager: """Manages extensions for the EmailIntelligence application.""" - def __init__( - self, root_dir: Path = ROOT_DIR, python_executable: Optional[str] = None - ): + def __init__(self, root_dir: Path = ROOT_DIR, python_executable: Optional[str] = None): """Initialize the extensions manager.""" self.root_dir = root_dir self.extensions_dir = root_dir / "extensions" self.extensions: Dict[str, Extension] = {} - self.python_executable = ( - python_executable if python_executable else sys.executable - ) + self.python_executable = python_executable if python_executable else sys.executable def set_python_executable(self, python_executable: str): """Set the Python executable path.""" @@ -130,9 +126,7 @@ def discover_extensions(self) -> List[Extension]: # Check if the extension has a metadata file metadata_file = ext_dir / "metadata.json" if not metadata_file.exists(): - logger.warning( - f"Extension {ext_dir.name} does not have a metadata.json file" - ) + logger.warning(f"Extension {ext_dir.name} does not have a metadata.json file") continue # Load the metadata @@ -140,9 +134,7 @@ def discover_extensions(self) -> List[Extension]: with open(metadata_file, "r") as f: metadata = json.load(f) except json.JSONDecodeError as e: - logger.error( - f"Failed to parse metadata for extension {ext_dir.name}: {e}" - ) + logger.error(f"Failed to parse metadata for extension {ext_dir.name}: {e}") continue # Check if the extension has a main module @@ -198,9 +190,7 @@ def initialize_extensions(self) -> bool: all_initialized = False failed_extensions.append(name) if failed_extensions: - logger.error( - f"Extensions failed to initialize: {', '.join(failed_extensions)}" - ) + logger.error(f"Extensions failed to initialize: {', '.join(failed_extensions)}") return all_initialized def shutdown_extensions(self) -> bool: @@ -283,9 +273,7 @@ def install_extension(self, url: str) -> bool: # Clone the repository try: - subprocess.check_call( - ["git", "clone", url, str(self.extensions_dir / name)] - ) + subprocess.check_call(["git", "clone", url, str(self.extensions_dir / name)]) # Check if the extension has a requirements.txt file requirements_file = self.extensions_dir / name / "requirements.txt" @@ -457,12 +445,8 @@ def shutdown(): print(f"Found {len(extensions)} extensions:") for extension in extensions: - print( - f" {extension['name']} - {'Enabled' if extension['enabled'] else 'Disabled'}" - ) + print(f" {extension['name']} - {'Enabled' if extension['enabled'] else 'Disabled'}") print(f" Path: {extension['path']}") print(f" Loaded: {extension['loaded']}") - print( - f" Description: {extension['metadata'].get('description', 'No description')}" - ) + print(f" Description: {extension['metadata'].get('description', 'No description')}") print() diff --git a/deployment/migrate.py b/deployment/migrate.py index 38472e23e..9fb3bad5d 100644 --- a/deployment/migrate.py +++ b/deployment/migrate.py @@ -74,9 +74,7 @@ def rollback_migration(): def main(): """Main entry point for the migration script.""" - parser = argparse.ArgumentParser( - description="Database Migration Script for EmailIntelligence" - ) + parser = argparse.ArgumentParser(description="Database Migration Script for EmailIntelligence") parser.add_argument( "command", choices=["generate", "apply", "status", "rollback"], diff --git a/deployment/models.py b/deployment/models.py index 4482aced2..78f5aad15 100644 --- a/deployment/models.py +++ b/deployment/models.py @@ -188,15 +188,11 @@ def create_placeholder_nlp_models(self) -> bool: all_created_or_exist = True if not placeholder_dir.exists(): - logger.info( - f"Placeholder directory {placeholder_dir} does not exist. Creating it." - ) + logger.info(f"Placeholder directory {placeholder_dir} does not exist. Creating it.") try: placeholder_dir.mkdir(parents=True, exist_ok=True) except Exception as e: - logger.error( - f"Failed to create placeholder directory {placeholder_dir}: {e}" - ) + logger.error(f"Failed to create placeholder directory {placeholder_dir}: {e}") return False # Cannot proceed if directory cannot be created logger.info(f"Checking for placeholder NLP models in {placeholder_dir}...") diff --git a/deployment/run_tests.py b/deployment/run_tests.py index 9f14d7d8a..ec619723e 100644 --- a/deployment/run_tests.py +++ b/deployment/run_tests.py @@ -44,15 +44,11 @@ def run_command( ) if result.stdout: logger.info(f"Command STDOUT:\n{result.stdout}") - if ( - result.stderr - ): # Should be empty if check=True and no error, but log if present + if result.stderr: # Should be empty if check=True and no error, but log if present logger.warning(f"Command STDERR:\n{result.stderr}") return True except subprocess.CalledProcessError as e: - logger.error( - f"Command '{' '.join(e.cmd)}' failed with exit code {e.returncode}" - ) + logger.error(f"Command '{' '.join(e.cmd)}' failed with exit code {e.returncode}") if e.stdout: logger.error(f"STDOUT:\n{e.stdout}") if e.stderr: diff --git a/deployment/setup_env.py b/deployment/setup_env.py index 888199efe..ab9e65df6 100644 --- a/deployment/setup_env.py +++ b/deployment/setup_env.py @@ -55,9 +55,7 @@ def setup_python_environment(dev_mode=False): if dev_mode: return run_command(f"{sys.executable} -m pip install -r requirements.txt") else: - return run_command( - f"{sys.executable} -m pip install -r requirements.txt --no-dev" - ) + return run_command(f"{sys.executable} -m pip install -r requirements.txt --no-dev") def setup_node_environment(dev_mode=False): @@ -82,9 +80,7 @@ def setup_database(): # Check if PostgreSQL is installed if not run_command("psql --version"): - logger.error( - "PostgreSQL is not installed. Please install PostgreSQL and try again." - ) + logger.error("PostgreSQL is not installed. Please install PostgreSQL and try again.") return False # Create the database if it doesn't exist @@ -130,18 +126,8 @@ def setup_directories(): directories = [ PROJECT_ROOT / "deployment" / "nginx" / "ssl", PROJECT_ROOT / "deployment" / "nginx" / "letsencrypt", - PROJECT_ROOT - / "deployment" - / "monitoring" - / "grafana" - / "provisioning" - / "dashboards", - PROJECT_ROOT - / "deployment" - / "monitoring" - / "grafana" - / "provisioning" - / "datasources", + PROJECT_ROOT / "deployment" / "monitoring" / "grafana" / "provisioning" / "dashboards", + PROJECT_ROOT / "deployment" / "monitoring" / "grafana" / "provisioning" / "datasources", ] for directory in directories: @@ -157,15 +143,9 @@ def setup_directories(): def main(): """Main entry point for the environment setup script.""" - parser = argparse.ArgumentParser( - description="Environment Setup Script for EmailIntelligence" - ) - parser.add_argument( - "--dev", action="store_true", help="Set up development environment" - ) - parser.add_argument( - "--force", action="store_true", help="Force overwrite of existing files" - ) + parser = argparse.ArgumentParser(description="Environment Setup Script for EmailIntelligence") + parser.add_argument("--dev", action="store_true", help="Set up development environment") + parser.add_argument("--force", action="store_true", help="Force overwrite of existing files") args = parser.parse_args() logger.info("Setting up EmailIntelligence environment...") diff --git a/deployment/test_stages.py b/deployment/test_stages.py index 09bbd268c..06b8fed95 100644 --- a/deployment/test_stages.py +++ b/deployment/test_stages.py @@ -64,9 +64,7 @@ def run_unit_tests(self, coverage: bool = False, verbose: bool = False) -> bool: logger.error(f"Unit tests failed with exit code {e.returncode}") return False - def run_integration_tests( - self, coverage: bool = False, verbose: bool = False - ) -> bool: + def run_integration_tests(self, coverage: bool = False, verbose: bool = False) -> bool: """Run integration tests.""" logger.info("Running integration tests...") @@ -160,9 +158,7 @@ def run_performance_tests( self, duration: int = 60, users: int = 10, verbose: bool = False ) -> bool: """Run performance tests.""" - logger.info( - f"Running performance tests with {users} users for {duration} seconds..." - ) + logger.info(f"Running performance tests with {users} users for {duration} seconds...") # Ensure test dependencies are installed if not self.env_manager.setup_environment_for_stage("test"): @@ -227,9 +223,7 @@ def run_security_tests( except subprocess.CalledProcessError: logger.info("OWASP ZAP Python API not found, installing...") try: - subprocess.check_call( - [python, "-m", "pip", "install", "python-owasp-zap-v2.4"] - ) + subprocess.check_call([python, "-m", "pip", "install", "python-owasp-zap-v2.4"]) except subprocess.CalledProcessError as e: logger.error(f"Failed to install OWASP ZAP Python API: {e}") return False @@ -286,9 +280,7 @@ def run_tests_for_stage( elif stage == "test": return self.run_all_tests(coverage, verbose) elif stage == "staging": - return self.run_integration_tests(coverage, verbose) and self.run_api_tests( - verbose - ) + return self.run_integration_tests(coverage, verbose) and self.run_api_tests(verbose) elif stage == "prod": return self.run_e2e_tests(True, verbose) else: @@ -306,21 +298,15 @@ def parse_arguments() -> argparse.Namespace: # Test type arguments parser.add_argument("--unit", action="store_true", help="Run unit tests") - parser.add_argument( - "--integration", action="store_true", help="Run integration tests" - ) + parser.add_argument("--integration", action="store_true", help="Run integration tests") parser.add_argument("--api", action="store_true", help="Run API tests") parser.add_argument("--e2e", action="store_true", help="Run end-to-end tests") - parser.add_argument( - "--performance", action="store_true", help="Run performance tests" - ) + parser.add_argument("--performance", action="store_true", help="Run performance tests") parser.add_argument("--security", action="store_true", help="Run security tests") parser.add_argument("--all", action="store_true", help="Run all tests") # Test configuration - parser.add_argument( - "--coverage", action="store_true", help="Generate coverage report" - ) + parser.add_argument("--coverage", action="store_true", help="Generate coverage report") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") parser.add_argument( "--stage", @@ -357,11 +343,7 @@ def main() -> int: # Determine which tests to run if args.stage: - return ( - 0 - if test_stages.run_tests_for_stage(args.stage, args.coverage, args.verbose) - else 1 - ) + return 0 if test_stages.run_tests_for_stage(args.stage, args.coverage, args.verbose) else 1 if args.all: return 0 if test_stages.run_all_tests(args.coverage, args.verbose) else 1 @@ -373,9 +355,7 @@ def main() -> int: success = test_stages.run_unit_tests(args.coverage, args.verbose) and success if args.integration: - success = ( - test_stages.run_integration_tests(args.coverage, args.verbose) and success - ) + success = test_stages.run_integration_tests(args.coverage, args.verbose) and success if args.api: success = test_stages.run_api_tests(args.verbose) and success @@ -385,8 +365,7 @@ def main() -> int: if args.performance: success = ( - test_stages.run_performance_tests(args.duration, args.users, args.verbose) - and success + test_stages.run_performance_tests(args.duration, args.users, args.verbose) and success ) if args.security: diff --git a/launch.py b/launch.py index 663046800..ea7a14c5a 100644 --- a/launch.py +++ b/launch.py @@ -72,9 +72,7 @@ def _handle_sigint(signum, frame): try: p.wait(timeout=5) except subprocess.TimeoutExpired: - logger.warning( - f"Process {p.pid} did not terminate gracefully, killing." - ) + logger.warning(f"Process {p.pid} did not terminate gracefully, killing.") p.kill() global ngrok_tunnel @@ -88,9 +86,7 @@ def _handle_sigint(signum, frame): logger.info("Ngrok tunnel closed.") ngrok_tunnel = None except ImportError: - logger.warning( - "pyngrok is not installed, cannot manage ngrok tunnel shutdown." - ) + logger.warning("pyngrok is not installed, cannot manage ngrok tunnel shutdown.") except Exception as e: logger.error(f"Error shutting down ngrok: {e}") @@ -125,9 +121,7 @@ def check_python_version() -> bool: """Check if the Python version is supported.""" current_version = sys.version_info[:2] if current_version < PYTHON_MIN_VERSION: - logger.error( - f"Python {'.'.join(map(str, PYTHON_MIN_VERSION))} or higher is required" - ) + logger.error(f"Python {'.'.join(map(str, PYTHON_MIN_VERSION))} or higher is required") return False if current_version > PYTHON_MAX_VERSION: logger.warning( @@ -172,9 +166,7 @@ def get_python_executable() -> str: return sys.executable -def install_requirements_from_file( - requirements_file_path_str: str, update: bool = False -) -> bool: +def install_requirements_from_file(requirements_file_path_str: str, update: bool = False) -> bool: """Install or update requirements from a file. requirements_file_path_str is relative to ROOT_DIR. """ @@ -225,9 +217,7 @@ def check_torch_cuda() -> bool: check=True, ) is_available = result.stdout.strip() == "True" - logger.info( - f"PyTorch CUDA is {'available' if is_available else 'not available'}" - ) + logger.info(f"PyTorch CUDA is {'available' if is_available else 'not available'}") return is_available except subprocess.CalledProcessError: logger.warning("Failed to check PyTorch CUDA availability") @@ -240,9 +230,7 @@ def reinstall_torch() -> bool: # Uninstall existing PyTorch logger.info("Uninstalling existing PyTorch...") - subprocess.run( - [python, "-m", "pip", "uninstall", "-y", "torch", "torchvision", "torchaudio"] - ) + subprocess.run([python, "-m", "pip", "uninstall", "-y", "torch", "torchvision", "torchaudio"]) # Install PyTorch with CUDA support logger.info("Installing PyTorch with CUDA support...") @@ -274,9 +262,7 @@ def download_nltk_data() -> bool: logger.info("NLTK data download process completed.") if result.stdout: logger.debug(f"NLTK download stdout:\n{result.stdout}") - if ( - result.stderr - ): # NLTK often prints to stderr even on success for some messages + if result.stderr: # NLTK often prints to stderr even on success for some messages logger.debug( f"NLTK download stderr:\n{result.stderr}" ) # Use debug for potentially noisy stderr @@ -332,9 +318,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: venv_python_exe_path = "" if os.name == "nt": - venv_python_exe_path = str( - ROOT_DIR / VENV_DIR / "Scripts" / "python.exe" - ) + venv_python_exe_path = str(ROOT_DIR / VENV_DIR / "Scripts" / "python.exe") else: venv_python_exe_path = str(ROOT_DIR / VENV_DIR / "bin" / "python") @@ -406,10 +390,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: venv_minor = int(parts[1]) target_major, target_minor = PYTHON_MIN_VERSION - if not ( - venv_major == target_major - and venv_minor == target_minor - ): + if not (venv_major == target_major and venv_minor == target_minor): logger.warning( f"WARNING: The existing virtual environment at './{VENV_DIR}' was created with Python {venv_major}.{venv_minor}. " f"This project requires Python {target_major}.{target_minor}." @@ -444,9 +425,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: ) return False - if ( - not create_venv() - ): # create_venv logs success/failure + if not create_venv(): # create_venv logs success/failure logger.error( "Failed to recreate virtual environment. Exiting." ) @@ -512,9 +491,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: logger.info( f"Updating base dependencies from {Path(primary_req_file).name} in existing venv as per --update-deps..." ) - if not install_dependencies( - primary_req_file, update=True - ): # Force update True + if not install_dependencies(primary_req_file, update=True): # Force update True logger.error( f"Failed to update base dependencies from {Path(primary_req_file).name}. Exiting." ) @@ -529,9 +506,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: # Handle stage-specific requirements # This logic should run if venv was newly set up, or if args.update_deps is true for existing venv - stage_requirements_file_path_str = ( - None # Use full path string for install_dependencies - ) + stage_requirements_file_path_str = None # Use full path string for install_dependencies if args.stage == "dev": dev_req_path_obj = ROOT_DIR / "requirements-dev.txt" if dev_req_path_obj.exists(): @@ -597,9 +572,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: "PyTorch CUDA not found. Reinstalling PyTorch with CUDA support as requested." ) if not reinstall_torch(): - logger.error( - "Failed to reinstall PyTorch with CUDA. Please check manually." - ) + logger.error("Failed to reinstall PyTorch with CUDA. Please check manually.") else: logger.warning( "PyTorch CUDA is not available. Use --reinstall-torch to attempt reinstallation, or --skip-torch-cuda-test to ignore." @@ -616,9 +589,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: if not args.skip_extensions: from deployment.extensions import extensions_manager - extensions_manager.set_python_executable( - python_executable - ) # Set python executable + extensions_manager.set_python_executable(python_executable) # Set python executable if not extensions_manager.load_extensions(): logger.error("Failed to load one or more extensions.") return False @@ -639,9 +610,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: logger.info( "Development stage: Skipping download of default models. Placeholders will be used/created." ) - elif ( - not models_manager.download_default_models() - ): # Original logic for non-dev stages + elif not models_manager.download_default_models(): # Original logic for non-dev stages logger.error("Failed to download default models.") # Logged error, but will proceed to create_placeholder_nlp_models anyway @@ -655,9 +624,7 @@ def prepare_environment(args: argparse.Namespace) -> bool: return True -def start_backend( - args: argparse.Namespace, python_executable: str -) -> Optional[subprocess.Popen]: +def start_backend(args: argparse.Namespace, python_executable: str) -> Optional[subprocess.Popen]: """Starts the backend server.""" actual_host = "0.0.0.0" if args.listen else args.host logger.info(f"Starting backend server on {actual_host}:{args.port}...") @@ -687,15 +654,11 @@ def start_backend( # Log the command with the actual host log_cmd = cmd[:] if args.listen: # For logging, show the original intention if --listen was used - log_cmd[log_cmd.index(actual_host)] = ( - f"{args.host} (via --listen on 0.0.0.0)" - ) + log_cmd[log_cmd.index(actual_host)] = f"{args.host} (via --listen on 0.0.0.0)" logger.info(f"Running backend command: {' '.join(log_cmd)}") process = subprocess.Popen(cmd, env=env) processes.append(process) # Add to global list - logger.info( - f"Backend server started with PID {process.pid} on {actual_host}:{args.port}." - ) + logger.info(f"Backend server started with PID {process.pid} on {actual_host}:{args.port}.") return process except FileNotFoundError: logger.error( @@ -720,9 +683,7 @@ def start_frontend(args: argparse.Namespace) -> Optional[subprocess.Popen]: ["node", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) except (subprocess.CalledProcessError, FileNotFoundError): - logger.error( - "Node.js is not installed or not found in PATH. Cannot start frontend." - ) + logger.error("Node.js is not installed or not found in PATH. Cannot start frontend.") return None client_dir = ROOT_DIR / "client" @@ -755,24 +716,18 @@ def start_frontend(args: argparse.Namespace) -> Optional[subprocess.Popen]: check=False, # check=False to handle errors manually ) if install_result.returncode != 0: - logger.error( - f"Failed to install frontend dependencies in {client_dir}." - ) + logger.error(f"Failed to install frontend dependencies in {client_dir}.") logger.error(f"npm stdout:\n{install_result.stdout}") logger.error(f"npm stderr:\n{install_result.stderr}") # Optionally, decide if this is a fatal error for frontend launch # For now, log and attempt to continue, Vite might still run if some deps are missing but core is there. else: - logger.info( - f"Frontend dependencies installed successfully in {client_dir}." - ) + logger.info(f"Frontend dependencies installed successfully in {client_dir}.") except Exception as e: logger.error(f"Error running npm install in {client_dir}: {e}") # Decide how to handle this, for now, attempt to continue else: - logger.warning( - f"No package.json found in {client_dir}. Skipping npm install for frontend." - ) + logger.warning(f"No package.json found in {client_dir}. Skipping npm install for frontend.") # This would likely lead to `npm run dev` failing, but the check is here. cmd = [ @@ -791,9 +746,7 @@ def start_frontend(args: argparse.Namespace) -> Optional[subprocess.Popen]: env["NODE_ENV"] = "development" # Or args.stage if relevant for frontend build try: - logger.info( - f"Running frontend command: {' '.join(cmd)} in {str(ROOT_DIR / 'client')}" - ) + logger.info(f"Running frontend command: {' '.join(cmd)} in {str(ROOT_DIR / 'client')}") process = subprocess.Popen(cmd, cwd=str(ROOT_DIR / "client"), env=env) processes.append(process) # Add to global list logger.info(f"Frontend server started with PID {process.pid}.") @@ -830,9 +783,7 @@ def run_application(args: argparse.Namespace) -> int: # Assuming backend port (args.port) is the one to share ngrok_tunnel = ngrok.connect(args.port) - logger.info( - f"Ngrok tunnel established. Public URL: {ngrok_tunnel.public_url}" - ) + logger.info(f"Ngrok tunnel established. Public URL: {ngrok_tunnel.public_url}") logger.info( "Note: If you have a free ngrok account, you might be limited to one tunnel at a time." ) @@ -863,14 +814,10 @@ def run_application(args: argparse.Namespace) -> int: if args.env_file: env_file_path = ROOT_DIR / args.env_file if env_file_path.exists(): - logger.info( - f"Loading environment variables from custom .env file: {env_file_path}" - ) + logger.info(f"Loading environment variables from custom .env file: {env_file_path}") load_dotenv(dotenv_path=env_file_path, override=True) else: - logger.warning( - f"Specified env file {args.env_file} not found at {env_file_path}" - ) + logger.warning(f"Specified env file {args.env_file} not found at {env_file_path}") if args.api_only: logger.info("Running in API only mode.") @@ -908,9 +855,7 @@ def run_application(args: argparse.Namespace) -> int: return 1 # Critical failure if frontend_process: - logger.info( - f"Frontend accessible at http://{args.host}:{args.frontend_port}" - ) + logger.info(f"Frontend accessible at http://{args.host}:{args.frontend_port}") else: logger.error("Frontend server failed to start.") if backend_process and backend_process.poll() is None: @@ -924,14 +869,10 @@ def run_application(args: argparse.Namespace) -> int: while True: # Check if either process has exited unexpectedly if backend_process.poll() is not None: - logger.error( - f"Backend process {backend_process.pid} exited unexpectedly." - ) + logger.error(f"Backend process {backend_process.pid} exited unexpectedly.") unexpected_exit = True if frontend_process.poll() is None: - logger.info( - f"Terminating frontend process {frontend_process.pid}..." - ) + logger.info(f"Terminating frontend process {frontend_process.pid}...") frontend_process.terminate() break if frontend_process.poll() is not None: @@ -940,23 +881,17 @@ def run_application(args: argparse.Namespace) -> int: ) unexpected_exit = True if backend_process.poll() is None: - logger.info( - f"Terminating backend process {backend_process.pid}..." - ) + logger.info(f"Terminating backend process {backend_process.pid}...") backend_process.terminate() break time.sleep(1) - except ( - KeyboardInterrupt - ): # This should ideally be caught by the SIGINT handler + except KeyboardInterrupt: # This should ideally be caught by the SIGINT handler logger.info( "KeyboardInterrupt in run_application. Signal handler should take over." ) pass # Signal handler will manage shutdown except Exception as e: - logger.error( - f"An unexpected error occurred in run_application main loop: {e}" - ) + logger.error(f"An unexpected error occurred in run_application main loop: {e}") finally: # Ensure processes are terminated if loop exits for other reasons # _handle_sigint should manage this, but as a fallback: @@ -976,9 +911,7 @@ def run_application(args: argparse.Namespace) -> int: backend_process.wait() # If backend exits with an error code, it might be an unexpected exit if backend_process.returncode != 0: - logger.error( - f"Backend process exited with code: {backend_process.returncode}" - ) + logger.error(f"Backend process exited with code: {backend_process.returncode}") return 1 # (No case for only frontend, as backend failure would terminate it) @@ -987,8 +920,7 @@ def run_application(args: argparse.Namespace) -> int: logger.info( f"Executing default test suite for '--stage {args.stage}'. Specific test flags (e.g., --unit, --integration) were not provided." ) - from deployment.test_stages import \ - test_stages # Moved import here for locality + from deployment.test_stages import test_stages # Moved import here for locality test_run_success = True # Assume success initially @@ -999,9 +931,7 @@ def run_application(args: argparse.Namespace) -> int: test_run_success = False logger.error("Unit tests failed.") else: - logger.warning( - "test_stages.run_unit_tests not found, cannot run unit tests." - ) + logger.warning("test_stages.run_unit_tests not found, cannot run unit tests.") # Consider if this should be a failure for the 'test' stage # test_run_success = False @@ -1018,9 +948,7 @@ def run_application(args: argparse.Namespace) -> int: # Consider if this should be a failure for the 'test' stage # test_run_success = False - logger.info( - f"Default test suite execution finished. Success: {test_run_success}" - ) + logger.info(f"Default test suite execution finished. Success: {test_run_success}") return 0 if test_run_success else 1 return 0 # Assuming success if processes managed by signal handler or exited cleanly for other stages @@ -1029,9 +957,7 @@ def run_application(args: argparse.Namespace) -> int: def _print_system_info(): """Prints detailed system information.""" print("\n--- System Information ---") - print( - f"Operating System: {platform.system()} {platform.release()} ({platform.version()})" - ) + print(f"Operating System: {platform.system()} {platform.release()} ({platform.version()})") print(f"Processor: {platform.processor()}") try: print(f"CPU Cores: {os.cpu_count()}") @@ -1076,9 +1002,7 @@ def _print_system_info(): print(f"PyTorch Version: {torch_version_proc.stdout.strip()}") check_torch_cuda() # This will print CUDA availability else: - print( - "PyTorch Version: Not installed or importable with current Python executable." - ) + print("PyTorch Version: Not installed or importable with current Python executable.") logger.debug(f"Failed to get PyTorch version: {torch_version_proc.stderr}") except Exception as e: print(f"PyTorch Information: Error checking PyTorch - {e}") @@ -1091,9 +1015,7 @@ def _print_system_info(): swap_mem = psutil.swap_memory() print(f"Total RAM: {virtual_mem.total / (1024**3):.2f} GB") print(f"Available RAM: {virtual_mem.available / (1024**3):.2f} GB") - print( - f"Used RAM: {virtual_mem.used / (1024**3):.2f} GB ({virtual_mem.percent}%)" - ) + print(f"Used RAM: {virtual_mem.used / (1024**3):.2f} GB ({virtual_mem.percent}%)") print(f"Total Swap: {swap_mem.total / (1024**3):.2f} GB") print(f"Used Swap: {swap_mem.used / (1024**3):.2f} GB ({swap_mem.percent}%)") except ImportError: @@ -1139,9 +1061,7 @@ def parse_arguments() -> argparse.Namespace: parser.add_argument( "--no-download-nltk", action="store_true", help="Skip downloading NLTK data" ) - parser.add_argument( - "--skip-prepare", action="store_true", help="Skip preparation steps" - ) + parser.add_argument("--skip-prepare", action="store_true", help="Skip preparation steps") # Application stage parser.add_argument( @@ -1170,9 +1090,7 @@ def parse_arguments() -> argparse.Namespace: default=5173, help="Specify the frontend port to run on (default: 5173)", ) - parser.add_argument( - "--api-url", type=str, help="Specify the API URL for the frontend" - ) + parser.add_argument("--api-url", type=str, help="Specify the API URL for the frontend") parser.add_argument( "--api-only", action="store_true", @@ -1192,68 +1110,42 @@ def parse_arguments() -> argparse.Namespace: help="Generate coverage report when running tests", ) parser.add_argument("--unit", action="store_true", help="Run unit tests") - parser.add_argument( - "--integration", action="store_true", help="Run integration tests" - ) + parser.add_argument("--integration", action="store_true", help="Run integration tests") parser.add_argument("--e2e", action="store_true", help="Run end-to-end tests") - parser.add_argument( - "--performance", action="store_true", help="Run performance tests" - ) + parser.add_argument("--performance", action="store_true", help="Run performance tests") parser.add_argument("--security", action="store_true", help="Run security tests") # Extensions and models - parser.add_argument( - "--skip-extensions", action="store_true", help="Skip loading extensions" - ) - parser.add_argument( - "--skip-models", action="store_true", help="Skip downloading models" - ) + parser.add_argument("--skip-extensions", action="store_true", help="Skip loading extensions") + parser.add_argument("--skip-models", action="store_true", help="Skip downloading models") parser.add_argument( "--install-extension", type=str, help="Install an extension from a Git repository", ) - parser.add_argument( - "--uninstall-extension", type=str, help="Uninstall an extension" - ) + parser.add_argument("--uninstall-extension", type=str, help="Uninstall an extension") parser.add_argument("--update-extension", type=str, help="Update an extension") - parser.add_argument( - "--list-extensions", action="store_true", help="List all extensions" - ) - parser.add_argument( - "--create-extension", type=str, help="Create a new extension template" - ) + parser.add_argument("--list-extensions", action="store_true", help="List all extensions") + parser.add_argument("--create-extension", type=str, help="Create a new extension template") # Model options - parser.add_argument( - "--download-model", type=str, help="Download a model from a URL" - ) - parser.add_argument( - "--model-name", type=str, help="Specify the model name for download" - ) + parser.add_argument("--download-model", type=str, help="Download a model from a URL") + parser.add_argument("--model-name", type=str, help="Specify the model name for download") parser.add_argument("--list-models", action="store_true", help="List all models") parser.add_argument("--delete-model", type=str, help="Delete a model") # Advanced options - parser.add_argument( - "--no-half", action="store_true", help="Disable half-precision for models" - ) + parser.add_argument("--no-half", action="store_true", help="Disable half-precision for models") parser.add_argument( "--force-cpu", action="store_true", help="Force CPU mode even if GPU is available", ) - parser.add_argument( - "--low-memory", action="store_true", help="Enable low memory mode" - ) - parser.add_argument( - "--system-info", action="store_true", help="Print system information" - ) + parser.add_argument("--low-memory", action="store_true", help="Enable low memory mode") + parser.add_argument("--system-info", action="store_true", help="Print system information") # Networking options - parser.add_argument( - "--share", action="store_true", help="Create a public URL using ngrok" - ) + parser.add_argument("--share", action="store_true", help="Create a public URL using ngrok") parser.add_argument( "--listen", action="store_true", @@ -1298,9 +1190,7 @@ def main() -> int: # Goal: Ensure launch.py runs with Python 3.11.x if os.environ.get("LAUNCHER_REEXEC_GUARD") != "1": current_major, current_minor = sys.version_info[:2] - target_major, target_minor = ( - PYTHON_MIN_VERSION # Assuming PYTHON_MIN_VERSION is (3, 11) - ) + target_major, target_minor = PYTHON_MIN_VERSION # Assuming PYTHON_MIN_VERSION is (3, 11) if not (current_major == target_major and current_minor == target_minor): logger.info( @@ -1365,9 +1255,7 @@ def main() -> int: ) if found_interpreter_path: - logger.info( - f"Re-executing launcher with interpreter: {found_interpreter_path}" - ) + logger.info(f"Re-executing launcher with interpreter: {found_interpreter_path}") new_env = os.environ.copy() new_env["LAUNCHER_REEXEC_GUARD"] = "1" @@ -1388,9 +1276,7 @@ def main() -> int: os.execve(found_interpreter_path, args_for_exec, new_env) # os.execve does not return if successful except Exception as e: - logger.error( - f"Failed to re-execute with {found_interpreter_path}: {e}" - ) + logger.error(f"Failed to re-execute with {found_interpreter_path}: {e}") # Fall through to the error below if execve fails critically # If loop completes or execve fails before replacing the process @@ -1440,9 +1326,7 @@ def main() -> int: # Load default .env file if it exists default_env_file = ROOT_DIR / ".env" if default_env_file.exists(): - logger.info( - f"Loading environment variables from default .env file: {default_env_file}" - ) + logger.info(f"Loading environment variables from default .env file: {default_env_file}") load_dotenv(dotenv_path=default_env_file, override=True) # Handle special commands @@ -1490,14 +1374,10 @@ def main() -> int: print(f"Found {len(extensions)} extensions:") for extension in extensions: - print( - f" {extension['name']} - {'Enabled' if extension['enabled'] else 'Disabled'}" - ) + print(f" {extension['name']} - {'Enabled' if extension['enabled'] else 'Disabled'}") print(f" Path: {extension['path']}") print(f" Loaded: {extension['loaded']}") - print( - f" Description: {extension['metadata'].get('description', 'No description')}" - ) + print(f" Description: {extension['metadata'].get('description', 'No description')}") print() return 0 @@ -1578,44 +1458,36 @@ def main() -> int: # If --stage test is specified WITHOUT these specific flags, it will be handled in run_application. if args.unit or args.integration or args.e2e or args.performance or args.security: logger.info("Specific test flags detected. Running requested tests...") - from deployment.test_stages import \ - test_stages # Import here as it's specific to this block + from deployment.test_stages import test_stages # Import here as it's specific to this block test_run_success = True if args.unit: logger.info("Running unit tests...") test_run_success = ( - test_stages.run_unit_tests(args.coverage, args.debug) - and test_run_success + test_stages.run_unit_tests(args.coverage, args.debug) and test_run_success ) if args.integration: logger.info("Running integration tests...") test_run_success = ( - test_stages.run_integration_tests(args.coverage, args.debug) - and test_run_success + test_stages.run_integration_tests(args.coverage, args.debug) and test_run_success ) if args.e2e: logger.info("Running e2e tests...") - test_run_success = ( - test_stages.run_e2e_tests(True, args.debug) and test_run_success - ) + test_run_success = test_stages.run_e2e_tests(True, args.debug) and test_run_success if args.performance: logger.info("Running performance tests...") test_run_success = ( - test_stages.run_performance_tests(60, 10, args.debug) - and test_run_success + test_stages.run_performance_tests(60, 10, args.debug) and test_run_success ) if args.security: logger.info("Running security tests...") test_run_success = ( - test_stages.run_security_tests( - f"http://{args.host}:{args.port}", args.debug - ) + test_stages.run_security_tests(f"http://{args.host}:{args.port}", args.debug) and test_run_success ) diff --git a/server/python_backend/__init__.py b/server/python_backend/__init__.py index 0b11f4070..8c9a77ce1 100644 --- a/server/python_backend/__init__.py +++ b/server/python_backend/__init__.py @@ -9,11 +9,21 @@ from .ai_engine import AdvancedAIEngine, AIAnalysisResult from .database import DatabaseManager, get_db from .main import app -from .models import (ActivityCreate, ActivityResponse, AIAnalysisResponse, - CategoryCreate, CategoryResponse, DashboardStats, - EmailCreate, EmailResponse, EmailUpdate, FilterRequest, - GmailSyncRequest, GmailSyncResponse, - SmartRetrievalRequest) +from .models import ( + ActivityCreate, + ActivityResponse, + AIAnalysisResponse, + CategoryCreate, + CategoryResponse, + DashboardStats, + EmailCreate, + EmailResponse, + EmailUpdate, + FilterRequest, + GmailSyncRequest, + GmailSyncResponse, + SmartRetrievalRequest, +) from .performance_monitor import PerformanceMonitor __version__ = "2.0.0" diff --git a/server/python_backend/action_routes.py b/server/python_backend/action_routes.py index d33f9874c..57682b9a1 100644 --- a/server/python_backend/action_routes.py +++ b/server/python_backend/action_routes.py @@ -5,8 +5,7 @@ from fastapi import APIRouter, HTTPException, Request from .ai_engine import AdvancedAIEngine -from .models import (ActionExtractionRequest, # Changed from .main to .models - ActionItem) +from .models import ActionExtractionRequest, ActionItem # Changed from .main to .models from .performance_monitor import PerformanceMonitor logger = logging.getLogger(__name__) diff --git a/server/python_backend/ai_engine.py b/server/python_backend/ai_engine.py index 349279cda..c08e3516d 100644 --- a/server/python_backend/ai_engine.py +++ b/server/python_backend/ai_engine.py @@ -5,6 +5,7 @@ import json import logging + # import sys # No longer needed for subprocess import os from datetime import datetime @@ -101,8 +102,7 @@ async def _match_category_id( logger.info(log_msg) return db_cat["id"] log_msg = ( - f"No direct match for AI categories: {ai_categories} " - f"against DB categories." + f"No direct match for AI categories: {ai_categories} " f"against DB categories." ) logger.info(log_msg) except Exception as e: @@ -114,9 +114,7 @@ async def analyze_email( ) -> AIAnalysisResult: """Analyze email content with AI and optional DB category matching.""" log_subject = subject[:50] + "..." if len(subject) > 50 else subject - logger.info( - f"Initiating AI analysis for email subject: '{log_subject}'" - ) + logger.info(f"Initiating AI analysis for email subject: '{log_subject}'") try: analysis_data = self.nlp_engine.analyze_email(subject, content) @@ -124,9 +122,7 @@ async def analyze_email( analysis_data["action_items"] = [] if db and analysis_data.get("categories"): - matched_category_id = await self._match_category_id( - analysis_data["categories"], db - ) + matched_category_id = await self._match_category_id(analysis_data["categories"], db) if matched_category_id: analysis_data["category_id"] = matched_category_id else: @@ -139,13 +135,8 @@ async def analyze_email( logger.info(log_msg) return AIAnalysisResult(analysis_data) except Exception as e: - logger.error( - f"An unexpected error occurred during AI analysis: {e}", - exc_info=True - ) - return self._get_fallback_analysis( - subject, content, f"AI analysis error: {str(e)}" - ) + logger.error(f"An unexpected error occurred during AI analysis: {e}", exc_info=True) + return self._get_fallback_analysis(subject, content, f"AI analysis error: {str(e)}") def train_models( self, training_emails: Optional[List[Dict[str, Any]]] = None @@ -168,26 +159,26 @@ def train_models( with open(training_file_path, "w") as f: json.dump(training_emails, f) except IOError as e: - logger.error( - f"Error creating temp training file {training_file_path}: {e}" - ) + logger.error(f"Error creating temp training file {training_file_path}: {e}") if os.path.exists(training_file_path): try: os.remove(training_file_path) logger.info(f"Removed temp training file: {training_file_path}") except OSError as e: - logger.error( - f"Error removing temp training file {training_file_path}: {e}" - ) + logger.error(f"Error removing temp training file {training_file_path}: {e}") error_msg = ( "Model training via direct NLPEngine call is not implemented. " "Requires ai_training.py logic integration." ) return { - "success": False, "error": error_msg, "modelsTrained": [], - "trainingAccuracy": {}, "validationAccuracy": {}, "trainingTime": 0, + "success": False, + "error": error_msg, + "modelsTrained": [], + "trainingAccuracy": {}, + "validationAccuracy": {}, + "trainingTime": 0, "emailsProcessed": len(training_emails) if training_emails else 0, } @@ -233,9 +224,7 @@ def health_check(self) -> Dict[str, Any]: "timestamp": datetime.now().isoformat(), } except Exception as e: - logger.error( - f"AI health check failed during direct inspection: {e}", exc_info=True - ) + logger.error(f"AI health check failed during direct inspection: {e}", exc_info=True) return { "status": "unhealthy", "error": str(e), @@ -258,14 +247,9 @@ def cleanup(self): # Changed to synchronous (was async but did sync operations) if os.path.exists(temp_file): try: os.remove(temp_file) - logger.info( - f"Removed temp file during cleanup: {temp_file}" - ) + logger.info(f"Removed temp file during cleanup: {temp_file}") except OSError as e: - err_msg = ( - f"Error removing temp file {temp_file} " - f"during cleanup: {e}" - ) + err_msg = f"Error removing temp file {temp_file} " f"during cleanup: {e}" logger.error(err_msg) logger.info("AI Engine cleanup completed") @@ -308,29 +292,28 @@ def _get_fallback_analysis( "confidence": fallback_data.get("confidence", 0.3), "categories": fallback_data.get("categories", ["general"]), "keywords": fallback_data.get("keywords", []), - "reasoning": fallback_data.get( - "reasoning", "Fallback: AI service unavailable" - ), - "suggested_labels": fallback_data.get( - "suggested_labels", ["general"] - ), - "risk_flags": fallback_data.get( - "risk_flags", ["ai_analysis_failed"] - ), + "reasoning": fallback_data.get("reasoning", "Fallback: AI service unavailable"), + "suggested_labels": fallback_data.get("suggested_labels", ["general"]), + "risk_flags": fallback_data.get("risk_flags", ["ai_analysis_failed"]), "category_id": None, "action_items": [], } ) except Exception as e: - logger.error( - f"Error generating fallback analysis itself: {e}", exc_info=True + logger.error(f"Error generating fallback analysis itself: {e}", exc_info=True) + return AIAnalysisResult( + { + "topic": "unknown", + "sentiment": "neutral", + "intent": "unknown", + "urgency": "low", + "confidence": 0.1, + "categories": ["general"], + "keywords": [], + "reasoning": f"Critical failure in AI and fallback: {e}", + "suggested_labels": ["general"], + "risk_flags": ["ai_analysis_critically_failed"], + "category_id": None, + "action_items": [], + } ) - return AIAnalysisResult({ - "topic": "unknown", "sentiment": "neutral", "intent": "unknown", - "urgency": "low", "confidence": 0.1, "categories": ["general"], - "keywords": [], - "reasoning": f"Critical failure in AI and fallback: {e}", - "suggested_labels": ["general"], - "risk_flags": ["ai_analysis_critically_failed"], - "category_id": None, "action_items": [], - }) diff --git a/server/python_backend/category_routes.py b/server/python_backend/category_routes.py index 305b2e566..a7ca5557e 100644 --- a/server/python_backend/category_routes.py +++ b/server/python_backend/category_routes.py @@ -6,8 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request from .database import DatabaseManager, get_db -from .models import ( # Added CategoryResponse, changed from .main - CategoryCreate, CategoryResponse) +from .models import CategoryCreate, CategoryResponse # Added CategoryResponse, changed from .main from .performance_monitor import PerformanceMonitor logger = logging.getLogger(__name__) @@ -43,9 +42,7 @@ async def get_categories(request: Request, db: DatabaseManager = Depends(get_db) raise HTTPException(status_code=500, detail="Failed to fetch categories") -@router.post( - "/api/categories", response_model=CategoryResponse -) # Changed to CategoryResponse +@router.post("/api/categories", response_model=CategoryResponse) # Changed to CategoryResponse @performance_monitor.track async def create_category( request: Request, category: CategoryCreate, db: DatabaseManager = Depends(get_db) @@ -55,9 +52,7 @@ async def create_category( created_category_dict = await db.create_category( category.dict() ) # db.create_category returns a dict - return CategoryResponse( - **created_category_dict - ) # Ensure it returns CategoryResponse + return CategoryResponse(**created_category_dict) # Ensure it returns CategoryResponse except psycopg2.Error as db_err: log_data = { "message": "Database operation failed while creating category", diff --git a/server/python_backend/dashboard_routes.py b/server/python_backend/dashboard_routes.py index a7e66b309..8a3f33f97 100644 --- a/server/python_backend/dashboard_routes.py +++ b/server/python_backend/dashboard_routes.py @@ -5,8 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request from .database import DatabaseManager, get_db -from .models import \ - DashboardStats +from .models import DashboardStats from .performance_monitor import PerformanceMonitor logger = logging.getLogger(__name__) @@ -14,16 +13,12 @@ performance_monitor = PerformanceMonitor() # Initialize performance monitor -@router.get( - "/api/dashboard/stats", response_model=DashboardStats -) # Changed to DashboardStats +@router.get("/api/dashboard/stats", response_model=DashboardStats) # Changed to DashboardStats @performance_monitor.track async def get_dashboard_stats(request: Request, db: DatabaseManager = Depends(get_db)): """Get comprehensive dashboard statistics""" try: - stats_dict = ( - await db.get_dashboard_stats() - ) # db.get_dashboard_stats returns a dict + stats_dict = await db.get_dashboard_stats() # db.get_dashboard_stats returns a dict # Ensure that the keys in stats_dict match the fields (or aliases) in models.DashboardStats # Ensure that the keys in stats_dict match the fields (or aliases) # in models.DashboardStats. Pydantic's `validate_by_name = True` (formerly diff --git a/server/python_backend/database.py b/server/python_backend/database.py index 2453fbc36..215198fb2 100644 --- a/server/python_backend/database.py +++ b/server/python_backend/database.py @@ -103,14 +103,10 @@ async def get_connection(self): async def initialize(self): """Initialize database asynchronously (e.g., seed data)""" - logger.info( - "DatabaseManager initialized. Default categories seeding attempted." - ) + logger.info("DatabaseManager initialized. Default categories seeding attempted.") pass - def _parse_json_fields( - self, row: Dict[str, Any], fields: List[str] - ) -> Dict[str, Any]: + def _parse_json_fields(self, row: Dict[str, Any], fields: List[str]) -> Dict[str, Any]: """Helper to parse stringified JSON fields in a row.""" if not row: return row @@ -120,8 +116,7 @@ def _parse_json_fields( row[field] = json.loads(row[field]) except json.JSONDecodeError: logger.warning( - f"Failed to parse JSON for field {field} " - f"in row {row.get('id')}" + f"Failed to parse JSON for field {field} " f"in row {row.get('id')}" ) if field in ("analysisMetadata", "metadata"): row[field] = {} @@ -129,9 +124,7 @@ def _parse_json_fields( row[field] = [] return row - async def create_email( - self, email_data: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: + async def create_email(self, email_data: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Create a new email record.""" is_unread = not email_data.get("is_read", False) @@ -165,8 +158,7 @@ async def create_email( is_unread, email_data.get("category_id", email_data.get("categoryId")), email_data.get("confidence", 0), - json.dumps(email_data.get("analysis_metadata", - email_data.get("analysisMetadata", {}))), + json.dumps(email_data.get("analysis_metadata", email_data.get("analysisMetadata", {}))), email_data.get("history_id", email_data.get("historyId")), email_data.get("content_html", email_data.get("contentHtml")), email_data.get("preview", email_data.get("snippet")), @@ -184,10 +176,8 @@ async def create_email( email_data.get("is_spam", email_data.get("isSpam", False)), email_data.get("is_trash", email_data.get("isTrash", False)), email_data.get("is_chat", email_data.get("isChat", False)), - email_data.get("has_attachments", - email_data.get("hasAttachments", False)), - email_data.get("attachment_count", - email_data.get("attachmentCount", 0)), + email_data.get("has_attachments", email_data.get("hasAttachments", False)), + email_data.get("attachment_count", email_data.get("attachmentCount", 0)), email_data.get("size_estimate", email_data.get("sizeEstimate")), email_data.get("spf_status", email_data.get("spfStatus")), email_data.get("dkim_status", email_data.get("dkimStatus")), @@ -199,30 +189,22 @@ async def create_email( email_data.get("mailing_list", email_data.get("mailingList")), email_data.get("in_reply_to", email_data.get("inReplyTo")), email_data.get("references", []), - email_data.get("is_first_in_thread", - email_data.get("isFirstInThread", True)), + email_data.get("is_first_in_thread", email_data.get("isFirstInThread", True)), ) try: - result = await self._execute_query( - query, params, fetch_one=True, commit=True - ) + result = await self._execute_query(query, params, fetch_one=True, commit=True) if result and result.get("id"): email_id = result["id"] - category_id = email_data.get( - "categoryId", email_data.get("category_id") - ) + category_id = email_data.get("categoryId", email_data.get("category_id")) if category_id: await self._update_category_count(category_id) return await self.get_email_by_id(email_id) return None except psycopg2.IntegrityError as e: logger.warning( - f"Email with messageId {email_data.get('messageId')} " - f"likely already exists: {e}" - ) - return await self.update_email_by_message_id( - email_data["messageId"], email_data + f"Email with messageId {email_data.get('messageId')} " f"likely already exists: {e}" ) + return await self.update_email_by_message_id(email_data["messageId"], email_data) async def get_email_by_id(self, email_id: int) -> Optional[Dict[str, Any]]: """Get email by ID""" @@ -235,7 +217,7 @@ async def get_email_by_id(self, email_id: int) -> Optional[Dict[str, Any]]: row = await self._execute_query(query, (email_id,), fetch_one=True) return self._parse_json_fields(row, ["analysisMetadata"]) if row else None - async def get_all_categories(self) -> List[Dict[str, Any]]: # Renamed for clarity + async def get_all_categories(self) -> List[Dict[str, Any]]: # Renamed for clarity """Get all categories with their counts.""" query = """ SELECT id, name, description, color, count @@ -245,9 +227,7 @@ async def get_all_categories(self) -> List[Dict[str, Any]]: # Renamed for clarit categories = await self._execute_query(query, fetch_all=True) return categories if categories else [] - async def create_category( - self, category_data: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: + async def create_category(self, category_data: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Create a new category.""" query = """ INSERT INTO categories (name, description, color, count) @@ -261,9 +241,7 @@ async def create_category( category_data.get("count", 0), ) try: - new_category = await self._execute_query( - query, params, fetch_one=True, commit=True - ) + new_category = await self._execute_query(query, params, fetch_one=True, commit=True) return new_category except psycopg2.Error as e: logger.error(f"Failed to create category {category_data.get('name')}: {e}") @@ -308,10 +286,7 @@ async def get_emails( emails = await self._execute_query(base_query, tuple(params), fetch_all=True) if emails: - return [ - self._parse_json_fields(email, ["analysisMetadata"]) - for email in emails - ] + return [self._parse_json_fields(email, ["analysisMetadata"]) for email in emails] return [] async def update_email_by_message_id( @@ -341,41 +316,74 @@ async def update_email_by_message_id( column_name = "labels" if not isinstance(value, list): logger.warning( - f"Labels value for update is not a list: {value}, " - "attempting to wrap." + f"Labels value for update is not a list: {value}, " "attempting to wrap." ) value = [str(value)] elif key == "timestamp": column_name = '"time"' - elif key == "historyId": column_name = "history_id" - elif key == "contentHtml": column_name = "content_html" - elif key == "toAddresses": column_name = "to_addresses" - elif key == "ccAddresses": column_name = "cc_addresses" - elif key == "bccAddresses": column_name = "bcc_addresses" - elif key == "replyTo": column_name = "reply_to" - elif key == "internalDate": column_name = "internal_date" - elif key == "labelIds": column_name = "label_ids" - elif key == "isStarred": column_name = "is_starred" - elif key == "isImportant": column_name = "is_important" - elif key == "isDraft": column_name = "is_draft" - elif key == "isSent": column_name = "is_sent" - elif key == "isSpam": column_name = "is_spam" - elif key == "isTrash": column_name = "is_trash" - elif key == "isChat": column_name = "is_chat" - elif key == "hasAttachments": column_name = "has_attachments" - elif key == "attachmentCount": column_name = "attachment_count" - elif key == "sizeEstimate": column_name = "size_estimate" - elif key == "spfStatus": column_name = "spf_status" - elif key == "dkimStatus": column_name = "dkim_status" - elif key == "dmarcStatus": column_name = "dmarc_status" - elif key == "isEncrypted": column_name = "is_encrypted" - elif key == "isSigned": column_name = "is_signed" - elif key == "isAutoReply": column_name = "is_auto_reply" - elif key == "mailingList": column_name = "mailing_list" - elif key == "inReplyTo": column_name = "in_reply_to" - elif key == "isFirstInThread": column_name = "is_first_in_thread" - elif key in ["subject", "sender", "content", "snippet", - "category", "priority", "references"]: + elif key == "historyId": + column_name = "history_id" + elif key == "contentHtml": + column_name = "content_html" + elif key == "toAddresses": + column_name = "to_addresses" + elif key == "ccAddresses": + column_name = "cc_addresses" + elif key == "bccAddresses": + column_name = "bcc_addresses" + elif key == "replyTo": + column_name = "reply_to" + elif key == "internalDate": + column_name = "internal_date" + elif key == "labelIds": + column_name = "label_ids" + elif key == "isStarred": + column_name = "is_starred" + elif key == "isImportant": + column_name = "is_important" + elif key == "isDraft": + column_name = "is_draft" + elif key == "isSent": + column_name = "is_sent" + elif key == "isSpam": + column_name = "is_spam" + elif key == "isTrash": + column_name = "is_trash" + elif key == "isChat": + column_name = "is_chat" + elif key == "hasAttachments": + column_name = "has_attachments" + elif key == "attachmentCount": + column_name = "attachment_count" + elif key == "sizeEstimate": + column_name = "size_estimate" + elif key == "spfStatus": + column_name = "spf_status" + elif key == "dkimStatus": + column_name = "dkim_status" + elif key == "dmarcStatus": + column_name = "dmarc_status" + elif key == "isEncrypted": + column_name = "is_encrypted" + elif key == "isSigned": + column_name = "is_signed" + elif key == "isAutoReply": + column_name = "is_auto_reply" + elif key == "mailingList": + column_name = "mailing_list" + elif key == "inReplyTo": + column_name = "in_reply_to" + elif key == "isFirstInThread": + column_name = "is_first_in_thread" + elif key in [ + "subject", + "sender", + "content", + "snippet", + "category", + "priority", + "references", + ]: if key == "references" and not isinstance(value, list): value = [str(value)] column_name = key @@ -393,17 +401,12 @@ async def update_email_by_message_id( return await self.get_email_by_message_id(message_id) set_clauses.append("updated_at = NOW()") - query = ( - f"UPDATE emails SET {', '.join(set_clauses)} " - f"WHERE message_id = %s" - ) + query = f"UPDATE emails SET {', '.join(set_clauses)} " f"WHERE message_id = %s" params.append(message_id) await self._execute_query(query, tuple(params), commit=True) return await self.get_email_by_message_id(message_id) - async def get_email_by_message_id( - self, message_id: str - ) -> Optional[Dict[str, Any]]: + async def get_email_by_message_id(self, message_id: str) -> Optional[Dict[str, Any]]: """Get email by messageId""" query = ( "SELECT e.*, c.name as categoryName, c.color as categoryColor " @@ -413,9 +416,7 @@ async def get_email_by_message_id( row = await self._execute_query(query, (message_id,), fetch_one=True) return self._parse_json_fields(row, ["analysisMetadata"]) if row else None - async def create_activity( - self, activity_data: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: + async def create_activity(self, activity_data: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Create a new activity record.""" query = """ INSERT INTO activities @@ -437,12 +438,8 @@ async def create_activity( activity_data.get("icon", "default_icon"), activity_data.get("icon_bg", activity_data.get("iconBg", "#ffffff")), ) - new_activity = await self._execute_query( - query, params, fetch_one=True, commit=True - ) - return ( - self._parse_json_fields(new_activity, ["details"]) if new_activity else None - ) + new_activity = await self._execute_query(query, params, fetch_one=True, commit=True) + return self._parse_json_fields(new_activity, ["details"]) if new_activity else None async def get_recent_activities(self, limit: int = 10) -> List[Dict[str, Any]]: """Get recent activities""" @@ -452,27 +449,29 @@ async def get_recent_activities(self, limit: int = 10) -> List[Dict[str, Any]]: ) activities = await self._execute_query(query, (limit,), fetch_all=True) if activities: - return [ - self._parse_json_fields(activity, ["details"]) - for activity in activities - ] + return [self._parse_json_fields(activity, ["details"]) for activity in activities] return [] async def get_dashboard_stats(self) -> Dict[str, Any]: logger.warning("get_dashboard_stats not fully migrated to PostgreSQL yet.") results = await asyncio.gather( - self._execute_query("SELECT COUNT(*) AS count FROM emails", - fetch_one=True), - self._execute_query("SELECT COUNT(*) AS count FROM emails " - "WHERE is_unread = TRUE", fetch_one=True), - self._execute_query("SELECT COUNT(*) AS count FROM emails " - "WHERE priority = %s", ("high",), fetch_one=True), - self._execute_query("SELECT COUNT(*) AS count FROM emails " - "WHERE is_spam = TRUE", fetch_one=True), - self._execute_query("SELECT COUNT(*) AS count FROM categories", - fetch_one=True), - self._execute_query("SELECT name, color, count FROM categories " - "ORDER BY count DESC LIMIT 5", fetch_all=True), + self._execute_query("SELECT COUNT(*) AS count FROM emails", fetch_one=True), + self._execute_query( + "SELECT COUNT(*) AS count FROM emails " "WHERE is_unread = TRUE", fetch_one=True + ), + self._execute_query( + "SELECT COUNT(*) AS count FROM emails " "WHERE priority = %s", + ("high",), + fetch_one=True, + ), + self._execute_query( + "SELECT COUNT(*) AS count FROM emails " "WHERE is_spam = TRUE", fetch_one=True + ), + self._execute_query("SELECT COUNT(*) AS count FROM categories", fetch_one=True), + self._execute_query( + "SELECT name, color, count FROM categories " "ORDER BY count DESC LIMIT 5", + fetch_all=True, + ), ) total_emails = results[0]["count"] if results[0] else 0 @@ -483,21 +482,23 @@ async def get_dashboard_stats(self) -> Dict[str, Any]: top_categories_list = results[5] if results[5] else [] return { - "totalEmails": total_emails, "unreadEmails": unread_emails, - "importantEmails": important_emails, "spamEmails": spam_emails, + "totalEmails": total_emails, + "unreadEmails": unread_emails, + "importantEmails": important_emails, + "spamEmails": spam_emails, "totalCategoryTypes": total_category_types, "topCategories": top_categories_list, "autoLabeled": total_emails, # Placeholder "timeSaved": "2.5 hours", # Placeholder - "weeklyGrowth": { # Placeholder for complex calculation - "totalEmails": total_emails, "autoLabeled": total_emails, - "categories": total_category_types, "timeSaved": 0, + "weeklyGrowth": { # Placeholder for complex calculation + "totalEmails": total_emails, + "autoLabeled": total_emails, + "categories": total_category_types, + "timeSaved": 0, }, } - async def get_all_emails( - self, limit: int = 50, offset: int = 0 - ) -> List[Dict[str, Any]]: + async def get_all_emails(self, limit: int = 50, offset: int = 0) -> List[Dict[str, Any]]: """Get all emails with pagination""" return await self.get_emails(limit=limit, offset=offset) @@ -505,13 +506,9 @@ async def get_emails_by_category( self, category_id: int, limit: int = 50, offset: int = 0 ) -> List[Dict[str, Any]]: """Get emails by category""" - return await self.get_emails( - limit=limit, offset=offset, category_id=category_id - ) + return await self.get_emails(limit=limit, offset=offset, category_id=category_id) - async def search_emails( - self, search_term: str, limit: int = 50 - ) -> List[Dict[str, Any]]: + async def search_emails(self, search_term: str, limit: int = 50) -> List[Dict[str, Any]]: """Search emails by content or subject.""" query = ( "SELECT e.*, c.name as categoryName, c.color as categoryColor " @@ -522,9 +519,7 @@ async def search_emails( params = (f"%{search_term}%", f"%{search_term}%", limit) emails = await self._execute_query(query, params, fetch_all=True) if emails: - return [ - self._parse_json_fields(email, ["analysisMetadata"]) for email in emails - ] + return [self._parse_json_fields(email, ["analysisMetadata"]) for email in emails] return [] async def get_recent_emails(self, limit: int = 100) -> List[Dict[str, Any]]: @@ -564,18 +559,25 @@ async def update_email( elif key == "time": column_name = '"time"' elif key in [ - "subject", "content", "sender", "confidence", - "snippet", "category", "priority", + "subject", + "content", + "sender", + "confidence", + "snippet", + "category", + "priority", ]: column_name = key else: temp_col_name = key.replace("Id", "_id").replace("Html", "_html") import re + temp_col_name = re.sub(r"(? Response: start_time = time.time() # Increment in-progress requests - REQUESTS_IN_PROGRESS.labels( - method=request.method, endpoint=request.url.path - ).inc() + REQUESTS_IN_PROGRESS.labels(method=request.method, endpoint=request.url.path).inc() # Process the request try: @@ -76,9 +73,9 @@ async def metrics_middleware(request: Request, call_next: Callable) -> Response: finally: # Record request duration duration = time.time() - start_time - REQUEST_LATENCY.labels( - method=request.method, endpoint=request.url.path - ).observe(duration) + REQUEST_LATENCY.labels(method=request.method, endpoint=request.url.path).observe( + duration + ) # Increment request count REQUEST_COUNT.labels( @@ -88,9 +85,7 @@ async def metrics_middleware(request: Request, call_next: Callable) -> Response: ).inc() # Decrement in-progress requests - REQUESTS_IN_PROGRESS.labels( - method=request.method, endpoint=request.url.path - ).dec() + REQUESTS_IN_PROGRESS.labels(method=request.method, endpoint=request.url.path).dec() return response diff --git a/server/python_backend/models.py b/server/python_backend/models.py index 9704dcd2c..dbc7eff79 100644 --- a/server/python_backend/models.py +++ b/server/python_backend/models.py @@ -322,16 +322,12 @@ class Config: # Training Models class TrainingRequest(BaseModel): trainingQuery: str = Field(default="newer_than:30d", alias="training_query") - maxTrainingEmails: int = Field( - default=5000, ge=100, le=10000, alias="max_training_emails" - ) + maxTrainingEmails: int = Field(default=5000, ge=100, le=10000, alias="max_training_emails") modelTypes: List[str] = Field( default_factory=lambda: ["sentiment", "topic", "intent", "urgency"], alias="model_types", ) - validationSplit: float = Field( - default=0.2, ge=0.1, le=0.5, alias="validation_split" - ) + validationSplit: float = Field(default=0.2, ge=0.1, le=0.5, alias="validation_split") class Config: validate_by_name = True diff --git a/server/python_backend/performance_monitor.py b/server/python_backend/performance_monitor.py index 7129d1cd1..1d247a4ef 100644 --- a/server/python_backend/performance_monitor.py +++ b/server/python_backend/performance_monitor.py @@ -9,6 +9,7 @@ import logging import time from collections import defaultdict, deque + # import sqlite3 # Removed SQLite from dataclasses import asdict, dataclass # Added dataclass and field from datetime import datetime # Ensure datetime is directly available @@ -78,9 +79,7 @@ def __init__(self): # Removed db_path self.system_health_history = deque(maxlen=100) # In-memory for system health # self.service_metrics = defaultdict(list) # This was not used, can be removed # self.init_database() # Removed SQLite database initialization - logger.info( - "PerformanceMonitor initialized (in-memory mode with file logging)." - ) + logger.info("PerformanceMonitor initialized (in-memory mode with file logging).") self.LOG_INTERVAL_SECONDS = ( LOG_INTERVAL_SECONDS # Make it instance variable for potential override ) @@ -101,9 +100,7 @@ async def _log_metrics_to_file(self): log_entry = asdict(metric) log_entry["type"] = "performance_metric" log_entry["timestamp_logged"] = datetime.now().isoformat() - f.write( - json.dumps(log_entry, default=json_default_converter) + "\n" - ) + f.write(json.dumps(log_entry, default=json_default_converter) + "\n") self.metrics_buffer.clear() # Clear after successful write logged_anything = True @@ -114,9 +111,7 @@ async def _log_metrics_to_file(self): log_entry = alert.copy() # Make a copy before modifying log_entry["type"] = "alert" log_entry["timestamp_logged"] = datetime.now().isoformat() - f.write( - json.dumps(log_entry, default=json_default_converter) + "\n" - ) + f.write(json.dumps(log_entry, default=json_default_converter) + "\n") self.alerts_buffer.clear() # Clear after successful write logged_anything = True @@ -127,21 +122,15 @@ async def _log_metrics_to_file(self): log_entry = asdict(health_record) log_entry["type"] = "system_health" log_entry["timestamp_logged"] = datetime.now().isoformat() - f.write( - json.dumps(log_entry, default=json_default_converter) + "\n" - ) + f.write(json.dumps(log_entry, default=json_default_converter) + "\n") self.system_health_history.clear() # Clear after successful write logged_anything = True if logged_anything: - logger.info( - f"Successfully logged performance data to {self.PERFORMANCE_LOG_FILE}" - ) + logger.info(f"Successfully logged performance data to {self.PERFORMANCE_LOG_FILE}") except IOError as e: - logger.error( - f"IOError writing performance metrics to {self.PERFORMANCE_LOG_FILE}: {e}" - ) + logger.error(f"IOError writing performance metrics to {self.PERFORMANCE_LOG_FILE}: {e}") except Exception as e: logger.error(f"Unexpected error logging performance metrics to file: {e}") @@ -232,9 +221,9 @@ async def get_real_time_dashboard(self) -> Dict[str, Any]: success_rate = 1.0 if recent_emails: - success_rate = sum( - 1 for m in recent_emails if m["processing_success"] - ) / len(recent_emails) + success_rate = sum(1 for m in recent_emails if m["processing_success"]) / len( + recent_emails + ) return { "timestamp": datetime.now().isoformat(), @@ -263,9 +252,7 @@ async def get_real_time_dashboard(self) -> Dict[str, Any]: } ], "alerts": self._generate_alerts(recent_emails, recent_syncs), - "recommendations": self._generate_recommendations( - recent_emails, recent_syncs - ), + "recommendations": self._generate_recommendations(recent_emails, recent_syncs), } except Exception as e: @@ -287,8 +274,7 @@ def _generate_alerts( if recent_emails: error_rate = 1 - ( - sum(1 for m in recent_emails if m["processing_success"]) - / len(recent_emails) + sum(1 for m in recent_emails if m["processing_success"]) / len(recent_emails) ) if error_rate > self.alert_thresholds["error_rate"]: alerts.append( @@ -343,9 +329,7 @@ async def record_metric( # Store in database (Removed) # await self._store_metric(metric) - logger.debug( - f"Metric recorded (in-memory): {metric.metric_name} = {metric.value}" - ) + logger.debug(f"Metric recorded (in-memory): {metric.metric_name} = {metric.value}") # Check for alerts (in-memory) await self._check_alerts(metric) @@ -356,7 +340,9 @@ async def _check_alerts(self, metric: PerformanceMetric): """Check if metric triggers any alerts and store them in-memory.""" threshold = self.alert_thresholds.get(metric.metric_name) if threshold and metric.value > threshold: - alert_message = f"{metric.metric_name} exceeded threshold: {metric.value:.2f} > {threshold}" + alert_message = ( + f"{metric.metric_name} exceeded threshold: {metric.value:.2f} > {threshold}" + ) severity = "warning" if metric.value < threshold * 1.2 else "critical" alert_data = { @@ -475,35 +461,22 @@ async def get_metrics_summary(self, hours: int = 24) -> Dict[str, Any]: "total_metrics_recorded_in_buffer_for_period": len(relevant_metrics), } - async def get_service_performance( - self, service_name: str, hours: int = 24 - ) -> Dict[str, Any]: + async def get_service_performance(self, service_name: str, hours: int = 24) -> Dict[str, Any]: """Get performance data for a specific service from in-memory buffer.""" since_time = datetime.now() - timedelta(hours=hours) service_data_points = [] for metric in self.metrics_buffer: - if ( - metric.timestamp > since_time - and metric.tags.get("service") == service_name - ): - service_data_points.append( - metric - ) # Store the whole PerformanceMetric object + if metric.timestamp > since_time and metric.tags.get("service") == service_name: + service_data_points.append(metric) # Store the whole PerformanceMetric object - response_times = [ - m.value for m in service_data_points if m.metric_name == "response_time" - ] + response_times = [m.value for m in service_data_points if m.metric_name == "response_time"] error_counts = [ m.value for m in service_data_points if m.metric_name == "error_count" ] # Assuming error_count is 1 per error - avg_response_time = ( - sum(response_times) / len(response_times) if response_times else 0 - ) - total_errors = sum( - error_counts - ) # Sum of values (e.g., if value is 1 per error) + avg_response_time = sum(response_times) / len(response_times) if response_times else 0 + total_errors = sum(error_counts) # Sum of values (e.g., if value is 1 per error) return { "service_name": service_name, @@ -542,9 +515,7 @@ async def wrapper(*args, **kwargs): raise finally: end_time = time.time() - response_time = ( - end_time - start_time - ) * 1000 # Convert to milliseconds + response_time = (end_time - start_time) * 1000 # Convert to milliseconds await self.record_metric( "response_time", @@ -646,9 +617,7 @@ async def cleanup_old_data(self, days: int = 30): """Clean up old performance data (No longer needed for in-memory)""" # This method is no longer needed as deques handle fixed-size history. # If specific cleanup of in-memory buffers were needed, it would go here. - logger.info( - "cleanup_old_data called, but not applicable for in-memory PerformanceMonitor." - ) + logger.info("cleanup_old_data called, but not applicable for in-memory PerformanceMonitor.") return { "metrics_deleted": 0, # No direct deletion like from DB "health_records_deleted": 0, diff --git a/server/python_backend/run_server.py b/server/python_backend/run_server.py index dffa321d5..8014ca286 100644 --- a/server/python_backend/run_server.py +++ b/server/python_backend/run_server.py @@ -10,7 +10,6 @@ from pathlib import Path import uvicorn - from python_backend.database import DatabaseManager from python_backend.main import app diff --git a/server/python_backend/tests/test_action_routes.py b/server/python_backend/tests/test_action_routes.py index 430bdf9f8..7120094df 100644 --- a/server/python_backend/tests/test_action_routes.py +++ b/server/python_backend/tests/test_action_routes.py @@ -14,11 +14,15 @@ # Mock PerformanceMonitor mock_performance_monitor_action_instance = MagicMock() + @pytest.fixture(scope="module", autouse=True) def mock_action_dependencies(): patches = [ - patch('server.python_backend.action_routes.ai_engine', mock_ai_engine_action), - patch('server.python_backend.action_routes.performance_monitor', mock_performance_monitor_action_instance) + patch("server.python_backend.action_routes.ai_engine", mock_ai_engine_action), + patch( + "server.python_backend.action_routes.performance_monitor", + mock_performance_monitor_action_instance, + ), ] for p in patches: p.start() @@ -26,11 +30,13 @@ def mock_action_dependencies(): for p in patches: p.stop() + @pytest.fixture def client_action(): # No db override needed as action_routes.ai_engine.analyze_email is called with db=None return TestClient(app) + def test_extract_actions_from_text(client_action): request_data = {"content": "Please follow up on this task by Friday."} @@ -38,7 +44,10 @@ def test_extract_actions_from_text(client_action): mock_ai_result = MagicMock() # This is a list of dicts, which will be converted to List[ActionItem] by Pydantic mock_ai_result.action_items = [ - {"action_phrase": "follow up on this task", "context": "Please follow up on this task by Friday."} + { + "action_phrase": "follow up on this task", + "context": "Please follow up on this task by Friday.", + } ] mock_ai_engine_action.analyze_email.return_value = mock_ai_result @@ -50,11 +59,12 @@ def test_extract_actions_from_text(client_action): assert response_data[0]["action_phrase"] == "follow up on this task" mock_ai_engine_action.analyze_email.assert_called_once_with( - subject="", # subject defaults to "" if None in request_model and passed as "" + subject="", # subject defaults to "" if None in request_model and passed as "" content=request_data["content"], - db=None # action_routes passes db=None + db=None, # action_routes passes db=None ) + def test_extract_actions_from_text_with_subject(client_action): request_data = {"subject": "Meeting Follow-up", "content": "Action: review the report."} mock_ai_result = MagicMock() @@ -68,11 +78,10 @@ def test_extract_actions_from_text_with_subject(client_action): assert response.status_code == 200 assert response.json()[0]["action_phrase"] == "review the report" mock_ai_engine_action.analyze_email.assert_called_once_with( - subject=request_data["subject"], - content=request_data["content"], - db=None + subject=request_data["subject"], content=request_data["content"], db=None ) + def test_extract_actions_from_text_ai_error(client_action): request_data = {"content": "Some text"} mock_ai_engine_action.analyze_email.side_effect = Exception("AI processing error") diff --git a/server/python_backend/tests/test_ai_engine.py b/server/python_backend/tests/test_ai_engine.py index 1e4f528cb..24f494597 100644 --- a/server/python_backend/tests/test_ai_engine.py +++ b/server/python_backend/tests/test_ai_engine.py @@ -3,8 +3,7 @@ import pytest from server.python_backend.ai_engine import AdvancedAIEngine, AIAnalysisResult -from server.python_nlp.nlp_engine import \ - NLPEngine # NLPEngine is used by AdvancedAIEngine +from server.python_nlp.nlp_engine import NLPEngine # NLPEngine is used by AdvancedAIEngine # Mock for DatabaseManager mock_db_manager_for_ai_engine = MagicMock() @@ -15,21 +14,21 @@ def ai_engine_instance(): # We need to mock NLPEngine that AdvancedAIEngine instantiates, # or mock its analyze_email method. - with patch.object(NLPEngine, 'analyze_email') as mock_nlp_analyze: + with patch.object(NLPEngine, "analyze_email") as mock_nlp_analyze: # Configure the mock for NLPEngine().analyze_email mock_nlp_analyze.return_value = { - 'topic': 'some_topic', # Raw topic from NLPEngine - 'sentiment': 'neutral', - 'intent': 'informational', - 'urgency': 'low', - 'confidence': 0.8, - 'categories': ['AI Category Suggestion 1', 'Work Related'], # Text suggestions - 'keywords': ['test', 'ai'], - 'reasoning': 'mocked nlp reasoning', - 'suggested_labels': ['label1'], - 'risk_flags': [], - 'action_items': [], - 'category_id': None # NLPEngine doesn't set this ID + "topic": "some_topic", # Raw topic from NLPEngine + "sentiment": "neutral", + "intent": "informational", + "urgency": "low", + "confidence": 0.8, + "categories": ["AI Category Suggestion 1", "Work Related"], # Text suggestions + "keywords": ["test", "ai"], + "reasoning": "mocked nlp reasoning", + "suggested_labels": ["label1"], + "risk_flags": [], + "action_items": [], + "category_id": None, # NLPEngine doesn't set this ID } engine = AdvancedAIEngine() # Store the mock for assertions if needed directly on nlp_engine's mock @@ -43,14 +42,16 @@ async def test_analyze_email_no_db_provided(ai_engine_instance: AdvancedAIEngine content = "Test Content" # NLPEngine().analyze_email is already mocked in the fixture - ai_engine_instance.nlp_engine.analyze_email.return_value['categories'] = ['AI Category Suggestion 1'] + ai_engine_instance.nlp_engine.analyze_email.return_value["categories"] = [ + "AI Category Suggestion 1" + ] result = await ai_engine_instance.analyze_email(subject, content, db=None) assert isinstance(result, AIAnalysisResult) - assert result.topic == 'some_topic' - assert result.category_id is None # No DB, so no ID matching - assert result.categories == ['AI Category Suggestion 1'] # Should still have text categories + assert result.topic == "some_topic" + assert result.category_id is None # No DB, so no ID matching + assert result.categories == ["AI Category Suggestion 1"] # Should still have text categories ai_engine_instance.nlp_engine.analyze_email.assert_called_once_with(subject, content) @@ -59,21 +60,32 @@ async def test_analyze_email_with_db_category_match(ai_engine_instance: Advanced subject = "Work Email" content = "Project discussion about work." - ai_engine_instance.nlp_engine.analyze_email.return_value['categories'] = ['Work Related', 'Important Stuff'] + ai_engine_instance.nlp_engine.analyze_email.return_value["categories"] = [ + "Work Related", + "Important Stuff", + ] # Mock database categories mock_db_categories = [ - {'id': 1, 'name': 'Personal', 'description': '', 'color': '', 'count': 0}, - {'id': 5, 'name': 'Work Related', 'description': '', 'color': '', 'count': 0}, # This should match - {'id': 10, 'name': 'Finance', 'description': '', 'color': '', 'count': 0}, + {"id": 1, "name": "Personal", "description": "", "color": "", "count": 0}, + { + "id": 5, + "name": "Work Related", + "description": "", + "color": "", + "count": 0, + }, # This should match + {"id": 10, "name": "Finance", "description": "", "color": "", "count": 0}, ] mock_db_manager_for_ai_engine.get_all_categories.return_value = mock_db_categories - result = await ai_engine_instance.analyze_email(subject, content, db=mock_db_manager_for_ai_engine) + result = await ai_engine_instance.analyze_email( + subject, content, db=mock_db_manager_for_ai_engine + ) assert isinstance(result, AIAnalysisResult) - assert result.category_id == 5 # Matched 'Work Related' - assert result.categories == ['Work Related', 'Important Stuff'] # Original AI suggestions + assert result.category_id == 5 # Matched 'Work Related' + assert result.categories == ["Work Related", "Important Stuff"] # Original AI suggestions mock_db_manager_for_ai_engine.get_all_categories.assert_called_once() @@ -82,19 +94,23 @@ async def test_analyze_email_with_db_no_category_match(ai_engine_instance: Advan subject = "Unique Topic" content = "Content about something new." - ai_engine_instance.nlp_engine.analyze_email.return_value['categories'] = ['Very New AI Category'] + ai_engine_instance.nlp_engine.analyze_email.return_value["categories"] = [ + "Very New AI Category" + ] mock_db_categories = [ - {'id': 1, 'name': 'Personal'}, - {'id': 5, 'name': 'Work'}, + {"id": 1, "name": "Personal"}, + {"id": 5, "name": "Work"}, ] mock_db_manager_for_ai_engine.get_all_categories.return_value = mock_db_categories - result = await ai_engine_instance.analyze_email(subject, content, db=mock_db_manager_for_ai_engine) + result = await ai_engine_instance.analyze_email( + subject, content, db=mock_db_manager_for_ai_engine + ) assert isinstance(result, AIAnalysisResult) - assert result.category_id is None # No match - assert result.categories == ['Very New AI Category'] + assert result.category_id is None # No match + assert result.categories == ["Very New AI Category"] @pytest.mark.asyncio @@ -102,16 +118,18 @@ async def test_analyze_email_db_error_during_category_match(ai_engine_instance: subject = "Test Subject" content = "Test Content" - ai_engine_instance.nlp_engine.analyze_email.return_value['categories'] = ['Some AI Category'] + ai_engine_instance.nlp_engine.analyze_email.return_value["categories"] = ["Some AI Category"] mock_db_manager_for_ai_engine.get_all_categories.side_effect = Exception("DB connection error") - result = await ai_engine_instance.analyze_email(subject, content, db=mock_db_manager_for_ai_engine) + result = await ai_engine_instance.analyze_email( + subject, content, db=mock_db_manager_for_ai_engine + ) assert isinstance(result, AIAnalysisResult) - assert result.category_id is None # Error during matching, so no ID + assert result.category_id is None # Error during matching, so no ID # Should still return the rest of the analysis from NLPEngine - assert result.topic == 'some_topic' - assert result.categories == ['Some AI Category'] + assert result.topic == "some_topic" + assert result.categories == ["Some AI Category"] @pytest.mark.asyncio @@ -119,12 +137,16 @@ async def test_analyze_email_no_ai_categories_to_match(ai_engine_instance: Advan subject = "Test Subject" content = "Test Content" - ai_engine_instance.nlp_engine.analyze_email.return_value['categories'] = [] # NLP engine returns no categories + ai_engine_instance.nlp_engine.analyze_email.return_value["categories"] = ( + [] + ) # NLP engine returns no categories - mock_db_categories = [{'id': 1, 'name': 'Personal'}] + mock_db_categories = [{"id": 1, "name": "Personal"}] mock_db_manager_for_ai_engine.get_all_categories.return_value = mock_db_categories - result = await ai_engine_instance.analyze_email(subject, content, db=mock_db_manager_for_ai_engine) + result = await ai_engine_instance.analyze_email( + subject, content, db=mock_db_manager_for_ai_engine + ) assert isinstance(result, AIAnalysisResult) assert result.category_id is None diff --git a/server/python_backend/tests/test_category_routes.py b/server/python_backend/tests/test_category_routes.py index 6fa61b941..1f4c344a6 100644 --- a/server/python_backend/tests/test_category_routes.py +++ b/server/python_backend/tests/test_category_routes.py @@ -6,7 +6,9 @@ from server.python_backend.main import app # Main FastAPI app # Mock DatabaseManager methods used by category_routes -mock_db_manager_cat = MagicMock() # Use a different name to avoid conflict if tests run in same session +mock_db_manager_cat = ( + MagicMock() +) # Use a different name to avoid conflict if tests run in same session mock_db_manager_cat.get_all_categories = AsyncMock() mock_db_manager_cat.create_category = AsyncMock() # update_category is not used by the current category_routes.py, but good to have if it were @@ -15,10 +17,14 @@ # Mock PerformanceMonitor mock_performance_monitor_cat_instance = MagicMock() + @pytest.fixture(scope="module", autouse=True) def mock_cat_dependencies(): patches = [ - patch('server.python_backend.category_routes.performance_monitor', mock_performance_monitor_cat_instance) + patch( + "server.python_backend.category_routes.performance_monitor", + mock_performance_monitor_cat_instance, + ) # No module-level instances of other services like AIEngine in category_routes ] for p in patches: @@ -27,13 +33,16 @@ def mock_cat_dependencies(): for p in patches: p.stop() + @pytest.fixture def client_cat(): from server.python_backend.database import get_db + app.dependency_overrides[get_db] = lambda: mock_db_manager_cat client = TestClient(app) yield client - del app.dependency_overrides[get_db] # Clean up override + del app.dependency_overrides[get_db] # Clean up override + def test_get_all_categories(client_cat): mock_categories_data = [ @@ -46,12 +55,18 @@ def test_get_all_categories(client_cat): assert response.json() == mock_categories_data mock_db_manager_cat.get_all_categories.assert_called_once() + def test_get_all_categories_db_error(client_cat): - mock_db_manager_cat.get_all_categories.side_effect = Exception("DB Error") # Simulate generic exception + mock_db_manager_cat.get_all_categories.side_effect = Exception( + "DB Error" + ) # Simulate generic exception response = client_cat.get("/api/categories") assert response.status_code == 500 - assert response.json() == {"detail": "Failed to fetch categories"} # Match error detail in route + assert response.json() == { + "detail": "Failed to fetch categories" + } # Match error detail in route + def test_create_category(client_cat): new_category_data = {"name": "Personal", "description": "Personal stuff", "color": "#00ff00"} @@ -61,11 +76,12 @@ def test_create_category(client_cat): response = client_cat.post("/api/categories", json=new_category_data) - assert response.status_code == 200 # Route returns CategoryResponse, not 201 + assert response.status_code == 200 # Route returns CategoryResponse, not 201 assert response.json()["name"] == "Personal" assert response.json()["id"] == 2 mock_db_manager_cat.create_category.assert_called_once_with(new_category_data) + def test_create_category_db_error(client_cat): new_category_data = {"name": "ErrorCategory", "description": "Test error", "color": "#0000ff"} mock_db_manager_cat.create_category.side_effect = Exception("DB Create Error") @@ -74,5 +90,6 @@ def test_create_category_db_error(client_cat): assert response.status_code == 500 assert response.json() == {"detail": "Failed to create category"} + # Note: category_routes.py does not currently have PUT /api/categories/{id} # If it were added, tests would be similar to update_email in test_email_routes.py diff --git a/server/python_backend/tests/test_dashboard_routes.py b/server/python_backend/tests/test_dashboard_routes.py index fb70af841..2b1bb87dd 100644 --- a/server/python_backend/tests/test_dashboard_routes.py +++ b/server/python_backend/tests/test_dashboard_routes.py @@ -15,11 +15,15 @@ mock_performance_monitor_dashboard_instance = MagicMock() mock_performance_monitor_dashboard_instance.get_real_time_dashboard = AsyncMock() + @pytest.fixture(scope="module", autouse=True) def mock_dashboard_dependencies(): patches = [ # Patch the instance of PerformanceMonitor used in dashboard_routes - patch('server.python_backend.dashboard_routes.performance_monitor', mock_performance_monitor_dashboard_instance) + patch( + "server.python_backend.dashboard_routes.performance_monitor", + mock_performance_monitor_dashboard_instance, + ) ] for p in patches: p.start() @@ -27,13 +31,16 @@ def mock_dashboard_dependencies(): for p in patches: p.stop() + @pytest.fixture def client_dashboard(): from server.python_backend.database import get_db + app.dependency_overrides[get_db] = lambda: mock_db_manager_dashboard client = TestClient(app) yield client - del app.dependency_overrides[get_db] # Clean up + del app.dependency_overrides[get_db] # Clean up + def test_get_dashboard_stats(client_dashboard): # This is the dict that db.get_dashboard_stats() is expected to return @@ -44,11 +51,14 @@ def test_get_dashboard_stats(client_dashboard): "spamEmails": 20, "totalCategoryTypes": 10, "topCategories": [{"name": "Work", "color": "#ff0000", "count": 300}], - "autoLabeled": 800, # Placeholder from DB logic - "timeSaved": "10 hours", # Placeholder from DB logic - "weeklyGrowth": { # Placeholder from DB logic - "totalEmails": 100, "autoLabeled": 80, "categories": 2, "timeSaved": 1 - } + "autoLabeled": 800, # Placeholder from DB logic + "timeSaved": "10 hours", # Placeholder from DB logic + "weeklyGrowth": { # Placeholder from DB logic + "totalEmails": 100, + "autoLabeled": 80, + "categories": 2, + "timeSaved": 1, + }, } mock_db_manager_dashboard.get_dashboard_stats.return_value = mock_stats_data_from_db @@ -75,17 +85,23 @@ def test_get_dashboard_stats(client_dashboard): # This implies the db_dict for weeklyGrowth should align with the Pydantic WeeklyGrowth model. # Let's adjust the mock_stats_data_from_db to reflect what DashboardStats model expects for weeklyGrowth. mock_stats_data_from_db_for_model = { - "total_emails": 1000, "auto_labeled": 800, "categories": 10, "time_saved": "10 hours", - "weekly_growth": {"emails": 100, "percentage": 0.1} # This matches models.WeeklyGrowth + "total_emails": 1000, + "auto_labeled": 800, + "categories": 10, + "time_saved": "10 hours", + "weekly_growth": {"emails": 100, "percentage": 0.1}, # This matches models.WeeklyGrowth # Other fields like unreadEmails, importantEmails from db.get_dashboard_stats are not in DashboardStats model. } mock_db_manager_dashboard.get_dashboard_stats.return_value = mock_stats_data_from_db_for_model - response = client_dashboard.get("/api/dashboard/stats") # Re-run with corrected mock + response = client_dashboard.get("/api/dashboard/stats") # Re-run with corrected mock assert response.status_code == 200 response_json_rerun = response.json() assert response_json_rerun["total_emails"] == mock_stats_data_from_db_for_model["total_emails"] - assert response_json_rerun["weekly_growth"]["emails"] == mock_stats_data_from_db_for_model["weekly_growth"]["emails"] + assert ( + response_json_rerun["weekly_growth"]["emails"] + == mock_stats_data_from_db_for_model["weekly_growth"]["emails"] + ) mock_db_manager_dashboard.get_dashboard_stats.assert_called_once() @@ -97,17 +113,23 @@ def test_get_dashboard_stats_db_error(client_dashboard): assert response.status_code == 500 assert response.json()["detail"] == "Failed to fetch dashboard stats" + def test_get_performance_overview(client_dashboard): mock_overview_data = {"status": "healthy", "efficiency": 0.9} - mock_performance_monitor_dashboard_instance.get_real_time_dashboard.return_value = mock_overview_data + mock_performance_monitor_dashboard_instance.get_real_time_dashboard.return_value = ( + mock_overview_data + ) - response = client_dashboard.get("/api/performance/overview") # Path defined in dashboard_routes + response = client_dashboard.get("/api/performance/overview") # Path defined in dashboard_routes assert response.status_code == 200 assert response.json() == mock_overview_data mock_performance_monitor_dashboard_instance.get_real_time_dashboard.assert_called_once() + def test_get_performance_overview_error(client_dashboard): - mock_performance_monitor_dashboard_instance.get_real_time_dashboard.side_effect = Exception("Perf Error") + mock_performance_monitor_dashboard_instance.get_real_time_dashboard.side_effect = Exception( + "Perf Error" + ) response = client_dashboard.get("/api/performance/overview") assert response.status_code == 500 diff --git a/server/python_backend/tests/test_email_routes.py b/server/python_backend/tests/test_email_routes.py index bdee5d09a..8b3ac2d1a 100644 --- a/server/python_backend/tests/test_email_routes.py +++ b/server/python_backend/tests/test_email_routes.py @@ -4,8 +4,7 @@ from fastapi.testclient import TestClient # Adjust import path to go up one level then into python_backend package -from server.python_backend.main import \ - app # Assuming 'app' is your FastAPI instance in main.py +from server.python_backend.main import app # Assuming 'app' is your FastAPI instance in main.py # We need to ensure that dependencies in email_routes are mocked *before* TestClient(app) is called # or that the TestClient uses dependency overrides. @@ -50,9 +49,12 @@ def mock_dependencies(): # performance_monitor = PerformanceMonitor() (module-level instance) patches = [ - patch('server.python_backend.email_routes.ai_engine', mock_ai_engine), - patch('server.python_backend.email_routes.filter_manager', mock_filter_manager), - patch('server.python_backend.email_routes.performance_monitor', mock_performance_monitor_instance) + patch("server.python_backend.email_routes.ai_engine", mock_ai_engine), + patch("server.python_backend.email_routes.filter_manager", mock_filter_manager), + patch( + "server.python_backend.email_routes.performance_monitor", + mock_performance_monitor_instance, + ), ] for p in patches: p.start() @@ -60,18 +62,34 @@ def mock_dependencies(): for p in patches: p.stop() + # Fixture for TestClient with dependency overrides @pytest.fixture def client(): - from server.python_backend.database import \ - get_db # Import here to ensure it's the one FastAPI uses + from server.python_backend.database import ( # Import here to ensure it's the one FastAPI uses + get_db, + ) app.dependency_overrides[get_db] = lambda: mock_db_manager return TestClient(app) + def test_get_all_emails(client): mock_emails_data = [ - {"id": 1, "subject": "Test 1", "sender": "a@a.com", "senderEmail": "a@a.com", "content": "c", "preview":"p", "time":"t", "labels":[], "isImportant":False, "isStarred":False, "isUnread":True, "confidence":90} + { + "id": 1, + "subject": "Test 1", + "sender": "a@a.com", + "senderEmail": "a@a.com", + "content": "c", + "preview": "p", + "time": "t", + "labels": [], + "isImportant": False, + "isStarred": False, + "isUnread": True, + "confidence": 90, + } ] mock_db_manager.get_all_emails.return_value = mock_emails_data @@ -80,6 +98,7 @@ def test_get_all_emails(client): assert response.json() == mock_emails_data mock_db_manager.get_all_emails.assert_called_once() + def test_search_emails(client): mock_emails_data = [{"id": 2, "subject": "Search Result"}] mock_db_manager.search_emails.return_value = mock_emails_data @@ -90,6 +109,7 @@ def test_search_emails(client): assert response.json()[0]["subject"] == "Search Result" mock_db_manager.search_emails.assert_called_once_with("testquery") + def test_get_emails_by_category(client): mock_emails_data = [{"id": 3, "subject": "Category Email"}] mock_db_manager.get_emails_by_category.return_value = mock_emails_data @@ -101,7 +121,20 @@ def test_get_emails_by_category(client): def test_get_email_by_id_found(client): - mock_email_data = {"id": 1, "subject": "Test Email Detail", "sender": "a@a.com", "senderEmail": "a@a.com", "content": "c", "preview":"p", "time":"t", "labels":[], "isImportant":False, "isStarred":False, "isUnread":True, "confidence":90} + mock_email_data = { + "id": 1, + "subject": "Test Email Detail", + "sender": "a@a.com", + "senderEmail": "a@a.com", + "content": "c", + "preview": "p", + "time": "t", + "labels": [], + "isImportant": False, + "isStarred": False, + "isUnread": True, + "confidence": 90, + } mock_db_manager.get_email_by_id.return_value = mock_email_data response = client.get("/api/emails/1") @@ -109,6 +142,7 @@ def test_get_email_by_id_found(client): assert response.json()["subject"] == "Test Email Detail" mock_db_manager.get_email_by_id.assert_called_once_with(1) + def test_get_email_by_id_not_found(client): mock_db_manager.get_email_by_id.return_value = None @@ -116,20 +150,23 @@ def test_get_email_by_id_not_found(client): assert response.status_code == 404 assert response.json() == {"detail": "Email not found"} + def test_create_email(client): new_email_data = { "sender": "new@example.com", "senderEmail": "new@example.com", "subject": "New Email", "content": "Email content", - "time": "2024-01-01T12:00:00Z" # Example datetime string + "time": "2024-01-01T12:00:00Z", # Example datetime string } # Mock the return value of ai_engine.analyze_email mock_ai_analysis_result = MagicMock() mock_ai_analysis_result.confidence = 0.95 mock_ai_analysis_result.category_id = 1 mock_ai_analysis_result.suggested_labels = ["test"] - mock_ai_analysis_result.to_dict.return_value = {"some_ai_field": "value"} # For analysisMetadata + mock_ai_analysis_result.to_dict.return_value = { + "some_ai_field": "value" + } # For analysisMetadata mock_ai_engine.analyze_email.return_value = mock_ai_analysis_result # Mock the return value of filter_manager.apply_filters_to_email_data @@ -137,13 +174,22 @@ def test_create_email(client): # Mock the return value of db.create_email (which returns a dict) # and db.get_email_by_id (which is called after creation) - created_email_db_dict = {**new_email_data, "id": 1, "confidence": 95, "categoryId": 1, "labels": ["test"], "analysisMetadata": {"some_ai_field": "value"}} - mock_db_manager.create_email.return_value = created_email_db_dict # Simulates what db.create_email returns - mock_db_manager.get_email_by_id.return_value = created_email_db_dict # Simulates the re-fetch + created_email_db_dict = { + **new_email_data, + "id": 1, + "confidence": 95, + "categoryId": 1, + "labels": ["test"], + "analysisMetadata": {"some_ai_field": "value"}, + } + mock_db_manager.create_email.return_value = ( + created_email_db_dict # Simulates what db.create_email returns + ) + mock_db_manager.get_email_by_id.return_value = created_email_db_dict # Simulates the re-fetch response = client.post("/api/emails", json=new_email_data) - assert response.status_code == 200 # Route returns EmailResponse, not 201 directly + assert response.status_code == 200 # Route returns EmailResponse, not 201 directly assert response.json()["subject"] == "New Email" mock_ai_engine.analyze_email.assert_called_once() mock_filter_manager.apply_filters_to_email_data.assert_called_once() @@ -153,9 +199,18 @@ def test_create_email(client): def test_update_email(client): update_data = {"subject": "Updated Subject"} updated_email_db_dict = { - "id": 1, "subject": "Updated Subject", "sender": "a@a.com", "senderEmail": "a@a.com", - "content": "c", "preview":"p", "time":"t", "labels":[], "isImportant":False, - "isStarred":False, "isUnread":True, "confidence":90 + "id": 1, + "subject": "Updated Subject", + "sender": "a@a.com", + "senderEmail": "a@a.com", + "content": "c", + "preview": "p", + "time": "t", + "labels": [], + "isImportant": False, + "isStarred": False, + "isUnread": True, + "confidence": 90, } mock_db_manager.update_email.return_value = updated_email_db_dict @@ -164,6 +219,7 @@ def test_update_email(client): assert response.json()["subject"] == "Updated Subject" mock_db_manager.update_email.assert_called_once_with(1, update_data) + def test_update_email_not_found(client): mock_db_manager.update_email.return_value = None response = client.put("/api/emails/99", json={"subject": "test"}) diff --git a/server/python_backend/tests/test_filter_routes.py b/server/python_backend/tests/test_filter_routes.py index 77ce6a3c8..86128a170 100644 --- a/server/python_backend/tests/test_filter_routes.py +++ b/server/python_backend/tests/test_filter_routes.py @@ -5,13 +5,12 @@ from fastapi.testclient import TestClient from server.python_backend.main import app -from server.python_nlp.smart_filters import \ - EmailFilter # For response model type hinting +from server.python_nlp.smart_filters import EmailFilter # For response model type hinting # Mock SmartFilterManager methods mock_filter_manager_instance = MagicMock() mock_filter_manager_instance.get_all_filters = AsyncMock() -mock_filter_manager_instance.add_custom_filter = MagicMock() # This one is synchronous in the class +mock_filter_manager_instance.add_custom_filter = MagicMock() # This one is synchronous in the class mock_filter_manager_instance.create_intelligent_filters = AsyncMock() mock_filter_manager_instance.prune_ineffective_filters = AsyncMock() @@ -22,11 +21,15 @@ # Mock PerformanceMonitor mock_performance_monitor_filter_instance = MagicMock() + @pytest.fixture(scope="module", autouse=True) def mock_filter_dependencies(): patches = [ - patch('server.python_backend.filter_routes.filter_manager', mock_filter_manager_instance), - patch('server.python_backend.filter_routes.performance_monitor', mock_performance_monitor_filter_instance) + patch("server.python_backend.filter_routes.filter_manager", mock_filter_manager_instance), + patch( + "server.python_backend.filter_routes.performance_monitor", + mock_performance_monitor_filter_instance, + ), ] for p in patches: p.start() @@ -34,9 +37,11 @@ def mock_filter_dependencies(): for p in patches: p.stop() + @pytest.fixture def client_filter(): from server.python_backend.database import get_db + app.dependency_overrides[get_db] = lambda: mock_db_manager_filter client = TestClient(app) yield client @@ -44,7 +49,7 @@ def client_filter(): def test_get_filters(client_filter): - mock_filters_data = [{"filter_id": "filter1", "name": "Test Filter"}] # Simplified + mock_filters_data = [{"filter_id": "filter1", "name": "Test Filter"}] # Simplified mock_filter_manager_instance.get_all_filters.return_value = mock_filters_data response = client_filter.get("/api/filters") @@ -52,6 +57,7 @@ def test_get_filters(client_filter): assert response.json() == {"filters": mock_filters_data} mock_filter_manager_instance.get_all_filters.assert_called_once() + def test_create_filter(client_filter): # Note: EmailFilter dataclass is complex. We'll mock the return of add_custom_filter. # The route uses FilterRequest Pydantic model for input. @@ -59,13 +65,13 @@ def test_create_filter(client_filter): "name": "My Custom Filter", "criteria": {"subject_keywords": ["test"]}, "actions": {"add_label": "Tested"}, - "priority": 7 + "priority": 7, } # Mock the EmailFilter object that add_custom_filter would return mock_created_filter = EmailFilter( filter_id="custom_filter_id_123", name=filter_request_data["name"], - description=None, # Or add if part of request + description=None, # Or add if part of request criteria=filter_request_data["criteria"], actions=filter_request_data["actions"], priority=filter_request_data["priority"], @@ -74,7 +80,7 @@ def test_create_filter(client_filter): last_used=datetime.now(), usage_count=0, false_positive_rate=0.0, - performance_metrics={} + performance_metrics={}, ) mock_filter_manager_instance.add_custom_filter.return_value = mock_created_filter @@ -88,26 +94,32 @@ def test_create_filter(client_filter): assert response_json["name"] == mock_created_filter.name mock_filter_manager_instance.add_custom_filter.assert_called_once_with( name=filter_request_data["name"], - description=None, # Assuming description is optional and defaults to None if not in request + description=None, # Assuming description is optional and defaults to None if not in request criteria=filter_request_data["criteria"], actions=filter_request_data["actions"], - priority=filter_request_data["priority"] + priority=filter_request_data["priority"], ) + def test_generate_intelligent_filters(client_filter): - mock_db_manager_filter.get_recent_emails.return_value = [{"id": 1, "subject": "Sample"}] # Sample email data - mock_generated_filters_data = [{"filter_id": "gen1", "name": "Generated Filter"}] # Simplified - mock_filter_manager_instance.create_intelligent_filters.return_value = mock_generated_filters_data + mock_db_manager_filter.get_recent_emails.return_value = [ + {"id": 1, "subject": "Sample"} + ] # Sample email data + mock_generated_filters_data = [{"filter_id": "gen1", "name": "Generated Filter"}] # Simplified + mock_filter_manager_instance.create_intelligent_filters.return_value = ( + mock_generated_filters_data + ) response = client_filter.post("/api/filters/generate-intelligent") assert response.status_code == 200 assert response.json() == { "created_filters": len(mock_generated_filters_data), - "filters": mock_generated_filters_data + "filters": mock_generated_filters_data, } mock_db_manager_filter.get_recent_emails.assert_called_once_with(limit=1000) mock_filter_manager_instance.create_intelligent_filters.assert_called_once() + def test_prune_filters(client_filter): mock_prune_results = {"pruned_count": 5} mock_filter_manager_instance.prune_ineffective_filters.return_value = mock_prune_results diff --git a/server/python_backend/tests/test_gmail_routes.py b/server/python_backend/tests/test_gmail_routes.py index 4b74bb1e6..a08e55dd6 100644 --- a/server/python_backend/tests/test_gmail_routes.py +++ b/server/python_backend/tests/test_gmail_routes.py @@ -21,14 +21,27 @@ # Mock PerformanceMonitor mock_performance_monitor_gmail_instance = MagicMock() + @pytest.fixture(scope="module", autouse=True) def mock_gmail_dependencies(): patches = [ - patch('server.python_backend.gmail_routes.GmailAIService', return_value=mock_gmail_service_instance), + patch( + "server.python_backend.gmail_routes.GmailAIService", + return_value=mock_gmail_service_instance, + ), # Patch the constructors if they are called directly in gmail_routes for instantiation - patch('server.python_backend.gmail_routes.DatabaseManager', return_value=mock_db_manager_for_gmail), - patch('server.python_backend.gmail_routes.AdvancedAIEngine', return_value=mock_ai_engine_for_gmail), - patch('server.python_backend.gmail_routes.performance_monitor', mock_performance_monitor_gmail_instance) + patch( + "server.python_backend.gmail_routes.DatabaseManager", + return_value=mock_db_manager_for_gmail, + ), + patch( + "server.python_backend.gmail_routes.AdvancedAIEngine", + return_value=mock_ai_engine_for_gmail, + ), + patch( + "server.python_backend.gmail_routes.performance_monitor", + mock_performance_monitor_gmail_instance, + ), ] for p in patches: p.start() @@ -36,13 +49,22 @@ def mock_gmail_dependencies(): for p in patches: p.stop() + @pytest.fixture def client_gmail(): # No db override needed here as gmail_routes doesn't use get_db directly for its main functions return TestClient(app) + def test_sync_gmail(client_gmail): - mock_sync_result = {"success": True, "processedCount": 100, "emailsCreated": 5, "errorsCount": 0, "batchInfo": {}, "statistics": {}} + mock_sync_result = { + "success": True, + "processedCount": 100, + "emailsCreated": 5, + "errorsCount": 0, + "batchInfo": {}, + "statistics": {}, + } mock_gmail_service_instance.sync_gmail_emails.return_value = mock_sync_result request_payload = { @@ -50,7 +72,7 @@ def test_sync_gmail(client_gmail): "queryFilter": "test", "includeAIAnalysis": True, "strategies": [], - "timeBudgetMinutes": 10 + "timeBudgetMinutes": 10, } response = client_gmail.post("/api/gmail/sync", json=request_payload) @@ -69,15 +91,20 @@ def test_sync_gmail(client_gmail): # For now, this test is fine, but the mock call check might need to be more specific. ) + def test_sync_gmail_api_error(client_gmail): # Simulate GoogleApiHttpError (which should be caught and result in HTTPException) from googleapiclient.errors import HttpError - mock_gmail_service_instance.sync_gmail_emails.side_effect = HttpError(MagicMock(status=401, reason="Auth error"), b'{"error": "Auth error"}') - response = client_gmail.post("/api/gmail/sync", json={}) # Basic payload - assert response.status_code == 401 # Or whatever status code the route maps it to + mock_gmail_service_instance.sync_gmail_emails.side_effect = HttpError( + MagicMock(status=401, reason="Auth error"), b'{"error": "Auth error"}' + ) + + response = client_gmail.post("/api/gmail/sync", json={}) # Basic payload + assert response.status_code == 401 # Or whatever status code the route maps it to assert "Gmail API authentication failed" in response.json()["detail"] + def test_smart_retrieval(client_gmail): mock_retrieval_result = {"success": True, "totalEmails": 50} mock_gmail_service_instance.execute_smart_retrieval.return_value = mock_retrieval_result @@ -91,6 +118,7 @@ def test_smart_retrieval(client_gmail): strategies=["strat1"], max_api_calls=50, time_budget_minutes=20 ) + def test_get_retrieval_strategies(client_gmail): mock_strategies_data = [{"name": "strategy1", "details": "..."}] mock_gmail_service_instance.get_retrieval_strategies.return_value = mock_strategies_data @@ -100,6 +128,7 @@ def test_get_retrieval_strategies(client_gmail): assert response.json() == {"strategies": mock_strategies_data} mock_gmail_service_instance.get_retrieval_strategies.assert_called_once() + def test_get_gmail_performance(client_gmail): mock_performance_data = {"status": "healthy", "metrics": {}} mock_gmail_service_instance.get_performance_metrics.return_value = mock_performance_data diff --git a/server/python_nlp/action_item_extractor.py b/server/python_nlp/action_item_extractor.py index c241258c1..cf30579f1 100644 --- a/server/python_nlp/action_item_extractor.py +++ b/server/python_nlp/action_item_extractor.py @@ -66,22 +66,16 @@ def __init__(self): r"next (week|month|year))\b", re.IGNORECASE, ) - self.sentence_splitter_regex = re.compile( - r"(? tuple[Optional[str], Optional[str]]: + def _extract_verb_object_with_nltk(self, text: str) -> tuple[Optional[str], Optional[str]]: """ Extracts verb and object from a phrase using NLTK POS tagging. This is a simplified approach. @@ -107,21 +101,15 @@ def _extract_verb_object_with_nltk( verb_index = tokens.index(verb) for i in range(verb_index + 1, len(tagged_tokens)): token, tag = tagged_tokens[i] - if tag.startswith("NN") or tag.startswith( - "PRP" - ): # Noun or Pronoun + if tag.startswith("NN") or tag.startswith("PRP"): # Noun or Pronoun obj = token break except ValueError: # Verb not found in tokens (shouldn't happen but being safe) - logger.debug( - f"Verb '{verb}' not found in tokens during object extraction" - ) + logger.debug(f"Verb '{verb}' not found in tokens during object extraction") return verb, obj except Exception as e: - logger.error( - f"Error during NLTK POS tagging or verb/object extraction: {e}" - ) + logger.error(f"Error during NLTK POS tagging or verb/object extraction: {e}") return None, None def extract_actions(self, text: str) -> List[Dict[str, Any]]: @@ -156,9 +144,7 @@ def extract_actions(self, text: str) -> List[Dict[str, Any]]: # Try to get a more specific part of the sentence for verb/object extraction # This could be the text following the keyword. potential_action_segment = sentence[match.end() :].strip() - verb, obj = self._extract_verb_object_with_nltk( - potential_action_segment - ) + verb, obj = self._extract_verb_object_with_nltk(potential_action_segment) due_date_match = self.due_date_regex.search(action_phrase) raw_due_date_text = None @@ -189,7 +175,9 @@ def extract_actions(self, text: str) -> List[Dict[str, Any]]: test_text_1 = "Please submit the report by Friday. We also need to review the budget. Can you schedule a meeting?" test_text_2 = "Action: John to complete the slides. Task: Maria to send out invites by tomorrow. Required to update the JIRA ticket." test_text_3 = "No actions here, just a general update." - test_text_4 = "Could you please finalize the presentation by next Monday? Also, will you call the vendor?" + test_text_4 = ( + "Could you please finalize the presentation by next Monday? Also, will you call the vendor?" + ) print("\n--- Test Text 1 ---") actions1 = extractor.extract_actions(test_text_1) diff --git a/server/python_nlp/ai_training.py b/server/python_nlp/ai_training.py index 48c17ca8d..9194f87d3 100644 --- a/server/python_nlp/ai_training.py +++ b/server/python_nlp/ai_training.py @@ -186,9 +186,7 @@ def _load_urgency_indicators(self) -> Dict[str, float]: "hurry": 0.7, } - def extract_features( - self, text: str, include_advanced: bool = True - ) -> Dict[str, Any]: + def extract_features(self, text: str, include_advanced: bool = True) -> Dict[str, Any]: """Extract comprehensive features from text""" text_lower = text.lower() words = re.findall(r"\b\w+\b", text_lower) @@ -202,9 +200,7 @@ def extract_features( # Punctuation features "exclamation_count": text.count("!"), "question_count": text.count("?"), - "capital_ratio": ( - sum(1 for c in text if c.isupper()) / len(text) if text else 0 - ), + "capital_ratio": (sum(1 for c in text if c.isupper()) / len(text) if text else 0), # Sentiment features "sentiment_score": self._calculate_sentiment_score(words), "positive_word_count": sum( @@ -215,25 +211,17 @@ def extract_features( ), # Urgency features "urgency_score": self._calculate_urgency_score(words), - "urgency_word_count": sum( - 1 for word in words if word in self.urgency_indicators - ), + "urgency_word_count": sum(1 for word in words if word in self.urgency_indicators), # Communication patterns - "has_greeting": any( - word in text_lower for word in ["hello", "hi", "hey", "dear"] - ), + "has_greeting": any(word in text_lower for word in ["hello", "hi", "hey", "dear"]), "has_closing": any( - word in text_lower - for word in ["regards", "sincerely", "thanks", "best"] + word in text_lower for word in ["regards", "sincerely", "thanks", "best"] ), "has_request": any( - phrase in text_lower - for phrase in ["please", "could you", "would you", "can you"] + phrase in text_lower for phrase in ["please", "could you", "would you", "can you"] ), "has_question": "?" in text, - "has_apology": any( - word in text_lower for word in ["sorry", "apologize", "apology"] - ), + "has_apology": any(word in text_lower for word in ["sorry", "apologize", "apology"]), } if include_advanced: @@ -257,9 +245,7 @@ def _extract_advanced_features(self, text: str, words: List[str]) -> Dict[str, A # Lexical diversity "unique_word_ratio": len(set(words)) / len(words) if words else 0, "stopword_ratio": ( - sum(1 for word in words if word in self.stopwords) / len(words) - if words - else 0 + sum(1 for word in words if word in self.stopwords) / len(words) if words else 0 ), # N-gram features (top bigrams) "top_bigrams": self._extract_top_ngrams(words, n=2, top_k=5), @@ -432,9 +418,7 @@ def train_naive_bayes( for i, x in enumerate(X_val): class_scores = {} for label in class_counts: - score = math.log( - class_counts[label] / len(y_train) - ) # Prior probability + score = math.log(class_counts[label] / len(y_train)) # Prior probability for feature_idx, value in enumerate(x): mean = feature_means[label][feature_idx] std = feature_stds[label][feature_idx] @@ -478,9 +462,7 @@ def train_naive_bayes( recall=recall, f1_score=f1, confusion_matrix=confusion_matrix, - feature_importance=self._calculate_feature_importance( - feature_names, feature_means - ), + feature_importance=self._calculate_feature_importance(feature_names, feature_means), training_time=training_time, model_size=len(pickle.dumps(model_data)), ) @@ -494,10 +476,7 @@ def train_logistic_regression( # Simplified logistic regression implementation feature_names = list(set().union(*(f.keys() for f in features))) X = np.array( - [ - [feature_dict.get(name, 0) for name in feature_names] - for feature_dict in features - ] + [[feature_dict.get(name, 0) for name in feature_names] for feature_dict in features] ) # Encode labels @@ -538,9 +517,7 @@ def train_logistic_regression( val_probabilities = self._softmax(val_scores) predictions = [unique_labels[np.argmax(prob)] for prob in val_probabilities] - accuracy = sum( - 1 for i, pred in enumerate(predictions) if pred == y_val[i] - ) / len(y_val) + accuracy = sum(1 for i, pred in enumerate(predictions) if pred == y_val[i]) / len(y_val) precision, recall, f1 = self._calculate_metrics(y_val, predictions) confusion_matrix = self._calculate_confusion_matrix(y_val, predictions) @@ -590,11 +567,7 @@ def _calculate_metrics( recalls = [] for label in labels: - tp = sum( - 1 - for i, pred in enumerate(y_pred) - if pred == label and y_true[i] == label - ) + tp = sum(1 for i, pred in enumerate(y_pred) if pred == label and y_true[i] == label) fp = sum(1 for pred in y_pred if pred == label) - tp fn = sum(1 for true in y_true if true == label) - tp @@ -614,9 +587,7 @@ def _calculate_metrics( return avg_precision, avg_recall, f1 - def _calculate_confusion_matrix( - self, y_true: List[str], y_pred: List[str] - ) -> List[List[int]]: + def _calculate_confusion_matrix(self, y_true: List[str], y_pred: List[str]) -> List[List[int]]: """Calculate confusion matrix""" labels = sorted(list(set(y_true + y_pred))) matrix = [[0 for _ in labels] for _ in labels] @@ -639,9 +610,7 @@ def _calculate_feature_importance( for i, name in enumerate(feature_names): # Calculate variance across classes - class_means = [ - class_data.get(i, 0) for class_data in feature_means.values() - ] + class_means = [class_data.get(i, 0) for class_data in feature_means.values()] importance[name] = np.var(class_means) if class_means else 0 return importance @@ -649,9 +618,7 @@ def _calculate_feature_importance( def _generate_model_id(self, config: ModelConfig) -> str: """Generate unique model ID""" config_str = json.dumps(asdict(config), sort_keys=True) - return hashlib.md5( - f"{config_str}_{datetime.now().isoformat()}".encode() - ).hexdigest()[:12] + return hashlib.md5(f"{config_str}_{datetime.now().isoformat()}".encode()).hexdigest()[:12] def save_model(self, model_id: str, filepath: str) -> None: """Save trained model to file""" @@ -888,13 +855,13 @@ def _optimize_for_performance( # Add confidence requirements if "Confidence:" not in optimized: - optimized += "\nConfidence: [Provide confidence score 0-100 based on clarity of indicators]" + optimized += ( + "\nConfidence: [Provide confidence score 0-100 based on clarity of indicators]" + ) # Add reasoning requirement if "Reasoning:" not in optimized and "reasoning" not in optimized.lower(): - optimized += ( - "\nReasoning: [Explain the key factors that led to this classification]" - ) + optimized += "\nReasoning: [Explain the key factors that led to this classification]" return optimized @@ -906,9 +873,7 @@ def _optimize_for_clarity(self, template: PromptTemplate) -> str: optimized = "Please follow these instructions carefully:\n\n" + optimized # Add format requirements - optimized += ( - "\n\nIMPORTANT: Provide your response in the exact format specified above." - ) + optimized += "\n\nIMPORTANT: Provide your response in the exact format specified above." return optimized @@ -946,15 +911,11 @@ def evaluate_prompt_performance( return metrics # Calculate accuracy - correct_responses = sum( - 1 for result in test_results if result.get("correct", False) - ) + correct_responses = sum(1 for result in test_results if result.get("correct", False)) metrics["accuracy"] = correct_responses / len(test_results) # Calculate consistency (how often same input produces same output) - consistency_scores = [ - result.get("consistency_score", 0) for result in test_results - ] + consistency_scores = [result.get("consistency_score", 0) for result in test_results] metrics["consistency"] = np.mean(consistency_scores) # Calculate clarity score (based on response format adherence) diff --git a/server/python_nlp/analysis_components/urgency_model.py b/server/python_nlp/analysis_components/urgency_model.py index d5b453ea4..8743e2e06 100644 --- a/server/python_nlp/analysis_components/urgency_model.py +++ b/server/python_nlp/analysis_components/urgency_model.py @@ -48,9 +48,7 @@ def _analyze_regex(self, text: str) -> Dict[str, Any]: ): urgency_label = "high" confidence = 0.8 - elif re.search( - r"\b(when you can|next week|upcoming|planned|scheduled)\b", text_lower - ): + elif re.search(r"\b(when you can|next week|upcoming|planned|scheduled)\b", text_lower): urgency_label = "medium" confidence = 0.6 else: diff --git a/server/python_nlp/data_strategy.py b/server/python_nlp/data_strategy.py index 78e40ea90..fc5e335f5 100644 --- a/server/python_nlp/data_strategy.py +++ b/server/python_nlp/data_strategy.py @@ -182,9 +182,7 @@ def _load_preprocessing_rules(self) -> Dict[str, Any]: }, } - def collect_email_samples( - self, source: str, limit: Optional[int] = None - ) -> List[EmailSample]: + def collect_email_samples(self, source: str, limit: Optional[int] = None) -> List[EmailSample]: """ Collect email samples from various sources In production, this would connect to real email APIs @@ -266,9 +264,7 @@ def collect_email_samples( break sample = EmailSample( - id=self._generate_sample_id( - email_data["subject"], email_data["sender"] - ), + id=self._generate_sample_id(email_data["subject"], email_data["sender"]), subject=email_data["subject"], content=email_data["content"], sender=email_data["sender"], @@ -369,26 +365,18 @@ def _extract_basic_features(self, content: str, subject: str) -> Dict[str, Any]: ) ), "has_url": bool( - re.search( - self.preprocessing_rules["email_patterns"]["urls"], combined_text - ) - ), - "urgency_keywords": self._count_pattern_matches( - combined_text, "urgency_signals" + re.search(self.preprocessing_rules["email_patterns"]["urls"], combined_text) ), + "urgency_keywords": self._count_pattern_matches(combined_text, "urgency_signals"), "sentiment_keywords": self._count_pattern_matches( combined_text, "sentiment_indicators" ), - "intent_keywords": self._count_pattern_matches( - combined_text, "intent_patterns" - ), + "intent_keywords": self._count_pattern_matches(combined_text, "intent_patterns"), } return features - def _count_pattern_matches( - self, text: str, pattern_category: str - ) -> Dict[str, int]: + def _count_pattern_matches(self, text: str, pattern_category: str) -> Dict[str, int]: """Count matches for specific pattern categories""" counts = {} patterns = self.annotation_guidelines.get(pattern_category, {}) @@ -412,34 +400,24 @@ def annotate_email( Internal predictions serve as a basic fallback for data generation scenarios. """ if external_analysis_results: - self.logger.info( - f"Using external analysis results for email ID: {email.id}" - ) + self.logger.info(f"Using external analysis results for email ID: {email.id}") # Ensure keys from external_analysis_results match AnnotationSchema fields # and provide defaults if some keys are missing. topic = external_analysis_results.get( "topic", - self._predict_topic( - email.content + " " + email.subject, is_fallback=True - ), + self._predict_topic(email.content + " " + email.subject, is_fallback=True), ) sentiment = external_analysis_results.get( "sentiment", - self._predict_sentiment( - email.content + " " + email.subject, is_fallback=True - ), + self._predict_sentiment(email.content + " " + email.subject, is_fallback=True), ) intent = external_analysis_results.get( "intent", - self._predict_intent( - email.content + " " + email.subject, is_fallback=True - ), + self._predict_intent(email.content + " " + email.subject, is_fallback=True), ) urgency = external_analysis_results.get( "urgency", - self._predict_urgency( - email.content + " " + email.subject, is_fallback=True - ), + self._predict_urgency(email.content + " " + email.subject, is_fallback=True), ) # Keywords and entities might also come from external_analysis_results keywords = external_analysis_results.get( @@ -453,9 +431,7 @@ def annotate_email( ) # Higher confidence if from advanced engine annotator_id_suffix = "_external" else: - self.logger.info( - f"Using internal basic prediction for email ID: {email.id}" - ) + self.logger.info(f"Using internal basic prediction for email ID: {email.id}") topic = self._predict_topic(email.content + " " + email.subject) sentiment = self._predict_sentiment(email.content + " " + email.subject) intent = self._predict_intent(email.content + " " + email.subject) @@ -504,9 +480,7 @@ def _predict_sentiment(self, text: str, is_fallback: bool = False) -> str: text_lower = text.lower() sentiment_scores = {} - for sentiment, keywords in self.annotation_guidelines[ - "sentiment_indicators" - ].items(): + for sentiment, keywords in self.annotation_guidelines["sentiment_indicators"].items(): score = sum(1 for keyword in keywords if keyword in text_lower) sentiment_scores[sentiment] = score @@ -572,15 +546,11 @@ def _extract_entities(self, text: str) -> List[str]: entities = [] # Extract email addresses - emails = re.findall( - self.preprocessing_rules["email_patterns"]["email_addresses"], text - ) + emails = re.findall(self.preprocessing_rules["email_patterns"]["email_addresses"], text) entities.extend([f"EMAIL:{email}" for email in emails]) # Extract phone numbers - phones = re.findall( - self.preprocessing_rules["email_patterns"]["phone_numbers"], text - ) + phones = re.findall(self.preprocessing_rules["email_patterns"]["phone_numbers"], text) entities.extend([f"PHONE:{phone}" for phone in phones]) # Extract URLs @@ -699,9 +669,7 @@ def create_training_dataset(self, samples: List[EmailSample]) -> Dict[str, Any]: return dataset - def _calculate_dataset_statistics( - self, samples: List[EmailSample] - ) -> Dict[str, Any]: + def _calculate_dataset_statistics(self, samples: List[EmailSample]) -> Dict[str, Any]: """Calculate statistics for the dataset""" if not samples: return {} diff --git a/server/python_nlp/gmail_integration.py b/server/python_nlp/gmail_integration.py index 4749fc51a..5a4e5231a 100644 --- a/server/python_nlp/gmail_integration.py +++ b/server/python_nlp/gmail_integration.py @@ -34,7 +34,9 @@ # Credentials content will be loaded from GMAIL_CREDENTIALS_JSON environment variable # CREDENTIALS_PATH is now a placeholder for where it *would* be if it were a file. # Users should set GMAIL_CREDENTIALS_JSON instead of creating credentials.json -CREDENTIALS_PATH = "credentials.json" # Placeholder, not directly used if GMAIL_CREDENTIALS_JSON is set. +CREDENTIALS_PATH = ( + "credentials.json" # Placeholder, not directly used if GMAIL_CREDENTIALS_JSON is set. +) GMAIL_CREDENTIALS_ENV_VAR = "GMAIL_CREDENTIALS_JSON" @@ -154,9 +156,7 @@ def _init_cache(self): def get_cached_email(self, message_id: str) -> Optional[Dict[str, Any]]: """Retrieve cached email by message ID""" - cursor = self.conn.execute( - "SELECT * FROM emails WHERE message_id = ?", (message_id,) - ) + cursor = self.conn.execute("SELECT * FROM emails WHERE message_id = ?", (message_id,)) row = cursor.fetchone() if row: @@ -279,13 +279,9 @@ def _load_credentials(self): if os.path.exists(token_path): try: os.remove(token_path) - self.logger.info( - f"Removed invalid token file: {token_path}" - ) + self.logger.info(f"Removed invalid token file: {token_path}") except OSError as oe: - self.logger.error( - f"Error removing token file {token_path}: {oe}" - ) + self.logger.error(f"Error removing token file {token_path}: {oe}") creds = None # Force re-authentication # If creds are still None (not loaded or refresh failed), _authenticate will be called @@ -325,9 +321,7 @@ def _authenticate(self): ) if os.path.exists(CREDENTIALS_PATH): try: - flow = InstalledAppFlow.from_client_secrets_file( - CREDENTIALS_PATH, SCOPES - ) + flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_PATH, SCOPES) except Exception as e: self.logger.error( f"Error loading fallback credentials from {CREDENTIALS_PATH}: {e}" @@ -349,9 +343,7 @@ def _authenticate(self): "Please ensure it's a valid JSON string." ) return - except ( - Exception - ) as e: # Catch other potential errors from from_client_config + except Exception as e: # Catch other potential errors from from_client_config self.logger.error( f"Error loading credentials from {GMAIL_CREDENTIALS_ENV_VAR}: {e}" ) @@ -378,9 +370,7 @@ def _authenticate(self): def set_gmail_service(self, service): """Set the Gmail API service instance (Deprecated if using OAuth within class)""" # This method might be deprecated or changed if OAuth is handled internally - self.logger.warning( - "set_gmail_service is called, but OAuth is now handled internally." - ) + self.logger.warning("set_gmail_service is called, but OAuth is now handled internally.") self.gmail_service = service async def collect_emails_incremental( @@ -397,9 +387,7 @@ async def collect_emails_incremental( max_emails: Maximum number of emails to collect since_date: Only collect emails after this date """ - sync_id = hashlib.md5( - f"{query_filter}_{datetime.now().date()}".encode() - ).hexdigest() + sync_id = hashlib.md5(f"{query_filter}_{datetime.now().date()}".encode()).hexdigest() # Check cache for existing sync state sync_state = self.cache.get_sync_state(query_filter) @@ -440,9 +428,7 @@ async def collect_emails_incremental( break # Process messages in parallel with rate limiting - batch_messages = await self._process_message_batch( - message_list["messages"] - ) + batch_messages = await self._process_message_batch(message_list["messages"]) collected_messages.extend(batch_messages) @@ -460,9 +446,7 @@ async def collect_emails_incremental( page_token = message_list["nextPageToken"] # Log progress - self.logger.info( - f"Collected {len(collected_messages)} emails so far..." - ) + self.logger.info(f"Collected {len(collected_messages)} emails so far...") except Exception as e: self.logger.error(f"Error collecting emails: {e}") @@ -527,9 +511,7 @@ async def _simulate_gmail_response( for i in range(min(max_results, 10)): # Limit simulation to 10 messages message_id = f"msg_{base_time}_{i:03d}" - messages.append( - {"id": message_id, "threadId": f"thread_{base_time}_{i//3:03d}"} - ) + messages.append({"id": message_id, "threadId": f"thread_{base_time}_{i//3:03d}"}) response = {"messages": messages, "resultSizeEstimate": len(messages)} @@ -554,9 +536,7 @@ async def _process_message_batch( # Filter out exceptions and None results valid_messages = [ - result - for result in results - if not isinstance(result, Exception) and result is not None + result for result in results if not isinstance(result, Exception) and result is not None ] return valid_messages @@ -583,34 +563,26 @@ async def _get_message_content(self, message_id: str) -> Optional[Dict[str, Any] if self.gmail_service: try: - self.logger.debug( - f"Attempting to fetch message {message_id} from Gmail API." - ) + self.logger.debug(f"Attempting to fetch message {message_id} from Gmail API.") message = ( self.gmail_service.users() .messages() .get(userId="me", id=message_id, format="full") .execute() ) - self.logger.debug( - f"Successfully fetched message {message_id} from API." - ) + self.logger.debug(f"Successfully fetched message {message_id} from API.") email_data = self._parse_message_payload(message) if email_data: self.cache.cache_email(email_data) - self.logger.debug( - f"Successfully parsed and cached message {message_id}." - ) + self.logger.debug(f"Successfully parsed and cached message {message_id}.") return email_data else: self.logger.warning( f"Could not parse email data for message {message_id}. This message will not be processed further." ) - return ( - None # Parsing failure, do not simulate for this specific case - ) + return None # Parsing failure, do not simulate for this specific case except HttpError as error: self.logger.error( f"API error fetching message {message_id}: {error}. Falling back to simulation." @@ -632,24 +604,17 @@ async def _get_message_content(self, message_id: str) -> Optional[Dict[str, Any] email_data = await self._simulate_email_content(message_id) # Ensure message_id is present in simulated data for caching - if ( - "message_id" not in email_data - ): # _simulate_email_content should guarantee this + if "message_id" not in email_data: # _simulate_email_content should guarantee this email_data["message_id"] = message_id self.cache.cache_email(email_data) self.logger.debug(f"Cached simulated content for message {message_id}.") return email_data - def _parse_message_payload( - self, message: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: + def _parse_message_payload(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Parses the raw message payload from Gmail API.""" try: - headers = { - h["name"]: h["value"] - for h in message.get("payload", {}).get("headers", []) - } + headers = {h["name"]: h["value"] for h in message.get("payload", {}).get("headers", [])} # Simplified content extraction (plaintext preferred) # Real implementation would need to handle multipart messages, base64 decoding, etc. @@ -659,17 +624,15 @@ def _parse_message_payload( if part["mimeType"] == "text/plain" and "data" in part["body"]: import base64 - content = base64.urlsafe_b64decode(part["body"]["data"]).decode( - "utf-8" - ) + content = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8") break # Could add more part types (e.g., text/html) elif "body" in message["payload"] and "data" in message["payload"]["body"]: import base64 - content = base64.urlsafe_b64decode( - message["payload"]["body"]["data"] - ).decode("utf-8") + content = base64.urlsafe_b64decode(message["payload"]["body"]["data"]).decode( + "utf-8" + ) return { "message_id": message["id"], @@ -684,9 +647,7 @@ def _parse_message_payload( ).isoformat(), } except Exception as e: - self.logger.error( - f"Error parsing message payload for {message.get('id')}: {e}" - ) + self.logger.error(f"Error parsing message payload for {message.get('id')}: {e}") return None def _extract_email_address(self, sender_header: str) -> str: @@ -810,9 +771,7 @@ async def main(): collector = GmailDataCollector() # This will trigger authentication if needed if not collector.gmail_service: - logger.error( - "Gmail service initialization failed. Please check logs for details." - ) + logger.error("Gmail service initialization failed. Please check logs for details.") print( "--------------------------------------------------------------------------------------" ) @@ -824,24 +783,16 @@ async def main(): print( f"1. {GMAIL_CREDENTIALS_ENV_VAR}: Should contain the JSON string of your Google Cloud credentials." ) - print( - f" Example: export {GMAIL_CREDENTIALS_ENV_VAR}='{{ \"installed\": {{ ... }} }}'" - ) + print(f" Example: export {GMAIL_CREDENTIALS_ENV_VAR}='{{ \"installed\": {{ ... }} }}'") print( f" (Alternatively, as a fallback, place a 'credentials.json' file in the script's directory: {os.getcwd()})" ) - print( - f"2. GMAIL_TOKEN_PATH (Optional): Specify a custom path for 'token.json'." - ) - print( - f" Defaults to '{TOKEN_JSON_PATH}' in the script's directory: {os.getcwd()}" - ) + print(f"2. GMAIL_TOKEN_PATH (Optional): Specify a custom path for 'token.json'.") + print(f" Defaults to '{TOKEN_JSON_PATH}' in the script's directory: {os.getcwd()}") print( "If running for the first time, you might need to go through the OAuth2 authentication flow in your browser." ) - print( - "Check the console logs for more detailed error messages from the application." - ) + print("Check the console logs for more detailed error messages from the application.") print( "--------------------------------------------------------------------------------------" ) diff --git a/server/python_nlp/gmail_metadata.py b/server/python_nlp/gmail_metadata.py index fab014cd1..1a1481e19 100644 --- a/server/python_nlp/gmail_metadata.py +++ b/server/python_nlp/gmail_metadata.py @@ -137,9 +137,7 @@ def extract_complete_metadata(self, gmail_message: Dict[str, Any]) -> GmailMessa # Extract labels and determine message state label_ids = gmail_message.get("labelIds", []) - labels = [ - self.system_labels.get(label_id, label_id) for label_id in label_ids - ] + labels = [self.system_labels.get(label_id, label_id) for label_id in label_ids] # Determine message state from labels is_unread = "UNREAD" in label_ids @@ -172,9 +170,7 @@ def extract_complete_metadata(self, gmail_message: Dict[str, Any]) -> GmailMessa thread_info = self._extract_thread_info(headers, gmail_message) # Extract security information - spf_status, dkim_status, dmarc_status = self._extract_security_status( - headers - ) + spf_status, dkim_status, dmarc_status = self._extract_security_status(headers) encryption_info = self._extract_encryption_info(headers) # Extract priority information @@ -592,9 +588,7 @@ def _extract_custom_headers(self, headers: Dict[str, str]) -> Dict[str, str]: custom_headers = {} for name, value in headers.items(): - if name not in standard_headers and ( - name.startswith("X-") or name.startswith("x-") - ): + if name not in standard_headers and (name.startswith("X-") or name.startswith("x-")): custom_headers[name] = value return custom_headers @@ -646,9 +640,7 @@ def _create_minimal_metadata(self, gmail_message: Dict[str, Any]) -> GmailMessag raw_email=None, ) - def extract_batch_metadata( - self, gmail_messages: List[Dict[str, Any]] - ) -> List[GmailMessage]: + def extract_batch_metadata(self, gmail_messages: List[Dict[str, Any]]) -> List[GmailMessage]: """Extract metadata from multiple messages efficiently""" extracted_messages = [] @@ -721,9 +713,7 @@ def main(): {"name": "Message-ID", "value": ""}, {"name": "X-Priority", "value": "High"}, ], - "body": { - "data": base64.urlsafe_b64encode(b"This is the email content.").decode() - }, + "body": {"data": base64.urlsafe_b64encode(b"This is the email content.").decode()}, "mimeType": "text/plain", }, } diff --git a/server/python_nlp/gmail_service.py b/server/python_nlp/gmail_service.py index b7c37c7bd..bf3872a06 100644 --- a/server/python_nlp/gmail_service.py +++ b/server/python_nlp/gmail_service.py @@ -16,19 +16,15 @@ # This assumes AdvancedAIEngine is in a module that can be imported. # If it's in python_backend, the path needs to be correct. # For now, let's assume a placeholder for where AdvancedAIEngine would be imported from. -from server.python_backend.ai_engine import \ - AdvancedAIEngine # Assuming this import works -from server.python_backend.database import \ - DatabaseManager # Assuming this import works - -from .data_strategy import DataCollectionStrategy -from .gmail_integration import EmailBatch, GmailDataCollector, RateLimitConfig -from .gmail_metadata import GmailMessage, GmailMetadataExtractor +from server.python_backend.ai_engine import AdvancedAIEngine # Assuming this import works +from server.python_backend.database import DatabaseManager # Assuming this import works # AI Training and PromptEngineer might not be directly used by GmailAIService after refactoring # if all AI analysis is delegated to AdvancedAIEngine. from .ai_training import ModelConfig - +from .data_strategy import DataCollectionStrategy +from .gmail_integration import EmailBatch, GmailDataCollector, RateLimitConfig +from .gmail_metadata import GmailMessage, GmailMetadataExtractor class GmailAIService: @@ -85,9 +81,7 @@ async def _execute_async_command( Returns a dictionary with 'success': True/False and other command output. """ try: - self.logger.debug( - f"Executing async command: {' '.join(cmd)} in {cwd or '.'}" - ) + self.logger.debug(f"Executing async command: {' '.join(cmd)} in {cwd or '.'}") process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, @@ -101,9 +95,7 @@ async def _execute_async_command( if process.returncode != 0: error_msg = ( - stderr_decoded - or stdout_decoded - or "Unknown error during command execution." + stderr_decoded or stdout_decoded or "Unknown error during command execution." ) self.logger.error( f"Async command failed (return code {process.returncode}): {cmd}. Error: {error_msg}" @@ -117,10 +109,7 @@ async def _execute_async_command( if stdout_decoded: try: parsed_output = json.loads(stdout_decoded) - if ( - isinstance(parsed_output, dict) - and "success" not in parsed_output - ): + if isinstance(parsed_output, dict) and "success" not in parsed_output: # If script provides JSON dict but no 'success' field, assume true as command didn't fail parsed_output["success"] = True elif not isinstance( @@ -142,17 +131,13 @@ async def _execute_async_command( return {"success": True, "output": ""} # Script success, no output except FileNotFoundError as e: - self.logger.error( - f"Async command failed: Executable not found for {cmd}. Error: {e}" - ) + self.logger.error(f"Async command failed: Executable not found for {cmd}. Error: {e}") return { "success": False, "error": f"Executable not found: {cmd[0]}. {str(e)}", } except PermissionError as e: - self.logger.error( - f"Async command failed: Permission denied for {cmd}. Error: {e}" - ) + self.logger.error(f"Async command failed: Permission denied for {cmd}. Error: {e}") return { "success": False, "error": f"Permission denied for {cmd[0]}. {str(e)}", @@ -225,9 +210,7 @@ async def _fetch_emails_from_gmail( self, query_filter: str, max_emails: int ) -> Optional[EmailBatch]: """Helper to fetch emails using GmailDataCollector.""" - self.logger.info( - f"Fetching emails with query: {query_filter}, max: {max_emails}" - ) + self.logger.info(f"Fetching emails with query: {query_filter}, max: {max_emails}") try: email_batch = await self.collector.collect_emails_incremental( query_filter=query_filter, max_emails=max_emails @@ -250,9 +233,7 @@ async def _process_and_analyze_batch( ) for gmail_msg in email_batch.messages: try: - gmail_metadata = self.metadata_extractor.extract_complete_metadata( - gmail_msg - ) + gmail_metadata = self.metadata_extractor.extract_complete_metadata(gmail_msg) # The 'training_sample' here is a bit of a misnomer if it's just for AI analysis input # It's essentially the data structure expected by _perform_ai_analysis or _convert_to_db_format @@ -268,9 +249,7 @@ async def _process_and_analyze_batch( ai_analysis_result = None if include_ai_analysis: - ai_analysis_result = await self._perform_ai_analysis( - email_data_for_analysis - ) + ai_analysis_result = await self._perform_ai_analysis(email_data_for_analysis) db_email = self._convert_to_db_format( gmail_metadata, ai_analysis_result @@ -287,18 +266,14 @@ async def _process_and_analyze_batch( continue return processed_db_emails - async def _perform_ai_analysis( - self, email_data: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: + async def _perform_ai_analysis(self, email_data: Dict[str, Any]) -> Optional[Dict[str, Any]]: """ Perform AI analysis on email data using AdvancedAIEngine. `email_data` should contain 'subject' and 'content'. """ if not self.advanced_ai_engine: self.logger.error("AdvancedAIEngine not available for AI analysis.") - return self._get_basic_fallback_analysis_structure( - "AdvancedAIEngine not configured" - ) + return self._get_basic_fallback_analysis_structure("AdvancedAIEngine not configured") if not self.db_manager: self.logger.warning( @@ -309,9 +284,7 @@ async def _perform_ai_analysis( else: db_for_analysis = self.db_manager - self.logger.debug( - f"Performing AI analysis for email ID: {email_data.get('id', 'unknown')}" - ) + self.logger.debug(f"Performing AI analysis for email ID: {email_data.get('id', 'unknown')}") try: # AdvancedAIEngine is expected to have an `analyze_email` method # that takes subject and content, and returns an object or dict with analysis. @@ -330,9 +303,7 @@ async def _perform_ai_analysis( self.logger.error( f"Unexpected AI analysis result type for email {email_data.get('id', 'unknown')}" ) - return self._get_basic_fallback_analysis_structure( - "Unexpected AI result type" - ) + return self._get_basic_fallback_analysis_structure("Unexpected AI result type") self.stats["ai_analyses_completed"] += 1 self.logger.info( @@ -347,9 +318,7 @@ async def _perform_ai_analysis( ) return self._get_basic_fallback_analysis_structure(str(e)) - def _get_basic_fallback_analysis_structure( - self, error_message: str - ) -> Dict[str, Any]: + def _get_basic_fallback_analysis_structure(self, error_message: str) -> Dict[str, Any]: """Returns a minimal AI analysis structure in case of errors during _perform_ai_analysis.""" return { "error": error_message, @@ -399,14 +368,10 @@ def _convert_to_db_format( } else: # Handle case where AI analysis was skipped or failed - analysis_metadata_payload["ai_analysis"] = ( - self._get_basic_fallback_analysis_structure( - "AI analysis not performed or failed" - ) + analysis_metadata_payload["ai_analysis"] = self._get_basic_fallback_analysis_structure( + "AI analysis not performed or failed" ) - ai_topic = analysis_metadata_payload["ai_analysis"][ - "topic" - ] # Fallback topic + ai_topic = analysis_metadata_payload["ai_analysis"]["topic"] # Fallback topic ai_confidence = analysis_metadata_payload["ai_analysis"][ "confidence" ] # Fallback confidence @@ -507,12 +472,8 @@ async def train_models_from_gmail_data( training_samples = [] for gmail_msg in training_batch.messages: try: - metadata = self.metadata_extractor.extract_complete_metadata( - gmail_msg - ) - training_format = self.metadata_extractor.to_training_format( - metadata - ) + metadata = self.metadata_extractor.extract_complete_metadata(gmail_msg) + training_format = self.metadata_extractor.to_training_format(metadata) sample_dict = { "subject": training_format["subject"], @@ -534,9 +495,7 @@ async def train_models_from_gmail_data( continue if not training_samples: - self.logger.warning( - "No training samples collected, aborting model training." - ) + self.logger.warning("No training samples collected, aborting model training.") return { "success": False, "error": "No training samples collected.", @@ -557,13 +516,9 @@ async def train_models_from_gmail_data( training_data_version="gmail_v1.0", ) - features, labels = self.model_trainer.prepare_training_data( - training_samples, "topic" - ) + features, labels = self.model_trainer.prepare_training_data(training_samples, "topic") if features and labels: - topic_result = self.model_trainer.train_naive_bayes( - features, labels, topic_config - ) + topic_result = self.model_trainer.train_naive_bayes(features, labels, topic_config) training_results["topic_model"] = { "model_id": topic_result.model_id, "accuracy": topic_result.accuracy, @@ -595,9 +550,7 @@ async def train_models_from_gmail_data( "f1_score": sentiment_result.f1_score, } - prompt_templates = ( - self.prompt_engineer.generate_email_classification_prompts() - ) + prompt_templates = self.prompt_engineer.generate_email_classification_prompts() training_results["prompt_templates"] = list(prompt_templates.keys()) return { @@ -618,8 +571,7 @@ def _infer_topic_from_metadata(self, metadata: GmailMessage) -> str: if any(label in ["CATEGORY_PERSONAL"] for label in metadata.label_ids): return "personal_family" elif metadata.mailing_list or any( - word in metadata.subject.lower() - for word in ["newsletter", "promotion", "offer"] + word in metadata.subject.lower() for word in ["newsletter", "promotion", "offer"] ): return "promotions" else: @@ -657,8 +609,7 @@ def _infer_intent_from_metadata(self, metadata: GmailMessage) -> str: if any(word in subject_lower for word in ["?", "question", "help", "how"]): return "question" elif any( - word in subject_lower - for word in ["confirmation", "confirm", "booking", "receipt"] + word in subject_lower for word in ["confirmation", "confirm", "booking", "receipt"] ): return "confirmation" elif any(word in subject_lower for word in ["request", "please", "need"]): @@ -673,8 +624,7 @@ def _infer_urgency_from_metadata(self, metadata: GmailMessage) -> str: subject_lower = metadata.subject.lower() if metadata.is_important or any( - word in subject_lower - for word in ["urgent", "asap", "emergency", "critical"] + word in subject_lower for word in ["urgent", "asap", "emergency", "critical"] ): return "high" elif any(word in subject_lower for word in ["today", "tomorrow", "deadline"]): @@ -721,9 +671,7 @@ async def execute_smart_retrieval( result = await self._execute_async_command(cmd, cwd=self.nlp_path) - if not result.get( - "success" - ): # Check success from _execute_async_command's perspective + if not result.get("success"): # Check success from _execute_async_command's perspective self.logger.error( f"Smart retrieval script execution failed or reported an error. Result: {result}" ) @@ -732,9 +680,7 @@ async def execute_smart_retrieval( "strategiesExecuted": result.get("strategies_executed", []), "totalEmails": result.get("total_emails", 0), "performance": result.get("performance", {}), - "error": result.get( - "error", "Smart retrieval script execution failed." - ), + "error": result.get("error", "Smart retrieval script execution failed."), } # If _execute_async_command was successful, 'result' contains the script's output. @@ -746,15 +692,11 @@ async def execute_smart_retrieval( "strategiesExecuted": result.get("strategies_executed", []), "totalEmails": result.get("total_emails", 0), "performance": result.get("performance", {}), - "error": result.get( - "error" - ), # Pass along any error reported by the script + "error": result.get("error"), # Pass along any error reported by the script "data": result, # Include full script result for more details } except Exception as e: - self.logger.error( - f"Smart retrieval task failed unexpectedly: {e}", exc_info=True - ) + self.logger.error(f"Smart retrieval task failed unexpectedly: {e}", exc_info=True) return { "success": False, "strategiesExecuted": [], @@ -810,9 +752,7 @@ async def get_performance_metrics(self) -> Optional[Dict[str, Any]]: "activeStrategies": result.get("active_strategies", 0), }, "quotaStatus": { - "dailyUsage": { - "percentage": result.get("quota_used_percent", 0) - } + "dailyUsage": {"percentage": result.get("quota_used_percent", 0)} }, "alerts": result.get("alerts", []), "recommendations": result.get("recommendations", []), @@ -826,9 +766,7 @@ async def get_performance_metrics(self) -> Optional[Dict[str, Any]]: self.logger.error(error_msg) return None except Exception as e: - self.logger.error( - f"Failed to get performance metrics unexpectedly: {e}", exc_info=True - ) + self.logger.error(f"Failed to get performance metrics unexpectedly: {e}", exc_info=True) return None diff --git a/server/python_nlp/nlp_engine.py b/server/python_nlp/nlp_engine.py index aff8d3629..54e23e8b0 100644 --- a/server/python_nlp/nlp_engine.py +++ b/server/python_nlp/nlp_engine.py @@ -15,8 +15,9 @@ from datetime import datetime from typing import Any, Dict, List, Optional -from server.python_nlp.action_item_extractor import \ - ActionItemExtractor # Import ActionItemExtractor +from server.python_nlp.action_item_extractor import ( # Import ActionItemExtractor + ActionItemExtractor, +) from server.python_nlp.text_utils import clean_text from .analysis_components.intent_model import IntentModel @@ -143,9 +144,7 @@ def _load_model(self, model_path: str) -> Optional[Any]: logger.info(f"Successfully loaded model from {model_path}") return model - logger.warning( - f"Model file not found at {model_path}. This model will be unavailable." - ) + logger.warning(f"Model file not found at {model_path}. This model will be unavailable.") return None except Exception as e: logger.error(f"Error loading model from {model_path}: {e}") @@ -397,9 +396,7 @@ def _extract_keywords(self, text: str) -> List[str]: # Extract important single words (not in stopwords and longer than 3 chars) words = blob.words important_words = [ - word - for word in words - if len(word) > 3 and word.lower() not in self.stop_words + word for word in words if len(word) > 3 and word.lower() not in self.stop_words ] # Add top 10 important words @@ -549,21 +546,16 @@ def get_method_suffix(analysis_result: Optional[Dict[str, Any]]) -> str: # Add topic reasoning if significant if topic and topic.get("topic") != "General": - parts.append( - f"Identified topic: {topic['topic']}" f"{get_method_suffix(topic)}" - ) + parts.append(f"Identified topic: {topic['topic']}" f"{get_method_suffix(topic)}") # Add intent reasoning if significant if intent and intent.get("intent") != "informational": - parts.append( - f"Detected intent: {intent['intent']}" f"{get_method_suffix(intent)}" - ) + parts.append(f"Detected intent: {intent['intent']}" f"{get_method_suffix(intent)}") # Add urgency reasoning if significant if urgency and urgency.get("urgency") != "low": parts.append( - f"Assessed urgency level: {urgency['urgency']}" - f"{get_method_suffix(urgency)}" + f"Assessed urgency level: {urgency['urgency']}" f"{get_method_suffix(urgency)}" ) # Return default message if no significant insights @@ -592,16 +584,12 @@ def _detect_risk_factors(self, text: str) -> List[str]: r"\b(free|winner|congratulations|claim|prize|lottery)\b", r"\b(click here|act now|limited time|exclusive offer)\b", ] - spam_score = sum( - len(re.findall(pattern, text_lower)) for pattern in spam_patterns - ) + spam_score = sum(len(re.findall(pattern, text_lower)) for pattern in spam_patterns) if spam_score > 2: risk_flags.append("potential_spam") # Suspicious patterns - suspicious_patterns = [ - r"\b(confidential|private|secret|password|ssn|social security)\b" - ] + suspicious_patterns = [r"\b(confidential|private|secret|password|ssn|social security)\b"] suspicious_score = sum( len(re.findall(pattern, text_lower)) for pattern in suspicious_patterns ) @@ -613,9 +601,7 @@ def _detect_risk_factors(self, text: str) -> List[str]: def _validate_analysis(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]: """Validate the analysis results""" # Simple validation based on confidence levels - confidence = self._calculate_confidence( - [analysis_results[key] for key in analysis_results] - ) + confidence = self._calculate_confidence([analysis_results[key] for key in analysis_results]) is_reliable = confidence > 0.7 if is_reliable: @@ -652,9 +638,7 @@ def _get_fallback_analysis(self, error_msg: str) -> Dict[str, Any]: "action_items": [], # Include empty list for action items } - def _get_simple_fallback_analysis( - self, subject: str, content: str - ) -> Dict[str, Any]: + def _get_simple_fallback_analysis(self, subject: str, content: str) -> Dict[str, Any]: """Simple fallback analysis when NLTK is not available""" text = f"{subject} {content}".lower() @@ -724,7 +708,7 @@ def _get_simple_fallback_analysis( "sentiment": sentiment, "intent": "informational", "urgency": urgency, - "confidence": confidence_value, # Use calculated confidence + "confidence": confidence_value, # Use calculated confidence "categories": categories, "keywords": [], "reasoning": "Basic analysis using keyword matching (NLTK not available)", @@ -746,9 +730,7 @@ def _analyze_action_items(self, text: str) -> List[Dict[str, Any]]: logger.info("Analyzing for action items...") try: actions = self.action_item_extractor.extract_actions(text) - logger.info( - f"Action item analysis completed. Found {len(actions)} potential actions." - ) + logger.info(f"Action item analysis completed. Found {len(actions)} potential actions.") return actions except Exception as e: logger.error(f"Error during action item analysis: {e}", exc_info=True) @@ -804,15 +786,11 @@ def analyze_email(self, subject: str, content: str) -> Dict[str, Any]: # This method is regex-based, no model to load for it currently per its implementation logger.info("Detecting risk factors...") risk_analysis_flags = self._detect_risk_factors(cleaned_text) - logger.info( - f"Risk factor detection completed. Flags: {risk_analysis_flags}" - ) + logger.info(f"Risk factor detection completed. Flags: {risk_analysis_flags}") # Extract keywords and entities logger.info("Extracting keywords...") - keywords = self._extract_keywords( - cleaned_text - ) # Uses TextBlob if available + keywords = self._extract_keywords(cleaned_text) # Uses TextBlob if available logger.info(f"Keyword extraction completed. Keywords: {keywords}") logger.info("Categorizing content...") @@ -885,28 +863,18 @@ def _build_final_analysis_response( "intent": intent_analysis, "urgency": urgency_analysis, } - validation_input_filtered = { - k: v for k, v in validation_input.items() if v is not None - } + validation_input_filtered = {k: v for k, v in validation_input.items() if v is not None} validation = self._validate_analysis(validation_input_filtered) # Determine default values for primary analysis fields - final_topic = ( - topic_analysis.get("topic", "General") if topic_analysis else "General" - ) + final_topic = topic_analysis.get("topic", "General") if topic_analysis else "General" final_sentiment = ( - sentiment_analysis.get("sentiment", "neutral") - if sentiment_analysis - else "neutral" + sentiment_analysis.get("sentiment", "neutral") if sentiment_analysis else "neutral" ) final_intent = ( - intent_analysis.get("intent", "informational") - if intent_analysis - else "informational" - ) - final_urgency = ( - urgency_analysis.get("urgency", "low") if urgency_analysis else "low" + intent_analysis.get("intent", "informational") if intent_analysis else "informational" ) + final_urgency = urgency_analysis.get("urgency", "low") if urgency_analysis else "low" suggested_labels = self._suggest_labels(categories, final_urgency) @@ -944,14 +912,10 @@ def main(): parser = argparse.ArgumentParser( description="Enhanced NLP Engine for Gmail AI Email Management" ) - parser.add_argument( - "--analyze-email", action="store_true", help="Perform email analysis." - ) + parser.add_argument("--analyze-email", action="store_true", help="Perform email analysis.") parser.add_argument("--subject", type=str, default="", help="Subject of the email.") parser.add_argument("--content", type=str, default="", help="Content of the email.") - parser.add_argument( - "--health-check", action="store_true", help="Perform a health check." - ) + parser.add_argument("--health-check", action="store_true", help="Perform a health check.") parser.add_argument( "--output-format", type=str, @@ -968,9 +932,7 @@ def main(): sys.exit(0) if args.analyze_email: - _perform_email_analysis_cli( - engine, args.subject, args.content, args.output_format - ) + _perform_email_analysis_cli(engine, args.subject, args.content, args.output_format) sys.exit(0) # Backward compatibility / Default behavior @@ -1037,9 +999,7 @@ def _perform_health_check(engine: NLPEngine, output_format: str): print(json.dumps(health_status, indent=2)) -def _perform_email_analysis_cli( - engine: NLPEngine, subject: str, content: str, output_format: str -): +def _perform_email_analysis_cli(engine: NLPEngine, subject: str, content: str, output_format: str): """Performs email analysis based on CLI arguments and prints the result.""" if not subject and not content: # If called with --analyze-email but no subject/content, could be an error or expect empty analysis @@ -1087,13 +1047,9 @@ def _handle_backward_compatible_cli_invocation( subject_old = argv[1] content_old = argv[2] if len(argv) > 2 else "" - logger.info( - "Processing with backward compatibility mode (positional arguments)." - ) + logger.info("Processing with backward compatibility mode (positional arguments).") # Use the already defined _perform_email_analysis_cli for consistency in output - _perform_email_analysis_cli( - engine, subject_old, content_old, args.output_format - ) + _perform_email_analysis_cli(engine, subject_old, content_old, args.output_format) return True return False diff --git a/server/python_nlp/retrieval_monitor.py b/server/python_nlp/retrieval_monitor.py index 0093b3349..d24376fd1 100644 --- a/server/python_nlp/retrieval_monitor.py +++ b/server/python_nlp/retrieval_monitor.py @@ -9,6 +9,7 @@ import statistics from collections import defaultdict, deque from dataclasses import asdict, dataclass + # Added imports: from datetime import datetime # Ensure datetime is directly available from datetime import timedelta @@ -83,9 +84,7 @@ async def _log_metrics_to_file(self): metrics_buffer_copy = self.metrics_buffer.copy() # Shallow copy of dict for strategy_name, metrics_deque in metrics_buffer_copy.items(): - current_deque_copy = list( - metrics_deque - ) # Copy of the deque for this strategy + current_deque_copy = list(metrics_deque) # Copy of the deque for this strategy if not current_deque_copy: continue @@ -95,9 +94,7 @@ async def _log_metrics_to_file(self): # strategy_name is already in RetrievalMetrics dataclass, but good to have consistently log_entry["strategy_name_key"] = strategy_name log_entry["timestamp_logged"] = datetime.now().isoformat() - f.write( - json.dumps(log_entry, default=json_default_converter) + "\n" - ) + f.write(json.dumps(log_entry, default=json_default_converter) + "\n") # Clear the original deque for this strategy after its contents are written self.metrics_buffer[strategy_name].clear() @@ -113,9 +110,7 @@ async def _log_metrics_to_file(self): f"IOError writing retrieval metrics to {self.RETRIEVAL_LOG_FILE}: {e}" ) except Exception as e: - self.logger.error( - f"Unexpected error logging retrieval metrics to file: {e}" - ) + self.logger.error(f"Unexpected error logging retrieval metrics to file: {e}") async def _periodic_logger_task(self): """Periodically logs metrics to a file.""" @@ -129,9 +124,7 @@ async def _periodic_logger_task(self): break except Exception as e: self.logger.error(f"Error in periodic retrieval logger task: {e}") - await asyncio.sleep( - self.LOG_INTERVAL_SECONDS / 2 - ) # Shorter sleep on error + await asyncio.sleep(self.LOG_INTERVAL_SECONDS / 2) # Shorter sleep on error def record_retrieval_metrics(self, metrics: RetrievalMetrics): """Record real-time retrieval metrics""" @@ -199,9 +192,9 @@ def _check_performance_alerts(self, metrics: RetrievalMetrics): # Error rate check (calculated over recent window) recent_metrics = list(self.metrics_buffer[metrics.strategy_name]) if len(recent_metrics) >= 5: - error_rate = sum( - 1 for m in recent_metrics[-10:] if m.error_count > 0 - ) / min(10, len(recent_metrics)) + error_rate = sum(1 for m in recent_metrics[-10:] if m.error_count > 0) / min( + 10, len(recent_metrics) + ) if error_rate > self.alert_thresholds.max_error_rate: alerts.append( { @@ -349,9 +342,7 @@ def _get_strategy_performance_summary(self) -> List[Dict[str, Any]]: "total_errors": total_errors, "performance_score": round(overall_score, 1), "last_execution": ( - recent_metrics[-1].timestamp.isoformat() - if recent_metrics - else None + recent_metrics[-1].timestamp.isoformat() if recent_metrics else None ), "trend": self._calculate_performance_trend(strategy_name), } @@ -428,22 +419,16 @@ def _get_performance_trends_summary(self) -> Dict[str, Any]: return { "last_hour": { "avg_efficiency": ( - statistics.mean([t["efficiency"] for t in last_hour]) - if last_hour - else 0 + statistics.mean([t["efficiency"] for t in last_hour]) if last_hour else 0 ), "avg_latency": ( - statistics.mean([t["latency"] for t in last_hour]) - if last_hour - else 0 + statistics.mean([t["latency"] for t in last_hour]) if last_hour else 0 ), "total_errors": sum([t["error_count"] for t in last_hour]), }, "last_24_hours": { "avg_efficiency": ( - statistics.mean([t["efficiency"] for t in last_day]) - if last_day - else 0 + statistics.mean([t["efficiency"] for t in last_day]) if last_day else 0 ), "avg_latency": ( statistics.mean([t["latency"] for t in last_day]) if last_day else 0 @@ -545,9 +530,7 @@ def _generate_optimization_recommendations(self) -> List[Dict[str, Any]]: return sorted( recommendations, - key=lambda x: {"critical": 3, "high": 2, "medium": 1, "low": 0}[ - x["priority"] - ], + key=lambda x: {"critical": 3, "high": 2, "medium": 1, "low": 0}[x["priority"]], reverse=True, ) diff --git a/server/python_nlp/smart_filters.py b/server/python_nlp/smart_filters.py index 582e2624e..f9c679c8d 100644 --- a/server/python_nlp/smart_filters.py +++ b/server/python_nlp/smart_filters.py @@ -321,9 +321,7 @@ def _load_filter_templates(self) -> Dict[str, Dict[str, Any]]: }, "personal_communications": { "criteria": { - "from_patterns": [ - r".*@(gmail\.com|yahoo\.com|hotmail\.com|outlook\.com)" - ], + "from_patterns": [r".*@(gmail\.com|yahoo\.com|hotmail\.com|outlook\.com)"], "exclude_patterns": [ r".*noreply.*", r".*automated.*", @@ -397,9 +395,7 @@ def _load_pruning_criteria(self) -> Dict[str, Any]: "redundancy_threshold": 0.8, # Similarity threshold for redundant filters } - def create_intelligent_filters( - self, email_samples: List[Dict[str, Any]] - ) -> List[EmailFilter]: + def create_intelligent_filters(self, email_samples: List[Dict[str, Any]]) -> List[EmailFilter]: """Create intelligent filters based on email patterns""" created_filters = [] @@ -419,23 +415,17 @@ def create_intelligent_filters( self.logger.info(f"Created {len(created_filters)} intelligent filters") return created_filters - def _create_filters_from_templates( - self, patterns: Dict[str, Any] - ) -> List[EmailFilter]: + def _create_filters_from_templates(self, patterns: Dict[str, Any]) -> List[EmailFilter]: """Creates filters based on predefined templates and analyzed patterns.""" template_filters = [] for template_name, template_data in self.filter_templates.items(): if self._should_create_filter(template_data, patterns): - filter_obj = self._create_filter_from_template( - template_name, template_data - ) + filter_obj = self._create_filter_from_template(template_name, template_data) self._save_filter(filter_obj) # Save the filter template_filters.append(filter_obj) return template_filters - def _extract_patterns_from_single_email( - self, email: Dict[str, Any] - ) -> Dict[str, Any]: + def _extract_patterns_from_single_email(self, email: Dict[str, Any]) -> Dict[str, Any]: """Extracts various pattern elements from a single email.""" email_patterns = {} @@ -451,17 +441,13 @@ def _extract_patterns_from_single_email( if content_words: email_patterns["content_keywords"] = content_words # Store list of words - category = email.get("category") or email.get("ai_analysis", {}).get( - "topic", "unknown" - ) + category = email.get("category") or email.get("ai_analysis", {}).get("topic", "unknown") email_patterns["category"] = category # Store single category if email.get("isImportant") or email.get("isStarred"): # Combine subject and content words for importance, ensure no duplicates if that's desired # For now, simple concatenation is fine as Counter will handle frequency. - email_patterns["importance_keywords"] = list( - set(subject_words + content_words) - ) + email_patterns["importance_keywords"] = list(set(subject_words + content_words)) # For automation indicators, we want to associate them with the email's characteristics if self._is_automated_email(email): @@ -473,9 +459,7 @@ def _extract_patterns_from_single_email( return email_patterns - def _analyze_email_patterns( - self, email_samples: List[Dict[str, Any]] - ) -> Dict[str, Any]: + def _analyze_email_patterns(self, email_samples: List[Dict[str, Any]]) -> Dict[str, Any]: """Analyze email samples to discover patterns by aggregating findings from each email.""" # Initialize aggregated patterns structure aggregated_patterns = { @@ -599,13 +583,10 @@ def _is_automated_email(self, email: Dict[str, Any]) -> bool: subject = email.get("subject", "").lower() return any( - indicator in sender or indicator in subject - for indicator in automated_indicators + indicator in sender or indicator in subject for indicator in automated_indicators ) - def _should_create_filter( - self, template: Dict[str, Any], patterns: Dict[str, Any] - ) -> bool: + def _should_create_filter(self, template: Dict[str, Any], patterns: Dict[str, Any]) -> bool: """Determine if a filter should be created based on patterns""" criteria = template["criteria"] @@ -616,9 +597,7 @@ def _should_create_filter( if "from_patterns" in criteria: for pattern in criteria["from_patterns"]: matching_domains = [ - domain - for domain in patterns["sender_domains"] - if re.match(pattern, domain) + domain for domain in patterns["sender_domains"] if re.match(pattern, domain) ] if matching_domains: relevance_score += 1 @@ -639,9 +618,7 @@ def _create_filter_from_template( self, template_name: str, template: Dict[str, Any] ) -> EmailFilter: """Create filter object from template""" - filter_id = ( - f"template_{template_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" - ) + filter_id = f"template_{template_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" return EmailFilter( filter_id=filter_id, @@ -670,9 +647,7 @@ def _create_custom_filters(self, patterns: Dict[str, Any]) -> List[EmailFilter]: self._save_filter(filter_obj) # Create filters for frequent keyword combinations - keyword_combinations = self._find_keyword_combinations( - patterns["subject_keywords"] - ) + keyword_combinations = self._find_keyword_combinations(patterns["subject_keywords"]) for combo, score in keyword_combinations: if score >= 3: filter_obj = self._create_keyword_filter(combo, score) @@ -727,14 +702,10 @@ def _create_keyword_filter(self, keywords: List[str], score: float) -> EmailFilt performance_metrics={"keyword_score": score}, ) - def _find_keyword_combinations( - self, keyword_counter: Counter - ) -> List[tuple[List[str], float]]: + def _find_keyword_combinations(self, keyword_counter: Counter) -> List[tuple[List[str], float]]: """Find meaningful keyword combinations""" combinations = [] - frequent_keywords = [ - word for word, count in keyword_counter.most_common(50) if count >= 3 - ] + frequent_keywords = [word for word, count in keyword_counter.most_common(50) if count >= 3] # Generate 2-word combinations for i, word1 in enumerate(frequent_keywords): @@ -793,9 +764,7 @@ def prune_ineffective_filters(self) -> Dict[str, Any]: pruning_results["total_analyzed"] = len(all_filters) active_filters_map = { - f.filter_id: f - for f in all_filters - if self._is_filter_active_in_db(f.filter_id) + f.filter_id: f for f in all_filters if self._is_filter_active_in_db(f.filter_id) } # Check DB for active status actions_to_take = defaultdict(list) @@ -818,9 +787,7 @@ def prune_ineffective_filters(self) -> Dict[str, Any]: if filter_obj.filter_id in active_filters_map: actions_to_take["disable"].append(filter_obj) else: # If already inactive, it might be kept or pruned based on other rules (e.g. age) - filters_to_keep_for_redundancy_check.append( - filter_obj - ) # Or it's just ignored + filters_to_keep_for_redundancy_check.append(filter_obj) # Or it's just ignored elif evaluation_action == "optimize": # Only optimize if it was active if filter_obj.filter_id in active_filters_map: @@ -884,15 +851,11 @@ def prune_ineffective_filters(self) -> Dict[str, Any]: final_filters_for_redundancy_check = [] for f_obj in filters_to_keep_for_redundancy_check: if f_obj.filter_id in active_filters_map: # If it was kept and active - final_filters_for_redundancy_check.append( - active_filters_map[f_obj.filter_id] - ) + final_filters_for_redundancy_check.append(active_filters_map[f_obj.filter_id]) # Remove redundant filters from the set of currently active and kept filters # This should operate on filters that are currently considered active after the above pruning/disabling. - self._prune_redundant_filters( - final_filters_for_redundancy_check, pruning_results - ) + self._prune_redundant_filters(final_filters_for_redundancy_check, pruning_results) self.logger.info( f"Pruning completed: {len(pruning_results['pruned_filters'])} pruned, " @@ -990,8 +953,7 @@ def _evaluate_filter_for_pruning(self, filter_obj: EmailFilter) -> str: ): # Check if effectiveness is not too low for optimization if ( - filter_obj.effectiveness_score - > criteria["effectiveness_threshold"] * 0.7 + filter_obj.effectiveness_score > criteria["effectiveness_threshold"] * 0.7 ): # Avoid optimizing hopeless filters return "optimize" else: # Low effectiveness and poor perf might mean prune or disable @@ -1016,37 +978,27 @@ def _optimize_filter(self, filter_obj: EmailFilter) -> EmailFilter: # If high false positive rate, make criteria more restrictive if filter_obj.false_positive_rate > 0.1: - optimized_criteria = self._make_criteria_more_restrictive( - optimized_criteria - ) + optimized_criteria = self._make_criteria_more_restrictive(optimized_criteria) # If low recall, make criteria more inclusive - avg_recall = sum(p.recall for p in performance_history) / len( - performance_history - ) + avg_recall = sum(p.recall for p in performance_history) / len(performance_history) if avg_recall < 0.6: optimized_criteria = self._make_criteria_more_inclusive(optimized_criteria) filter_obj.criteria = optimized_criteria return filter_obj - def _make_criteria_more_restrictive( - self, criteria: Dict[str, Any] - ) -> Dict[str, Any]: + def _make_criteria_more_restrictive(self, criteria: Dict[str, Any]) -> Dict[str, Any]: """Make filter criteria more restrictive to reduce false positives""" # Add more specific keywords or patterns if "subject_keywords" in criteria: # Require multiple keyword matches criteria["keyword_operator"] = "AND" - criteria["min_keyword_matches"] = max( - 2, len(criteria["subject_keywords"]) // 2 - ) + criteria["min_keyword_matches"] = max(2, len(criteria["subject_keywords"]) // 2) if "from_patterns" in criteria: # Make email patterns more specific - criteria["from_patterns"] = [ - pattern + "$" for pattern in criteria["from_patterns"] - ] + criteria["from_patterns"] = [pattern + "$" for pattern in criteria["from_patterns"]] return criteria @@ -1059,9 +1011,7 @@ def _make_criteria_more_inclusive(self, criteria: Dict[str, Any]) -> Dict[str, A return criteria - def _find_redundant_filters( - self, filters: List[EmailFilter] - ) -> List[tuple[str, str]]: + def _find_redundant_filters(self, filters: List[EmailFilter]) -> List[tuple[str, str]]: """Find pairs of redundant filters""" redundant_pairs = [] @@ -1073,9 +1023,7 @@ def _find_redundant_filters( return redundant_pairs - def _calculate_filter_similarity( - self, filter1: EmailFilter, filter2: EmailFilter - ) -> float: + def _calculate_filter_similarity(self, filter1: EmailFilter, filter2: EmailFilter) -> float: """Calculate similarity between two filters""" # Compare criteria criteria_similarity = self._compare_criteria(filter1.criteria, filter2.criteria) @@ -1086,9 +1034,7 @@ def _calculate_filter_similarity( # Overall similarity return (criteria_similarity + action_similarity) / 2 - def _compare_criteria( - self, criteria1: Dict[str, Any], criteria2: Dict[str, Any] - ) -> float: + def _compare_criteria(self, criteria1: Dict[str, Any], criteria2: Dict[str, Any]) -> float: """Compare filter criteria similarity""" common_keys = set(criteria1.keys()) & set(criteria2.keys()) if not common_keys: @@ -1111,15 +1057,9 @@ def _compare_criteria( similarity_scores.append(similarity) - return ( - sum(similarity_scores) / len(similarity_scores) - if similarity_scores - else 0.0 - ) + return sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0.0 - def _compare_actions( - self, actions1: Dict[str, Any], actions2: Dict[str, Any] - ) -> float: + def _compare_actions(self, actions1: Dict[str, Any], actions2: Dict[str, Any]) -> float: """Compare filter actions similarity""" common_keys = set(actions1.keys()) & set(actions2.keys()) if not common_keys: @@ -1210,19 +1150,11 @@ def _calculate_performance_metrics_from_results( if (true_positives + false_negatives) > 0 else 0.0 ) - accuracy = ( - (true_positives + true_negatives) / total_emails - if total_emails > 0 - else 0.0 - ) + accuracy = (true_positives + true_negatives) / total_emails if total_emails > 0 else 0.0 f1_score = ( - 2 * (precision * recall) / (precision + recall) - if (precision + recall) > 0 - else 0.0 - ) - false_positive_rate = ( - false_positives / total_emails if total_emails > 0 else 0.0 + 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0 ) + false_positive_rate = false_positives / total_emails if total_emails > 0 else 0.0 return { "true_positives": true_positives, @@ -1236,15 +1168,11 @@ def _calculate_performance_metrics_from_results( "false_positive_rate": false_positive_rate, } - def _check_sender_patterns( - self, criteria_block: Dict[str, Any], email: Dict[str, Any] - ) -> bool: + def _check_sender_patterns(self, criteria_block: Dict[str, Any], email: Dict[str, Any]) -> bool: """Check sender patterns against email.""" sender = email.get("senderEmail", "") # Uses re.search for regex matching, re.IGNORECASE for case-insensitivity. - return any( - re.search(pattern, sender, re.IGNORECASE) for pattern in criteria_block - ) + return any(re.search(pattern, sender, re.IGNORECASE) for pattern in criteria_block) def _check_subject_keywords( self, @@ -1255,9 +1183,7 @@ def _check_subject_keywords( ) -> bool: """Check subject keywords against email subject.""" subject = email.get("subject", "").lower() - keyword_matches = sum( - 1 for keyword in criteria_block if keyword.lower() in subject - ) + keyword_matches = sum(1 for keyword in criteria_block if keyword.lower() in subject) if operator == "AND": return keyword_matches == len(criteria_block) # All keywords must match @@ -1265,31 +1191,22 @@ def _check_subject_keywords( return keyword_matches >= min_matches return False # Should not happen if operator is valid - def _check_content_keywords( - self, criteria_block: List[str], email: Dict[str, Any] - ) -> bool: + def _check_content_keywords(self, criteria_block: List[str], email: Dict[str, Any]) -> bool: """Check content keywords against email content.""" content = email.get("content", "").lower() # Assumes OR logic for content keywords: any keyword match is sufficient. return any(keyword.lower() in content for keyword in criteria_block) - def _check_exclusion_patterns( - self, criteria_block: List[str], email: Dict[str, Any] - ) -> bool: + def _check_exclusion_patterns(self, criteria_block: List[str], email: Dict[str, Any]) -> bool: """Check exclusion patterns (regex) against email subject and content. Returns True if no exclusions match.""" text_to_check = f"{email.get('subject', '')} {email.get('content', '')}" # No .lower() here, regex handles case if needed via IGNORECASE # If any exclusion pattern matches, the check fails (returns False). # Assuming criteria_block contains regex patterns. - if any( - re.search(pattern, text_to_check, re.IGNORECASE) - for pattern in criteria_block - ): + if any(re.search(pattern, text_to_check, re.IGNORECASE) for pattern in criteria_block): return False # Exclusion found (pattern matched) return True # No exclusions matched - def _apply_filter_to_email( - self, filter_obj: EmailFilter, email: Dict[str, Any] - ) -> bool: + def _apply_filter_to_email(self, filter_obj: EmailFilter, email: Dict[str, Any]) -> bool: """Apply filter to email and return match result by evaluating various criteria checks.""" criteria = filter_obj.criteria @@ -1411,9 +1328,7 @@ def _update_filter(self, filter_obj: EmailFilter): def _delete_filter(self, filter_id: str): """Permanently delete filter and its performance metrics from database.""" self._db_execute("DELETE FROM email_filters WHERE filter_id = ?", (filter_id,)) - self._db_execute( - "DELETE FROM filter_performance WHERE filter_id = ?", (filter_id,) - ) + self._db_execute("DELETE FROM filter_performance WHERE filter_id = ?", (filter_id,)) def _disable_filter(self, filter_id: str): """Disable filter without deleting""" @@ -1429,9 +1344,7 @@ def _save_filter_performance(self, performance: FilterPerformance): VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ # Added microseconds for more unique ID - performance_id = ( - f"{performance.filter_id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}" - ) + performance_id = f"{performance.filter_id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}" params = ( performance_id, performance.filter_id, @@ -1490,9 +1403,7 @@ def _get_filter_performance_history( FilterPerformance( filter_id=row["filter_id"], accuracy=row["accuracy"], - precision=row[ - "precision_score" - ], # Note: column name precision_score in DB + precision=row["precision_score"], # Note: column name precision_score in DB recall=row["recall_score"], # Note: column name recall_score in DB f1_score=row["f1_score"], processing_time_ms=row["processing_time_ms"], @@ -1573,16 +1484,12 @@ def _execute_filter_actions( # For category overrides, BSFM used specific action names # We can map them or use a generic "set_category" in NSFM's action definition # Example: action "set_category_finance" from BSFM - if ( - actions.get("action_name") == "set_category_finance" - ): # Hypothetical action name in NSFM + if actions.get("action_name") == "set_category_finance": # Hypothetical action name in NSFM email_data["category_name_override"] = "Finance & Banking" actions_taken_summary.append("Categorized as Finance & Banking") elif actions.get("category_override"): # More generic way email_data["category_name_override"] = actions["category_override"] - actions_taken_summary.append( - f"Categorized as {actions['category_override']}" - ) + actions_taken_summary.append(f"Categorized as {actions['category_override']}") # This part needs to be adapted based on how NSFM actions are structured. # The above are examples based on BSFM's direct manipulations. @@ -1614,9 +1521,7 @@ def apply_filters_to_email_data(self, email_data: Dict[str, Any]) -> Dict[str, A self._update_filter(filter_obj) # Save updated stats to DB # Execute actions defined in the filter - action_descriptions = self._execute_filter_actions( - email_data, filter_obj.actions - ) + action_descriptions = self._execute_filter_actions(email_data, filter_obj.actions) email_actions_summary["filters_matched"].append(filter_obj.name) email_actions_summary["actions_taken"].extend(action_descriptions) @@ -1665,9 +1570,7 @@ def main(): # Evaluate filter performance for filter_obj in filters[:1]: # Test first filter - performance = manager.evaluate_filter_performance( - filter_obj.filter_id, sample_emails - ) + performance = manager.evaluate_filter_performance(filter_obj.filter_id, sample_emails) print( f"Filter {filter_obj.name}: Accuracy={performance.accuracy:.2f}, F1={performance.f1_score:.2f}" ) diff --git a/server/python_nlp/smart_retrieval.py b/server/python_nlp/smart_retrieval.py index c727a7557..5f1a2ef50 100644 --- a/server/python_nlp/smart_retrieval.py +++ b/server/python_nlp/smart_retrieval.py @@ -89,9 +89,7 @@ def __init__(self, checkpoint_db_path: str = "sync_checkpoints.db"): self.gmail_service = None # Ensure service is None if build fails self.logger.error("Gmail service initialization failed.") else: - self.logger.error( - "Failed to obtain valid credentials. Gmail service not initialized." - ) + self.logger.error("Failed to obtain valid credentials. Gmail service not initialized.") # Gmail API quotas and limits self.api_limits = { @@ -158,9 +156,7 @@ def _load_credentials(self) -> Credentials | None: creds = Credentials.from_authorized_user_file(TOKEN_JSON_PATH, SCOPES) self.logger.info(f"Loaded credentials from {TOKEN_JSON_PATH}") except Exception as e: - self.logger.error( - f"Error loading credentials from {TOKEN_JSON_PATH}: {e}" - ) + self.logger.error(f"Error loading credentials from {TOKEN_JSON_PATH}: {e}") return creds def _store_credentials(self, creds: Credentials): @@ -196,14 +192,10 @@ def _authenticate(self) -> Credentials | None: if not flow and os.path.exists(CREDENTIALS_PATH): try: - flow = InstalledAppFlow.from_client_secrets_file( - CREDENTIALS_PATH, SCOPES - ) + flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_PATH, SCOPES) self.logger.info(f"Loaded credentials from {CREDENTIALS_PATH}.") except Exception as e: - self.logger.error( - f"Error loading credentials from {CREDENTIALS_PATH}: {e}" - ) + self.logger.error(f"Error loading credentials from {CREDENTIALS_PATH}: {e}") return None # Cannot proceed without client secrets if not flow: @@ -522,18 +514,14 @@ async def execute_smart_retrieval( query = self.get_incremental_query(strategy, checkpoint) # Execute retrieval for this strategy - self.logger.debug( - f"Calling _execute_strategy_retrieval for '{strategy.name}'" - ) + self.logger.debug(f"Calling _execute_strategy_retrieval for '{strategy.name}'") strategy_result = await self._execute_strategy_retrieval( strategy, query, checkpoint, remaining_api_calls=max_api_calls - api_calls_used, ) - self.logger.debug( - f"Strategy '{strategy.name}' result: {strategy_result}" - ) + self.logger.debug(f"Strategy '{strategy.name}' result: {strategy_result}") # Update results results["strategies_executed"].append( @@ -564,8 +552,7 @@ async def execute_smart_retrieval( last_sync_date=datetime.now(), last_history_id=strategy_result.get("last_history_id", ""), processed_count=( - checkpoint.processed_count - + strategy_result["emails_count"] + checkpoint.processed_count + strategy_result["emails_count"] if checkpoint else strategy_result["emails_count"] ), @@ -591,9 +578,7 @@ async def execute_smart_retrieval( else 0 ), "api_efficiency": ( - results["total_emails_retrieved"] / api_calls_used - if api_calls_used > 0 - else 0 + results["total_emails_retrieved"] / api_calls_used if api_calls_used > 0 else 0 ), } @@ -673,9 +658,7 @@ async def _execute_strategy_retrieval( # Count actual API calls: 1 for list + N for individual message gets messages_in_batch = batch_result.get("messages", []) - batch_api_calls = 1 + len( - messages_in_batch - ) # 1 list call + N get calls + batch_api_calls = 1 + len(messages_in_batch) # 1 list call + N get calls api_calls_for_strategy += batch_api_calls if batch_result.get("error"): @@ -687,9 +670,7 @@ async def _execute_strategy_retrieval( "emails_count": len(emails_retrieved_for_strategy), "api_calls": api_calls_for_strategy, "error": batch_result["error"], - "last_history_id": ( - checkpoint.last_history_id if checkpoint else None - ), + "last_history_id": (checkpoint.last_history_id if checkpoint else None), "next_page_token": current_page_token, } @@ -741,9 +722,7 @@ async def _execute_strategy_retrieval( else 0 ), "api_calls": ( - api_calls_for_strategy - if "api_calls_for_strategy" in locals() - else 0 + api_calls_for_strategy if "api_calls_for_strategy" in locals() else 0 ), "error": str(e), } @@ -801,17 +780,14 @@ async def _fetch_email_batch( "threadId": msg_detail["threadId"], "snippet": msg_detail.get("snippet", ""), "payload": { - "headers": msg_detail.get("payload", {}).get( - "headers", [] - ) + "headers": msg_detail.get("payload", {}).get("headers", []) }, } fetched_messages.append(transformed_message) if msg_detail.get("historyId"): current_msg_hist_id = str(msg_detail.get("historyId")) - if ( - history_id_from_list is None - or current_msg_hist_id > str(history_id_from_list) + if history_id_from_list is None or current_msg_hist_id > str( + history_id_from_list ): history_id_from_list = current_msg_hist_id self.logger.debug( @@ -856,9 +832,7 @@ async def _fetch_email_batch( return { "messages": fetched_messages, "nextPageToken": next_page_token_from_list, - "resultSizeEstimate": list_response.get( - "resultSizeEstimate", len(fetched_messages) - ), + "resultSizeEstimate": list_response.get("resultSizeEstimate", len(fetched_messages)), "historyId": history_id_from_list, } @@ -889,9 +863,7 @@ async def _simulate_gmail_response( }, { "name": "Date", - "value": datetime.now().strftime( - "%a, %d %b %Y %H:%M:%S %z" - ), + "value": datetime.now().strftime("%a, %d %b %Y %H:%M:%S %z"), }, ] }, @@ -904,9 +876,7 @@ async def _simulate_gmail_response( "historyId": f"history_simulated_{datetime.now().timestamp()}", } - if ( - batch_size >= 10 and not page_token - ): # Simplified simulation of next page token + if batch_size >= 10 and not page_token: # Simplified simulation of next page token response["nextPageToken"] = f"token_simulated_{datetime.now().timestamp()}" self.logger.debug(f"Simulated response: {response}") @@ -984,9 +954,7 @@ def _store_daily_stats(self, results: Dict[str, Any]): today, results["total_emails_retrieved"], results["api_calls_used"], - json.dumps( - [s["strategy_name"] for s in results["strategies_executed"]] - ), + json.dumps([s["strategy_name"] for s in results["strategies_executed"]]), json.dumps(results["performance_metrics"]), ), ) @@ -1061,9 +1029,7 @@ def get_retrieval_analytics(self, days: int = 30) -> Dict[str, Any]: "average_daily_retrieval": ( total_retrieved / len(daily_stats) if daily_stats else 0 ), - "api_efficiency": ( - total_retrieved / total_api_calls if total_api_calls > 0 else 0 - ), + "api_efficiency": (total_retrieved / total_api_calls if total_api_calls > 0 else 0), "days_analyzed": len(daily_stats), }, "daily_stats": daily_stats, @@ -1072,9 +1038,7 @@ def get_retrieval_analytics(self, days: int = 30) -> Dict[str, Any]: def optimize_strategies_based_on_performance(self) -> List[RetrievalStrategy]: """Optimize retrieval strategies based on historical performance""" - self.logger.info( - "Attempting to optimize strategies based on performance (last 7 days)." - ) + self.logger.info("Attempting to optimize strategies based on performance (last 7 days).") analytics = self.get_retrieval_analytics(days=7) # Get current strategies @@ -1092,9 +1056,7 @@ def optimize_strategies_based_on_performance(self) -> List[RetrievalStrategy]: perf = strategy_performance_map.get(strategy.name) if perf: - self.logger.debug( - f"Performance data for strategy '{strategy.name}': {perf}" - ) + self.logger.debug(f"Performance data for strategy '{strategy.name}': {perf}") error_rate = perf.get("error_rate", 0) avg_per_sync = perf.get("avg_per_sync", 0) @@ -1174,9 +1136,7 @@ async def run_example_usage(): async def main_cli(): """Command-line interface for Smart Gmail Retriever""" parser = argparse.ArgumentParser(description="Smart Gmail Retriever CLI") - subparsers = parser.add_subparsers( - dest="command", required=True, help="Available commands" - ) + subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands") # list-strategies subparser list_parser = subparsers.add_parser( @@ -1261,9 +1221,7 @@ async def main_cli(): selected_strategies = None if args.strategy_names: all_strategies = retriever.get_optimized_retrieval_strategies() - selected_strategies = [ - s for s in all_strategies if s.name in args.strategy_names - ] + selected_strategies = [s for s in all_strategies if s.name in args.strategy_names] if not selected_strategies: print( json.dumps( diff --git a/server/python_nlp/tests/analysis_components/test_intent_model.py b/server/python_nlp/tests/analysis_components/test_intent_model.py index f67467656..0bf9cc0a8 100644 --- a/server/python_nlp/tests/analysis_components/test_intent_model.py +++ b/server/python_nlp/tests/analysis_components/test_intent_model.py @@ -6,51 +6,57 @@ logging.disable(logging.CRITICAL) + class TestIntentModel(unittest.TestCase): def test_analyze_model_success(self): mock_sklearn_model = MagicMock() - mock_sklearn_model.predict.return_value = ['request'] - mock_sklearn_model.predict_proba.return_value = [[0.1, 0.75, 0.15]] # Example probabilities + mock_sklearn_model.predict.return_value = ["request"] + mock_sklearn_model.predict_proba.return_value = [[0.1, 0.75, 0.15]] # Example probabilities analyzer = IntentModel(intent_model=mock_sklearn_model) result = analyzer.analyze("Could you please send the report?") mock_sklearn_model.predict.assert_called_once_with(["Could you please send the report?"]) - mock_sklearn_model.predict_proba.assert_called_once_with(["Could you please send the report?"]) - self.assertEqual(result['intent'], 'request') - self.assertEqual(result['confidence'], 0.75) - self.assertEqual(result['method_used'], 'model_intent') + mock_sklearn_model.predict_proba.assert_called_once_with( + ["Could you please send the report?"] + ) + self.assertEqual(result["intent"], "request") + self.assertEqual(result["confidence"], 0.75) + self.assertEqual(result["method_used"], "model_intent") def test_analyze_model_fails_fallback_to_regex(self): mock_sklearn_model = MagicMock() mock_sklearn_model.predict.side_effect = Exception("Model error") analyzer = IntentModel(intent_model=mock_sklearn_model) - result = analyzer.analyze("I have a question about the schedule.") # Regex keywords for inquiry/scheduling + result = analyzer.analyze( + "I have a question about the schedule." + ) # Regex keywords for inquiry/scheduling # Regex fallback might pick 'question' (inquiry) or 'schedule' (scheduling) - self.assertIn(result['intent'], ['inquiry', 'scheduling']) - self.assertTrue(0.0 < result['confidence'] < 1.0) - self.assertEqual(result['method_used'], 'fallback_regex_intent') + self.assertIn(result["intent"], ["inquiry", "scheduling"]) + self.assertTrue(0.0 < result["confidence"] < 1.0) + self.assertEqual(result["method_used"], "fallback_regex_intent") def test_analyze_no_model_fallback_to_regex(self): - analyzer = IntentModel(intent_model=None) # No model - result = analyzer.analyze("Thank you for your help.") # Regex keyword for gratitude + analyzer = IntentModel(intent_model=None) # No model + result = analyzer.analyze("Thank you for your help.") # Regex keyword for gratitude - self.assertEqual(result['intent'], 'gratitude') - self.assertEqual(result['method_used'], 'fallback_regex_intent') + self.assertEqual(result["intent"], "gratitude") + self.assertEqual(result["method_used"], "fallback_regex_intent") def test_regex_analysis_specific_intent(self): analyzer = IntentModel(intent_model=None) result = analyzer._analyze_regex("This is a complaint about the service.") - self.assertEqual(result['intent'], 'complaint') + self.assertEqual(result["intent"], "complaint") def test_regex_analysis_informational_if_no_match(self): analyzer = IntentModel(intent_model=None) result = analyzer._analyze_regex("The sky is blue today.") - self.assertEqual(result['intent'], 'informational') - self.assertEqual(result['confidence'], 0.6) # Default for informational regex + self.assertEqual(result["intent"], "informational") + self.assertEqual(result["confidence"], 0.6) # Default for informational regex + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/server/python_nlp/tests/analysis_components/test_sentiment_model.py b/server/python_nlp/tests/analysis_components/test_sentiment_model.py index 43acc9fbb..085e0fc30 100644 --- a/server/python_nlp/tests/analysis_components/test_sentiment_model.py +++ b/server/python_nlp/tests/analysis_components/test_sentiment_model.py @@ -5,28 +5,28 @@ # Ensure imports work from the test directory context. # This might mean setting PYTHONPATH or using relative imports carefully. # Assuming tests are run from a level where 'server' is a package. -from server.python_nlp.analysis_components.sentiment_model import \ - SentimentModel +from server.python_nlp.analysis_components.sentiment_model import SentimentModel # Suppress logging for tests if not needed logging.disable(logging.CRITICAL) + class TestSentimentModel(unittest.TestCase): def test_analyze_model_success(self): mock_sklearn_model = MagicMock() - mock_sklearn_model.predict.return_value = ['positive'] - mock_sklearn_model.predict_proba.return_value = [[0.1, 0.9]] # neutral, positive + mock_sklearn_model.predict.return_value = ["positive"] + mock_sklearn_model.predict_proba.return_value = [[0.1, 0.9]] # neutral, positive analyzer = SentimentModel(sentiment_model=mock_sklearn_model, has_nltk_installed=True) result = analyzer.analyze("This is a great test!") mock_sklearn_model.predict.assert_called_once_with(["This is a great test!"]) mock_sklearn_model.predict_proba.assert_called_once_with(["This is a great test!"]) - self.assertEqual(result['sentiment'], 'positive') - self.assertEqual(result['confidence'], 0.9) - self.assertEqual(result['method_used'], 'model_sentiment') - self.assertAlmostEqual(result['polarity'], 0.9) # For positive, polarity is confidence + self.assertEqual(result["sentiment"], "positive") + self.assertEqual(result["confidence"], 0.9) + self.assertEqual(result["method_used"], "model_sentiment") + self.assertAlmostEqual(result["polarity"], 0.9) # For positive, polarity is confidence def test_analyze_model_fails_fallback_to_textblob(self): mock_sklearn_model = MagicMock() @@ -37,63 +37,75 @@ def test_analyze_model_fails_fallback_to_textblob(self): mock_textblob_instance.sentiment.polarity = 0.8 mock_textblob_instance.sentiment.subjectivity = 0.5 - with patch('server.python_nlp.analysis_components.sentiment_model.TextBlob', return_value=mock_textblob_instance) as mock_textblob_class: + with patch( + "server.python_nlp.analysis_components.sentiment_model.TextBlob", + return_value=mock_textblob_instance, + ) as mock_textblob_class: analyzer = SentimentModel(sentiment_model=mock_sklearn_model, has_nltk_installed=True) result = analyzer.analyze("This is a great test!") mock_textblob_class.assert_called_once_with("This is a great test!") - self.assertEqual(result['sentiment'], 'positive') - self.assertAlmostEqual(result['polarity'], 0.8) - self.assertEqual(result['method_used'], 'fallback_textblob_sentiment') + self.assertEqual(result["sentiment"], "positive") + self.assertAlmostEqual(result["polarity"], 0.8) + self.assertEqual(result["method_used"], "fallback_textblob_sentiment") def test_analyze_model_and_textblob_fail_fallback_to_keyword(self): mock_sklearn_model = MagicMock() mock_sklearn_model.predict.side_effect = Exception("Model error") - with patch('server.python_nlp.analysis_components.sentiment_model.TextBlob', side_effect=Exception("TextBlob error")): + with patch( + "server.python_nlp.analysis_components.sentiment_model.TextBlob", + side_effect=Exception("TextBlob error"), + ): analyzer = SentimentModel(sentiment_model=mock_sklearn_model, has_nltk_installed=True) - result = analyzer.analyze("This is a good test!") # "good" is a positive keyword + result = analyzer.analyze("This is a good test!") # "good" is a positive keyword - self.assertEqual(result['sentiment'], 'positive') - self.assertEqual(result['confidence'], 0.6) # Default for keyword - self.assertEqual(result['method_used'], 'fallback_keyword_sentiment') + self.assertEqual(result["sentiment"], "positive") + self.assertEqual(result["confidence"], 0.6) # Default for keyword + self.assertEqual(result["method_used"], "fallback_keyword_sentiment") def test_analyze_no_model_fallback_to_textblob(self): mock_textblob_instance = MagicMock() mock_textblob_instance.sentiment.polarity = -0.7 mock_textblob_instance.sentiment.subjectivity = 0.4 - with patch('server.python_nlp.analysis_components.sentiment_model.TextBlob', return_value=mock_textblob_instance) as mock_textblob_class: - analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=True) # No model + with patch( + "server.python_nlp.analysis_components.sentiment_model.TextBlob", + return_value=mock_textblob_instance, + ) as mock_textblob_class: + analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=True) # No model result = analyzer.analyze("This is a bad test.") mock_textblob_class.assert_called_once_with("This is a bad test.") - self.assertEqual(result['sentiment'], 'negative') - self.assertAlmostEqual(result['polarity'], -0.7) - self.assertEqual(result['method_used'], 'fallback_textblob_sentiment') + self.assertEqual(result["sentiment"], "negative") + self.assertAlmostEqual(result["polarity"], -0.7) + self.assertEqual(result["method_used"], "fallback_textblob_sentiment") def test_analyze_no_model_nltk_disabled_fallback_to_keyword(self): - analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=False) # NLTK (for TextBlob) disabled + analyzer = SentimentModel( + sentiment_model=None, has_nltk_installed=False + ) # NLTK (for TextBlob) disabled result = analyzer.analyze("This is a neutral test.") - self.assertEqual(result['sentiment'], 'neutral') - self.assertEqual(result['confidence'], 0.5) - self.assertEqual(result['method_used'], 'fallback_keyword_sentiment') + self.assertEqual(result["sentiment"], "neutral") + self.assertEqual(result["confidence"], 0.5) + self.assertEqual(result["method_used"], "fallback_keyword_sentiment") def test_keyword_analysis_positive(self): analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=False) result = analyzer._analyze_keyword("This is good and happy.") - self.assertEqual(result['sentiment'], 'positive') + self.assertEqual(result["sentiment"], "positive") def test_keyword_analysis_negative(self): analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=False) result = analyzer._analyze_keyword("This is bad and sad.") - self.assertEqual(result['sentiment'], 'negative') + self.assertEqual(result["sentiment"], "negative") def test_keyword_analysis_neutral(self): analyzer = SentimentModel(sentiment_model=None, has_nltk_installed=False) result = analyzer._analyze_keyword("This is a test.") - self.assertEqual(result['sentiment'], 'neutral') + self.assertEqual(result["sentiment"], "neutral") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/server/python_nlp/tests/analysis_components/test_topic_model.py b/server/python_nlp/tests/analysis_components/test_topic_model.py index 32f50573b..bc8f75d09 100644 --- a/server/python_nlp/tests/analysis_components/test_topic_model.py +++ b/server/python_nlp/tests/analysis_components/test_topic_model.py @@ -6,51 +6,57 @@ logging.disable(logging.CRITICAL) + class TestTopicModel(unittest.TestCase): def test_analyze_model_success(self): mock_sklearn_model = MagicMock() - mock_sklearn_model.predict.return_value = ['Work & Business'] - mock_sklearn_model.predict_proba.return_value = [[0.1, 0.8, 0.1]] # Example probabilities + mock_sklearn_model.predict.return_value = ["Work & Business"] + mock_sklearn_model.predict_proba.return_value = [[0.1, 0.8, 0.1]] # Example probabilities analyzer = TopicModel(topic_model=mock_sklearn_model) result = analyzer.analyze("Discussing project deadlines.") mock_sklearn_model.predict.assert_called_once_with(["Discussing project deadlines."]) mock_sklearn_model.predict_proba.assert_called_once_with(["Discussing project deadlines."]) - self.assertEqual(result['topic'], 'Work & Business') - self.assertEqual(result['confidence'], 0.8) - self.assertEqual(result['method_used'], 'model_topic') + self.assertEqual(result["topic"], "Work & Business") + self.assertEqual(result["confidence"], 0.8) + self.assertEqual(result["method_used"], "model_topic") def test_analyze_model_fails_fallback_to_keyword(self): mock_sklearn_model = MagicMock() mock_sklearn_model.predict.side_effect = Exception("Model error") analyzer = TopicModel(topic_model=mock_sklearn_model) - result = analyzer.analyze("Let's talk about the invoice and payment.") # Keywords for Finance + result = analyzer.analyze( + "Let's talk about the invoice and payment." + ) # Keywords for Finance - self.assertEqual(result['topic'], 'Finance & Banking') # Based on keyword fallback - self.assertTrue(0.0 < result['confidence'] < 1.0) # Confidence from keyword matching - self.assertEqual(result['method_used'], 'fallback_keyword_topic') + self.assertEqual(result["topic"], "Finance & Banking") # Based on keyword fallback + self.assertTrue(0.0 < result["confidence"] < 1.0) # Confidence from keyword matching + self.assertEqual(result["method_used"], "fallback_keyword_topic") def test_analyze_no_model_fallback_to_keyword(self): - analyzer = TopicModel(topic_model=None) # No model - result = analyzer.analyze("My family vacation was great.") # Keywords for Personal/Travel + analyzer = TopicModel(topic_model=None) # No model + result = analyzer.analyze("My family vacation was great.") # Keywords for Personal/Travel # Keyword logic might pick 'Personal & Family' or 'Travel & Leisure' - self.assertIn(result['topic'], ['Personal & Family', 'Travel & Leisure']) - self.assertEqual(result['method_used'], 'fallback_keyword_topic') + self.assertIn(result["topic"], ["Personal & Family", "Travel & Leisure"]) + self.assertEqual(result["method_used"], "fallback_keyword_topic") def test_keyword_analysis_specific_topic(self): analyzer = TopicModel(topic_model=None) - result = analyzer._analyze_keyword("This email is about a project meeting and presentation.") - self.assertEqual(result['topic'], 'Work & Business') + result = analyzer._analyze_keyword( + "This email is about a project meeting and presentation." + ) + self.assertEqual(result["topic"], "Work & Business") def test_keyword_analysis_general_if_no_keywords(self): analyzer = TopicModel(topic_model=None) result = analyzer._analyze_keyword("This is a simple statement.") - self.assertEqual(result['topic'], 'General') - self.assertEqual(result['confidence'], 0.5) # Default for general + self.assertEqual(result["topic"], "General") + self.assertEqual(result["confidence"], 0.5) # Default for general + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/server/python_nlp/tests/analysis_components/test_urgency_model.py b/server/python_nlp/tests/analysis_components/test_urgency_model.py index ccb4cd076..e75a14370 100644 --- a/server/python_nlp/tests/analysis_components/test_urgency_model.py +++ b/server/python_nlp/tests/analysis_components/test_urgency_model.py @@ -6,11 +6,12 @@ logging.disable(logging.CRITICAL) + class TestUrgencyModel(unittest.TestCase): def test_analyze_model_success(self): mock_sklearn_model = MagicMock() - mock_sklearn_model.predict.return_value = ['high'] + mock_sklearn_model.predict.return_value = ["high"] # Probabilities for [critical, high, low, medium] or whatever order model uses mock_sklearn_model.predict_proba.return_value = [[0.1, 0.8, 0.05, 0.05]] @@ -19,48 +20,49 @@ def test_analyze_model_success(self): mock_sklearn_model.predict.assert_called_once_with(["This needs to be done soon."]) mock_sklearn_model.predict_proba.assert_called_once_with(["This needs to be done soon."]) - self.assertEqual(result['urgency'], 'high') - self.assertEqual(result['confidence'], 0.8) - self.assertEqual(result['method_used'], 'model_urgency') + self.assertEqual(result["urgency"], "high") + self.assertEqual(result["confidence"], 0.8) + self.assertEqual(result["method_used"], "model_urgency") def test_analyze_model_fails_fallback_to_regex(self): mock_sklearn_model = MagicMock() mock_sklearn_model.predict.side_effect = Exception("Model error") analyzer = UrgencyModel(urgency_model=mock_sklearn_model) - result = analyzer.analyze("This is an emergency!") # Regex keyword for critical + result = analyzer.analyze("This is an emergency!") # Regex keyword for critical - self.assertEqual(result['urgency'], 'critical') - self.assertEqual(result['confidence'], 0.9) # Confidence from regex matching - self.assertEqual(result['method_used'], 'fallback_regex_urgency') + self.assertEqual(result["urgency"], "critical") + self.assertEqual(result["confidence"], 0.9) # Confidence from regex matching + self.assertEqual(result["method_used"], "fallback_regex_urgency") def test_analyze_no_model_fallback_to_regex(self): - analyzer = UrgencyModel(urgency_model=None) # No model - result = analyzer.analyze("Please review this when you can.") # Should be low by regex + analyzer = UrgencyModel(urgency_model=None) # No model + result = analyzer.analyze("Please review this when you can.") # Should be low by regex - self.assertEqual(result['urgency'], 'low') - self.assertEqual(result['confidence'], 0.5) # Default for low via regex - self.assertEqual(result['method_used'], 'fallback_regex_urgency') + self.assertEqual(result["urgency"], "low") + self.assertEqual(result["confidence"], 0.5) # Default for low via regex + self.assertEqual(result["method_used"], "fallback_regex_urgency") def test_regex_analysis_critical(self): analyzer = UrgencyModel(urgency_model=None) result = analyzer._analyze_regex("URGENT action required immediately for system failure") - self.assertEqual(result['urgency'], 'critical') + self.assertEqual(result["urgency"], "critical") def test_regex_analysis_high(self): analyzer = UrgencyModel(urgency_model=None) result = analyzer._analyze_regex("This is important and has a deadline approaching.") - self.assertEqual(result['urgency'], 'high') + self.assertEqual(result["urgency"], "high") def test_regex_analysis_medium(self): analyzer = UrgencyModel(urgency_model=None) result = analyzer._analyze_regex("Let's discuss this next week.") - self.assertEqual(result['urgency'], 'medium') + self.assertEqual(result["urgency"], "medium") def test_regex_analysis_low(self): analyzer = UrgencyModel(urgency_model=None) result = analyzer._analyze_regex("FYI only.") - self.assertEqual(result['urgency'], 'low') + self.assertEqual(result["urgency"], "low") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/setup_linting.py b/setup_linting.py index da7fcb8b2..708cec651 100644 --- a/setup_linting.py +++ b/setup_linting.py @@ -15,9 +15,7 @@ def install_packages(): print("Installing code quality packages...") try: - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "--upgrade"] + packages - ) + subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade"] + packages) print("Successfully installed packages.") except subprocess.CalledProcessError as e: print(f"Error installing packages: {e}") @@ -94,10 +92,7 @@ def create_config_files(): existing_content = f.read() # Check if black and isort configs already exist - if ( - "[tool.black]" not in existing_content - and "[tool.isort]" not in existing_content - ): + if "[tool.black]" not in existing_content and "[tool.isort]" not in existing_content: with open("pyproject.toml", "a") as f: f.write("\n\n" + pyproject_toml) else: diff --git a/tests/conftest.py b/tests/conftest.py index abf2478b8..adb835f9f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,8 +3,7 @@ import pytest from server.python_backend.database import get_db # The actual dependency -from server.python_backend.main import \ - app # Assuming 'app' is your FastAPI instance +from server.python_backend.main import app # Assuming 'app' is your FastAPI instance @pytest.fixture(scope="session", autouse=True) diff --git a/tests/test_action_item_extractor.py b/tests/test_action_item_extractor.py index 4bcb72781..ee2468200 100644 --- a/tests/test_action_item_extractor.py +++ b/tests/test_action_item_extractor.py @@ -1,8 +1,7 @@ import unittest from unittest.mock import patch -from server.python_nlp.action_item_extractor import (HAS_NLTK, - ActionItemExtractor) +from server.python_nlp.action_item_extractor import HAS_NLTK, ActionItemExtractor class TestActionItemExtractor(unittest.TestCase): @@ -15,13 +14,9 @@ def test_extract_actions_clear_phrase_with_due_date(self): actions = self.extractor.extract_actions(text) self.assertEqual(len(actions), 1) action = actions[0] - self.assertEqual( - action["action_phrase"], "Please review the attached document by Friday." - ) + self.assertEqual(action["action_phrase"], "Please review the attached document by Friday.") self.assertEqual(action["raw_due_date_text"], "by Friday") - self.assertEqual( - action["context"], "Please review the attached document by Friday." - ) + self.assertEqual(action["context"], "Please review the attached document by Friday.") if HAS_NLTK: self.assertIsNotNone(action["verb"]) # NLTK should find 'review' # self.assertEqual(action['object'], "document") # Object extraction can be tricky @@ -31,13 +26,9 @@ def test_extract_actions_keyword_task(self): actions = self.extractor.extract_actions(text) self.assertEqual(len(actions), 1) action = actions[0] - self.assertTrue( - action["action_phrase"].startswith("Task: John to complete the slides") - ) + self.assertTrue(action["action_phrase"].startswith("Task: John to complete the slides")) self.assertEqual(action["raw_due_date_text"], "by tomorrow") - self.assertEqual( - action["context"], "Task: John to complete the slides by tomorrow." - ) + self.assertEqual(action["context"], "Task: John to complete the slides by tomorrow.") if HAS_NLTK: self.assertIsNotNone(action["verb"]) # NLTK might pick up 'complete' @@ -46,9 +37,7 @@ def test_extract_actions_keyword_action_required(self): actions = self.extractor.extract_actions(text) self.assertEqual(len(actions), 1) action = actions[0] - self.assertEqual( - action["action_phrase"], "Action required: Update the JIRA ticket." - ) + self.assertEqual(action["action_phrase"], "Action required: Update the JIRA ticket.") self.assertIsNone(action["raw_due_date_text"]) self.assertEqual(action["context"], "Action required: Update the JIRA ticket.") if HAS_NLTK: @@ -80,9 +69,7 @@ def test_extract_actions_multiple_action_items(self): if HAS_NLTK: self.assertEqual(actions[0]["verb"], "call") - self.assertEqual( - actions[1]["action_phrase"], "can you send the invoice by EOD?" - ) + self.assertEqual(actions[1]["action_phrase"], "can you send the invoice by EOD?") self.assertEqual(actions[1]["raw_due_date_text"], "by EOD") if HAS_NLTK: self.assertEqual(actions[1]["verb"], "send") @@ -100,9 +87,7 @@ def test_extract_actions_simple_due_date_tomorrow(self): self.assertEqual(len(actions_with_keyword), 1) self.assertEqual(actions_with_keyword[0]["raw_due_date_text"], "by tomorrow") self.assertTrue( - actions_with_keyword[0]["action_phrase"].startswith( - "should Submit the expenses" - ) + actions_with_keyword[0]["action_phrase"].startswith("should Submit the expenses") ) def test_extract_actions_due_date_on_monday(self): @@ -141,25 +126,19 @@ def test_input_with_only_whitespace(self): @patch("server.python_nlp.action_item_extractor.HAS_NLTK", False) def test_extract_actions_without_nltk(self): # This test will run as if NLTK is not installed - extractor_no_nltk = ( - ActionItemExtractor() - ) # Re-initialize to pick up the patched HAS_NLTK + extractor_no_nltk = ActionItemExtractor() # Re-initialize to pick up the patched HAS_NLTK text = "Please review the document." actions = extractor_no_nltk.extract_actions(text) self.assertEqual(len(actions), 1) action = actions[0] self.assertEqual(action["action_phrase"], "Please review the document.") - self.assertIsNone( - action["verb"] - ) # Verb should be None as NLTK is mocked to False + self.assertIsNone(action["verb"]) # Verb should be None as NLTK is mocked to False self.assertIsNone(action["object"]) # Object should be None @patch("server.python_nlp.action_item_extractor.HAS_NLTK", True) @patch("nltk.pos_tag") @patch("nltk.word_tokenize") - def test_extract_actions_with_nltk_mocked_behavior( - self, mock_word_tokenize, mock_pos_tag - ): + def test_extract_actions_with_nltk_mocked_behavior(self, mock_word_tokenize, mock_pos_tag): # This test runs with NLTK assumed present, but mocks its functions mock_word_tokenize.return_value = ["Please", "review", "the", "document", "."] mock_pos_tag.return_value = [ @@ -182,9 +161,7 @@ def test_extract_actions_with_nltk_mocked_behavior( self.assertEqual( action["verb"], "Please" ) # Because "Please" is the first VB as per mock_pos_tag - self.assertEqual( - action["object"], "document" - ) # "document" is the first NN after "Please" + self.assertEqual(action["object"], "document") # "document" is the first NN after "Please" mock_word_tokenize.assert_called_once() # Check if it was called on the relevant part mock_pos_tag.assert_called_once() diff --git a/tests/test_api_actions.py b/tests/test_api_actions.py index 7696702d5..8827fb2ec 100644 --- a/tests/test_api_actions.py +++ b/tests/test_api_actions.py @@ -3,10 +3,8 @@ from fastapi.testclient import TestClient -from server.python_backend.ai_engine import \ - AIAnalysisResult # To help mock the return -from server.python_backend.main import \ - app # Assuming your FastAPI app instance is named 'app' +from server.python_backend.ai_engine import AIAnalysisResult # To help mock the return +from server.python_backend.main import app # Assuming your FastAPI app instance is named 'app' # If your Pydantic models are in main.py, they would be imported via `app` or directly if structured so. # For this test, we might not need to import them if we're just checking response structure. @@ -60,9 +58,7 @@ def test_extract_actions_from_text_success(self, mock_analyze_email): "content": "A test sentence. Please review the document by tomorrow. Thank you. Also, submit the report. And another thing.", } - response = self.client.post( - "/api/actions/extract-from-text", json=request_payload - ) + response = self.client.post("/api/actions/extract-from-text", json=request_payload) self.assertEqual(response.status_code, 200) response_data = response.json() @@ -109,9 +105,7 @@ def test_extract_actions_no_actions_found(self, mock_analyze_email): "content": "This is just a general update, no specific actions required.", } - response = self.client.post( - "/api/actions/extract-from-text", json=request_payload - ) + response = self.client.post("/api/actions/extract-from-text", json=request_payload) self.assertEqual(response.status_code, 200) response_data = response.json() @@ -125,9 +119,7 @@ def test_extract_actions_missing_content(self): # "content" field is missing } - response = self.client.post( - "/api/actions/extract-from-text", json=request_payload - ) + response = self.client.post("/api/actions/extract-from-text", json=request_payload) # FastAPI should return a 422 Unprocessable Entity for Pydantic validation errors self.assertEqual(response.status_code, 422) @@ -135,11 +127,7 @@ def test_extract_actions_missing_content(self): self.assertIn("detail", response_data) # Check that the error detail mentions the 'content' field self.assertTrue( - any( - "content" in error["loc"] - for error in response_data["detail"] - if "loc" in error - ) + any("content" in error["loc"] for error in response_data["detail"] if "loc" in error) ) @patch("server.python_backend.main.ai_engine.analyze_email", new_callable=AsyncMock) @@ -152,16 +140,13 @@ def test_extract_actions_ai_engine_exception(self, mock_analyze_email): "content": "This content will cause an error in the mocked AI engine.", } - response = self.client.post( - "/api/actions/extract-from-text", json=request_payload - ) + response = self.client.post("/api/actions/extract-from-text", json=request_payload) self.assertEqual(response.status_code, 500) response_data = response.json() self.assertIn("detail", response_data) self.assertTrue( - "Failed to extract action items: AI Engine processing error" - in response_data["detail"] + "Failed to extract action items: AI Engine processing error" in response_data["detail"] ) diff --git a/tests/test_category_api.py b/tests/test_category_api.py index c0ec812c8..47d975b0e 100644 --- a/tests/test_category_api.py +++ b/tests/test_category_api.py @@ -28,9 +28,7 @@ def setUp(self): # REMOVE mock_get_db_globally from signature # Reset and configure methods on our instance for each test self.mock_db_instance.reset_mock() - self.mock_db_instance.get_all_categories = ( - AsyncMock() - ) # ensure methods are AsyncMocks + self.mock_db_instance.get_all_categories = AsyncMock() # ensure methods are AsyncMocks self.mock_db_instance.create_category = AsyncMock() def tearDown(self): @@ -54,8 +52,7 @@ def test_get_categories_success(self): "count": 25, }, ] - self.mock_db_instance.get_all_categories.return_value = \ - mock_categories_data + self.mock_db_instance.get_all_categories.return_value = mock_categories_data response = self.client.get("/api/categories") @@ -84,9 +81,7 @@ def test_get_categories_db_error(self): print("Running test_get_categories_db_error") # Ensure the method is an AsyncMock before setting side_effect self.mock_db_instance.get_all_categories = AsyncMock() - self.mock_db_instance.get_all_categories.side_effect = ( - self.async_raise_db_connection_error - ) + self.mock_db_instance.get_all_categories.side_effect = self.async_raise_db_connection_error response = self.client.get("/api/categories") @@ -114,9 +109,7 @@ def test_create_category_success(self): } # count is part of CategoryResponse # Assigning to a temporary variable to avoid E501 created_category_for_mock = mock_created_category - self.mock_db_instance.create_category.return_value = ( - created_category_for_mock - ) + self.mock_db_instance.create_category.return_value = created_category_for_mock response = self.client.post("/api/categories", json=category_data) @@ -132,16 +125,12 @@ def test_create_category_success(self): # where category is CategoryCreate. # The test's category_data is compatible with CategoryCreate. validated_category_data = CategoryCreate(**category_data).model_dump() - self.mock_db_instance.create_category.assert_called_once_with( - validated_category_data - ) + self.mock_db_instance.create_category.assert_called_once_with(validated_category_data) def test_create_category_validation_error_missing_name(self): print("Running test_create_category_validation_error_missing_name") # Missing required 'name' field - category_data = { - "description": "A category without a name", "color": "#123456" - } + category_data = {"description": "A category without a name", "color": "#123456"} response = self.client.post("/api/categories", json=category_data) self.assertEqual(response.status_code, 422) # Unprocessable Entity response_data = response.json() @@ -149,13 +138,10 @@ def test_create_category_validation_error_missing_name(self): # Check for specific error details for missing 'name' found_error = False for error in response_data["detail"]: - if "name" in error.get("loc", []) and \ - error.get("type") == "missing": + if "name" in error.get("loc", []) and error.get("type") == "missing": found_error = True break - self.assertTrue( - found_error, "Validation error for missing name not found." - ) + self.assertTrue(found_error, "Validation error for missing name not found.") def test_create_category_validation_error_invalid_color(self): print("Running test_create_category_validation_error_invalid_color") @@ -180,13 +166,10 @@ def test_create_category_validation_error_invalid_color(self): # Pydantic v2 type error for string is 'string_type' found_error = False for error in response_data["detail"]: - if "color" in error.get("loc", []) and \ - "string_type" in error.get("type", ""): + if "color" in error.get("loc", []) and "string_type" in error.get("type", ""): found_error = True break - self.assertTrue( - found_error, "Validation error for invalid color type not found." - ) + self.assertTrue(found_error, "Validation error for invalid color type not found.") async def async_raise_db_write_error(self, *args, **kwargs): raise Exception("Database write error") @@ -200,9 +183,7 @@ def test_create_category_db_error(self): } # Ensure the method is an AsyncMock self.mock_db_instance.create_category = AsyncMock() - self.mock_db_instance.create_category.side_effect = ( - self.async_raise_db_write_error - ) + self.mock_db_instance.create_category.side_effect = self.async_raise_db_write_error response = self.client.post("/api/categories", json=category_data) @@ -210,9 +191,7 @@ def test_create_category_db_error(self): data = response.json() self.assertIn("Failed to create category", data["detail"]) validated_category_data = CategoryCreate(**category_data).model_dump() - self.mock_db_instance.create_category.assert_called_once_with( - validated_category_data - ) + self.mock_db_instance.create_category.assert_called_once_with(validated_category_data) if __name__ == "__main__": diff --git a/tests/test_dashboard_api.py b/tests/test_dashboard_api.py index 75f3583fa..3f6bd6dbb 100644 --- a/tests/test_dashboard_api.py +++ b/tests/test_dashboard_api.py @@ -6,8 +6,7 @@ from psycopg2 import Error as Psycopg2Error # Import real psycopg2.Error from server.python_backend.main import app, get_db -from server.python_backend.models import \ - DashboardStats # Import specific model - corrected name +from server.python_backend.models import DashboardStats # Import specific model - corrected name # Mock DatabaseManager for dependency injection # This will use the same global mock_db_manager_filter instance if tests are run together, @@ -143,9 +142,7 @@ def test_get_performance_overview_success(self): # self.mock_performance_monitor_patch.stop() # REMOVED # Use self.mock_performance_monitor directly as it's the MagicMock replacing the instance in main - self.mock_performance_monitor.get_real_time_dashboard.return_value = ( - mock_performance_data - ) + self.mock_performance_monitor.get_real_time_dashboard.return_value = mock_performance_data response = self.client.get("/api/performance/overview") diff --git a/tests/test_email_api.py b/tests/test_email_api.py index 00f392f11..5a830db6b 100644 --- a/tests/test_email_api.py +++ b/tests/test_email_api.py @@ -200,9 +200,7 @@ async def async_raise_exception(self, *args, **kwargs): def test_get_email_by_id_db_error(self): print("Running test_get_email_by_id_db_error") - mock_db_manager.get_email_by_id = ( - AsyncMock() - ) # Ensure fresh AsyncMock for this test + mock_db_manager.get_email_by_id = AsyncMock() # Ensure fresh AsyncMock for this test mock_db_manager.get_email_by_id.side_effect = self.async_raise_exception response = self.client.get("/api/emails/1") self.assertEqual(response.status_code, 500) @@ -219,8 +217,8 @@ def test_get_emails_db_error(self): mock_db_manager.get_all_emails.assert_called_once() -from server.python_backend.ai_engine import \ - AIAnalysisResult # Ensure this is imported +from server.python_backend.ai_engine import AIAnalysisResult # Ensure this is imported + # Import Pydantic models if not already at the top from server.python_backend.models import EmailCreate, EmailUpdate @@ -324,7 +322,9 @@ def test_create_email_success( # For the purpose of this test, we are mocking `db.create_email`'s return value. # So, it should be a dict that FastAPI can convert using `EmailResponse` as the response_model. - mock_db_manager.create_email.return_value = created_email_response # This mock is what the endpoint returns after db call + mock_db_manager.create_email.return_value = ( + created_email_response # This mock is what the endpoint returns after db call + ) response = self.client.post("/api/emails", json=email_data) @@ -337,21 +337,15 @@ def test_create_email_success( self.assertEqual(response.status_code, 200) data = response.json() self.assertEqual(data["subject"], "New Email") - self.assertEqual( - data["confidence"], 95 - ) # This matches our created_email_response + self.assertEqual(data["confidence"], 95) # This matches our created_email_response self.assertEqual(data["id"], 100) - mock_analyze_email.assert_called_once_with( - email_data["subject"], email_data["content"] - ) + mock_analyze_email.assert_called_once_with(email_data["subject"], email_data["content"]) mock_apply_filters.assert_called_once() # What `db.create_email` is called with in main.py: expected_db_payload_to_main_create_email_func = { - **EmailCreate( - **email_data - ).model_dump(), # Ensures it's validated by EmailCreate first + **EmailCreate(**email_data).model_dump(), # Ensures it's validated by EmailCreate first "confidence": 95, "categoryId": 1, "labels": ["inbox"], @@ -369,9 +363,7 @@ def test_create_email_success( # would pass to `db.create_email`. # The `email_data` in `main.py` is `email.dict()` (from `EmailCreate`) then updated. - temp_email_create_obj = EmailCreate( - **email_data - ) # Pydantic validation of input + temp_email_create_obj = EmailCreate(**email_data) # Pydantic validation of input db_call_arg = temp_email_create_obj.model_dump() db_call_arg.update( { @@ -456,9 +448,7 @@ def test_create_email_db_error(self, mock_apply_filters, mock_analyze_email): mock_apply_filters.return_value = {"matched_filters": [], "applied_actions": []} mock_db_manager.create_email = AsyncMock() # Ensure fresh AsyncMock - mock_db_manager.create_email.side_effect = ( - self.async_raise_exception - ) # Use helper + mock_db_manager.create_email.side_effect = self.async_raise_exception # Use helper response = self.client.post("/api/emails", json=email_data) self.assertEqual(response.status_code, 500) @@ -496,9 +486,7 @@ def test_update_email_success(self): } response = self.client.put(f"/api/emails/{email_id}", json=email_update_payload) - print( - f"PUT /api/emails/{email_id} Response Status Code: {response.status_code}" - ) + print(f"PUT /api/emails/{email_id} Response Status Code: {response.status_code}") try: print(f"PUT /api/emails/{email_id} Response JSON: {response.json()}") except Exception as e: @@ -512,9 +500,7 @@ def test_update_email_success(self): # await db.update_email(email_id, email_update.dict(exclude_unset=True)) # So, the mock should expect the payload with exclude_unset=True applied. # For `email_update_payload` this is the same. - mock_db_manager.update_email.assert_called_once_with( - email_id, email_update_payload - ) + mock_db_manager.update_email.assert_called_once_with(email_id, email_update_payload) def test_update_email_not_found(self): print("Running test_update_email_not_found") @@ -525,9 +511,7 @@ def test_update_email_not_found(self): response = self.client.put(f"/api/emails/{email_id}", json=email_update_payload) self.assertEqual(response.status_code, 404) self.assertEqual(response.json(), {"detail": "Email not found"}) - mock_db_manager.update_email.assert_called_once_with( - email_id, email_update_payload - ) + mock_db_manager.update_email.assert_called_once_with(email_id, email_update_payload) def test_update_email_validation_error(self): print("Running test_update_email_validation_error") @@ -546,15 +530,11 @@ def test_update_email_db_error(self): email_id = 1 email_update_payload = {"subject": "Updated Subject"} mock_db_manager.update_email = AsyncMock() # Ensure fresh AsyncMock - mock_db_manager.update_email.side_effect = ( - self.async_raise_exception - ) # Use helper + mock_db_manager.update_email.side_effect = self.async_raise_exception # Use helper response = self.client.put(f"/api/emails/{email_id}", json=email_update_payload) self.assertEqual(response.status_code, 500) self.assertIn("Failed to update email", response.json()["detail"]) - mock_db_manager.update_email.assert_called_once_with( - email_id, email_update_payload - ) + mock_db_manager.update_email.assert_called_once_with(email_id, email_update_payload) if __name__ == "__main__": diff --git a/tests/test_filter_api.py b/tests/test_filter_api.py index 82655804f..bce5011fb 100644 --- a/tests/test_filter_api.py +++ b/tests/test_filter_api.py @@ -6,6 +6,7 @@ from psycopg2 import Error as Psycopg2Error # Import psycopg2.Error from server.python_backend.main import app, get_db + # from server.python_backend.models import FilterRequest # Not directly used in this version of the test for payload from server.python_nlp.smart_filters import EmailFilter # Changed import @@ -112,9 +113,7 @@ def test_get_filters_empty(self): def test_get_filters_manager_error(self): print("Running test_get_filters_manager_error") - self.mock_filter_manager.get_all_filters.side_effect = Exception( - "Filter manager error" - ) + self.mock_filter_manager.get_all_filters.side_effect = Exception("Filter manager error") response = self.client.get("/api/filters") @@ -161,9 +160,7 @@ def test_create_filter_success(self): self.assertEqual(data["name"], filter_payload["name"]) self.assertEqual(data["criteria"], filter_payload["criteria"]) - self.assertEqual( - data["actions"], filter_payload["actions"] - ) # Check actions dict + self.assertEqual(data["actions"], filter_payload["actions"]) # Check actions dict self.assertEqual(data["priority"], filter_payload["priority"]) # Check for presence and type of other fields @@ -211,9 +208,7 @@ def test_create_filter_manager_error(self): "actions": {"add_label": "ERROR_LABEL"}, "priority": 1, } - self.mock_filter_manager.add_custom_filter.side_effect = Exception( - "Cannot add filter" - ) + self.mock_filter_manager.add_custom_filter.side_effect = Exception("Cannot add filter") response = self.client.post("/api/filters", json=filter_payload) @@ -232,9 +227,7 @@ def test_create_filter_manager_error(self): def test_generate_intelligent_filters_success(self): print("Running test_generate_intelligent_filters_success") - mock_emails_data = [ - {"id": 1, "subject": "Test Email", "content": "Some content"} - ] + mock_emails_data = [{"id": 1, "subject": "Test Email", "content": "Some content"}] mock_generated_filters = [ { "name": "Intelligent Filter 1", @@ -247,9 +240,7 @@ def test_generate_intelligent_filters_success(self): ] mock_db_manager_filter.get_recent_emails.return_value = mock_emails_data - self.mock_filter_manager.create_intelligent_filters.return_value = ( - mock_generated_filters - ) + self.mock_filter_manager.create_intelligent_filters.return_value = mock_generated_filters response = self.client.post("/api/filters/generate-intelligent") @@ -281,9 +272,7 @@ def test_generate_intelligent_filters_db_error(self): def test_generate_intelligent_filters_manager_error(self): print("Running test_generate_intelligent_filters_manager_error") - mock_emails_data = [ - {"id": 1, "subject": "Test Email", "content": "Some content"} - ] + mock_emails_data = [{"id": 1, "subject": "Test Email", "content": "Some content"}] mock_db_manager_filter.get_recent_emails.return_value = mock_emails_data self.mock_filter_manager.create_intelligent_filters.side_effect = Exception( "Filter generation failed" @@ -304,9 +293,7 @@ def test_prune_filters_success(self): "pruned_count": 2, "details": "Removed 2 ineffective filters", } - self.mock_filter_manager.prune_ineffective_filters.return_value = ( - mock_prune_results - ) + self.mock_filter_manager.prune_ineffective_filters.return_value = mock_prune_results response = self.client.post("/api/filters/prune") @@ -316,9 +303,7 @@ def test_prune_filters_success(self): def test_prune_filters_manager_error(self): print("Running test_prune_filters_manager_error") - self.mock_filter_manager.prune_ineffective_filters.side_effect = Exception( - "Pruning error" - ) + self.mock_filter_manager.prune_ineffective_filters.side_effect = Exception("Pruning error") response = self.client.post("/api/filters/prune") diff --git a/tests/test_gmail_api.py b/tests/test_gmail_api.py index 2e47a8fcb..284572f59 100644 --- a/tests/test_gmail_api.py +++ b/tests/test_gmail_api.py @@ -7,9 +7,10 @@ from googleapiclient.errors import HttpError from server.python_backend.main import ( # Assuming get_db is for DatabaseManager, not directly used here but good to keep consistent - app, get_db) -from server.python_backend.models import (GmailSyncRequest, - SmartRetrievalRequest) + app, + get_db, +) +from server.python_backend.models import GmailSyncRequest, SmartRetrievalRequest # Mock DatabaseManager (though not directly used by these endpoints, it's good practice if other parts of app setup need it) mock_db_manager_gmail = MagicMock() @@ -91,9 +92,7 @@ def test_sync_gmail_success(self): self.assertTrue(data["success"]) self.assertEqual(data["processedCount"], 5) self.assertEqual(data["emailsCreated"], 5) # Approximation in main.py - self.assertEqual( - data["batchInfo"]["batchId"], "batch123" - ) # Changed batch_id to batchId + self.assertEqual(data["batchInfo"]["batchId"], "batch123") # Changed batch_id to batchId self.mock_gmail_service.sync_gmail_emails.assert_called_once_with( max_emails=request_data["maxEmails"], query_filter=request_data["queryFilter"], @@ -113,9 +112,7 @@ def test_sync_gmail_nlp_failure(self): response = self.client.post("/api/gmail/sync", json=request_data) - self.assertEqual( - response.status_code, 200 - ) # Endpoint itself succeeds, but reports failure + self.assertEqual(response.status_code, 200) # Endpoint itself succeeds, but reports failure data = response.json() self.assertFalse(data["success"]) self.assertEqual(data["error"], "NLP service error") @@ -169,17 +166,14 @@ def test_sync_gmail_google_api_error_500(self): def test_sync_gmail_generic_exception(self): print("Running test_sync_gmail_generic_exception") request_data = {"maxEmails": 10} - self.mock_gmail_service.sync_gmail_emails.side_effect = Exception( - "Some unexpected error" - ) + self.mock_gmail_service.sync_gmail_emails.side_effect = Exception("Some unexpected error") response = self.client.post("/api/gmail/sync", json=request_data) self.assertEqual(response.status_code, 500) data = response.json() self.assertTrue( - "Gmail sync failed due to an unexpected error: Some unexpected error" - in data["detail"] + "Gmail sync failed due to an unexpected error: Some unexpected error" in data["detail"] ) self.mock_performance_monitor.record_sync_performance.assert_not_called() @@ -195,9 +189,7 @@ def test_smart_retrieval_success(self): "emails_found": 5, "details": "...", } - self.mock_gmail_service.execute_smart_retrieval.return_value = ( - mock_retrieval_result - ) + self.mock_gmail_service.execute_smart_retrieval.return_value = mock_retrieval_result response = self.client.post("/api/gmail/smart-retrieval", json=request_data) @@ -297,9 +289,7 @@ def test_get_gmail_performance_exception(self): response = self.client.get("/api/gmail/performance") self.assertEqual(response.status_code, 500) - self.assertEqual( - response.json(), {"detail": "Failed to fetch performance metrics"} - ) + self.assertEqual(response.json(), {"detail": "Failed to fetch performance metrics"}) if __name__ == "__main__": diff --git a/tests/test_gmail_service_integration.py b/tests/test_gmail_service_integration.py index f8836cc6c..7550d3b66 100644 --- a/tests/test_gmail_service_integration.py +++ b/tests/test_gmail_service_integration.py @@ -4,8 +4,7 @@ # Assuming AdvancedAIEngine and AIAnalysisResult are importable for type hinting or mocking structure # from server.python_backend.ai_engine import AdvancedAIEngine, AIAnalysisResult -from server.python_nlp.gmail_metadata import \ - GmailMessage # For structuring metadata input +from server.python_nlp.gmail_metadata import GmailMessage # For structuring metadata input from server.python_nlp.gmail_service import GmailAIService @@ -41,9 +40,7 @@ async def test_perform_ai_analysis_includes_action_items(self): ], } # Configure the mock for an async method call - mock_advanced_ai_engine.analyze_email = AsyncMock( - return_value=mock_analysis_output - ) + mock_advanced_ai_engine.analyze_email = AsyncMock(return_value=mock_analysis_output) # Instantiate GmailAIService with the mocked AdvancedAIEngine gmail_service = GmailAIService(advanced_ai_engine=mock_advanced_ai_engine) @@ -58,9 +55,7 @@ async def test_perform_ai_analysis_includes_action_items(self): } # Call the method under test - result_analysis = await gmail_service._perform_ai_analysis( - email_data_for_analysis - ) + result_analysis = await gmail_service._perform_ai_analysis(email_data_for_analysis) # Assertions self.assertIsNotNone(result_analysis) @@ -87,9 +82,7 @@ def test_convert_to_db_format_includes_action_items_in_metadata(self): mock_gmail_metadata.from_address = "sender@example.com" mock_gmail_metadata.subject = "DB Format Test" mock_gmail_metadata.body_plain = "Content with action: please do this." - mock_gmail_metadata.body_html = ( - "

Content with action: please do this.

" # Added - ) + mock_gmail_metadata.body_html = "

Content with action: please do this.

" # Added mock_gmail_metadata.snippet = "Content with action..." mock_gmail_metadata.date = "2023-10-26 10:00:00" mock_gmail_metadata.internal_date = 1672531200000 diff --git a/tests/test_health_check_api.py b/tests/test_health_check_api.py index 4a941bd8f..bc39dbd78 100644 --- a/tests/test_health_check_api.py +++ b/tests/test_health_check_api.py @@ -4,8 +4,7 @@ from fastapi.testclient import TestClient -from server.python_backend.main import \ - app # Assuming your FastAPI app instance is named 'app' +from server.python_backend.main import app # Assuming your FastAPI app instance is named 'app' class TestHealthCheckAPI(unittest.TestCase): @@ -28,9 +27,7 @@ def test_health_check_success(self): # or "2023-10-27T12:34:56.789012+00:00" (UTC) datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00")) except ValueError as e: - self.fail( - f"Timestamp is not a valid ISO 8601 format: {data['timestamp']}. Error: {e}" - ) + self.fail(f"Timestamp is not a valid ISO 8601 format: {data['timestamp']}. Error: {e}") self.assertEqual(data["version"], "2.0.0") # As defined in main.py # Example of how to test a failure scenario if the health check had dependencies @@ -49,12 +46,8 @@ def test_health_check_success(self): # deeper, potentially flaky, mocking of internal FastAPI/Starlette components or datetime. # The existing error handling in the health_check endpoint catches generic Exception. # We can test this generic error handling by forcing an exception during the request. - @patch( - "server.python_backend.main.datetime" - ) # Patch datetime used within health_check - def test_health_check_generic_error( - self, mock_datetime_module - ): # Renamed for clarity + @patch("server.python_backend.main.datetime") # Patch datetime used within health_check + def test_health_check_generic_error(self, mock_datetime_module): # Renamed for clarity print("Running test_health_check_generic_error") # Configure the mock for datetime.now().isoformat() diff --git a/tests/test_nlp_engine.py b/tests/test_nlp_engine.py index aa5176bbc..9fab7f0e4 100644 --- a/tests/test_nlp_engine.py +++ b/tests/test_nlp_engine.py @@ -48,9 +48,7 @@ def setUp(self): self.mock_intent_model = MagicMock() self.mock_intent_model.predict = MagicMock(return_value=["request"]) - self.mock_intent_model.predict_proba = MagicMock( - return_value=[[0.9, 0.05, 0.05]] - ) + self.mock_intent_model.predict_proba = MagicMock(return_value=[[0.9, 0.05, 0.05]]) self.mock_intent_model.classes_ = ["request", "inquiry", "informational"] self.mock_urgency_model = MagicMock() @@ -80,9 +78,7 @@ def side_effect_joblib_load(path): # Patch os.path.exists for model loading self.patcher_os_path_exists = patch("os.path.exists") self.mock_os_path_exists = self.patcher_os_path_exists.start() - self.mock_os_path_exists.return_value = ( - True # Assume all model files exist by default - ) + self.mock_os_path_exists.return_value = True # Assume all model files exist by default # Mock NLTK and TextBlob if necessary if MOCK_NLTK: @@ -125,9 +121,7 @@ def tearDown(self): def test_analyze_email_with_all_models_available(self): """Test analyze_email when all models are loaded.""" subject = "Important meeting update" - content = ( - "Please be advised the meeting time has changed. This is an urgent request." - ) + content = "Please be advised the meeting time has changed. This is an urgent request." result = self.engine.analyze_email(subject, content) @@ -149,9 +143,7 @@ def test_analyze_email_with_all_models_available(self): return_value=None, ) @patch("server.python_nlp.nlp_engine.NLPEngine._analyze_sentiment_textblob") - def test_sentiment_fallback_to_textblob( - self, mock_textblob_method, mock_model_method - ): + def test_sentiment_fallback_to_textblob(self, mock_textblob_method, mock_model_method): """Test sentiment analysis falls back to TextBlob when model fails.""" mock_textblob_method.return_value = { "sentiment": "positive_textblob", @@ -214,16 +206,10 @@ def test_generate_reasoning(self): reasoning = self.engine._generate_reasoning( sentiment_info, topic_info, intent_info, urgency_info ) - self.assertIn( - "Sentiment analysis detected positive sentiment (using AI model)", reasoning - ) - self.assertIn( - "Identified topic: work_business (using fallback: keyword_topic)", reasoning - ) + self.assertIn("Sentiment analysis detected positive sentiment (using AI model)", reasoning) + self.assertIn("Identified topic: work_business (using fallback: keyword_topic)", reasoning) self.assertIn("Detected intent: request (using AI model)", reasoning) - self.assertIn( - "Assessed urgency level: high (using fallback: regex_urgency)", reasoning - ) + self.assertIn("Assessed urgency level: high (using fallback: regex_urgency)", reasoning) # Test neutral/general cases sentiment_info_neutral = {"sentiment": "neutral"} @@ -244,9 +230,7 @@ def test_generate_reasoning(self): def test_preprocess_text(self): text = " Test Text with Punctuation!! " processed = self.engine._preprocess_text(text) - self.assertEqual( - processed, "test text with punctuation!!" - ) # Adjusted expectation + self.assertEqual(processed, "test text with punctuation!!") # Adjusted expectation def test_extract_keywords_nltk_available(self): """Test keyword extraction when NLTK (and TextBlob) is available.""" @@ -297,9 +281,7 @@ def test_extract_keywords_nltk_available(self): self.assertIn("test", keywords) # From individual words self.assertNotIn("this", keywords) # Stopword - @patch( - "server.python_nlp.nlp_engine.HAS_NLTK", False - ) # Simulate NLTK not available + @patch("server.python_nlp.nlp_engine.HAS_NLTK", False) # Simulate NLTK not available def test_extract_keywords_nltk_unavailable( self, ): # Removed mock_has_nltk_false argument @@ -340,7 +322,9 @@ def test_detect_risk_factors(self): risks_spam = self.engine._detect_risk_factors(text_spam) self.assertIn("potential_spam", risks_spam) - text_sensitive = "Please provide your password and social security number ssn for verification." + text_sensitive = ( + "Please provide your password and social security number ssn for verification." + ) risks_sensitive = self.engine._detect_risk_factors(text_sensitive) self.assertIn("sensitive_data", risks_sensitive) @@ -371,9 +355,7 @@ def test_main_health_check(self, mock_parse_args, mock_stdout): @patch("sys.stdout", new_callable=io.StringIO) # Changed to io.StringIO @patch("argparse.ArgumentParser.parse_args") - @patch( - "server.python_nlp.nlp_engine.NLPEngine.analyze_email" - ) # Mock the main analysis method + @patch("server.python_nlp.nlp_engine.NLPEngine.analyze_email") # Mock the main analysis method def test_main_analyze_email(self, mock_analyze_email, mock_parse_args, mock_stdout): mock_parse_args.return_value = MagicMock( health_check=False, diff --git a/tests/test_nlp_engine_integration.py b/tests/test_nlp_engine_integration.py index 2f7f935e8..8b2752cbd 100644 --- a/tests/test_nlp_engine_integration.py +++ b/tests/test_nlp_engine_integration.py @@ -1,7 +1,6 @@ import json import unittest -from unittest.mock import ( # Import MagicMock for AdvancedAIEngine test - MagicMock, patch) +from unittest.mock import MagicMock, patch # Import MagicMock for AdvancedAIEngine test from server.python_backend.ai_engine import AdvancedAIEngine, AIAnalysisResult from server.python_nlp.nlp_engine import NLPEngine @@ -38,14 +37,9 @@ def test_nlp_engine_analyze_email_includes_action_items(self): # Check if one of the expected actions is present phrases = [item["action_phrase"] for item in analysis["action_items"]] self.assertTrue( - any( - "Please complete the task by Monday." in phrase - for phrase in phrases - ) - ) - self.assertTrue( - any("need to also review the report." in phrase for phrase in phrases) + any("Please complete the task by Monday." in phrase for phrase in phrases) ) + self.assertTrue(any("need to also review the report." in phrase for phrase in phrases)) def test_nlp_engine_fallback_analysis_includes_empty_action_items(self): # Test _get_fallback_analysis @@ -54,16 +48,12 @@ def test_nlp_engine_fallback_analysis_includes_empty_action_items(self): self.assertEqual(fallback_result["action_items"], []) # Test _get_simple_fallback_analysis - simple_fallback_result = self.nlp_engine._get_simple_fallback_analysis( - "Subject", "Content" - ) + simple_fallback_result = self.nlp_engine._get_simple_fallback_analysis("Subject", "Content") self.assertIn("action_items", simple_fallback_result) self.assertEqual(simple_fallback_result["action_items"], []) @patch("server.python_backend.ai_engine._execute_async_command") - async def test_advanced_ai_engine_analyze_email_parses_action_items( - self, mock_execute_async - ): + async def test_advanced_ai_engine_analyze_email_parses_action_items(self, mock_execute_async): # Mock the output of the nlp_engine.py script mock_script_output = { "topic": "work_business", @@ -108,9 +98,7 @@ async def test_advanced_ai_engine_analyze_email_parses_action_items( self.assertEqual(len(ai_result.action_items), 1) action_item = ai_result.action_items[0] - self.assertEqual( - action_item["action_phrase"], "Please complete the task by Monday." - ) + self.assertEqual(action_item["action_phrase"], "Please complete the task by Monday.") self.assertEqual(action_item["verb"], "complete") self.assertEqual(action_item["raw_due_date_text"], "by Monday") diff --git a/tests/test_smart_filters.py b/tests/test_smart_filters.py index 33a0ade4b..75ef5aef8 100644 --- a/tests/test_smart_filters.py +++ b/tests/test_smart_filters.py @@ -9,8 +9,7 @@ # Adjust path to import module from parent directory sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -from server.python_nlp.smart_filters import (EmailFilter, FilterPerformance, - SmartFilterManager) +from server.python_nlp.smart_filters import EmailFilter, FilterPerformance, SmartFilterManager class TestSmartFilterManager(unittest.TestCase): @@ -73,21 +72,13 @@ def test_db_initialization(self): # Use an internal, low-level query method of the manager if available, # or a high-level one that would fail if tables don't exist. # Example: try to load a non-existent filter; it should return None, not raise "no such table". - loaded_filter = self.manager._load_filter( - "non_existent_filter_id_for_init_test" - ) - self.assertIsNone( - loaded_filter, "Loading a non-existent filter should return None." - ) + loaded_filter = self.manager._load_filter("non_existent_filter_id_for_init_test") + self.assertIsNone(loaded_filter, "Loading a non-existent filter should return None.") # Additionally, check for other tables if necessary by a simple count or query - perf_records = self.manager._db_fetchall( - "SELECT * FROM filter_performance WHERE 1=0" - ) + perf_records = self.manager._db_fetchall("SELECT * FROM filter_performance WHERE 1=0") self.assertEqual(len(perf_records), 0) - script_records = self.manager._db_fetchall( - "SELECT * FROM google_scripts WHERE 1=0" - ) + script_records = self.manager._db_fetchall("SELECT * FROM google_scripts WHERE 1=0") self.assertEqual(len(script_records), 0) except sqlite3.OperationalError as e: @@ -114,9 +105,7 @@ def test_save_and_load_filter(self): loaded_filter = self.manager._load_filter("test_filter_001") self.assertIsNotNone(loaded_filter) self.assertEqual(loaded_filter.name, "Test Filter") - self.assertEqual( - loaded_filter.criteria["subject_keywords"], ["test", "example"] - ) + self.assertEqual(loaded_filter.criteria["subject_keywords"], ["test", "example"]) def test_load_all_filters(self): """Test loading all filters (initially empty, then after adding some).""" @@ -148,9 +137,7 @@ def test_load_all_filters(self): # and create a new _load_active_filters for clarity. # For now, assuming _load_all_filters fetches everything and test logic should reflect this. - all_db_filters = ( - self.manager._load_all_filters() - ) # This loads all, active or not + all_db_filters = self.manager._load_all_filters() # This loads all, active or not self.assertEqual(len(all_db_filters), 2) # Still 2 in DB is_f1_active = self.manager._is_filter_active_in_db("f1") @@ -181,9 +168,7 @@ def test_apply_filter_to_email_subject_keyword(self): self.assertTrue(self.manager._apply_filter_to_email(filter_obj, email_match)) email_no_match1 = {"subject": "Project report needed"} # Missing urgent - self.assertFalse( - self.manager._apply_filter_to_email(filter_obj, email_no_match1) - ) + self.assertFalse(self.manager._apply_filter_to_email(filter_obj, email_no_match1)) filter_criteria_or = { "subject_keywords": ["urgent", "report"], @@ -204,9 +189,7 @@ def test_apply_filter_to_email_subject_keyword(self): 0, {}, ) - self.assertTrue( - self.manager._apply_filter_to_email(filter_obj_or, email_no_match1) - ) + self.assertTrue(self.manager._apply_filter_to_email(filter_obj_or, email_no_match1)) def test_apply_filter_to_email_from_pattern(self): """Test _apply_filter_to_email for sender pattern matching.""" @@ -230,9 +213,7 @@ def test_apply_filter_to_email_from_pattern(self): self.assertTrue(self.manager._apply_filter_to_email(filter_obj, email_match)) email_no_match = {"senderEmail": "user@external.com"} - self.assertFalse( - self.manager._apply_filter_to_email(filter_obj, email_no_match) - ) + self.assertFalse(self.manager._apply_filter_to_email(filter_obj, email_no_match)) def test_apply_filter_to_email_exclusion(self): """Test _apply_filter_to_email for exclusion patterns.""" @@ -262,17 +243,13 @@ def test_apply_filter_to_email_exclusion(self): self.assertTrue(self.manager._apply_filter_to_email(filter_obj, email_match)) email_no_match_subject = {"subject": "Newsletter update"} # Excluded by subject - self.assertFalse( - self.manager._apply_filter_to_email(filter_obj, email_no_match_subject) - ) + self.assertFalse(self.manager._apply_filter_to_email(filter_obj, email_no_match_subject)) email_no_match_content = { "subject": "System update", "content": "This is an automated message.", } # Excluded by content - self.assertFalse( - self.manager._apply_filter_to_email(filter_obj, email_no_match_content) - ) + self.assertFalse(self.manager._apply_filter_to_email(filter_obj, email_no_match_content)) def test_create_intelligent_filters(self): """Test creation of intelligent filters from email samples.""" @@ -281,9 +258,7 @@ def test_create_intelligent_filters(self): # Mock _should_create_filter to True for specific templates to ensure they are created with patch.object(self.manager, "_should_create_filter", return_value=True): - created_filters = self.manager.create_intelligent_filters( - self.sample_emails - ) + created_filters = self.manager.create_intelligent_filters(self.sample_emails) self.assertGreater(len(created_filters), 0) # Check if filters were saved by trying to load one @@ -317,9 +292,7 @@ def test_evaluate_filter_performance(self): # Email1: subject "Urgent: Project Alpha Deadline", expected_filter_match = True # Email2: subject "Weekly Newsletter...", expected_filter_match = False - performance = self.manager.evaluate_filter_performance( - "perf_test_001", self.sample_emails - ) + performance = self.manager.evaluate_filter_performance("perf_test_001", self.sample_emails) self.assertEqual(performance.filter_id, "perf_test_001") self.assertEqual(performance.emails_processed, len(self.sample_emails)) @@ -377,9 +350,7 @@ def test_evaluate_filter_performance(self): self.assertAlmostEqual(performance.f1_score, 1.5 / 1.75) loaded_filter = self.manager._load_filter("perf_test_001") - self.assertAlmostEqual( - loaded_filter.effectiveness_score, 1.5 / 1.75 - ) # F1 score + self.assertAlmostEqual(loaded_filter.effectiveness_score, 1.5 / 1.75) # F1 score # FP Rate = FP / (FP+TN) if defining by specificity, or FP / Total. Here, it's FP/Total. # FP Rate = FP / Total emails = 0 / 5 = 0 self.assertAlmostEqual(loaded_filter.false_positive_rate, 0) @@ -459,9 +430,7 @@ def test_prune_ineffective_filters_disable_unused(self): results = self.manager.prune_ineffective_filters() self.assertEqual(len(results["disabled_filters"]), 1) self.assertEqual(results["disabled_filters"][0]["filter_id"], fid) - self.assertFalse( - self.manager._is_filter_active_in_db(fid) - ) # Check it's marked inactive + self.assertFalse(self.manager._is_filter_active_in_db(fid)) # Check it's marked inactive self.assertIsNotNone(self.manager._load_filter(fid)) # Still exists @patch("server.python_nlp.smart_filters.SmartFilterManager._get_filter_performance")