From 8e78eace2ef2c3eebaf54984a504cabddb5851a9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 27 Dec 2025 12:28:41 +0000 Subject: [PATCH] Add retry logic to link verification script - Retry failed links up to 3 times before marking as failed - Add 2-second delay between retry attempts - Display retry attempts in output for better visibility - Show which attempt succeeded when a link passes after retry - Include attempt count in final error message --- _scripts/verify_links.py | 61 +++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/_scripts/verify_links.py b/_scripts/verify_links.py index a8fb583..4b29416 100644 --- a/_scripts/verify_links.py +++ b/_scripts/verify_links.py @@ -7,44 +7,61 @@ import yaml import requests import sys +import time from pathlib import Path from typing import List, Dict, Tuple # Configuration TIMEOUT = 10 # seconds +MAX_RETRIES = 3 # number of attempts before marking as failed +RETRY_DELAY = 2 # seconds between retries FILES_TO_CHECK = ['guis.yml', 'engines.yml', 'resources.yml', 'servers.yml'] DATA_DIR = Path('_data') -def check_url(url: str) -> Tuple[bool, str]: +def check_url(url: str, attempt: int = 1) -> Tuple[bool, str]: """ - Check if a URL is accessible. + Check if a URL is accessible with retry logic. Args: url: The URL to check + attempt: Current attempt number (used for retry tracking) Returns: Tuple of (success, error_message) """ - try: - response = requests.head(url, timeout=TIMEOUT, allow_redirects=True) - # If HEAD doesn't work, try GET - if response.status_code >= 400: - response = requests.get(url, timeout=TIMEOUT, allow_redirects=True) - - if response.status_code < 400: - return True, "" - else: - return False, f"HTTP {response.status_code}" - except requests.exceptions.Timeout: - return False, "Timeout" - except requests.exceptions.ConnectionError: - return False, "Connection error" - except requests.exceptions.TooManyRedirects: - return False, "Too many redirects" - except requests.exceptions.RequestException as e: - return False, f"Request error: {str(e)}" - except Exception as e: - return False, f"Unexpected error: {str(e)}" + last_error = "" + + for current_attempt in range(attempt, MAX_RETRIES + 1): + try: + response = requests.head(url, timeout=TIMEOUT, allow_redirects=True) + # If HEAD doesn't work, try GET + if response.status_code >= 400: + response = requests.get(url, timeout=TIMEOUT, allow_redirects=True) + + if response.status_code < 400: + if current_attempt > 1: + print(f" ✓ OK (succeeded on attempt {current_attempt})") + return True, "" + else: + last_error = f"HTTP {response.status_code}" + except requests.exceptions.Timeout: + last_error = "Timeout" + except requests.exceptions.ConnectionError: + last_error = "Connection error" + except requests.exceptions.TooManyRedirects: + last_error = "Too many redirects" + except requests.exceptions.RequestException as e: + last_error = f"Request error: {str(e)}" + except Exception as e: + last_error = f"Unexpected error: {str(e)}" + + # If this wasn't the last attempt, wait before retrying + if current_attempt < MAX_RETRIES: + print(f" ⟳ Attempt {current_attempt} failed ({last_error}), retrying...") + time.sleep(RETRY_DELAY) + + # All retries exhausted + return False, f"{last_error} (failed after {MAX_RETRIES} attempts)" def extract_links(data: Dict) -> List[Tuple[str, str, str]]: """