diff --git a/README.md b/README.md index cf4e142b..dfe7c224 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,15 @@ - DeepL - 兼容所有支持 OpenAI 接口的模型 +## 当前代码结构 + +- `core/models`:数据结构与最基本模型行为 +- `core/services/feed`:更新、刷新、过滤、输出 +- `core/services/admin`:后台动作与批量修改 +- `core/services/agent`:各类 Agent 的具体执行逻辑 +- `core/services/opml.py`:OPML 导入导出 +- `core/views.py` / `core/actions.py` / `core/management/commands`:入口层 + ## 赞助 感谢以下公司为本项目提供的开源支持。 diff --git a/README_EN.md b/README_EN.md index 6431ed76..2233c44f 100644 --- a/README_EN.md +++ b/README_EN.md @@ -36,6 +36,15 @@ Supported translation engines: More engines coming soon. +## Current Code Structure + +- `core/models`: data structures and minimal model behavior +- `core/services/feed`: update, refresh, filtering, and output logic +- `core/services/admin`: admin actions and batch modification +- `core/services/agent`: concrete agent implementations +- `core/services/opml.py`: OPML import/export +- `core/views.py` / `core/actions.py` / `core/management/commands`: entry layer + ## Sponsors Thanks to the following companies for their open-source support. diff --git a/config/rt_cron b/config/rt_cron index 97f42df7..2884b65b 100644 --- a/config/rt_cron +++ b/config/rt_cron @@ -11,21 +11,6 @@ # 每周一凌晨2点执行一次 feed_updater_for_frequency 任务 0 2 * * 1 root cd /app && /opt/venv/bin/python manage.py feed_updater --frequency 'weekly' >> /var/log/cron.log 2>&1 -# 每周一凌晨2点10分执行一次 digest 生成任务 (monday) -10 2 * * 1 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days monday >> /var/log/cron.log 2>&1 -# 每周二凌晨2点10分执行一次 digest 生成任务 (tuesday) -10 2 * * 2 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days tuesday >> /var/log/cron.log 2>&1 -# 每周三凌晨2点10分执行一次 digest 生成任务 (wednesday) -10 2 * * 3 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days wednesday >> /var/log/cron.log 2>&1 -# 每周四凌晨2点10分执行一次 digest 生成任务 (thursday) -10 2 * * 4 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days thursday >> /var/log/cron.log 2>&1 -# 每周五凌晨2点10分执行一次 digest 生成任务 (friday) -10 2 * * 5 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days friday >> /var/log/cron.log 2>&1 -# 每周六凌晨2点10分执行一次 digest 生成任务 (saturday) -10 2 * * 6 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days saturday >> /var/log/cron.log 2>&1 -# 每周日凌晨2点10分执行一次 digest 生成任务 (sunday) -10 2 * * 0 root cd /app && /opt/venv/bin/python manage.py digest_generator --publish-days sunday >> /var/log/cron.log 2>&1 # 每周运行一次 清理entreis 任务 0 3 * * 1 root cd /app && /opt/venv/bin/python manage.py clean_entries >> /var/log/cron.log 2>&1 - diff --git a/config/settings.py b/config/settings.py index 4f21fa29..a62dcba2 100644 --- a/config/settings.py +++ b/config/settings.py @@ -18,6 +18,13 @@ from django.core.management.utils import get_random_secret_key import os import sys +from core.prompts import ( + DEFAULT_CONTENT_TRANSLATE_PROMPT, + DEFAULT_FILTER_PROMPT, + DEFAULT_SUMMARY_PROMPT, + DEFAULT_TITLE_TRANSLATE_PROMPT, + OUTPUT_FORMAT_FOR_FILTER_PROMPT, +) SITE_URL = os.environ.get("SITE_URL", "http://localhost:8000") USER_MANAGEMENT = os.environ.get("USER_MANAGEMENT") == "1" @@ -280,91 +287,8 @@ }, } -default_title_translate_prompt = "You are a professional, authentic translation engine. Translate only the text into {target_language}, return only the translations, do not explain the original text." -default_content_translate_prompt = """ -You are a professional, authentic translation engine specialized in HTML content translation. - -Requirements: -1. Translate only the text content into {target_language} -2. Preserve ALL HTML tags, attributes, and structure completely unchanged -3. Maintain proper context awareness across different HTML elements and their relationships -4. Consider semantic meaning within nested tags and their hierarchical context -5. Ensure translated text fits naturally within the HTML structure -6. Keep inline elements (like , , ) contextually coherent with their surrounding text -7. Maintain consistency in terminology throughout the entire HTML document -8. Return only the translated HTML content without explanations or comments - -Important: Do not modify, remove, or alter any HTML tags, attributes, classes, IDs, or structural elements. Only translate the actual text content between tags. - -""" -default_summary_prompt = ( - "Summarize the following text in {target_language} and return markdown format." -) - -default_digest_prompt = """ -You are an editor-in-chief and expert analyst. Create a concise, objective, and well-structured daily digest from multiple articles. - -OUTPUT FORMAT -3–5 categories (auto-generated, short topic names): -- Header: ## {category} -- Keep only the most important articles in this category -- For each kept article: - - **[Title](LINK_X)** - > 1–2 sentences, present tense, active voice; highlight novel results/decisions/metrics/impacts; avoid repeated phrasing. - -Selection policy (apply strictly): -- Prioritize significance and real-world impact (e.g., regulatory decisions, major launches, security incidents, funding/M&A, peer-reviewed findings) -- Prioritize novelty; ignore incremental or routine updates and marketing/promotional posts -- Prefer credible, primary sources; avoid rumors/speculation -- Deduplicate across sources; if multiple items cover the same event, keep the most comprehensive one and drop the rest -- If a category has no important items, skip the category entirely - -General rules: -- Maintain a neutral, objective tone; no content beyond the provided texts -- Extract facts, named entities, numbers, dates, and concrete outcomes -- Vary wording across items to avoid repetition -- Title rewrite (each item): rewrite into {target_language} objectively (no hype/clickbait/exclamations). Do NOT copy source wording; always paraphrase into a concise, neutral title even if the source is already in {target_language}. -""" - -output_format_for_digest_prompt = """ - -**OUTPUT FORMAT** -- All headers, titles, and summaries must be in {target_language}. -- Return ONLY the digest content in the markdown format -- Use the exact LINK_X placeholders. Do not modify, replace, add, or remove links. Keep the format **[Title](LINK_X)**. -- Do NOT add any explanatory notes, disclaimers, or meta-commentary -- Do NOT provide usage instructions or suggestions -""" - -default_filter_prompt = """ -You are an advanced RSS content curator. Analyze the article following these protocols: - -1. **Cross-article Deduplication**: - - Identify duplicate content using semantic similarity - - For duplicate sets: - • Keep the most comprehensive version - -2. **Ad Exclusion**: - • Discard if any detected: - - Promotional language patterns - - Affiliate links - - Brand mentions >5% of content - - "Sponsored" disclosure - -3. **Clickbait Detection** - Discard if headline: - - Uses sensational punctuation (e.g., "SHOCKING!", "You won't BELIEVE...") - - Poses unanswered questions ("What happened next?") - - Employs urgency/scarcity tactics ("Act NOW!") -""" - -output_format_for_filter_prompt = """ - -**Output Requirements** -• Only return "Passed" or "Blocked" based on the above checks. -• ABSOLUTELY NO: - - Explanations - - Metadata - - Discarded IDs - - Additional text -""" +default_title_translate_prompt = DEFAULT_TITLE_TRANSLATE_PROMPT +default_content_translate_prompt = DEFAULT_CONTENT_TRANSLATE_PROMPT +default_summary_prompt = DEFAULT_SUMMARY_PROMPT +default_filter_prompt = DEFAULT_FILTER_PROMPT +output_format_for_filter_prompt = OUTPUT_FORMAT_FOR_FILTER_PROMPT diff --git a/core/actions.py b/core/actions.py index f12e32c0..384bd0ab 100644 --- a/core/actions.py +++ b/core/actions.py @@ -1,22 +1,24 @@ import logging -from datetime import datetime from django.utils import timezone -from ast import literal_eval from django.contrib import admin, messages from django.shortcuts import render, redirect -from django.http import HttpResponse, HttpResponseRedirect +from django.http import HttpResponse from django.db import transaction -from django.urls import reverse from django.utils.translation import gettext_lazy as _ from django.conf import settings -from lxml import etree from utils.modelAdmin_utils import get_all_agent_choices from core.admin import core_admin_site from core.models import Filter, Tag, OpenAIAgent from core.tasks.task_manager import task_manager from .management.commands.feed_updater import update_multiple_feeds from core.cache import cache_tag +from core.services.admin import force_update_feeds, force_update_tags +from core.services.admin.batch import ( + apply_batch_updates, + build_batch_modify_context, +) +from core.services.opml import build_opml_response logger = logging.getLogger(__name__) @@ -58,84 +60,13 @@ def clean_filter_results(modeladmin, request, queryset): def _generate_opml_feed(title_prefix, queryset, get_feed_url_func, filename_prefix): - """ - 生成OPML文件的通用函数 - - Args: - title_prefix (str): OPML标题前缀 - queryset (QuerySet): 要导出的数据集合 - get_feed_url_func (function): 获取feed URL的函数 - filename_prefix (str): 导出文件名前缀 - - Returns: - HttpResponse: 包含OPML文件的响应或错误响应 - """ try: - # 创建根元素 并设置版本 - root = etree.Element("opml", version="2.0") - - # 创建头部 - head = etree.SubElement(root, "head") - etree.SubElement(head, "title").text = f"{title_prefix} | RSSBox" - etree.SubElement(head, "dateCreated").text = datetime.now().strftime( - "%a, %d %b %Y %H:%M:%S %z" - ) - etree.SubElement(head, "ownerName").text = "RSSBox" - - # 创建主体 - body = etree.SubElement(root, "body") - - # 按分类组织订阅源 - categories = {} - for feed in queryset: - feed_tags = list(feed.tags.all()) or [ - None - ] # 如果没有tag,用None表示默认分类 - - for tag in feed_tags: - tag_name = tag.name if tag else "uncategorized" - - # 获取或创建分类大纲 - if tag_name not in categories: - tag_outline = etree.SubElement( - body, "outline", text=tag_name, title=tag_name - ) - categories[tag_name] = tag_outline - else: - tag_outline = categories[tag_name] - - # 获取feed URL - feed_url = get_feed_url_func(feed) - - # 确保所有字段都是字符串,防止None值导致lxml错误 - feed_name = feed.name or "Untitled Feed" - feed_url = feed_url or "" - - # 添加feed条目 - etree.SubElement( - tag_outline, - "outline", - { - "title": feed_name, - "text": feed_name, - "type": "rss", - "xmlUrl": feed_url, - "htmlUrl": feed_url, - }, - ) - - # 生成XML内容 - xml_content = etree.tostring( - root, encoding="utf-8", xml_declaration=True, pretty_print=True - ) - - # 创建HTTP响应 - response = HttpResponse(xml_content, content_type="application/xml") - response["Content-Disposition"] = ( - f'attachment; filename="{filename_prefix}_feeds_from_rssbox.opml"' + return build_opml_response( + title_prefix=title_prefix, + queryset=queryset, + get_feed_url_func=get_feed_url_func, + filename_prefix=filename_prefix, ) - return response - except Exception as e: logger.error("OPML export error: %s", str(e), exc_info=True) return HttpResponse("An error occurred during OPML export", status=500) @@ -166,160 +97,41 @@ def export_translated_feed_as_opml(modeladmin, request, queryset): @admin.display(description=_("Force update")) def feed_force_update(modeladmin, request, queryset): logger.info("Call feed_force_update: %s", queryset) - - with transaction.atomic(): - for instance in queryset: - instance.fetch_status = None - instance.translation_status = None - instance.save() - - feeds = queryset - task_manager.submit_task("Force Update Feeds", update_multiple_feeds, feeds) + force_update_feeds( + queryset, + task_manager=task_manager, + update_multiple_feeds_func=update_multiple_feeds, + ) @admin.display(description=_("Recombine related feeds.")) def tag_force_update(modeladmin, request, queryset): logger.info("Call tag_force_update: %s", queryset) - - with transaction.atomic(): - for instance in queryset: - task_manager.submit_task( - "Force Update Tags", cache_tag, instance.slug, "t", "xml" - ) - task_manager.submit_task( - "Force Update Tags", cache_tag, instance.slug, "t", "json" - ) - instance.last_updated = timezone.now() - instance.save() + force_update_tags( + queryset, + task_manager=task_manager, + cache_tag_func=cache_tag, + now_func=timezone.now, + ) @admin.display(description=_("Batch modification")) def feed_batch_modify(modeladmin, request, queryset): if "apply" in request.POST: logger.info("Apply feed_batch_modify") - post_data = request.POST - fields = { - "update_frequency": "update_frequency_value", - "max_posts": "max_posts_value", - "translator": "translator_value", - "target_language": "target_language_value", - "translation_display": "translation_display_value", - "summarizer": "summarizer_value", - "summary_detail": "summary_detail_value", - "additional_prompt": "additional_prompt_value", - "fetch_article": "fetch_article", - "tags": "tags_value", - "translate_title": "translate_title", - "translate_content": "translate_content", - "summary": "summary", - "filter": "filter_value", - } - field_types = { - "update_frequency": int, - "max_posts": int, - "target_language": str, - "translation_display": int, - "summary_detail": float, - "additional_prompt": str, - "fetch_article": literal_eval, - "translate_title": literal_eval, - "translate_content": literal_eval, - "summary": literal_eval, - } - translate_title = request.POST.get("translate_title", "Keep") - translate_content = request.POST.get("translate_content", "Keep") - summary = request.POST.get("summary", "Keep") - - match translate_title: - case "Keep": - pass - case "True": - queryset.update(translate_title=True) - case "False": - queryset.update(translate_title=False) - - match translate_content: - case "Keep": - pass - case "True": - queryset.update(translate_content=True) - case "False": - queryset.update(translate_content=False) - - match summary: - case "Keep": - pass - case "True": - queryset.update(summary=True) - case "False": - queryset.update(summary=False) - - update_fields = {} - for field, value_field in fields.items(): - value = post_data.get(value_field) - if post_data.get(field, "Keep") != "Keep" and value: - match field: - case "translator": - content_type_id, object_id = map(int, value.split(":")) - queryset.update(translator_content_type_id=content_type_id) - queryset.update(translator_object_id=object_id) - case "summarizer": - # 直接使用 ForeignKey ID 进行更新 - queryset.update(summarizer_id=int(value)) - case "tags": - tag_values = post_data.getlist( - "tags_value" - ) # 获取所有选中的 tag IDs(可能是多选) - if tag_values: - tag_ids = [int(id) for id in tag_values] # 转换成整数列表 - for feed in queryset: - feed.tags.set(tag_ids) # 批量更新每个 Feed 的 tags - case "filter": - filter_values = post_data.getlist("filter_value") - if filter_values: - filter_ids = [int(id) for id in filter_values] - for obj in queryset: - obj.filters.set(filter_ids) - case _: - update_fields[field] = field_types.get(field, str)(value) - - if update_fields: - queryset.update(**update_fields) + apply_batch_updates(queryset, request.POST) return redirect(request.get_full_path()) - translator_choices = get_all_agent_choices() - # 为 summarizer 提供直接的选择项,格式为 (id, name) - summary_engine_choices = [ - (str(agent.id), agent.name) for agent in OpenAIAgent.objects.filter(valid=True) - ] - filter_choices = [(f"{filter.id}", filter.name) for filter in Filter.objects.all()] - tags_choices = [(f"{tag.id}", tag.name) for tag in Tag.objects.all()] return render( request, "admin/feed_batch_modify.html", - context={ - **core_admin_site.each_context(request), - "items": queryset, - "translator_choices": translator_choices, - "target_language_choices": settings.TRANSLATION_LANGUAGES, - "summary_engine_choices": summary_engine_choices, - "filter_choices": filter_choices, - "tags_choices": tags_choices, - "update_frequency_choices": [ - (5, "5 min"), - (15, "15 min"), - (30, "30 min"), - (60, "hourly"), - (1440, "daily"), - (10080, "weekly"), - ], - }, + context=build_batch_modify_context( + queryset, + get_all_agent_choices_func=get_all_agent_choices, + openai_agent_model=OpenAIAgent, + filter_model=Filter, + tag_model=Tag, + settings_module=settings, + admin_context=core_admin_site.each_context(request), + ), ) - - -# @admin.display(description=_("Create Digest")) -def create_digest(self, request, queryset): - selected_ids = queryset.values_list("id", flat=True) - ids_string = ",".join(str(id) for id in selected_ids) - url = reverse("admin:core_digest_add") - return HttpResponseRedirect(f"{url}?feed_ids={ids_string}") diff --git a/core/admin/__init__.py b/core/admin/__init__.py index 62802de5..e7c3af27 100644 --- a/core/admin/__init__.py +++ b/core/admin/__init__.py @@ -4,4 +4,3 @@ from core.admin.feed_admin import * from core.admin.filter_admin import * from core.admin.tag_admin import * -from core.admin.digest_admin import * diff --git a/core/admin/admin_site.py b/core/admin/admin_site.py index c418514c..bdcf2d37 100644 --- a/core/admin/admin_site.py +++ b/core/admin/admin_site.py @@ -11,7 +11,7 @@ ) from django.shortcuts import redirect, render -from core.models import Feed, Filter, Tag, Digest +from core.models import Feed, Filter, Tag class CoreAdminSite(AdminSite): @@ -64,20 +64,6 @@ def get_app_list(self, request, app_label=None): "add_url": "/core/tag/add/", "view_only": False, }, - { - "model": Digest, - "name": "Digests", - "object_name": "Digest", - "perms": { - "add": True, - "change": True, - "delete": True, - "view": True, - }, - "admin_url": "/core/digest/", - "add_url": "/core/digest/add/", - "view_only": False, - }, ], }, { diff --git a/core/admin/digest_admin.py b/core/admin/digest_admin.py deleted file mode 100644 index 538aacf4..00000000 --- a/core/admin/digest_admin.py +++ /dev/null @@ -1,362 +0,0 @@ -from django.contrib import admin -from django.utils.html import format_html, mark_safe -from django.utils.translation import gettext_lazy as _ -from django.urls import reverse -from django import forms -from django.contrib import messages -from core.models.digest import Digest -from core.models.agent import OpenAIAgent -from core.admin.admin_site import core_admin_site -from utils.modelAdmin_utils import status_icon - - -class PublishDaysWidget(forms.MultipleChoiceField): - """Custom widget for selecting publish days using checkboxes.""" - - def __init__(self, *args, **kwargs): - choices = [ - ("monday", _("Monday")), - ("tuesday", _("Tuesday")), - ("wednesday", _("Wednesday")), - ("thursday", _("Thursday")), - ("friday", _("Friday")), - ("saturday", _("Saturday")), - ("sunday", _("Sunday")), - ] - kwargs["choices"] = choices - kwargs["widget"] = forms.CheckboxSelectMultiple - kwargs["required"] = False - super().__init__(*args, **kwargs) - - def prepare_value(self, value): - """Convert JSON list to list of selected values.""" - if value is None or value == "": - return [] - if isinstance(value, list): - return value - if isinstance(value, str): - # Handle legacy comma-separated string format - return [day.strip().lower() for day in value.split(",") if day.strip()] - return value - - def clean(self, value): - """Convert list of selected values back to JSON list.""" - if not value: - return [] - # Return as sorted list - return sorted(value) - - def has_changed(self, initial, data): - """Override has_changed to handle list comparison properly.""" - if initial is None: - initial = [] - elif isinstance(initial, str): - # Handle legacy comma-separated string format - initial = self.prepare_value(initial) - - if data is None: - data = [] - - # Ensure both are lists for comparison - if not isinstance(initial, list): - initial = list(initial) if initial else [] - if not isinstance(data, list): - data = list(data) if data else [] - - # Convert to sets for comparison (order doesn't matter) - initial_set = set(str(x) for x in initial) - data_set = set(str(x) for x in data) - - return initial_set != data_set - - -class DigestAdminForm(forms.ModelForm): - """Custom form for Digest admin to restrict summarizer choices.""" - - publish_days = PublishDaysWidget( - label=_("Publish Days"), - help_text=_("Select which days of the week this digest should be published"), - ) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Limit summarizer choices to only valid OpenAI agents - if "summarizer" in self.fields: - self.fields["summarizer"].queryset = OpenAIAgent.objects.filter(valid=True) - self.fields["summarizer"].empty_label = _("Select a valid OpenAI agent...") - - class Meta: - model = Digest - fields = "__all__" - - -@admin.register(Digest, site=core_admin_site) -class DigestAdmin(admin.ModelAdmin): - """ - Admin interface for Digest model. - - Provides comprehensive management of AI digest configurations including - tag selection, AI agent assignment, and prompt customization. - """ - - change_form_template = "admin/change_form_with_tabs.html" - form = DigestAdminForm - autocomplete_fields = ["tags"] - - list_display = [ - "name", - "show_url", - "generation_status", - "publish_days_display", - "show_tags", - "last_generated", - ] - - list_filter = [ - "is_active", - "status", - "days_range", - "created_at", - "last_generated", - ] - - search_fields = [ - "name", - "slug", - "description", - "tags__name", - ] - - filter_horizontal = [ - "tags", - ] - - fieldsets = ( - ( - _("Basic Information"), - { - "fields": ( - "name", - "slug", - "description", - "is_active", - "show_log", - ) - }, - ), - ( - _("Content Configuration"), - { - "fields": ( - "tags", - "target_language", - "days_range", - ) - }, - ), - ( - _("Publishing Schedule"), - { - "fields": ("publish_days",), - }, - ), - ( - _("Agent Configuration"), - { - "fields": ( - "summarizer", - "prompt", - ), - }, - ), - ( - _("Status"), - { - "fields": ( - "status", - "last_generated", - "total_tokens", - ), - }, - ), - ) - - readonly_fields = [ - "status", - "last_generated", - "created_at", - "updated_at", - "total_tokens", - "show_log", - ] - - actions = ["generate_digest_action"] - - @admin.display(description=_("Status")) - def generation_status(self, obj): - """Display generation status with visual indicator.""" - if not obj.is_active: - return "⏸️" - return status_icon(obj.status) - - generation_status.admin_order_field = "status" - - @admin.display(description=_("Log")) - def show_log(self, obj): - return format_html( - """ -
- show -
- {0} -
-
- """, - mark_safe(obj.log), - ) - - @admin.display(description=_("URL")) - def show_url(self, obj): - """Display URL with link.""" - return format_html( - '
rss | json', - obj.slug, - obj.slug, - ) - - @admin.display(description=_("Tags")) - def tag_list(self, obj): - """Display associated tags as a comma-separated list.""" - tags = obj.tags.all()[:3] # Show first 3 tags - tag_names = [tag.name for tag in tags] - if obj.tags.count() > 3: - tag_names.append(f"... (+{obj.tags.count() - 3} more)") - return ", ".join(tag_names) if tag_names else "-" - - @admin.display(description=_("tags")) - def show_tags(self, obj): - if not obj.tags.exists(): # obj.tags 返回一个QuerySet对象,bool(obj.tags) 总是True,因为QuerySet对象总是被认为是True - return "-" - tags_html = "
".join( - f"#{t.name}" - for t in obj.tags.all() - ) - return format_html("{}", mark_safe(tags_html)) - - @admin.display(description=_("AI Agent")) - def summarizer_name(self, obj): - """Display summarizer agent name with link.""" - if obj.summarizer: - url = reverse( - f"admin:core_{obj.summarizer._meta.model_name}_change", - args=[obj.summarizer.pk], - ) - return format_html('{}', url, obj.summarizer.name) - return _("No agent assigned") - - @admin.display(description=_("Publish Days")) - def publish_days_display(self, obj): - """Display publish days as abbreviated weekday names.""" - days = obj.get_publish_days_list() - if not days: - return _("No days selected") - - # Abbreviate day names - day_abbrevs = { - "Monday": "Mon", - "Tuesday": "Tue", - "Wednesday": "Wed", - "Thursday": "Thu", - "Friday": "Fri", - "Saturday": "Sat", - "Sunday": "Sun", - } - - abbrev_days = [day_abbrevs.get(day, day) for day in days] - return ", ".join(abbrev_days) - - @admin.display(description=_("Generate selected Digests")) - def generate_digest_action(self, request, queryset): - """Generate digests for selected items.""" - from core.tasks.generate_digests import DigestGenerator - from core.tasks.task_manager import task_manager - import time - - # Only process active digests - active_digests = queryset.filter(is_active=True) - - if not active_digests: - self.message_user( - request, - _("No active digests selected. Only active digests can be generated."), - level=messages.WARNING, - ) - return - - success_count = 0 - error_count = 0 - - for digest in active_digests: - try: - # Generate unique task name - task_name = f"digest_generation_{digest.id}_{int(time.time())}" - digest_generator = DigestGenerator(digest) - # Submit task to background execution - digest.status = None - digest.save() - future = task_manager.submit_task(task_name, digest_generator.generate) - - success_count += 1 - - except Exception as e: - error_count += 1 - self.message_user( - request, - _("Failed to generate digest '{}': {}").format(digest.name, str(e)), - level=messages.ERROR, - ) - - if success_count > 0: - self.message_user( - request, - _("Successfully started generation for {} digest(s).").format( - success_count - ), - level=messages.SUCCESS, - ) - - if error_count > 0: - self.message_user( - request, - _("Failed to generate {} digest(s).").format(error_count), - level=messages.ERROR, - ) - - def get_queryset(self, request): - """Optimize queryset with prefetch_related for better performance.""" - return ( - super() - .get_queryset(request) - .prefetch_related( - "tags", - "summarizer", - ) - ) - - def save_model(self, request, obj, form, change): - """Custom save logic if needed.""" - super().save_model(request, obj, form, change) - - if not change: # New object - # Generate digest immediately - from core.tasks.generate_digests import DigestGenerator - - digest_generator = DigestGenerator(obj) - digest_generator.generate() - self.message_user( - request, - _("Digest '{}' created successfully. Generated immediately.").format( - obj.name - ), - level="success", - ) diff --git a/core/admin/feed_admin.py b/core/admin/feed_admin.py index 7b8a3d90..76baab32 100644 --- a/core/admin/feed_admin.py +++ b/core/admin/feed_admin.py @@ -17,8 +17,8 @@ ) from utils.modelAdmin_utils import status_icon from core.tasks.task_manager import task_manager +from core.services.feed import run_feed_update from core.views import import_opml -from core.management.commands.feed_updater import update_single_feed from core.admin import core_admin_site logger = logging.getLogger(__name__) @@ -128,15 +128,6 @@ class FeedAdmin(admin.ModelAdmin): ] list_per_page = 20 - def get_queryset(self, request): - """ - 过滤掉系统生成的 Digest Feed,只显示用户添加的普通 Feed。 - Digest Feed 的 feed_url 包含 '/core/digest/rss/' 路径。 - """ - queryset = super().get_queryset(request) - # 过滤掉 Digest Feed - return queryset.exclude(author="RSSBox Digest") - def get_urls(self): urls = super().get_urls() custom_urls = [ @@ -202,7 +193,7 @@ def save_model(self, request, obj, form, change): def _submit_feed_update_task(self, feed): task_id = task_manager.submit_task( - f"update_feed_{feed.slug}", update_single_feed, feed + f"update_feed_{feed.slug}", run_feed_update, feed ) logger.info(f"Submitted feed update task after commit: {task_id}") diff --git a/core/cache.py b/core/cache.py index 6251943d..22183095 100644 --- a/core/cache.py +++ b/core/cache.py @@ -1,14 +1,15 @@ import logging from django.core.cache import cache -from django.utils import timezone -from django.conf import settings -from lxml import etree -import mistune -from feedgen.feed import FeedGenerator -from core.models import Feed, Entry, Tag +from core.models import Feed from utils.text_handler import set_translation_display -from .models import Feed +from core.services.feed.rendering import ( + add_atom_entry as service_add_atom_entry, + build_atom_feed as service_build_atom_feed, + finalize_atom_feed as service_finalize_atom_feed, + render_feed_content, + render_tag_content, +) logger = logging.getLogger(__name__) @@ -61,231 +62,35 @@ def cache_tag(tag: str, feed_type="t", format="xml"): return atom_feed -def cache_digest(slug: str, format: str = "xml"): - logger.debug(f"Start cache_digest for {slug} with format {format}") - cache_key = f"cache_digest_{slug}_{format}" +def _build_atom_feed(*args, **kwargs): + return service_build_atom_feed(*args, **kwargs) - from .models import Digest - digest = Digest.objects.get(slug=slug) - digest_feed = digest.get_digest_feed() - - atom_feed = generate_atom_feed(digest_feed, "t") - if not atom_feed: - return None - - ttl = digest_feed.update_frequency or 86400 - cache.set(cache_key, atom_feed, ttl) - logger.debug(f"Cached successfully with key {cache_key}") - return atom_feed - - -def _build_atom_feed( - feed_id, title, author, link, subtitle, language, updated, pubdate=None -): - """构建Atom Feed的基本结构""" - updated_time = updated or pubdate or timezone.now() - # 确保必要字段有值:updated, title, id - fg = FeedGenerator() - fg.id(str(feed_id)) - # 如果没有title,使用时间作为备用,转换到本地时区 - if not title: - local_time = timezone.localtime(updated_time) - title = local_time.strftime("%Y-%m-%d %H:%M:%S") - fg.title(title) - fg.author({"name": author or "Unknown"}) - fg.link(href=link, rel="alternate") - fg.subtitle(subtitle or "") - fg.language(language or "") - fg.updated(updated_time) - fg.pubDate(pubdate or updated_time) - - return fg - - -def _add_atom_entry(fg, entry, feed_type, translation_display=None): - """向Atom Feed添加条目""" - pubdate = entry.pubdate or timezone.now() - updated = entry.updated or pubdate - summary = entry.original_summary - - # 处理标题和内容 - title = entry.original_title - content = entry.original_content or "" - - if feed_type == "t": - if entry.translated_title: - title = set_translation_display( - entry.original_title, - entry.translated_title, - translation_display or entry.feed.translation_display, - ) - - if entry.translated_content: - content = set_translation_display( - entry.original_content, - entry.translated_content, - translation_display or entry.feed.translation_display, - "
---------------
", - ) - - if entry.ai_summary: - html_summary = ( - f"{mistune.html(entry.ai_summary)}
---------------
" - ) - content = html_summary + content - - summary = content or "" # 确保summary始终是全文内容 - - # 创建条目 - fe = fg.add_entry() - # 如果没有title,使用时间作为备用,转换到本地时区 - if not title: - local_time = timezone.localtime(updated) - title = local_time.strftime("%Y-%m-%d %H:%M:%S") - fe.title(title) - fe.link(href=entry.link or "", rel="alternate") - fe.author({"name": entry.author or "Unknown"}) - fe.id(entry.guid or entry.link) - fe.content(content, type="html") - fe.summary(summary, type="html") - fe.updated(updated) - fe.pubDate(pubdate) - - # 处理附件 - if entry.enclosures_xml: - try: - xml = etree.fromstring(entry.enclosures_xml) - for enclosure in xml.iter("enclosure"): - fe.enclosure( - url=enclosure.get("href"), - type=enclosure.get("type"), - length=enclosure.get("length"), - ) - except Exception as e: - logger.error(f"Error parsing enclosures for entry {entry.id}: {str(e)}") - - return fe +def _add_atom_entry(*args, **kwargs): + return service_add_atom_entry(*args, **kwargs) def generate_atom_feed(feed: Feed, feed_type="t"): - """生成单个Feed的Atom格式""" - if not feed: - logger.error("generate_atom_feed: feed is None") - return None - - try: - # 构建基础Feed - fg = _build_atom_feed( - feed_id=feed.id, - title=feed.name, - author=feed.author, - link=feed.link or feed.feed_url, - subtitle=feed.subtitle, - language=feed.language, - updated=feed.updated, - pubdate=feed.pubdate, - ) - - # 添加所有条目 - entries = feed.filtered_entries if feed_type == "t" else feed.entries.all() - if entries is None: - return [] - - for entry in reversed(entries.order_by("-pubdate")[: feed.max_posts]): - _add_atom_entry(fg, entry, feed_type, feed.translation_display) - - # 生成最终XML - return _finalize_atom_feed(fg) - - except Exception as e: - logging.exception(f"generate_atom_feed error {feed.feed_url}: {str(e)}") - return None + return render_feed_content( + feed, + feed_type=feed_type, + build_feed_func=_build_atom_feed, + add_entry_func=_add_atom_entry, + finalize_func=_finalize_atom_feed, + render_logger=logger, + ) def merge_feeds_into_one_atom(tag: str, feeds: list[Feed], feed_type="t"): - """合并多个Feeds生成单个Atom Feed""" - type_str = "Original" if feed_type == "o" else "Translated" - feed_id = f"urn:merged-tag-{tag}-{type_str}-feeds" - feed_title = f"{type_str} #{tag} tag Feeds" - - # 构建基础Feed - fg = _build_atom_feed( - feed_id=feed_id, - title=feed_title, - author=feed_title, - link=settings.SITE_URL, - subtitle=f"Combined {type_str} {tag} Feeds", - language="en", - updated=timezone.now(), + return render_tag_content( + tag, + feeds, + feed_type=feed_type, + build_feed_func=_build_atom_feed, + add_entry_func=_add_atom_entry, + finalize_func=_finalize_atom_feed, ) - # 收集所有条目 - all_entries = [] - entry_ids = [] # 用于存储所有条目的ID - for feed in feeds: - # 添加Feed作为分类 - fg.category(term=str(feed.id), label=feed.name, scheme=feed.feed_url) - # 收集当前feed的条目 - entries = feed.entries.all() # tag的条目不走feed的filter,因为tag有自己的filter - if not entries: - continue - - for entry in reversed(entries.order_by("-pubdate")[: feed.max_posts]): - sort_time = entry.pubdate or entry.updated or timezone.now() - all_entries.append((sort_time, entry)) - entry_ids.append(entry.id) - - # 按时间降序排序(最新的在最前面) - all_entries.sort(key=lambda x: x[0], reverse=True) - - # 获取tag filter对象 - tag_filters = Tag.objects.get(slug=tag).filters.all() - - # 开始过滤 - 使用批量查询优化性能 - if not tag_filters: - # 没有过滤器,直接使用所有条目 - filtered_entries = [entry for (_, entry) in all_entries] - else: - # 批量获取所有条目ID的QuerySet - base_qs = Entry.objects.filter(id__in=entry_ids) - - # 应用所有过滤器(链式应用) - filtered_qs = base_qs - for filter_obj in tag_filters: - filtered_qs = filter_obj.apply_filter(filtered_qs) - - # 获取通过过滤的条目ID集合 - passed_ids = set(filtered_qs.values_list("id", flat=True)) - - # 构建过滤后的条目列表(保持原排序) - filtered_entries = [ - entry for (_, entry) in all_entries if entry.id in passed_ids - ] - - # 更新Feed时间为最新条目时间 - if filtered_entries: - # 第一个条目是最新的(因为已按时间降序排序) - latest_time = all_entries[0][0] - fg.updated(latest_time) - - # 添加所有条目(最多100条) - for entry in filtered_entries[:100]: - _add_atom_entry(fg, entry, feed_type) - - # 生成最终XML - return _finalize_atom_feed(fg) - def _finalize_atom_feed(fg): - """生成最终的Atom XML字符串""" - atom_string = fg.atom_str(pretty=False) - root = etree.fromstring(atom_string) - tree = etree.ElementTree(root) - pi = etree.ProcessingInstruction( - "xml-stylesheet", 'type="text/xsl" href="/static/rss.xsl"' - ) - root.addprevious(pi) - return etree.tostring( - tree, pretty_print=True, xml_declaration=True, encoding="utf-8" - ).decode() + return service_finalize_atom_feed(fg) diff --git a/core/management/commands/digest_generator.py b/core/management/commands/digest_generator.py deleted file mode 100644 index e0b67e4c..00000000 --- a/core/management/commands/digest_generator.py +++ /dev/null @@ -1,122 +0,0 @@ -from django.core.management.base import BaseCommand -from core.models.digest import Digest -from core.tasks.generate_digests import DigestGenerator -import logging -import sys - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - """ - Django management command to generate AI digest content. - - Usage: - python manage.py generate_digests --publish-days monday # Generate digests for Monday - python manage.py generate_digests --publish-days tuesday # Generate digests for Tuesday - """ - - help = "Generate AI digest content for specified publish days" - - def add_arguments(self, parser): - parser.add_argument( - "--publish-days", - type=str, - required=True, - help="Specify publish days to generate digests for (e.g., monday, tuesday)", - ) - - def handle(self, *args, **options): - publish_days = options.get("publish_days") - - # Validate publish_days parameter - valid_days = [ - "monday", - "tuesday", - "wednesday", - "thursday", - "friday", - "saturday", - "sunday", - ] - if publish_days.lower() not in valid_days: - self.stderr.write( - self.style.ERROR( - f"Error: Invalid publish_days. Valid options: {', '.join(valid_days)}" - ) - ) - sys.exit(1) - - # Get digests to process based on publish_days - # Use JSON_EXTRACT for SQLite compatibility - digests = Digest.objects.filter(is_active=True).extra( - where=["JSON_EXTRACT(publish_days, '$') LIKE ?"], - params=[f"%{publish_days.lower()}%"], - ) - - if not digests: - self.stdout.write( - self.style.WARNING( - f"No active digests found for publish_days: {publish_days}" - ) - ) - return - - self.stdout.write( - f"Found {len(digests)} digest(s) to process for {publish_days}" - ) - - results = [] - for digest in digests: - # # Check if generation is needed - # if not digest.should_generate_today(): - # self.stdout.write( - # self.style.WARNING(f'Skipping {digest.name} - already generated today or inactive') - # ) - # continue - - # Generate digest - self.stdout.write(f"Generating digest: {digest.name}") - - try: - generator = DigestGenerator(digest) - result = generator.generate(force=False) - - if result["success"]: - self.stdout.write( - self.style.SUCCESS( - f'✓ Successfully generated "{digest.name}" ' - f"({result.get('articles_processed', 0)} articles processed)" - ) - ) - results.append(("success", digest.name)) - else: - self.stdout.write( - self.style.ERROR( - f'✗ Failed to generate "{digest.name}": {result["error"]}' - ) - ) - results.append(("failed", digest.name, result["error"])) - - except Exception as e: - error_msg = str(e) - self.stdout.write( - self.style.ERROR(f'✗ Error generating "{digest.name}": {error_msg}') - ) - results.append(("error", digest.name, error_msg)) - logger.error(f"Digest generation error for {digest.name}: {e}") - - # Summary - successful = len([r for r in results if r[0] == "success"]) - failed = len(results) - successful - - self.stdout.write("\n" + "=" * 50) - self.stdout.write(f"Generation Summary:") - self.stdout.write(f" Successful: {successful}") - self.stdout.write(f" Failed: {failed}") - - if failed > 0: - self.stdout.write("\nFailed digests:") - for result in results: - if result[0] != "success": - self.stdout.write(f" - {result[1]}: {result[2]}") diff --git a/core/management/commands/feed_updater.py b/core/management/commands/feed_updater.py index edc059ff..478974ad 100644 --- a/core/management/commands/feed_updater.py +++ b/core/management/commands/feed_updater.py @@ -1,6 +1,5 @@ import logging import sys -from itertools import chain import time import os from concurrent.futures import wait @@ -11,6 +10,7 @@ from core.tasks.translate_feeds import handle_feeds_translation from core.tasks.summarize_feeds import handle_feeds_summary from django.db import close_old_connections +from core.services.feed import run_feed_update, refresh_updated_content from core.tasks.task_manager import task_manager from core.cache import cache_rss, cache_tag @@ -82,36 +82,15 @@ def handle(self, *args, **options): def update_single_feed(feed: Feed): - """在后台线程中执行feed更新""" - try: - # 确保在新线程中创建新的数据库连接 - close_old_connections() - - try: - logger.info(f"Starting feed update: {feed.name}") - - handle_single_feed_fetch(feed) - # task_manager.update_progress(feed_id, 50) - # 执行更新操作 - if feed.translate_title: - handle_feeds_translation([feed], target_field="title") - if feed.translate_content: - handle_feeds_translation([feed], target_field="content") - if feed.summary: - handle_feeds_summary([feed]) - - logger.info(f"Completed feed update: {feed.name}") - - return True - except Feed.DoesNotExist: - logger.error(f"Feed not found: ID {feed.name}") - return False - except Exception as e: - logger.exception(f"Error updating feed ID {feed.name}: {str(e)}") - return False - finally: - # 确保关闭数据库连接 - close_old_connections() + """Backward-compatible wrapper around the feed pipeline.""" + return run_feed_update( + feed, + fetch_func=handle_single_feed_fetch, + translate_func=handle_feeds_translation, + summarize_func=handle_feeds_summary, + close_connections=close_old_connections, + pipeline_logger=logger, + ) def update_multiple_feeds(feeds: list): @@ -143,33 +122,14 @@ def update_multiple_feeds(feeds: list): except Exception as e: logger.warning(f"A feed update task resulted in an exception: {e}") - # 所有任务完成后执行缓存操作 - # Note: 'feeds' is a list materialized from an iterator, so it's safe to iterate again. - for feed in feeds: - try: - cache_rss(feed.slug, feed_type="o", format="xml") - cache_rss(feed.slug, feed_type="o", format="json") - cache_rss(feed.slug, feed_type="t", format="xml") - cache_rss(feed.slug, feed_type="t", format="json") - except Exception as e: - logger.error( - f"{time.time()}: Failed to cache RSS for {feed.slug}: {str(e)}" - ) - - # 获取所有 feeds 关联的 tags(去重) - tag_ids = set( - chain.from_iterable( - feed.tags.values_list("id", flat=True) for feed in feeds - ) + refresh_updated_content( + feeds, + tag_model=Tag, + cache_rss_func=cache_rss, + cache_tag_func=cache_tag, + logger=logger, + time_func=time.time, ) - tags = Tag.objects.filter(id__in=tag_ids) - for tag in tags: - try: - cache_tag(tag.slug, feed_type="o", format="xml") - cache_tag(tag.slug, feed_type="t", format="xml") - cache_tag(tag.slug, feed_type="t", format="json") - except Exception as e: - logger.error(f"Failed to cache tag {tag.slug}: {str(e)}") except Exception as e: logger.exception("Command update_multiple_feeds failed: %s", str(e)) diff --git a/core/migrations/0037_remove_digest.py b/core/migrations/0037_remove_digest.py new file mode 100644 index 00000000..63ea5a8a --- /dev/null +++ b/core/migrations/0037_remove_digest.py @@ -0,0 +1,22 @@ +from django.db import migrations + + +def remove_digest_generated_feeds(apps, schema_editor): + Feed = apps.get_model("core", "Feed") + Feed.objects.filter(author="RSSBox Digest").delete() + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0036_openaiagent_merge_system_prompt"), + ] + + operations = [ + migrations.RunPython( + remove_digest_generated_feeds, + migrations.RunPython.noop, + ), + migrations.DeleteModel( + name="Digest", + ), + ] diff --git a/core/models/__init__.py b/core/models/__init__.py index a108240d..47fbce56 100644 --- a/core/models/__init__.py +++ b/core/models/__init__.py @@ -9,4 +9,3 @@ TestAgent, ) from core.models.tag import Tag -from core.models.digest import Digest diff --git a/core/models/agent.py b/core/models/agent.py index 4e14346d..3e875dab 100644 --- a/core/models/agent.py +++ b/core/models/agent.py @@ -13,6 +13,34 @@ import json from urllib import request, parse from core.tasks.task_manager import task_manager +from core.services.agent.openai import ( + openai_advanced_default, + openai_completions, + openai_detect_model_limit, + openai_filter, + openai_init, + openai_summarize, + openai_translate, + openai_validate, + openai_wait_for_rate_limit, +) +from core.services.agent.deepl import ( + deepl_init, + deepl_translate, + deepl_validate, +) +from core.services.agent.libretranslate import ( + libretranslate_api_languages, + libretranslate_api_request, + libretranslate_api_translate, + libretranslate_translate, + libretranslate_validate, +) +from core.services.agent.test_agent import ( + testagent_filter, + testagent_summarize, + testagent_translate, +) logger = logging.getLogger(__name__) @@ -58,11 +86,6 @@ class Meta: def __str__(self): return self.name - -def openai_advanced_default(): - return {"temperature": 0.2,"reasoning_effort":"minimal"} - - class OpenAIAgent(Agent): # https://platform.openai.com/docs/api-reference/chat is_ai = models.BooleanField(default=True, editable=False) @@ -108,180 +131,41 @@ class Meta: verbose_name_plural = "OpenAI" def _init(self): - return OpenAI( - api_key=self.api_key, - base_url=self.base_url, - timeout=settings.OPENAI_API_TIMEOUT, - max_retries=settings.OPENAI_API_MAX_RETRIES, + return openai_init( + self, + openai_client_cls=OpenAI, + settings_module=settings, ) def validate(self) -> bool: - if self.api_key: - try: - client = self._init() - # 应用速率限制 - self._wait_for_rate_limit() - - system_prompt = "You must only reply with exactly one character: 1" - user_content = "1" - - # 根据 merge_system_prompt 的值决定消息格式 - if self.merge_system_prompt: - merged_content = f"{system_prompt}\n\n{user_content}" - messages = [ - {"role": "user", "content": merged_content} - ] - else: - messages = [ - { - "role": "system", - "content": system_prompt, - }, - {"role": "user", "content": user_content}, - ] - - res = client.with_options( - max_retries=settings.OPENAI_API_MAX_RETRIES - ).chat.completions.create( - extra_headers=self.EXTRA_HEADERS, - model=self.model, - messages=messages, - # max_tokens=50, - max_completion_tokens=50, - ) - # 有些第三方源在key或url错误的情况下,并不会抛出异常代码,而是返回html广告,因此添加该行。 - fr = res.choices[0].finish_reason - # 提交后台任务检测模型限制 - if self.max_tokens == 0: - task_manager.submit_task( - f"detect_model_limit_{self.model}_{self.id}", - self.detect_model_limit, - force=True, - ) - logger.info( - f"Submitted background task to detect model limit for {self.model}" - ) - self.log = "" - self.valid = True - return True - except Exception as e: - logger.error("OpenAIAgent validate ->%s", e) - self.log = f"{timezone.now()}: {str(e)}" - self.valid = False - return False - finally: - self.save(update_fields=["log", "valid"]) + return openai_validate( + self, + init_client=self._init, + wait_for_rate_limit=self._wait_for_rate_limit, + task_submit=task_manager.submit_task, + logger=logger, + settings_module=settings, + timezone_module=timezone, + save_func=self.save, + ) def detect_model_limit(self, force=False) -> int: - """通过二分搜索来高效检测模型实际限制""" - if not force and self.max_tokens != 0: - return self.max_tokens - - initial_model = self.model - initial_max_tokens = self.max_tokens - - # 二分搜索找到确切限制 - def binary_search_limit(low, high): - """使用二分搜索找到确切的token限制""" - if high - low <= 256: # 当范围足够小时,返回低值作为安全限制 - return low - - mid = (low + high) // 2 - - try: - # 应用速率限制 - self._wait_for_rate_limit() - # 使用最小的测试内容减少token消耗 - response = self._init().chat.completions.create( - extra_headers=self.EXTRA_HEADERS, - model=self.model, - messages=[ - { - "role": "system", - "content": "You must only reply with exactly one character: 1", - }, - {"role": "user", "content": "1"}, - ], - # max_tokens=mid, - max_completion_tokens=mid, - temperature=0, # 确保结果一致性 - stop=[",", "\n", " ", ".", "1"], - ) - if response.choices[0].finish_reason == "stop": - # 成功调用,尝试更高的限制 - return binary_search_limit(mid, high) - - except Exception as e: - error_str = str(e).lower() - if any( - keyword in error_str - for keyword in ["maximum", "limit", "tokens", "context", "length"] - ): - # 遇到限制错误,降低上限 - return binary_search_limit(low, mid) - else: - # 其他错误(如API错误),使用保守值 - logger.warning( - f"Detect model limit when non-limit error occurs: {e}" - ) - return low - - # 直接使用二分搜索 - final_limit = binary_search_limit(4096, 1000000) - self.max_tokens = final_limit - - if self.pk is None: - return final_limit - - updated = type(self).objects.filter( - pk=self.pk, - model=initial_model, - max_tokens=initial_max_tokens, - ).update(max_tokens=final_limit) - if updated: - return final_limit - - current_max_tokens = ( - type(self).objects.filter(pk=self.pk).values_list("max_tokens", flat=True).first() + return openai_detect_model_limit( + self, + force=force, + init_client=self._init, + wait_for_rate_limit=self._wait_for_rate_limit, + logger=logger, ) - if current_max_tokens is not None: - self.max_tokens = current_max_tokens - return current_max_tokens - - return final_limit def _wait_for_rate_limit(self): - """等待直到满足速率限制条件""" - if self.rate_limit_rpm <= 0: - return # 无速率限制 - - # 生成基于当前分钟的缓存键 - current_minute = datetime.datetime.now().strftime("%Y%m%d%H%M") - cache_key = f"openai_rate_limit_{self.id}_{current_minute}" - - # 获取当前计数或初始化为0 - request_count = cache.get(cache_key, 0) - - # 计算等待时间(如果超过限制) - if request_count >= self.rate_limit_rpm: - # 计算到下一分钟开始的时间 - now = datetime.datetime.now() - next_minute = now.replace(second=0, microsecond=0) + datetime.timedelta( - minutes=1 - ) - wait_seconds = (next_minute - now).total_seconds() - - # 添加一点缓冲确保时间窗口切换 - wait_seconds += 0.1 - logger.info(f"Rate limit reached. Waiting {wait_seconds:.2f} seconds...") - time.sleep(wait_seconds) - - # 重置计数(新分钟开始) - cache.delete(cache_key) - return - - # 增加计数并设置过期时间(确保在下一分钟开始时过期) - cache.set(cache_key, request_count + 1, timeout=60) + return openai_wait_for_rate_limit( + self, + cache_backend=cache, + datetime_module=datetime, + sleep_func=time.sleep, + logger=logger, + ) def completions( self, @@ -291,150 +175,22 @@ def completions( _is_chunk: bool = False, # 内部参数,用于标记是否为分块调用 **kwargs, ) -> dict: - client = self._init() - tokens = 0 - result_text = "" - log_updated = False - - try: - if user_prompt: - system_prompt += f"\n\n{user_prompt}" - - # 应用速率限制 - self._wait_for_rate_limit() - - if self.merge_system_prompt: - merged_content = f"{system_prompt}\n\n{text}" - messages = [ - {"role": "user", "content": merged_content} - ] - # 当合并 system prompt 时,system_prompt_tokens 为 0(因为已包含在 merged_content 中) - system_prompt_tokens = 0 - input_tokens = get_token_count(merged_content) - else: - messages = [ - { - "role": "system", - "content": system_prompt, - }, - { - "role": "user", - "content": text - } - ] - # 计算系统提示的token占用 - system_prompt_tokens = get_token_count(system_prompt) - input_tokens = get_token_count(system_prompt) + get_token_count(text) - - # 获取最大可用token数(保留buffer) - if self.max_tokens == 0: - task_manager.submit_task( - f"detect_model_limit_{self.model}_{self.id}", - self.detect_model_limit, - force=True, - ) - raise ValueError( - "max_tokens is not set, Please wait for the model limit detection to complete" - ) - - # 计算最大可用token数 - # 当 merge_system_prompt=True 时,system_prompt 已包含在 merged_content 中,所以只需要考虑 text 的 token - if self.merge_system_prompt: - # 合并模式下,需要从总限制中减去 system_prompt 的 token(因为它会占用输入空间) - system_prompt_token_cost = get_token_count(system_prompt) - max_usable_tokens = ( - self.max_tokens - system_prompt_token_cost - 100 - ) # 100 token buffer - else: - max_usable_tokens = ( - self.max_tokens - system_prompt_tokens - 100 - ) # 100 token buffer - - # 检查文本长度是否需要分块 - if get_token_count(text) > max_usable_tokens: - logger.info( - f"Text too large ({get_token_count(text)} tokens), chunking..." - ) - - # 使用自适应分块 - chunks = adaptive_chunking( - text, - target_chunks=max(1, int(len(text) / max_usable_tokens)), - min_chunk_size=500, - max_chunk_size=max_usable_tokens, - ) - - # 分块翻译 - translated_chunks = [] - for chunk in chunks: - result = self.completions( - text=chunk, - system_prompt=system_prompt, - user_prompt=user_prompt, - _is_chunk=True, # 标记为分块调用 - **kwargs, - ) - translated_chunks.append(result["text"]) - tokens += result["tokens"] - - result_text = " ".join(translated_chunks) - return {"text": result_text, "tokens": tokens} - - # 计算合理的输出token限制 - # input_tokens 已经在上面根据 merge_system_prompt 的情况正确计算了,直接使用 - # 输出token限制 = 模型总限制 - 输入token - 安全缓冲 - output_token_limit = int(max(4096, (self.max_tokens - input_tokens) * 0.8)) - - # 正常流程 - adv_params = self.advanced_params or {} - if not isinstance(adv_params, dict): - adv_params = {} - - call_kwargs = {**adv_params} - # 仅在未显式提供时设置安全的默认/限制 - if ( - "max_completion_tokens" not in call_kwargs - and "max_tokens" not in call_kwargs - ): - call_kwargs["max_completion_tokens"] = output_token_limit - - res = client.with_options( - max_retries=settings.OPENAI_API_MAX_RETRIES - ).chat.completions.create( - extra_headers=self.EXTRA_HEADERS, - model=self.model, - messages=messages, - **call_kwargs, - ) - if ( - res.choices - and res.choices[0].finish_reason == "stop" - and res.choices[0].message.content - ): - result_text = res.choices[0].message.content - logger.debug(f"[{self.name}]: {result_text[:50]}...") - else: - # 安全获取 finish_reason,避免在 choices 为空时抛出异常 - finish_reason = None - if res.choices: - try: - finish_reason = res.choices[0].finish_reason - except Exception: - finish_reason = None - logger.warning( - f"[{self.name}]: Failed to complete request:[{finish_reason or 'unknown'}]" - ) - - tokens = res.usage.total_tokens if getattr(res, "usage", None) else 0 - except Exception as e: - self.log = f"{timezone.now()}: {str(e)}" - log_updated = True - logger.error(f"{self.name}: {e}") - - if not _is_chunk and log_updated: - self.save(update_fields=["log"]) - - return {"text": result_text, "tokens": tokens} + return openai_completions( + self, + text, + system_prompt=system_prompt, + user_prompt=user_prompt, + _is_chunk=_is_chunk, + init_client=self._init, + wait_for_rate_limit=self._wait_for_rate_limit, + task_submit=task_manager.submit_task, + logger=logger, + settings_module=settings, + get_token_count_func=get_token_count, + adaptive_chunking_func=adaptive_chunking, + save_func=self.save, + **kwargs, + ) def translate( self, @@ -444,56 +200,38 @@ def translate( text_type: str = "title", **kwargs, ) -> dict: - logger.info(f">>>Start Translate [{target_language}]: {text[:50]}...") - system_prompt = ( - self.title_translate_prompt - if text_type == "title" - else self.content_translate_prompt - ).replace("{target_language}", target_language) - - return self.completions( - text, system_prompt=system_prompt, user_prompt=user_prompt, **kwargs + return openai_translate( + self, + text, + target_language, + user_prompt=user_prompt, + text_type=text_type, + completions_func=self.completions, + logger=logger, + **kwargs, ) def summarize(self, text: str, target_language: str, **kwargs) -> dict: - logger.info(f">>> Start Summarize [{target_language}]: {text[:50]}...") - system_prompt = self.summary_prompt.replace( - "{target_language}", target_language + return openai_summarize( + self, + text, + target_language, + completions_func=self.completions, + logger=logger, + **kwargs, ) - return self.completions(text, system_prompt=system_prompt, **kwargs) - - def digester( - self, - text: str, - system_prompt: str, - digest_name: str, - date: str, - **kwargs, - ) -> dict: - logger.info(f">>> Start Digesting [{digest_name}]-{date}") - # prompt = system_prompt + settings.output_format_for_digest_prompt - return self.completions(text, system_prompt=system_prompt, **kwargs) def filter(self, text: str, system_prompt: str, **kwargs) -> dict: - logger.info(f">>> Start Filter: {text[:50]}...") - passed = False - tokens = 0 - results = self.completions( + return openai_filter( + self, text, - system_prompt=system_prompt + settings.output_format_for_filter_prompt, + system_prompt, + completions_func=self.completions, + logger=logger, + settings_module=settings, **kwargs, ) - if results["text"] and "Passed" in results["text"]: - logger.info(">>> Filter Passed") - passed = True - tokens = results["tokens"] - else: - logger.info(">>> Filter Blocked") - passed = False - - return {"passed": passed, "tokens": tokens} - class DeepLAgent(Agent): # https://github.com/DeepLcom/deepl-python @@ -528,51 +266,30 @@ class Meta: verbose_name_plural = "DeepL" def _init(self): - return deepl.Translator( - self.api_key, server_url=self.server_url, proxy=self.proxy + return deepl_init( + self, + translator_cls=deepl.Translator, ) def validate(self) -> bool: - is_valid = False - try: - translator = self._init() - usage = translator.get_usage() - if usage.character.valid: - self.log = "" - is_valid = True - except Exception as e: - logger.error("DeepLTranslator validate ->%s", e) - self.log = f"{timezone.now()}: {str(e)}" - is_valid = False - finally: - self.valid = is_valid - self.save() - return is_valid + return deepl_validate( + self, + init_client=self._init, + logger=logger, + timezone_module=timezone, + save_func=self.save, + ) def translate(self, text: str, target_language: str, **kwargs) -> dict: - logger.info(">>> DeepL Translate [%s]: %s", target_language, text) - target_code = self.language_code_map.get(target_language, None) - translated_text = "" - try: - if target_code is None: - logger.error( - "DeepLTranslator->Not support target language:%s", target_language - ) - translator = self._init() - resp = translator.translate_text( - text, - target_lang=target_code, - preserve_formatting=True, - split_sentences="nonewlines", - tag_handling="html", - ) - translated_text = resp.text - except Exception as e: - logger.error("DeepLTranslator->%s: %s", e, text) - self.log = f"{timezone.now()}: {str(e)}" - finally: - self.save() - return {"text": translated_text, "characters": len(text)} + return deepl_translate( + self, + text, + target_language, + init_client=self._init, + logger=logger, + timezone_module=timezone, + save_func=self.save, + ) class LibreTranslateAgent(Agent): @@ -624,87 +341,56 @@ def __init__(self, *args, **kwargs): def _api_request( self, endpoint: str, params: dict = None, method: str = "POST" ) -> any: - """ - Handles sending requests to the configured LibreTranslate server endpoint. - """ - try: - url = self.server_url - if not url.endswith("/"): - url += "/" - full_url = f"{url}{endpoint}" - - query_params = params or {} - if self.api_key: - query_params["api_key"] = self.api_key - - data = parse.urlencode(query_params).encode("utf-8") - req = request.Request(full_url, data=data, method=method) - req.add_header("accept", "application/json") - req.add_header("Content-Type", "application/x-www-form-urlencoded") - req.add_header("User-Agent", "LibreTranslateAgent/1.0") - - with request.urlopen(req, timeout=settings.LT_TIMEOUT) as response: - response_str = response.read().decode("utf-8") - return json.loads(response_str) - except Exception as e: - raise ConnectionError(f"_api_request {str(e)}") # e.reason + return libretranslate_api_request( + self, + endpoint, + params=params, + method=method, + request_module=request, + parse_module=parse, + json_module=json, + settings_module=settings, + ) def _api_translate( self, q: str, source: str, target: str, format: str = "html" ) -> str: - """Calls the /translate endpoint.""" - params = {"q": q, "source": source, "target": target, "format": format} - response_data = self._api_request("translate", params=params, method="POST") - - if "error" in response_data: - raise Exception(f"_api_translate Error: {response_data['error']}") - - return response_data.get("translatedText", "") + return libretranslate_api_translate( + self, + q, + source, + target, + format=format, + api_request_func=self._api_request, + ) def _api_languages(self) -> list: - """Calls the /languages endpoint.""" - # Languages endpoint requires a GET request - return self._api_request("languages", method="GET") + return libretranslate_api_languages( + self, + api_request_func=self._api_request, + ) # -------------------------------- # Agent Methods # -------------------------------- def validate(self) -> bool: - is_valid = False - try: - self._api_languages() - self.log = "" - is_valid = True - except Exception as e: - self.log = f"{timezone.now()}: {str(e)}" - is_valid = False - finally: - self.valid = is_valid - self.save() - return is_valid + return libretranslate_validate( + self, + api_languages_func=self._api_languages, + timezone_module=timezone, + save_func=self.save, + ) def translate(self, text: str, target_language: str, **kwargs) -> dict: - target_code = self.language_map.get(target_language) - if not target_code: - self.log += ( - f"{timezone.now()}: Not support target language: {target_language}" - ) - logger.error( - f"LibreTranslateAgent->Not support target language: {target_language}" - ) - self.save() - return {"text": "", "characters": 0} - - try: - translated_text = self._api_translate( - q=text, source="auto", target=target_code, format="html" - ) - return {"text": translated_text, "characters": len(text)} - except Exception as e: - logger.error("LibreTranslateAgent->: %s", str(e)) - self.log = f"{timezone.now()}: {str(e)}" - self.save() - return {"text": "", "characters": 0} + return libretranslate_translate( + self, + text, + target_language, + api_translate_func=self._api_translate, + logger=logger, + timezone_module=timezone, + save_func=self.save, + ) class Meta: verbose_name = "LibreTranslate" @@ -726,18 +412,29 @@ def validate(self) -> bool: return True def translate(self, text: str, target_language: str, **kwargs) -> dict: - logger.info(">>> Test Translate [%s]: %s", target_language, text) - time.sleep(self.interval) - return {"text": self.translated_text, "tokens": 10, "characters": len(text)} + return testagent_translate( + self, + text, + target_language, + logger=logger, + sleep_func=time.sleep, + ) def summarize(self, text: str, target_language: str, **kwargs) -> dict: - logger.info(">>> Test Summarize [%s]: %s", target_language, text) - time.sleep(self.interval) - return {"text": self.translated_text, "tokens": 10, "characters": len(text)} + return testagent_summarize( + self, + text, + target_language, + logger=logger, + sleep_func=time.sleep, + ) def filter(self, text: str, **kwargs): - logger.info(">>> Test Filter") import random - time.sleep(self.interval) - return {"passed": random.choice([True, False]), "tokens": 10} + return testagent_filter( + self, + logger=logger, + sleep_func=time.sleep, + random_choice=random.choice, + ) diff --git a/core/models/digest.py b/core/models/digest.py deleted file mode 100644 index 0bae1caa..00000000 --- a/core/models/digest.py +++ /dev/null @@ -1,292 +0,0 @@ -from django.db import models -from django.utils.translation import gettext_lazy as _ -from django.utils import timezone -from config import settings -import uuid - -from core.models.feed import Feed -from django.db.models.signals import post_delete -from django.dispatch import receiver - - -class Digest(models.Model): - """ - Digest model for generating AI-powered daily/weekly briefings from RSS feeds. - - This model stores configuration for automatic digest generation, including - which tags to monitor, how many articles to process, and what AI agent to use - for content generation. - """ - - name = models.CharField( - max_length=200, - verbose_name=_("Name"), - help_text=_("Name of the digest (e.g., 'Tech Daily', 'Weekly Summary')"), - ) - - slug = models.SlugField( - unique=True, - blank=True, - null=True, - verbose_name=_("Slug"), - help_text=_("URL-friendly version of the name, auto-generated"), - ) - - description = models.TextField( - blank=True, - verbose_name=_("Description"), - help_text=_("Optional description of what this digest covers"), - ) - - status = models.BooleanField( - _("Generation Status"), - null=True, - editable=False, - help_text=_("Whether the last generation was successful"), - ) - - tags = models.ManyToManyField( - "Tag", - related_name="digests", - verbose_name=_("Tags"), - help_text=_( - "Tags to include in this digest. Articles from feeds with these tags will be processed" - ), - ) - - summarizer = models.ForeignKey( - "OpenAIAgent", - on_delete=models.CASCADE, - limit_choices_to={"valid": True}, - verbose_name=_("AI Summarizer"), - help_text=_( - "OpenAI agent to use for generating digest content (only valid agents are shown)" - ), - ) - - days_range = models.IntegerField( - default=1, - verbose_name=_("Days Range"), - help_text=_( - "Number of days to look back for articles (1 = yesterday only, 7 = past week)" - ), - ) - - target_language = models.CharField( - max_length=50, - choices=settings.TRANSLATION_LANGUAGES, - default=settings.DEFAULT_TARGET_LANGUAGE, - verbose_name=_("Target Language"), - help_text=_("Language of the generated digest output"), - ) - - prompt = models.TextField( - default=settings.default_digest_prompt, - verbose_name=_("Prompt"), - help_text=_( - "AI prompt for generating digest content. Use {digest_name}, {date}, {description}, {target_language} as placeholders" - ), - ) - - is_active = models.BooleanField( - default=True, - verbose_name=_("Active"), - help_text=_("Whether this digest should be automatically generated"), - ) - - last_generated = models.DateTimeField( - null=True, - blank=True, - verbose_name=_("Last Generated"), - help_text=_("When this digest was last generated"), - ) - - publish_days = models.JSONField( - default=list, - verbose_name=_("Publish Days"), - help_text=_("Days of week to publish (e.g., ['monday', 'tuesday', 'friday'])"), - ) - - created_at = models.DateTimeField( - auto_now_add=True, - verbose_name=_("Created At"), - ) - - updated_at = models.DateTimeField( - auto_now=True, - verbose_name=_("Updated At"), - ) - - log = models.TextField( - _("Log"), - default="", - blank=True, - null=True, - help_text=_("Log for the digest, useful for debugging"), - ) - - total_tokens = models.IntegerField(_("Tokens Cost"), default=0) - - class Meta: - verbose_name = _("Digest") - verbose_name_plural = _("Digests") - ordering = ["-updated_at"] - - def __str__(self): - return self.name - - def save(self, *args, **kwargs): - if not self.slug: - self.slug = uuid.uuid5( - uuid.NAMESPACE_URL, - f"{self.name}:{self.target_language}:{settings.SECRET_KEY}", - ).hex - super(Digest, self).save(*args, **kwargs) - - def should_generate_today(self): - """ - Check if digest should be generated today based on last generation time and publish days. - - Returns: - bool: True if digest should be generated, False otherwise - """ - if not self.is_active: - return False - - # Check if today is a publish day - if not self.is_publish_day(): - return False - - if not self.last_generated: - return True - - today = timezone.now().date() - last_gen_date = self.last_generated.date() - - return today > last_gen_date - - def is_publish_day(self, date=None): - """ - Check if the given date (or today) is a publish day for this digest. - - Args: - date: datetime.date object, defaults to today - - Returns: - bool: True if this is a publish day, False otherwise - """ - if date is None: - date = timezone.now().date() - - # Get weekday name (Monday, Tuesday, etc.) - weekday_name = date.strftime("%A").lower() - - # Check if this day is in the publish_days list - return weekday_name in (self.publish_days or []) - - def get_publish_days_list(self): - """ - Get list of enabled publish days as weekday names. - - Returns: - list: List of weekday names (e.g., ['Monday', 'Tuesday']) - """ - if not self.publish_days: - return [] - - # Convert to proper case weekday names - weekday_map = { - "monday": "Monday", - "tuesday": "Tuesday", - "wednesday": "Wednesday", - "thursday": "Thursday", - "friday": "Friday", - "saturday": "Saturday", - "sunday": "Sunday", - } - - enabled_days = [] - for day in self.publish_days: - if day and day.lower() in weekday_map: - enabled_days.append(weekday_map[day.lower()]) - - return enabled_days - - def get_articles_for_digest(self): - """ - Get articles that should be included in this digest. - - Returns: - QuerySet: Entry objects filtered by tags and date range - """ - from core.models.entry import Entry - from datetime import timedelta - - end_date = timezone.now() - start_date = end_date - timedelta(days=self.days_range+1) #由于是凌晨2点,所以包含今天影响不大 - - # 正确获取digest tags对应的feeds的entries - digest_tags = self.tags.all() - if not digest_tags.exists(): - return Entry.objects.none() - - # 获取所有相关feeds - related_feeds = [tag.feeds.all() for tag in digest_tags] - feed_ids = set() - for feeds in related_feeds: - feed_ids.update(feeds.values_list("id", flat=True)) - - if not feed_ids: - return Entry.objects.none() - - # 从这些feeds获取entries - entries = ( - Entry.objects.filter( - feed_id__in=feed_ids, pubdate__gte=start_date, pubdate__lte=end_date - ) - .distinct() - .order_by("-pubdate") - ) - - return entries - - def get_digest_feed(self) -> Feed: - """获取或创建 Digest 专用 Feed""" - feed_url = f"{settings.SITE_URL.rstrip('/')}/rss/digest/{self.slug}" - - defaults = { - "name": f"{self.name}", - "subtitle": f"{self.description}", - "link": f"{settings.SITE_URL.rstrip('/')}/rss/digest/{self.slug}", - "author": "RSSBox Digest", - "language": self.target_language, - "update_frequency": 1440, - "fetch_article": False, - "translate_title": False, - "translate_content": False, - "summary": False, - "target_language": self.target_language, - } - - return Feed.objects.get_or_create( - feed_url=feed_url, - target_language=self.target_language, - defaults=defaults, - )[0] - - -@receiver(post_delete, sender=Digest) -def delete_associated_feed_and_entries(sender, instance: "Digest", **kwargs): - """ - 当删除一个 Digest 时,删除其专用 Feed,从而通过 Entry.feed 的 CASCADE 删除相关 Entries。 - 注意:不要使用 get_or_create 以避免误创建。 - """ - try: - feed_url = f"{settings.SITE_URL.rstrip('/')}/rss/digest/{instance.slug}" - Feed.objects.filter( - feed_url=feed_url, - target_language=instance.target_language, - ).delete() - except Exception: - # 静默失败以避免阻断 Digest 删除流程;实际错误会在上层日志中体现 - pass diff --git a/core/models/feed.py b/core/models/feed.py index c8edc9ac..4a6756b0 100644 --- a/core/models/feed.py +++ b/core/models/feed.py @@ -7,6 +7,7 @@ from django.utils.translation import gettext_lazy as _ from django.core.validators import MinValueValidator, MaxValueValidator from tagulous.models import TagField +from core.services.feed.filters import apply_feed_filters class Feed(models.Model): @@ -231,7 +232,4 @@ def get_translation_display(self): @property def filtered_entries(self): - queryset = self.entries.all() - for filter_obj in self.filters.all(): - queryset = filter_obj.apply_filter(queryset) - return queryset + return apply_feed_filters(self, self.entries.all()) diff --git a/core/models/filter.py b/core/models/filter.py index 8a44353f..05b2ed7c 100644 --- a/core/models/filter.py +++ b/core/models/filter.py @@ -2,9 +2,13 @@ from django.db import models from django.utils.translation import gettext_lazy as _ from tagulous.models import TagField -from utils import text_handler -import json from config import settings +from core.services.feed.filters import ( + apply_ai_filter as service_apply_ai_filter, + apply_filter as service_apply_filter, + apply_keywords_filter as service_apply_keywords_filter, + needs_re_evaluation as service_needs_re_evaluation, +) logger = logging.getLogger(__name__) @@ -92,111 +96,16 @@ class Meta: verbose_name_plural = _("Filter") def apply_keywords_filter(self, queryset): - """ - 应用过滤器到查询集,检查文本内容是否包含标签关键词 - :param queryset: 要过滤的查询集 - :return: 过滤后的查询集 - """ - keywords = self.keywords.values_list("name", flat=True) - - if not keywords: - return queryset.none() if self.operation == self.INCLUDE else queryset - - # 构建查询条件:内容包含任何关键词 - query = models.Q() - for keyword in keywords: - if self.filter_original_title: - query |= models.Q(original_title__icontains=keyword) - if self.filter_original_content: - query |= models.Q(original_content__icontains=keyword) - if self.filter_translated_title: - query |= models.Q(translated_title__icontains=keyword) - if self.filter_translated_content: - query |= models.Q(translated_content__icontains=keyword) - - if self.operation == self.INCLUDE: - # 包含模式:只显示包含任何关键词的内容 - return queryset.filter(query).distinct() - else: - # 排除模式:隐藏包含任何关键词的内容 - return queryset.exclude(query).distinct() + return service_apply_keywords_filter(self, queryset) def apply_ai_filter(self, queryset): - """ - 应用AI过滤器到查询集,使用AI代理处理内容 - :param queryset: 要过滤的查询集 - :return: 过滤后的查询集 - """ - passed_ids = [] - tokens = 0 - for entry in queryset: - # 尝试获取缓存结果 - result, created = FilterResult.objects.get_or_create( - filter=self, - entry=entry, - ) - - # 检查是否需要重新评估 - if created or self.needs_re_evaluation(result, entry): - # 准备要发送给AI的内容 - json_data = {} - if self.filter_original_title: - json_data["original_title"] = entry.original_title - if self.filter_original_content: - json_data["original_content"] = text_handler.clean_content( - entry.original_content - ) - if self.filter_translated_title: - json_data["translated_title"] = entry.translated_title - if self.filter_translated_content: - json_data["translated_content"] = text_handler.clean_content( - entry.translated_content - ) - - text_str = json.dumps(json_data, ensure_ascii=False) - passed = None - if self.agent: - filter_results = self.agent.filter( - text=text_str, system_prompt=self.filter_prompt - ) - passed = filter_results["passed"] - tokens += filter_results["tokens"] - result.passed = passed - result.save() - else: - passed = result.passed - - if passed: - passed_ids.append(entry.id) - - # 过滤出通过的项目 - return queryset.filter(id__in=passed_ids), tokens + return service_apply_ai_filter(self, queryset) def apply_filter(self, queryset): - tokens = 0 - # 优先尝试使用关键字过滤 - if self.filter_method in [self.KEYWORD_ONLY, self.BOTH]: - queryset = self.apply_keywords_filter(queryset) - - # 检查是否需要AI过滤 - if self.filter_method in [self.AI_ONLY, self.BOTH] and self.agent: - queryset, tokens = self.apply_ai_filter(queryset) - - if tokens > 0: - self.total_tokens += tokens - self.save() - - return queryset + return service_apply_filter(self, queryset) def needs_re_evaluation(self, result, entry): - """检查缓存是否失效""" - # 1. 如果从未评估过 - if result.passed is None: - return True - - # 2. 检查条目内容是否更新 - if entry.updated and entry.updated > result.last_updated: - return True + return service_needs_re_evaluation(result, entry) def save(self, *args, **kwargs): """ diff --git a/core/prompts.py b/core/prompts.py new file mode 100644 index 00000000..928f9320 --- /dev/null +++ b/core/prompts.py @@ -0,0 +1,59 @@ +DEFAULT_TITLE_TRANSLATE_PROMPT = ( + "You are a professional, authentic translation engine. Translate only the text " + "into {target_language}, return only the translations, do not explain the " + "original text." +) + +DEFAULT_CONTENT_TRANSLATE_PROMPT = """ +You are a professional, authentic translation engine specialized in HTML content translation. + +Requirements: +1. Translate only the text content into {target_language} +2. Preserve ALL HTML tags, attributes, and structure completely unchanged +3. Maintain proper context awareness across different HTML elements and their relationships +4. Consider semantic meaning within nested tags and their hierarchical context +5. Ensure translated text fits naturally within the HTML structure +6. Keep inline elements (like , , ) contextually coherent with their surrounding text +7. Maintain consistency in terminology throughout the entire HTML document +8. Return only the translated HTML content without explanations or comments + +Important: Do not modify, remove, or alter any HTML tags, attributes, classes, IDs, or structural elements. Only translate the actual text content between tags. + +""" + +DEFAULT_SUMMARY_PROMPT = ( + "Summarize the following text in {target_language} and return markdown format." +) + +DEFAULT_FILTER_PROMPT = """ +You are an advanced RSS content curator. Analyze the article following these protocols: + +1. **Cross-article Deduplication**: + - Identify duplicate content using semantic similarity + - For duplicate sets: + • Keep the most comprehensive version + +2. **Ad Exclusion**: + • Discard if any detected: + - Promotional language patterns + - Affiliate links + - Brand mentions >5% of content + - "Sponsored" disclosure + +3. **Clickbait Detection** + Discard if headline: + - Uses sensational punctuation (e.g., "SHOCKING!", "You won't BELIEVE...") + - Poses unanswered questions ("What happened next?") + - Employs urgency/scarcity tactics ("Act NOW!") +""" + +OUTPUT_FORMAT_FOR_FILTER_PROMPT = """ + +**Output Requirements** +• Only return "Passed" or "Blocked" based on the above checks. +• ABSOLUTELY NO: + - Explanations + - Metadata + - Discarded IDs + - Additional text +""" diff --git a/core/services/__init__.py b/core/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/core/services/admin/__init__.py b/core/services/admin/__init__.py new file mode 100644 index 00000000..95e1b9c0 --- /dev/null +++ b/core/services/admin/__init__.py @@ -0,0 +1,3 @@ +from core.services.admin.actions import force_update_feeds, force_update_tags + +__all__ = ["force_update_feeds", "force_update_tags"] diff --git a/core/services/admin/actions.py b/core/services/admin/actions.py new file mode 100644 index 00000000..638c4277 --- /dev/null +++ b/core/services/admin/actions.py @@ -0,0 +1,40 @@ +from django.db import transaction +from django.utils import timezone + + +def force_update_feeds( + queryset, + *, + task_manager, + update_multiple_feeds_func, +): + with transaction.atomic(): + for instance in queryset: + instance.fetch_status = None + instance.translation_status = None + instance.save() + + task_manager.submit_task( + "Force Update Feeds", + update_multiple_feeds_func, + queryset, + ) + + +def force_update_tags( + queryset, + *, + task_manager, + cache_tag_func, + now_func=timezone.now, +): + with transaction.atomic(): + for instance in queryset: + task_manager.submit_task( + "Force Update Tags", cache_tag_func, instance.slug, "t", "xml" + ) + task_manager.submit_task( + "Force Update Tags", cache_tag_func, instance.slug, "t", "json" + ) + instance.last_updated = now_func() + instance.save() diff --git a/core/services/admin/batch.py b/core/services/admin/batch.py new file mode 100644 index 00000000..c0f19683 --- /dev/null +++ b/core/services/admin/batch.py @@ -0,0 +1,132 @@ +from ast import literal_eval + + +def apply_batch_updates(queryset, post_data): + getlist = ( + post_data.getlist + if hasattr(post_data, "getlist") + else lambda key: post_data.get(key, []) + ) + + fields = { + "update_frequency": "update_frequency_value", + "max_posts": "max_posts_value", + "translator": "translator_value", + "target_language": "target_language_value", + "translation_display": "translation_display_value", + "summarizer": "summarizer_value", + "summary_detail": "summary_detail_value", + "additional_prompt": "additional_prompt_value", + "fetch_article": "fetch_article", + "tags": "tags_value", + "translate_title": "translate_title", + "translate_content": "translate_content", + "summary": "summary", + "filter": "filter_value", + } + field_types = { + "update_frequency": int, + "max_posts": int, + "target_language": str, + "translation_display": int, + "summary_detail": float, + "additional_prompt": str, + "fetch_article": literal_eval, + "translate_title": literal_eval, + "translate_content": literal_eval, + "summary": literal_eval, + } + + translate_title = post_data.get("translate_title", "Keep") + translate_content = post_data.get("translate_content", "Keep") + summary = post_data.get("summary", "Keep") + + match translate_title: + case "True": + queryset.update(translate_title=True) + case "False": + queryset.update(translate_title=False) + + match translate_content: + case "True": + queryset.update(translate_content=True) + case "False": + queryset.update(translate_content=False) + + match summary: + case "True": + queryset.update(summary=True) + case "False": + queryset.update(summary=False) + + update_fields = {} + for field, value_field in fields.items(): + value = post_data.get(value_field) + if post_data.get(field, "Keep") != "Keep" and value: + match field: + case "translator": + content_type_id, object_id = map(int, value.split(":")) + queryset.update(translator_content_type_id=content_type_id) + queryset.update(translator_object_id=object_id) + case "summarizer": + queryset.update(summarizer_id=int(value)) + case "tags": + tag_values = getlist("tags_value") + if tag_values: + tag_ids = [int(current_id) for current_id in tag_values] + for feed in queryset: + feed.tags.set(tag_ids) + case "filter": + filter_values = getlist("filter_value") + if filter_values: + filter_ids = [int(current_id) for current_id in filter_values] + for obj in queryset: + obj.filters.set(filter_ids) + case _: + update_fields[field] = field_types.get(field, str)(value) + + if update_fields: + queryset.update(**update_fields) + + +def build_batch_modify_context( + queryset, + *, + get_all_agent_choices_func, + openai_agent_model, + filter_model, + tag_model, + settings_module, + admin_context, +): + translator_choices = get_all_agent_choices_func() + summary_engine_choices = [ + (str(agent.id), agent.name) + for agent in openai_agent_model.objects.filter(valid=True) + ] + filter_choices = [ + (f"{filter_obj.id}", filter_obj.name) + for filter_obj in filter_model.objects.all() + ] + tags_choices = [ + (f"{tag.id}", tag.name) + for tag in tag_model.objects.all() + ] + + return { + **admin_context, + "items": queryset, + "translator_choices": translator_choices, + "target_language_choices": settings_module.TRANSLATION_LANGUAGES, + "summary_engine_choices": summary_engine_choices, + "filter_choices": filter_choices, + "tags_choices": tags_choices, + "update_frequency_choices": [ + (5, "5 min"), + (15, "15 min"), + (30, "30 min"), + (60, "hourly"), + (1440, "daily"), + (10080, "weekly"), + ], + } diff --git a/core/services/agent/__init__.py b/core/services/agent/__init__.py new file mode 100644 index 00000000..2d183f6d --- /dev/null +++ b/core/services/agent/__init__.py @@ -0,0 +1 @@ +from core.services.agent import deepl, libretranslate, openai, test_agent diff --git a/core/services/agent/deepl.py b/core/services/agent/deepl.py new file mode 100644 index 00000000..d5743186 --- /dev/null +++ b/core/services/agent/deepl.py @@ -0,0 +1,50 @@ +def deepl_init(agent, *, translator_cls): + return translator_cls( + agent.api_key, + server_url=agent.server_url, + proxy=agent.proxy, + ) + + +def deepl_validate(agent, *, init_client, logger, timezone_module, save_func): + is_valid = False + try: + translator = init_client() + usage = translator.get_usage() + if usage.character.valid: + agent.log = "" + is_valid = True + except Exception as exc: + logger.error("DeepLTranslator validate ->%s", exc) + agent.log = f"{timezone_module.now()}: {str(exc)}" + is_valid = False + finally: + agent.valid = is_valid + save_func() + return is_valid + + +def deepl_translate(agent, text, target_language, *, init_client, logger, timezone_module, save_func): + logger.info(">>> DeepL Translate [%s]: %s", target_language, text) + target_code = agent.language_code_map.get(target_language, None) + translated_text = "" + try: + if target_code is None: + logger.error( + "DeepLTranslator->Not support target language:%s", target_language + ) + translator = init_client() + resp = translator.translate_text( + text, + target_lang=target_code, + preserve_formatting=True, + split_sentences="nonewlines", + tag_handling="html", + ) + translated_text = resp.text + except Exception as exc: + logger.error("DeepLTranslator->%s: %s", exc, text) + agent.log = f"{timezone_module.now()}: {str(exc)}" + finally: + save_func() + return {"text": translated_text, "characters": len(text)} diff --git a/core/services/agent/libretranslate.py b/core/services/agent/libretranslate.py new file mode 100644 index 00000000..aac91d27 --- /dev/null +++ b/core/services/agent/libretranslate.py @@ -0,0 +1,93 @@ +def libretranslate_api_request( + agent, + endpoint, + *, + params=None, + method="POST", + request_module, + parse_module, + json_module, + settings_module, +): + try: + url = agent.server_url + if not url.endswith("/"): + url += "/" + full_url = f"{url}{endpoint}" + + query_params = params or {} + if agent.api_key: + query_params["api_key"] = agent.api_key + + data = parse_module.urlencode(query_params).encode("utf-8") + req = request_module.Request(full_url, data=data, method=method) + req.add_header("accept", "application/json") + req.add_header("Content-Type", "application/x-www-form-urlencoded") + req.add_header("User-Agent", "LibreTranslateAgent/1.0") + + with request_module.urlopen(req, timeout=settings_module.LT_TIMEOUT) as response: + response_str = response.read().decode("utf-8") + return json_module.loads(response_str) + except Exception as exc: + raise ConnectionError(f"_api_request {str(exc)}") + + +def libretranslate_api_translate(agent, q, source, target, *, format="html", api_request_func): + params = {"q": q, "source": source, "target": target, "format": format} + response_data = api_request_func("translate", params=params, method="POST") + if "error" in response_data: + raise Exception(f"_api_translate Error: {response_data['error']}") + return response_data.get("translatedText", "") + + +def libretranslate_api_languages(agent, *, api_request_func): + return api_request_func("languages", method="GET") + + +def libretranslate_validate(agent, *, api_languages_func, timezone_module, save_func): + is_valid = False + try: + api_languages_func() + agent.log = "" + is_valid = True + except Exception as exc: + agent.log = f"{timezone_module.now()}: {str(exc)}" + is_valid = False + finally: + agent.valid = is_valid + save_func() + return is_valid + + +def libretranslate_translate( + agent, + text, + target_language, + *, + api_translate_func, + logger, + timezone_module, + save_func, +): + target_code = agent.language_map.get(target_language) + if not target_code: + agent.log += f"{timezone_module.now()}: Not support target language: {target_language}" + logger.error( + f"LibreTranslateAgent->Not support target language: {target_language}" + ) + save_func() + return {"text": "", "characters": 0} + + try: + translated_text = api_translate_func( + q=text, + source="auto", + target=target_code, + format="html", + ) + return {"text": translated_text, "characters": len(text)} + except Exception as exc: + logger.error("LibreTranslateAgent->: %s", str(exc)) + agent.log = f"{timezone_module.now()}: {str(exc)}" + save_func() + return {"text": "", "characters": 0} diff --git a/core/services/agent/openai.py b/core/services/agent/openai.py new file mode 100644 index 00000000..22c8d9ef --- /dev/null +++ b/core/services/agent/openai.py @@ -0,0 +1,367 @@ +def openai_advanced_default(): + return {"temperature": 0.2, "reasoning_effort": "minimal"} + + +def openai_init(agent, *, openai_client_cls, settings_module): + return openai_client_cls( + api_key=agent.api_key, + base_url=agent.base_url, + timeout=settings_module.OPENAI_API_TIMEOUT, + max_retries=settings_module.OPENAI_API_MAX_RETRIES, + ) + + +def openai_wait_for_rate_limit( + agent, + *, + cache_backend, + datetime_module, + sleep_func, + logger, +): + if agent.rate_limit_rpm <= 0: + return + + current_minute = datetime_module.datetime.now().strftime("%Y%m%d%H%M") + cache_key = f"openai_rate_limit_{agent.id}_{current_minute}" + request_count = cache_backend.get(cache_key, 0) + + if request_count >= agent.rate_limit_rpm: + now = datetime_module.datetime.now() + next_minute = now.replace(second=0, microsecond=0) + datetime_module.timedelta( + minutes=1 + ) + wait_seconds = (next_minute - now).total_seconds() + 0.1 + logger.info(f"Rate limit reached. Waiting {wait_seconds:.2f} seconds...") + sleep_func(wait_seconds) + cache_backend.delete(cache_key) + return + + cache_backend.set(cache_key, request_count + 1, timeout=60) + + +def openai_validate( + agent, + *, + init_client, + wait_for_rate_limit, + task_submit, + logger, + settings_module, + timezone_module, + save_func, +): + if not agent.api_key: + return None + + try: + client = init_client() + wait_for_rate_limit() + + system_prompt = "You must only reply with exactly one character: 1" + user_content = "1" + if agent.merge_system_prompt: + merged_content = f"{system_prompt}\n\n{user_content}" + messages = [{"role": "user", "content": merged_content}] + else: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ] + + res = client.with_options( + max_retries=settings_module.OPENAI_API_MAX_RETRIES + ).chat.completions.create( + extra_headers=agent.EXTRA_HEADERS, + model=agent.model, + messages=messages, + max_completion_tokens=50, + ) + _ = res.choices[0].finish_reason + if agent.max_tokens == 0: + task_submit( + f"detect_model_limit_{agent.model}_{agent.id}", + agent.detect_model_limit, + force=True, + ) + logger.info( + f"Submitted background task to detect model limit for {agent.model}" + ) + agent.log = "" + agent.valid = True + return True + except Exception as exc: + logger.error("OpenAIAgent validate ->%s", exc) + agent.log = f"{timezone_module.now()}: {str(exc)}" + agent.valid = False + return False + finally: + save_func(update_fields=["log", "valid"]) + + +def openai_detect_model_limit( + agent, + *, + force=False, + init_client, + wait_for_rate_limit, + logger, +): + if not force and agent.max_tokens != 0: + return agent.max_tokens + + initial_model = agent.model + initial_max_tokens = agent.max_tokens + + def binary_search_limit(low, high): + if high - low <= 256: + return low + + mid = (low + high) // 2 + try: + wait_for_rate_limit() + response = init_client().chat.completions.create( + extra_headers=agent.EXTRA_HEADERS, + model=agent.model, + messages=[ + { + "role": "system", + "content": "You must only reply with exactly one character: 1", + }, + {"role": "user", "content": "1"}, + ], + max_completion_tokens=mid, + temperature=0, + stop=[",", "\n", " ", ".", "1"], + ) + if response.choices[0].finish_reason == "stop": + return binary_search_limit(mid, high) + except Exception as exc: + error_str = str(exc).lower() + if any( + keyword in error_str + for keyword in ["maximum", "limit", "tokens", "context", "length"] + ): + return binary_search_limit(low, mid) + logger.warning(f"Detect model limit when non-limit error occurs: {exc}") + return low + + final_limit = binary_search_limit(4096, 1000000) + agent.max_tokens = final_limit + + if agent.pk is None: + return final_limit + + updated = type(agent).objects.filter( + pk=agent.pk, + model=initial_model, + max_tokens=initial_max_tokens, + ).update(max_tokens=final_limit) + if updated: + return final_limit + + current_max_tokens = ( + type(agent) + .objects.filter(pk=agent.pk) + .values_list("max_tokens", flat=True) + .first() + ) + if current_max_tokens is not None: + agent.max_tokens = current_max_tokens + return current_max_tokens + + return final_limit + + +def openai_completions( + agent, + text, + *, + system_prompt=None, + user_prompt=None, + _is_chunk=False, + init_client, + wait_for_rate_limit, + task_submit, + logger, + settings_module, + get_token_count_func, + adaptive_chunking_func, + save_func, + **kwargs, +): + client = init_client() + tokens = 0 + result_text = "" + log_updated = False + + try: + if user_prompt: + system_prompt += f"\n\n{user_prompt}" + + wait_for_rate_limit() + + if agent.merge_system_prompt: + merged_content = f"{system_prompt}\n\n{text}" + messages = [{"role": "user", "content": merged_content}] + system_prompt_tokens = 0 + input_tokens = get_token_count_func(merged_content) + else: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": text}, + ] + system_prompt_tokens = get_token_count_func(system_prompt) + input_tokens = get_token_count_func(system_prompt) + get_token_count_func(text) + + if agent.max_tokens == 0: + task_submit( + f"detect_model_limit_{agent.model}_{agent.id}", + agent.detect_model_limit, + force=True, + ) + raise ValueError( + "max_tokens is not set, Please wait for the model limit detection to complete" + ) + + if agent.merge_system_prompt: + system_prompt_token_cost = get_token_count_func(system_prompt) + max_usable_tokens = agent.max_tokens - system_prompt_token_cost - 100 + else: + max_usable_tokens = agent.max_tokens - system_prompt_tokens - 100 + + if get_token_count_func(text) > max_usable_tokens: + logger.info( + f"Text too large ({get_token_count_func(text)} tokens), chunking..." + ) + chunks = adaptive_chunking_func( + text, + target_chunks=max(1, int(len(text) / max_usable_tokens)), + min_chunk_size=500, + max_chunk_size=max_usable_tokens, + ) + translated_chunks = [] + for chunk in chunks: + result = openai_completions( + agent, + chunk, + system_prompt=system_prompt, + user_prompt=user_prompt, + _is_chunk=True, + init_client=init_client, + wait_for_rate_limit=wait_for_rate_limit, + task_submit=task_submit, + logger=logger, + settings_module=settings_module, + get_token_count_func=get_token_count_func, + adaptive_chunking_func=adaptive_chunking_func, + save_func=save_func, + **kwargs, + ) + translated_chunks.append(result["text"]) + tokens += result["tokens"] + + return {"text": " ".join(translated_chunks), "tokens": tokens} + + output_token_limit = int(max(4096, (agent.max_tokens - input_tokens) * 0.8)) + adv_params = agent.advanced_params or {} + if not isinstance(adv_params, dict): + adv_params = {} + + call_kwargs = {**adv_params} + if ( + "max_completion_tokens" not in call_kwargs + and "max_tokens" not in call_kwargs + ): + call_kwargs["max_completion_tokens"] = output_token_limit + + res = client.with_options( + max_retries=settings_module.OPENAI_API_MAX_RETRIES + ).chat.completions.create( + extra_headers=agent.EXTRA_HEADERS, + model=agent.model, + messages=messages, + **call_kwargs, + ) + if ( + res.choices + and res.choices[0].finish_reason == "stop" + and res.choices[0].message.content + ): + result_text = res.choices[0].message.content + logger.debug(f"[{agent.name}]: {result_text[:50]}...") + else: + finish_reason = None + if res.choices: + try: + finish_reason = res.choices[0].finish_reason + except Exception: + finish_reason = None + logger.warning( + f"[{agent.name}]: Failed to complete request:[{finish_reason or 'unknown'}]" + ) + + tokens = res.usage.total_tokens if getattr(res, "usage", None) else 0 + except Exception as exc: + from django.utils import timezone + + agent.log = f"{timezone.now()}: {str(exc)}" + log_updated = True + logger.error(f"{agent.name}: {exc}") + + if not _is_chunk and log_updated: + save_func(update_fields=["log"]) + + return {"text": result_text, "tokens": tokens} + + +def openai_translate( + agent, + text, + target_language, + *, + user_prompt=None, + text_type="title", + completions_func, + logger, + **kwargs, +): + logger.info(f">>>Start Translate [{target_language}]: {text[:50]}...") + system_prompt = ( + agent.title_translate_prompt + if text_type == "title" + else agent.content_translate_prompt + ).replace("{target_language}", target_language) + return completions_func( + text, + system_prompt=system_prompt, + user_prompt=user_prompt, + **kwargs, + ) + + +def openai_summarize(agent, text, target_language, *, completions_func, logger, **kwargs): + logger.info(f">>> Start Summarize [{target_language}]: {text[:50]}...") + system_prompt = agent.summary_prompt.replace("{target_language}", target_language) + return completions_func(text, system_prompt=system_prompt, **kwargs) + + +def openai_filter(agent, text, system_prompt, *, completions_func, logger, settings_module, **kwargs): + logger.info(f">>> Start Filter: {text[:50]}...") + passed = False + tokens = 0 + results = completions_func( + text, + system_prompt=system_prompt + settings_module.output_format_for_filter_prompt, + **kwargs, + ) + + if results["text"] and "Passed" in results["text"]: + logger.info(">>> Filter Passed") + passed = True + tokens = results["tokens"] + else: + logger.info(">>> Filter Blocked") + passed = False + + return {"passed": passed, "tokens": tokens} diff --git a/core/services/agent/test_agent.py b/core/services/agent/test_agent.py new file mode 100644 index 00000000..45f64b38 --- /dev/null +++ b/core/services/agent/test_agent.py @@ -0,0 +1,16 @@ +def testagent_translate(agent, text, target_language, *, logger, sleep_func): + logger.info(">>> Test Translate [%s]: %s", target_language, text) + sleep_func(agent.interval) + return {"text": agent.translated_text, "tokens": 10, "characters": len(text)} + + +def testagent_summarize(agent, text, target_language, *, logger, sleep_func): + logger.info(">>> Test Summarize [%s]: %s", target_language, text) + sleep_func(agent.interval) + return {"text": agent.translated_text, "tokens": 10, "characters": len(text)} + + +def testagent_filter(agent, *, logger, sleep_func, random_choice): + logger.info(">>> Test Filter") + sleep_func(agent.interval) + return {"passed": random_choice([True, False]), "tokens": 10} diff --git a/core/services/feed/__init__.py b/core/services/feed/__init__.py new file mode 100644 index 00000000..b3fd7512 --- /dev/null +++ b/core/services/feed/__init__.py @@ -0,0 +1,23 @@ +from importlib import import_module + + +_EXPORTS = { + "run_feed_update": ("core.services.feed.pipeline", "run_feed_update"), + "refresh_updated_content": ("core.services.feed.refresh", "refresh_updated_content"), + "render_feed_content": ("core.services.feed.rendering", "render_feed_content"), + "render_tag_content": ("core.services.feed.rendering", "render_tag_content"), + "build_feed_response": ("core.services.feed.response", "build_feed_response"), +} + +__all__ = list(_EXPORTS) + + +def __getattr__(name): + if name not in _EXPORTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + module_name, attribute_name = _EXPORTS[name] + module = import_module(module_name) + value = getattr(module, attribute_name) + globals()[name] = value + return value diff --git a/core/services/feed/filters.py b/core/services/feed/filters.py new file mode 100644 index 00000000..cab746e5 --- /dev/null +++ b/core/services/feed/filters.py @@ -0,0 +1,120 @@ +import json + +from django.db import models +from utils import text_handler + + +def needs_re_evaluation(result, entry): + if result.passed is None: + return True + + if entry.updated and entry.updated > result.last_updated: + return True + + return False + + +def apply_keywords_filter(filter_obj, queryset): + keywords = filter_obj.keywords.values_list("name", flat=True) + + if not keywords: + return queryset.none() if filter_obj.operation == filter_obj.INCLUDE else queryset + + query = models.Q() + for keyword in keywords: + if filter_obj.filter_original_title: + query |= models.Q(original_title__icontains=keyword) + if filter_obj.filter_original_content: + query |= models.Q(original_content__icontains=keyword) + if filter_obj.filter_translated_title: + query |= models.Q(translated_title__icontains=keyword) + if filter_obj.filter_translated_content: + query |= models.Q(translated_content__icontains=keyword) + + if filter_obj.operation == filter_obj.INCLUDE: + return queryset.filter(query).distinct() + return queryset.exclude(query).distinct() + + +def apply_ai_filter(filter_obj, queryset): + from core.models.filter import FilterResult + + passed_ids = [] + tokens = 0 + for entry in queryset: + result, created = FilterResult.objects.get_or_create( + filter=filter_obj, + entry=entry, + ) + + if created or needs_re_evaluation(result, entry): + json_data = {} + if filter_obj.filter_original_title: + json_data["original_title"] = entry.original_title + if filter_obj.filter_original_content: + json_data["original_content"] = text_handler.clean_content( + entry.original_content + ) + if filter_obj.filter_translated_title: + json_data["translated_title"] = entry.translated_title + if filter_obj.filter_translated_content: + json_data["translated_content"] = text_handler.clean_content( + entry.translated_content + ) + + text_str = json.dumps(json_data, ensure_ascii=False) + passed = None + if filter_obj.agent: + filter_results = filter_obj.agent.filter( + text=text_str, + system_prompt=filter_obj.filter_prompt, + ) + passed = filter_results["passed"] + tokens += filter_results["tokens"] + result.passed = passed + result.save() + else: + passed = result.passed + + if passed: + passed_ids.append(entry.id) + + return queryset.filter(id__in=passed_ids), tokens + + +def apply_filter(filter_obj, queryset): + tokens = 0 + if filter_obj.filter_method in [filter_obj.KEYWORD_ONLY, filter_obj.BOTH]: + queryset = apply_keywords_filter(filter_obj, queryset) + + if filter_obj.filter_method in [filter_obj.AI_ONLY, filter_obj.BOTH] and filter_obj.agent: + queryset, tokens = apply_ai_filter(filter_obj, queryset) + + if tokens > 0: + filter_obj.total_tokens += tokens + filter_obj.save() + + return queryset + + +def apply_feed_filters(feed, queryset): + for filter_obj in feed.filters.all(): + queryset = apply_filter(filter_obj, queryset) + return queryset + + +def apply_tag_filters(tag_slug, entry_ids, all_entries): + from core.models.entry import Entry + from core.models.tag import Tag + + tag_filters = Tag.objects.get(slug=tag_slug).filters.all() + + if not tag_filters: + return [entry for (_, entry) in all_entries] + + filtered_qs = Entry.objects.filter(id__in=entry_ids) + for filter_obj in tag_filters: + filtered_qs = apply_filter(filter_obj, filtered_qs) + + passed_ids = set(filtered_qs.values_list("id", flat=True)) + return [entry for (_, entry) in all_entries if entry.id in passed_ids] diff --git a/core/services/feed/pipeline.py b/core/services/feed/pipeline.py new file mode 100644 index 00000000..51ba3c6d --- /dev/null +++ b/core/services/feed/pipeline.py @@ -0,0 +1,50 @@ +import logging + +from django.db import close_old_connections + +from core.models import Feed +from core.tasks.fetch_feeds import handle_single_feed_fetch +from core.tasks.translate_feeds import handle_feeds_translation +from core.tasks.summarize_feeds import handle_feeds_summary + +logger = logging.getLogger(__name__) + + +def run_feed_update( + feed: Feed, + *, + fetch_func=None, + translate_func=None, + summarize_func=None, + close_connections=None, + pipeline_logger=None, +) -> bool: + fetch_func = fetch_func or handle_single_feed_fetch + translate_func = translate_func or handle_feeds_translation + summarize_func = summarize_func or handle_feeds_summary + close_connections = close_connections or close_old_connections + pipeline_logger = pipeline_logger or logger + + try: + close_connections() + try: + pipeline_logger.info(f"Starting feed update: {feed.name}") + + fetch_func(feed) + if feed.translate_title: + translate_func([feed], target_field="title") + if feed.translate_content: + translate_func([feed], target_field="content") + if feed.summary: + summarize_func([feed]) + + pipeline_logger.info(f"Completed feed update: {feed.name}") + return True + except Feed.DoesNotExist: + pipeline_logger.error(f"Feed not found: ID {feed.name}") + return False + except Exception as exc: + pipeline_logger.exception(f"Error updating feed ID {feed.name}: {str(exc)}") + return False + finally: + close_connections() diff --git a/core/services/feed/refresh.py b/core/services/feed/refresh.py new file mode 100644 index 00000000..f2dfa361 --- /dev/null +++ b/core/services/feed/refresh.py @@ -0,0 +1,76 @@ +from itertools import chain +import logging +import time + +from core.models import Tag +from core.cache import cache_rss, cache_tag + +logger = logging.getLogger(__name__) + + +def refresh_feed_caches( + feeds, + *, + cache_rss_func=None, + logger=None, + time_func=None, +): + cache_rss_func = cache_rss_func or cache_rss + logger = logger or logging.getLogger(__name__) + time_func = time_func or time.time + + for feed in feeds: + try: + cache_rss_func(feed.slug, feed_type="o", format="xml") + cache_rss_func(feed.slug, feed_type="o", format="json") + cache_rss_func(feed.slug, feed_type="t", format="xml") + cache_rss_func(feed.slug, feed_type="t", format="json") + except Exception as exc: + logger.error( + f"{time_func()}: Failed to cache RSS for {feed.slug}: {str(exc)}" + ) + + +def get_related_tags(feeds, *, tag_model=None): + tag_model = tag_model or Tag + tag_ids = set( + chain.from_iterable(feed.tags.values_list("id", flat=True) for feed in feeds) + ) + return tag_model.objects.filter(id__in=tag_ids) + + +def refresh_tag_caches(tags, *, cache_tag_func=None, logger=None): + cache_tag_func = cache_tag_func or cache_tag + logger = logger or logging.getLogger(__name__) + + for tag in tags: + try: + cache_tag_func(tag.slug, feed_type="o", format="xml") + cache_tag_func(tag.slug, feed_type="t", format="xml") + cache_tag_func(tag.slug, feed_type="t", format="json") + except Exception as exc: + logger.error(f"Failed to cache tag {tag.slug}: {str(exc)}") + + +def refresh_updated_content( + feeds, + *, + tag_model=None, + cache_rss_func=None, + cache_tag_func=None, + logger=None, + time_func=None, +): + logger = logger or logging.getLogger(__name__) + refresh_feed_caches( + feeds, + cache_rss_func=cache_rss_func, + logger=logger, + time_func=time_func, + ) + tags = get_related_tags(feeds, tag_model=tag_model) + refresh_tag_caches( + tags, + cache_tag_func=cache_tag_func, + logger=logger, + ) diff --git a/core/services/feed/rendering.py b/core/services/feed/rendering.py new file mode 100644 index 00000000..f2a0a032 --- /dev/null +++ b/core/services/feed/rendering.py @@ -0,0 +1,200 @@ +import logging + +import mistune +from django.conf import settings +from django.utils import timezone +from feedgen.feed import FeedGenerator +from lxml import etree + +from utils.text_handler import set_translation_display + +logger = logging.getLogger(__name__) + + +def build_atom_feed( + feed_id, + title, + author, + link, + subtitle, + language, + updated, + pubdate=None, +): + updated_time = updated or pubdate or timezone.now() + fg = FeedGenerator() + fg.id(str(feed_id)) + if not title: + local_time = timezone.localtime(updated_time) + title = local_time.strftime("%Y-%m-%d %H:%M:%S") + fg.title(title) + fg.author({"name": author or "Unknown"}) + fg.link(href=link, rel="alternate") + fg.subtitle(subtitle or "") + fg.language(language or "") + fg.updated(updated_time) + fg.pubDate(pubdate or updated_time) + return fg + + +def add_atom_entry(fg, entry, feed_type, translation_display=None, entry_logger=None): + entry_logger = entry_logger or logger + pubdate = entry.pubdate or timezone.now() + updated = entry.updated or pubdate + summary = entry.original_summary + title = entry.original_title + content = entry.original_content or "" + + if feed_type == "t": + if entry.translated_title: + title = set_translation_display( + entry.original_title, + entry.translated_title, + translation_display or entry.feed.translation_display, + ) + + if entry.translated_content: + content = set_translation_display( + entry.original_content, + entry.translated_content, + translation_display or entry.feed.translation_display, + "
---------------
", + ) + + if entry.ai_summary: + html_summary = f"{mistune.html(entry.ai_summary)}
---------------
" + content = html_summary + content + + summary = content or "" + + fe = fg.add_entry() + if not title: + local_time = timezone.localtime(updated) + title = local_time.strftime("%Y-%m-%d %H:%M:%S") + fe.title(title) + fe.link(href=entry.link or "", rel="alternate") + fe.author({"name": entry.author or "Unknown"}) + fe.id(entry.guid or entry.link) + fe.content(content, type="html") + fe.summary(summary, type="html") + fe.updated(updated) + fe.pubDate(pubdate) + + if entry.enclosures_xml: + try: + xml = etree.fromstring(entry.enclosures_xml) + for enclosure in xml.iter("enclosure"): + fe.enclosure( + url=enclosure.get("href"), + type=enclosure.get("type"), + length=enclosure.get("length"), + ) + except Exception as exc: + entry_logger.error(f"Error parsing enclosures for entry {entry.id}: {str(exc)}") + + return fe + + +def finalize_atom_feed(fg): + atom_string = fg.atom_str(pretty=False) + root = etree.fromstring(atom_string) + tree = etree.ElementTree(root) + pi = etree.ProcessingInstruction( + "xml-stylesheet", 'type="text/xsl" href="/static/rss.xsl"' + ) + root.addprevious(pi) + return etree.tostring( + tree, pretty_print=True, xml_declaration=True, encoding="utf-8" + ).decode() + + +def render_feed_content( + feed, + feed_type="t", + *, + build_feed_func=build_atom_feed, + add_entry_func=add_atom_entry, + finalize_func=finalize_atom_feed, + render_logger=None, +): + render_logger = render_logger or logger + if not feed: + render_logger.error("generate_atom_feed: feed is None") + return None + + try: + fg = build_feed_func( + feed_id=feed.id, + title=feed.name, + author=feed.author, + link=feed.link or feed.feed_url, + subtitle=feed.subtitle, + language=feed.language, + updated=feed.updated, + pubdate=feed.pubdate, + ) + + entries = feed.filtered_entries if feed_type == "t" else feed.entries.all() + if entries is None: + return [] + + for entry in reversed(entries.order_by("-pubdate")[: feed.max_posts]): + add_entry_func( + fg, + entry, + feed_type, + feed.translation_display, + entry_logger=render_logger, + ) + + return finalize_func(fg) + except Exception as exc: + render_logger.exception(f"generate_atom_feed error {feed.feed_url}: {str(exc)}") + return None + + +def render_tag_content( + tag, + feeds, + feed_type="t", + *, + build_feed_func=build_atom_feed, + add_entry_func=add_atom_entry, + finalize_func=finalize_atom_feed, +): + from core.services.feed.filters import apply_tag_filters + + type_str = "Original" if feed_type == "o" else "Translated" + fg = build_feed_func( + feed_id=f"urn:merged-tag-{tag}-{type_str}-feeds", + title=f"{type_str} #{tag} tag Feeds", + author=f"{type_str} #{tag} tag Feeds", + link=settings.SITE_URL, + subtitle=f"Combined {type_str} {tag} Feeds", + language="en", + updated=timezone.now(), + ) + + all_entries = [] + entry_ids = [] + for feed in feeds: + fg.category(term=str(feed.id), label=feed.name, scheme=feed.feed_url) + entries = feed.entries.all() + if not entries: + continue + + for entry in reversed(entries.order_by("-pubdate")[: feed.max_posts]): + sort_time = entry.pubdate or entry.updated or timezone.now() + all_entries.append((sort_time, entry)) + entry_ids.append(entry.id) + + all_entries.sort(key=lambda item: item[0], reverse=True) + filtered_entries = apply_tag_filters(tag, entry_ids, all_entries) + + if filtered_entries: + fg.updated(all_entries[0][0]) + + for entry in filtered_entries[:100]: + add_entry_func(fg, entry, feed_type) + + return finalize_func(fg) diff --git a/core/services/feed/response.py b/core/services/feed/response.py new file mode 100644 index 00000000..35cf6888 --- /dev/null +++ b/core/services/feed/response.py @@ -0,0 +1,24 @@ +from django.http import JsonResponse, StreamingHttpResponse +from feed2json import feed2json + + +def build_feed_response(atom_feed, filename, format="xml"): + if format == "json": + if not atom_feed: + return JsonResponse({"error": "No feed data available"}, status=404) + return JsonResponse(feed2json(atom_feed)) + + def stream_content(): + if not atom_feed: + yield b"No feed data available" + return + chunk_size = 4096 + for index in range(0, len(atom_feed), chunk_size): + yield atom_feed[index : index + chunk_size] + + response = StreamingHttpResponse( + stream_content(), + content_type="application/xml; charset=utf-8", + ) + response["Content-Disposition"] = f"inline; filename={filename}.xml" + return response diff --git a/core/services/opml.py b/core/services/opml.py new file mode 100644 index 00000000..3cb4c440 --- /dev/null +++ b/core/services/opml.py @@ -0,0 +1,84 @@ +from datetime import datetime + +from django.http import HttpResponse +from lxml import etree + +from core.models import Feed, Tag + + +def import_opml_content(opml_content: bytes) -> int: + parser = etree.XMLParser(resolve_entities=False) + root = etree.fromstring(opml_content, parser=parser) + body = root.find("body") + + if body is None: + raise ValueError("Invalid OPML: Missing body element") + + created_count = 0 + + def process_outlines(outlines, tag_name: str = None): + nonlocal created_count + for outline in outlines: + if "xmlUrl" in outline.attrib: + feed, created = Feed.objects.get_or_create( + feed_url=outline.get("xmlUrl"), + defaults={ + "name": outline.get("title") or outline.get("text") + }, + ) + if created: + created_count += 1 + if tag_name: + tag_obj, _ = Tag.objects.get_or_create(name=tag_name) + feed.tags.add(tag_obj) + elif outline.find("outline") is not None: + next_tag_name = outline.get("text") or outline.get("title") + process_outlines(outline.findall("outline"), next_tag_name) + + process_outlines(body.findall("outline")) + return created_count + + +def build_opml_response(title_prefix, queryset, get_feed_url_func, filename_prefix): + root = etree.Element("opml", version="2.0") + + head = etree.SubElement(root, "head") + etree.SubElement(head, "title").text = f"{title_prefix} | RSSBox" + etree.SubElement(head, "dateCreated").text = datetime.now().strftime( + "%a, %d %b %Y %H:%M:%S %z" + ) + etree.SubElement(head, "ownerName").text = "RSSBox" + + body = etree.SubElement(root, "body") + categories = {} + for feed in queryset: + feed_tags = list(feed.tags.all()) or [None] + for tag in feed_tags: + tag_name = tag.name if tag else "uncategorized" + if tag_name not in categories: + categories[tag_name] = etree.SubElement( + body, "outline", text=tag_name, title=tag_name + ) + + feed_url = get_feed_url_func(feed) or "" + feed_name = feed.name or "Untitled Feed" + etree.SubElement( + categories[tag_name], + "outline", + { + "title": feed_name, + "text": feed_name, + "type": "rss", + "xmlUrl": feed_url, + "htmlUrl": feed_url, + }, + ) + + xml_content = etree.tostring( + root, encoding="utf-8", xml_declaration=True, pretty_print=True + ) + response = HttpResponse(xml_content, content_type="application/xml") + response["Content-Disposition"] = ( + f'attachment; filename="{filename_prefix}_feeds_from_rssbox.opml"' + ) + return response diff --git a/core/tasks/generate_digests.py b/core/tasks/generate_digests.py deleted file mode 100644 index d0875dd3..00000000 --- a/core/tasks/generate_digests.py +++ /dev/null @@ -1,542 +0,0 @@ -import logging -from django.utils import timezone - -from core.models.digest import Digest -from core.models.entry import Entry -from utils.text_handler import get_token_count -from config import settings -from core.cache import cache_digest - -logger = logging.getLogger(__name__) - - -class DigestGenerator: - """ - AI-powered digest content generator. - - Processes articles from specified tags and generates comprehensive - daily/weekly briefings using OpenAI agents. - """ - - def __init__(self, digest: Digest): - self.digest = digest - self.articles = [] - # self.temp_translations = temp_translations or {} - - def prepare_articles(self): - """ - Fetch and prepare articles for digest generation. - - Returns: - list: Processed article data - """ - logger.info(f"Preparing articles for digest '{self.digest.name}'") - # Get articles based on digest configuration - articles = self.digest.get_articles_for_digest() - - processed_articles = [] - for entry in articles: - summary = entry.ai_summary or "" - - # Use temporary translation if available, otherwise fallback to Entry translation - title = entry.original_title or entry.translated_title or "No title" - - processed_articles.append( - { - "title": title, - "link": entry.link, - "summary": summary, - "published": entry.pubdate, - "author": entry.author or "Unknown", - } - ) - - self.articles = processed_articles - - return processed_articles - - def build_prompt(self): - """ - Build AI prompt with article data. - - Returns: - tuple: (articles_list, system_prompt, url_mapping) - 分离的文章内容、系统提示和URL映射 - """ - logger.info(f"Building prompt for digest '{self.digest.name}'") - # Prepare articles text with URL optimization - articles_list = [] - url_mapping = {} # 存储占位符到真实URL的映射 - - for i, article in enumerate(self.articles, 1): - # 使用markdown格式的占位符,让AI更容易保留 - url_placeholder = f"LINK_{i}" - url_mapping[url_placeholder] = article["link"] - - articles_list.append(f""" -Original Title: {article["title"]} -Link: {url_placeholder} -Published: {article["published"]} -Summary: {article["summary"]} -""") - - # Replace placeholders in system prompt using simple string replacement - output_format_for_digest_prompt = ( - settings.output_format_for_digest_prompt.replace( - "{digest_name}", self.digest.name - ) - .replace("{date}", timezone.now().strftime("%Y-%m-%d")) - .replace("{target_language}", self.digest.target_language) - .replace("{description}",self.digest.description) - ) - system_prompt = self.digest.prompt + output_format_for_digest_prompt - - # 计算token消耗,如果超出限制,则进行分块 - safe_tokens = self.digest.summarizer.max_tokens * 0.65 - available_tokens = safe_tokens - get_token_count(system_prompt) # 预留给输出 - - articles: list[str] = self._chunk_articles_by_token_limit( - articles_list, available_tokens - ) - return articles, system_prompt, url_mapping - - def generate(self, force: bool = False): - """ - Generate digest content using AI. - - Returns: - dict: Generation result with success status and content - """ - logger.info(f"Generating digest '{self.digest.name}'") - try: - # Pre-steps previously handled in generate_digest() - # 1) Create temporary translations for digest-specific language - # to ensure consistent language in digest output without - # polluting Entry translations - # self.temp_translations = _ensure_entries_have_translated_titles(self.digest) or {} - - # 2) Ensure all entries have AI summaries before generating digest - now = timezone.now() - _ensure_entries_have_summaries(self.digest) - - self.prepare_articles() - - if not self.articles: - local_now = timezone.localtime(now) - self.digest.log += f"{local_now.strftime('%Y-%m-%d %H:%M:%S')} No articles were found within the specified range.\n" - return {"success": False, "error": "No articles were found within the specified range."} - - # Build prompt - 分离文章内容、系统提示和URL映射 - articles_list, system_prompt, url_mapping = self.build_prompt() - - # Call AI agent - logger.info(f"Calling AI agent for digest '{self.digest.name}'") - logger.info(f"Total articles to digest: {len(articles_list)}") - digests_list = [] - final_digest = "" - for articles_text in articles_list: - logger.info(f"Digesting article") - result = self.digest.summarizer.digester( - text=articles_text, # 合并后的多篇文章内容 - system_prompt=system_prompt, # 处理指令 - digest_name=self.digest.name, - date=now.strftime("%Y-%m-%d"), - ) - - if result.get("text"): - logger.info(f"Digested article") - # 补充URL:将占位符替换回真实URL - final_content = result["text"] - # final_digest += "\n" + final_content - digests_list.append(final_content) - else: - logger.warning(f"Failed to digest article") - self.digest.total_tokens += result.get("tokens", 0) - - # Only for test - digests_list += digests_list - if len(digests_list) > 1: - logger.info( - f"Final digest has {len(digests_list)} digests, need to merge to one digest" - ) - # TODO: 是否还需要再次调用summarizer来合并总结 或者 直接合并digests_list - result = self.digest.summarizer.digester( - text="\n".join(digests_list), - system_prompt=system_prompt, - digest_name=self.digest.name, - date=now.strftime("%Y-%m-%d"), - ) - if result.get("text"): - logger.info(f"Merged digest") - final_digest = result["text"] - else: - logger.warning(f"Failed to merge digest") - else: - final_digest = digests_list[0] - - for placeholder, real_url in url_mapping.items(): - placeholder = f"({placeholder})" - real_url = f"({real_url})" - if placeholder in final_digest: - final_digest = final_digest.replace(placeholder, real_url) - logger.info(f"Replaced placeholder {placeholder} with URL") - - # 将生成内容保存为一个 Entry,写入 ai_summary - self.digest.last_generated = now - - # 获取/创建 Digest 专用 Feed - digest_feed = self.digest.get_digest_feed() - - # 创建一条新的摘要 Entry - entry = Entry.objects.create( - feed=digest_feed, - link=f"{settings.SITE_URL.rstrip('/')}/core/digest/{self.digest.slug}", - author=self.digest.name or "Digest", - pubdate=now, - updated=now, - guid=f"digest:{self.digest.id}:{int(now.timestamp())}", - original_title=f"{self.digest.name} | {timezone.localtime(now).strftime('%Y-%m-%d %H:%M')}", - translated_title=None, - original_content=None, - translated_content=None, - original_summary=None, - ai_summary=final_digest, - ) - - self.digest.status = True # Set status to success - cache_digest(self.digest.slug, "xml") - cache_digest(self.digest.slug, "json") - local_now = timezone.localtime(now) - self.digest.log += f"{local_now.strftime('%Y-%m-%d %H:%M:%S')} Digest generation successful for {self.digest.name}\n" - return { - "success": True, - "entry_id": entry.id, - } - except Exception as e: - logger.error(f"Digest generation failed for {self.digest.name}: {e}") - local_now = timezone.localtime(now) - self.digest.log += f"{local_now.strftime('%Y-%m-%d %H:%M:%S')} Digest generation failed for {self.digest.name}: {e}\n" - # Mark status as failed - self.digest.status = False - return {"success": False, "error": str(e)} - finally: - self.digest.save() - - def _chunk_articles_by_token_limit( - self, articles_list: list[str], max_tokens - ) -> list[str]: - """ - Split articles text into chunks. - Returns [articles_text, articles_text, ...] where each articles_text is <= max_tokens - """ - logger.info(f"Chunking articles for digest '{self.digest.name}'") - # 都是 AI summary,通常不会很长;按 token 限制合并为若干块 - chunks: list[str] = [] - current_chunk: str = "" - current_tokens: int = 0 - max_tokens = int(max_tokens) if max_tokens else 0 - logger.info( - f"Total articles to chunk: {len(articles_list)}; token limit per chunk: {max_tokens}" - ) - for articles_text in articles_list: - article_tokens = get_token_count(articles_text) - if ( - current_chunk - and current_tokens + article_tokens > max_tokens - and max_tokens > 0 - ): - # 关闭当前块,开启新块 - chunks.append(current_chunk) - current_chunk = articles_text - current_tokens = article_tokens - logger.debug("Started a new chunk due to token limit") - else: - # 追加到当前块 - if current_chunk: - current_chunk += "\n" + articles_text - current_tokens += article_tokens - else: - current_chunk = articles_text - current_tokens = article_tokens - # 收尾,把最后的块加入 - if current_chunk: - chunks.append(current_chunk) - logger.info(f"Total chunks produced: {len(chunks)}") - return chunks - - -def _ensure_entries_have_translated_titles(digest: Digest): - """ - Generate temporary translations for digest entries without caching to Entry model. - - This ensures consistent language in digest output by creating temporary translations - to the digest's target language. Translations are NOT saved to Entry.translated_title - to avoid polluting Feed-specific translations. - - Only processes articles within the digest's days_range to avoid unnecessary work. - - Args: - digest: The Digest instance - - Returns: - dict: Mapping of entry_id to translated_title for digest use - """ - from core.models.feed import Feed - from core.tasks.utils import auto_retry - - # Get articles for digest within the specified days_range - all_articles = list(digest.get_articles_for_digest()) - - if not all_articles: - logger.info(f"No articles found for digest '{digest.name}'") - return {} - - logger.info( - f"Processing {len(all_articles)} entries for temporary title translation to {digest.target_language}..." - ) - - # Create temporary translation cache for this digest - temp_translations = {} - - # Group entries by feed to process efficiently - feed_ids = set(entry.feed_id for entry in all_articles) - candidate_feeds = Feed.objects.filter(id__in=feed_ids) - - if not candidate_feeds.exists(): - logger.warning(f"Found {len(all_articles)} entries but their feeds don't exist") - return - - # Process temporary translation for each entry - digest_tokens = 0 # Tokens for digest-specific translations - total_tokens = 0 # Tokens for feed translations - total_characters = 0 - translated_count = 0 - feeds_to_update = {} # Track feed token usage - use_digest_summarizer = False - - for entry in all_articles: - try: - # Find the feed for this entry - feed = next((f for f in candidate_feeds if f.id == entry.feed_id), None) - if not feed: - continue - - # First priority: use existing translated_title if available - if entry.translated_title and entry.translated_title.strip(): - temp_translations[entry.id] = entry.translated_title - continue - - # No existing translation - determine translator - if not feed.translator: - use_digest_summarizer = True - translator = digest.summarizer - else: - translator = feed.translator - - # Determine what title to use based on language match - if feed.target_language == digest.target_language: - # Same language - trigger Feed translation to get translated_title - from core.tasks.translate_feeds import _translate_entry_title - - metrics = _translate_entry_title( - entry=entry, - target_language=feed.target_language, - engine=translator, - ) - - total_tokens += metrics["tokens"] - total_characters += metrics["characters"] - - # Save the translation to Entry for future use - if metrics["tokens"] > 0: - entry.save(update_fields=["translated_title"]) - translated_count += 1 - - temp_translations[entry.id] = ( - entry.translated_title or entry.original_title - ) - - else: - if not entry.original_title: - # Fallback to original title if no translator or no content - # temp_translations[entry.id] = entry.original_title - continue - - # Perform temporary translation (not saved to Entry) - logger.debug( - f"[Digest Temp Translation] Translating title for entry {entry.id}" - ) - result = auto_retry( - translator.translate, - max_retries=3, - text=entry.original_title, - target_language=digest.target_language, - text_type="title", - ) - - if result and result.get("text"): - temp_translations[entry.id] = result.get("text") - digest_tokens += result.get("tokens", 0) - total_characters += result.get("characters", 0) - translated_count += 1 - else: - # Fallback to original title - temp_translations[entry.id] = entry.original_title - # Update digest token counts if we used digest's summarizer - if use_digest_summarizer: - digest.total_tokens += total_tokens - digest.save() - else: - feed.total_tokens += total_tokens - feed.total_characters += total_characters - feed.save() - - except Exception as e: - logger.error( - f"Error creating temporary translation for entry {entry.id}: {e}" - ) - digest.log += ( - f"Error creating temporary translation for entry {entry.id}: {e}\n" - ) - digest.status = False - digest.save() - # Fallback to original title - temp_translations[entry.id] = entry.original_title or "No title" - return temp_translations - - -def _ensure_entries_have_summaries(digest: Digest): - """ - Ensure all entries that will be included in the digest have AI summaries. - - This is a critical dependency - digest quality depends on having proper AI summaries - for all entries. Without this, the digest would use fallback content which is - much lower quality. - - Only processes articles within the digest's days_range to avoid unnecessary work. - - Args: - digest: The Digest instance - """ - from core.tasks.summarize_feeds import _summarize_entry - from core.models.entry import Entry - import gc - - # Get articles for digest within the specified days_range - all_articles = list(digest.get_articles_for_digest()) - - if not all_articles: - logger.info(f"No articles found for digest '{digest.name}'") - return - - # Filter entries that need summaries - prioritize existing ai_summary - entries_without_summary = [ - entry - for entry in all_articles - if not entry.ai_summary or entry.ai_summary.strip() == "" - ] - - if not entries_without_summary: - logger.info(f"All entries for digest '{digest.name}' already have AI summaries") - return - - logger.info( - f"Found {len(entries_without_summary)} entries without AI summaries. Generating summaries..." - ) - - # Process each entry directly - no need to group by feed - entries_to_save = [] - total_tokens = 0 - BATCH_SIZE = 5 # Memory-efficient batch size - use_digest_summarizer = False - - for idx, entry in enumerate(entries_without_summary): - try: - # Determine which summarizer to use - if entry.feed.summarizer: - summarizer = entry.feed.summarizer - target_language = entry.feed.target_language - summary_detail = entry.feed.summary_detail or 0.0 - elif digest.summarizer: - # Use digest's summarizer as fallback - summarizer = digest.summarizer - target_language = digest.target_language - summary_detail = 0.0 # Default detail level for digest fallback - use_digest_summarizer = True - logger.info( - f"Using digest summarizer for entry '{entry.original_title}' " - f"from feed '{entry.feed.name}' that doesn't have its own summarizer" - ) - else: - logger.warning( - f"Entry '{entry.original_title}' from feed '{entry.feed.name}' " - f"has no summarizer and digest has no fallback summarizer" - ) - continue - - logger.info( - f"[{idx + 1}/{len(entries_without_summary)}] Processing: {entry.original_title}" - ) - - # Generate summary for this entry directly - summary, entry_tokens = _summarize_entry( - entry=entry, - summarizer=summarizer, - target_language=target_language, - min_chunk_size=summarizer.min_size(), - max_chunk_size=summarizer.max_size(), - summarize_recursively=True, - max_context_chunks=4, - max_context_tokens=summarizer.max_tokens, - chunk_delimiter=".", - max_chunks_per_entry=20, - summary_detail=summary_detail, - ) - - entry.ai_summary = summary - total_tokens += entry_tokens - entries_to_save.append(entry) - - logger.info( - f"Completed summary for '{entry.original_title}' - Tokens: {entry_tokens}" - ) - - # Periodically save progress with smaller batch size - if len(entries_to_save) >= BATCH_SIZE: - _save_progress_batch( - entries_to_save, digest, total_tokens, use_digest_summarizer - ) - total_tokens = 0 - entries_to_save = [] - - # Force garbage collection - gc.collect() - - except Exception as e: - logger.error( - f"Error generating summary for entry '{entry.original_title}': {e}" - ) - digest.log += ( - f"Error generating summary for entry '{entry.original_title}': {e}\n" - ) - digest.status = False - digest.save() - entry.ai_summary = f"[Summary failed: {str(e)}]" - entries_to_save.append(entry) - - if entries_to_save: - _save_progress_batch( - entries_to_save, digest, total_tokens, use_digest_summarizer - ) - - -def _save_progress_batch(entries_to_save, digest, total_tokens, use_digest_summarizer): - """Save progress with memory cleanup.""" - if entries_to_save: - from core.models.entry import Entry - - Entry.objects.bulk_update(entries_to_save, fields=["ai_summary"]) - del entries_to_save - - if total_tokens > 0 and use_digest_summarizer: - digest.total_tokens += total_tokens - digest.save() diff --git a/core/tests/test_actions.py b/core/tests/test_actions.py index 5a7cdb74..8a86feb8 100644 --- a/core/tests/test_actions.py +++ b/core/tests/test_actions.py @@ -16,8 +16,8 @@ feed_force_update, tag_force_update, feed_batch_modify, - create_digest, ) +from .. import actions as actions_module from unittest.mock import patch @@ -220,21 +220,9 @@ def test_export_opml_actions(self): root.find("head/title").text, "Translated Feeds | RSSBox" ) - @patch("core.actions.reverse") - def test_create_digest_action(self, mock_reverse): - """Test create digest action.""" - mock_reverse.return_value = "/admin/core/digest/add/" - feed2 = Feed.objects.create( - name="Feed 2", feed_url="https://example2.com/rss.xml" - ) - queryset = Feed.objects.filter(id__in=[self.feed.id, feed2.id]) - - response = create_digest(self.modeladmin, self.factory.get("/"), queryset) - - self.assertEqual(response.status_code, 302) - expected_ids = f"{self.feed.id},{feed2.id}" - self.assertIn(f"feed_ids={expected_ids}", response.url) - mock_reverse.assert_called_once_with("admin:core_digest_add") + def test_create_digest_action_removed(self): + """Digest action should no longer be exposed.""" + self.assertFalse(hasattr(actions_module, "create_digest")) def test_opml_edge_cases(self): """Test OPML generation edge cases and error handling.""" @@ -255,7 +243,9 @@ def test_opml_edge_cases(self): self.assertEqual(len(category_outline.findall("outline")), 2) # Test exception handling - with patch("core.actions.etree.Element", side_effect=Exception("Test error")): + with patch( + "core.actions.build_opml_response", side_effect=Exception("Test error") + ): with patch("core.actions.logger.error") as mock_logger: response = _generate_opml_feed( "Test", queryset, lambda f: f.feed_url, "test" diff --git a/core/tests/test_admin_site.py b/core/tests/test_admin_site.py index 51ca0dc4..34b30fbe 100644 --- a/core/tests/test_admin_site.py +++ b/core/tests/test_admin_site.py @@ -35,12 +35,12 @@ def test_get_app_list(self): # First app section - core models first_app = app_list[0] self.assertEqual(first_app["app_label"], "core") - # Allow optional Digest model; must at least contain Feed and Tag first_models = first_app["models"] self.assertGreaterEqual(len(first_models), 2) object_names = {m.get("object_name") for m in first_models} self.assertIn("Feed", object_names) self.assertIn("Tag", object_names) + self.assertNotIn("Digest", object_names) # Verify model entries exist and have required fields for model_entry in first_app["models"]: diff --git a/core/tests/test_agent_service_structure.py b/core/tests/test_agent_service_structure.py new file mode 100644 index 00000000..dccfb97a --- /dev/null +++ b/core/tests/test_agent_service_structure.py @@ -0,0 +1,105 @@ +from unittest.mock import patch + +from django.test import TestCase + +from core.models.agent import ( + DeepLAgent, + LibreTranslateAgent, + OpenAIAgent, + TestAgent, +) + + +class AgentServiceStructureTests(TestCase): + def test_agent_service_modules_exist(self): + from core.services.agent import deepl, libretranslate, openai, test_agent + + self.assertTrue(hasattr(openai, "openai_validate")) + self.assertTrue(hasattr(openai, "openai_completions")) + self.assertTrue(hasattr(openai, "openai_detect_model_limit")) + self.assertTrue(hasattr(openai, "openai_wait_for_rate_limit")) + self.assertTrue(hasattr(deepl, "deepl_validate")) + self.assertTrue(hasattr(deepl, "deepl_translate")) + self.assertTrue(hasattr(libretranslate, "libretranslate_validate")) + self.assertTrue(hasattr(libretranslate, "libretranslate_translate")) + self.assertTrue(hasattr(test_agent, "testagent_translate")) + + @patch("core.models.agent.openai_validate", return_value=True) + def test_openai_validate_delegates_to_service(self, mock_service): + agent = OpenAIAgent.objects.create(name="Delegation OpenAI", api_key="key") + + result = agent.validate() + + self.assertTrue(result) + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.openai_completions", return_value={"text": "ok", "tokens": 1}) + def test_openai_completions_delegates_to_service(self, mock_service): + agent = OpenAIAgent.objects.create( + name="Delegation OpenAI Completions", + api_key="key", + max_tokens=1000, + ) + + result = agent.completions("hello", system_prompt="sys") + + self.assertEqual(result["text"], "ok") + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.deepl_validate", return_value=True) + def test_deepl_validate_delegates_to_service(self, mock_service): + agent = DeepLAgent.objects.create(name="Delegation DeepL", api_key="key") + + result = agent.validate() + + self.assertTrue(result) + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.deepl_translate", return_value={"text": "ok", "characters": 3}) + def test_deepl_translate_delegates_to_service(self, mock_service): + agent = DeepLAgent.objects.create(name="Delegation DeepL Translate", api_key="key") + + result = agent.translate("hey", "English") + + self.assertEqual(result["text"], "ok") + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.libretranslate_validate", return_value=True) + def test_libretranslate_validate_delegates_to_service(self, mock_service): + agent = LibreTranslateAgent.objects.create( + name="Delegation LibreValidate", + server_url="https://example.com", + ) + + result = agent.validate() + + self.assertTrue(result) + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.libretranslate_translate", return_value={"text": "ok", "characters": 3}) + def test_libretranslate_translate_delegates_to_service(self, mock_service): + agent = LibreTranslateAgent.objects.create( + name="Delegation LibreTranslate", + server_url="https://example.com", + ) + + result = agent.translate("hey", "English") + + self.assertEqual(result["text"], "ok") + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) + + @patch("core.models.agent.testagent_translate", return_value={"text": "ok", "tokens": 1, "characters": 3}) + def test_testagent_translate_delegates_to_service(self, mock_service): + agent = TestAgent.objects.create(name="Delegation TestAgent") + + result = agent.translate("hey", "English") + + self.assertEqual(result["text"], "ok") + mock_service.assert_called_once() + self.assertEqual(mock_service.call_args[0][0], agent) diff --git a/core/tests/test_feed_action_service.py b/core/tests/test_feed_action_service.py new file mode 100644 index 00000000..c37fc46e --- /dev/null +++ b/core/tests/test_feed_action_service.py @@ -0,0 +1,58 @@ +from django.test import TestCase +from unittest.mock import Mock + +from core.models import Feed, Tag + + +class FeedActionServiceTests(TestCase): + def setUp(self): + self.feed = Feed.objects.create( + name="Action Feed", + feed_url="https://example.com/action.xml", + fetch_status=True, + translation_status=True, + ) + self.tag = Tag.objects.create(name="Action Tag") + + def test_force_update_feeds_resets_status_and_submits_task(self): + from core.services.admin import force_update_feeds + + task_manager = Mock() + update_multiple_feeds_func = Mock() + + force_update_feeds( + Feed.objects.filter(id=self.feed.id), + task_manager=task_manager, + update_multiple_feeds_func=update_multiple_feeds_func, + ) + + self.feed.refresh_from_db() + self.assertIsNone(self.feed.fetch_status) + self.assertIsNone(self.feed.translation_status) + task_manager.submit_task.assert_called_once() + args = task_manager.submit_task.call_args[0] + self.assertEqual(args[0], "Force Update Feeds") + self.assertIs(args[1], update_multiple_feeds_func) + self.assertEqual(list(args[2]), list(Feed.objects.filter(id=self.feed.id))) + + def test_force_update_tags_updates_timestamp_and_submits_tasks(self): + from core.services.admin import force_update_tags + + task_manager = Mock() + cache_tag_func = Mock() + + force_update_tags( + Tag.objects.filter(id=self.tag.id), + task_manager=task_manager, + cache_tag_func=cache_tag_func, + ) + + self.tag.refresh_from_db() + self.assertIsNotNone(self.tag.last_updated) + self.assertEqual(task_manager.submit_task.call_count, 2) + task_manager.submit_task.assert_any_call( + "Force Update Tags", cache_tag_func, self.tag.slug, "t", "xml" + ) + task_manager.submit_task.assert_any_call( + "Force Update Tags", cache_tag_func, self.tag.slug, "t", "json" + ) diff --git a/core/tests/test_feed_admin.py b/core/tests/test_feed_admin.py index 37f33574..dbf23147 100644 --- a/core/tests/test_feed_admin.py +++ b/core/tests/test_feed_admin.py @@ -298,6 +298,8 @@ def setUp(self): @patch("core.tasks.task_manager.task_manager.submit_task") def test_submit_feed_update_task(self, mock_submit_task): """Test _submit_feed_update_task method (lines 190-193).""" + from core.services.feed import run_feed_update + mock_submit_task.return_value = "task-123" self.admin._submit_feed_update_task(self.feed) @@ -305,6 +307,7 @@ def test_submit_feed_update_task(self, mock_submit_task): mock_submit_task.assert_called_once() args = mock_submit_task.call_args self.assertEqual(args[0][0], f"update_feed_{self.feed.slug}") + self.assertIs(args[0][1], run_feed_update) def test_simple_update_frequency_cases(self): """Test simple_update_frequency for different time intervals.""" diff --git a/core/tests/test_feed_batch_service.py b/core/tests/test_feed_batch_service.py new file mode 100644 index 00000000..95d14483 --- /dev/null +++ b/core/tests/test_feed_batch_service.py @@ -0,0 +1,59 @@ +import uuid + +from django.test import TestCase + +from core.models import Feed, Filter, OpenAIAgent, Tag + + +class FeedBatchServiceTests(TestCase): + def setUp(self): + self.feed = Feed.objects.create( + name="Batch Feed", + feed_url="https://example.com/batch.xml", + ) + + def test_apply_batch_updates_boolean_and_scalar_fields(self): + from core.services.admin.batch import apply_batch_updates + + apply_batch_updates( + Feed.objects.filter(id=self.feed.id), + { + "translate_title": "True", + "summary": "False", + "update_frequency": "Change", + "update_frequency_value": "60", + }, + ) + + self.feed.refresh_from_db() + self.assertTrue(self.feed.translate_title) + self.assertFalse(self.feed.summary) + self.assertEqual(self.feed.update_frequency, 60) + + def test_apply_batch_updates_tags_filters_and_summarizer(self): + from core.services.admin.batch import apply_batch_updates + + tag = Tag.objects.create(name="Batch Tag") + filter_obj = Filter.objects.create(name="Batch Filter") + agent = OpenAIAgent.objects.create( + name=f"Batch Agent {uuid.uuid4()}", + api_key="test-key", + valid=True, + ) + + apply_batch_updates( + Feed.objects.filter(id=self.feed.id), + { + "tags": "Change", + "tags_value": [str(tag.id)], + "filter": "Change", + "filter_value": [str(filter_obj.id)], + "summarizer": "Change", + "summarizer_value": str(agent.id), + }, + ) + + self.feed.refresh_from_db() + self.assertIn(tag, self.feed.tags.all()) + self.assertIn(filter_obj, self.feed.filters.all()) + self.assertEqual(self.feed.summarizer_id, agent.id) diff --git a/core/tests/test_feed_filters.py b/core/tests/test_feed_filters.py new file mode 100644 index 00000000..2821b71e --- /dev/null +++ b/core/tests/test_feed_filters.py @@ -0,0 +1,51 @@ +from unittest.mock import Mock, patch + +from django.test import TestCase + +from core.models import Entry, Feed, Filter + + +class FeedFiltersServiceTests(TestCase): + def setUp(self): + self.feed = Feed.objects.create(feed_url="https://example.com/filter-service.xml") + self.entry1 = Entry.objects.create( + feed=self.feed, + link="https://example.com/entry1", + original_title="Python Article", + original_content="Python content", + ) + self.entry2 = Entry.objects.create( + feed=self.feed, + link="https://example.com/entry2", + original_title="Rust Article", + original_content="Rust content", + ) + + def test_apply_feed_filters_applies_all_attached_filters(self): + from core.services.feed.filters import apply_feed_filters + + filter_obj = Filter.objects.create( + name="Python Only", + keywords="Python", + filter_method=Filter.KEYWORD_ONLY, + operation=Filter.INCLUDE, + ) + self.feed.filters.add(filter_obj) + + result = apply_feed_filters(self.feed, self.feed.entries.all()) + + self.assertIn(self.entry1, result) + self.assertNotIn(self.entry2, result) + + @patch("core.models.feed.apply_feed_filters") + def test_feed_filtered_entries_property_delegates_to_service(self, mock_apply): + queryset = self.feed.entries.all() + mock_apply.return_value = queryset + + result = self.feed.filtered_entries + + self.assertEqual(result, queryset) + mock_apply.assert_called_once() + args = mock_apply.call_args[0] + self.assertEqual(args[0], self.feed) + self.assertEqual(list(args[1]), list(self.feed.entries.all())) diff --git a/core/tests/test_feed_pipeline.py b/core/tests/test_feed_pipeline.py new file mode 100644 index 00000000..1f0648d2 --- /dev/null +++ b/core/tests/test_feed_pipeline.py @@ -0,0 +1,66 @@ +from unittest.mock import patch + +from django.test import TestCase + +from core.models import Feed + + +class FeedPipelineTests(TestCase): + def setUp(self): + self.feed = Feed.objects.create( + feed_url="https://example.com/pipeline.xml", + name="Pipeline Feed", + translate_title=True, + translate_content=True, + summary=True, + ) + + @patch("core.services.feed.pipeline.close_old_connections") + @patch("core.services.feed.pipeline.handle_feeds_summary") + @patch("core.services.feed.pipeline.handle_feeds_translation") + @patch("core.services.feed.pipeline.handle_single_feed_fetch") + def test_run_feed_update_executes_requested_steps_in_order( + self, + mock_fetch, + mock_translate, + mock_summary, + mock_close_connections, + ): + from core.services.feed import run_feed_update + + self.assertTrue(run_feed_update(self.feed)) + + self.assertEqual( + [call.args for call in mock_translate.call_args_list], + [ + ([self.feed],), + ([self.feed],), + ], + ) + self.assertEqual( + [call.kwargs for call in mock_translate.call_args_list], + [ + {"target_field": "title"}, + {"target_field": "content"}, + ], + ) + mock_fetch.assert_called_once_with(self.feed) + mock_summary.assert_called_once_with([self.feed]) + self.assertGreaterEqual(mock_close_connections.call_count, 2) + + @patch("core.services.feed.pipeline.close_old_connections") + @patch("core.services.feed.pipeline.handle_single_feed_fetch") + @patch("core.services.feed.pipeline.logger") + def test_run_feed_update_returns_false_when_fetch_fails( + self, + mock_logger, + mock_fetch, + mock_close_connections, + ): + from core.services.feed import run_feed_update + + mock_fetch.side_effect = RuntimeError("boom") + + self.assertFalse(run_feed_update(self.feed)) + mock_logger.exception.assert_called_once() + self.assertGreaterEqual(mock_close_connections.call_count, 2) diff --git a/core/tests/test_feed_refresh.py b/core/tests/test_feed_refresh.py new file mode 100644 index 00000000..f0b9244b --- /dev/null +++ b/core/tests/test_feed_refresh.py @@ -0,0 +1,89 @@ +from unittest.mock import Mock, call + +from django.test import SimpleTestCase + + +class FeedRefreshTests(SimpleTestCase): + def setUp(self): + self.feed1 = Mock(slug="feed-1") + self.feed2 = Mock(slug="feed-2") + self.feed1.tags.values_list.return_value = [1, 2] + self.feed2.tags.values_list.return_value = [2, 3] + + def test_refresh_feed_caches_calls_all_output_variants(self): + from core.services.feed.refresh import refresh_feed_caches + + cache_rss_func = Mock() + + refresh_feed_caches( + [self.feed1, self.feed2], + cache_rss_func=cache_rss_func, + ) + + expected_calls = [ + call("feed-1", feed_type="o", format="xml"), + call("feed-1", feed_type="o", format="json"), + call("feed-1", feed_type="t", format="xml"), + call("feed-1", feed_type="t", format="json"), + call("feed-2", feed_type="o", format="xml"), + call("feed-2", feed_type="o", format="json"), + call("feed-2", feed_type="t", format="xml"), + call("feed-2", feed_type="t", format="json"), + ] + cache_rss_func.assert_has_calls(expected_calls) + + def test_get_related_tags_deduplicates_ids_before_query(self): + from core.services.feed.refresh import get_related_tags + + tag_model = Mock() + tag_model.objects.filter.return_value = ["tag-1", "tag-2", "tag-3"] + + result = get_related_tags( + [self.feed1, self.feed2], + tag_model=tag_model, + ) + + self.assertEqual(result, ["tag-1", "tag-2", "tag-3"]) + tag_model.objects.filter.assert_called_once_with(id__in={1, 2, 3}) + + def test_refresh_tag_caches_calls_all_output_variants(self): + from core.services.feed.refresh import refresh_tag_caches + + cache_tag_func = Mock() + tags = [Mock(slug="tag-1"), Mock(slug="tag-2")] + + refresh_tag_caches(tags, cache_tag_func=cache_tag_func) + + expected_calls = [ + call("tag-1", feed_type="o", format="xml"), + call("tag-1", feed_type="t", format="xml"), + call("tag-1", feed_type="t", format="json"), + call("tag-2", feed_type="o", format="xml"), + call("tag-2", feed_type="t", format="xml"), + call("tag-2", feed_type="t", format="json"), + ] + cache_tag_func.assert_has_calls(expected_calls) + + def test_refresh_updated_content_logs_and_continues_on_cache_errors(self): + from core.services.feed import refresh_updated_content + + tag_model = Mock() + tag_model.objects.filter.return_value = [Mock(slug="tag-1")] + cache_rss_func = Mock(side_effect=[None, Exception("rss failed"), None, None]) + cache_tag_func = Mock(side_effect=[Exception("tag failed")]) + logger = Mock() + time_func = Mock(return_value=1234567890) + + refresh_updated_content( + [self.feed1], + tag_model=tag_model, + cache_rss_func=cache_rss_func, + cache_tag_func=cache_tag_func, + logger=logger, + time_func=time_func, + ) + + logger.error.assert_any_call( + "1234567890: Failed to cache RSS for feed-1: rss failed" + ) + logger.error.assert_any_call("Failed to cache tag tag-1: tag failed") diff --git a/core/tests/test_feed_rendering.py b/core/tests/test_feed_rendering.py new file mode 100644 index 00000000..2e7e14dc --- /dev/null +++ b/core/tests/test_feed_rendering.py @@ -0,0 +1,111 @@ +import json + +from django.test import TestCase +from django.utils import timezone + +from core.models import Feed, Entry, Tag + + +class FeedRenderingTests(TestCase): + def setUp(self): + self.tag = Tag.objects.create(name="render-tag") + self.feed = Feed.objects.create( + name="Render Feed", + feed_url="https://example.com/render.xml", + slug="render-feed", + ) + self.feed.tags.add(self.tag) + Entry.objects.create( + feed=self.feed, + link="https://example.com/article", + guid="render-guid", + original_title="Original Title", + translated_title="翻译标题", + original_content="

Original Content

", + translated_content="

翻译内容

", + ai_summary="Summary", + pubdate=timezone.now(), + ) + + def test_render_feed_content_original_and_translated(self): + from core.services.feed import render_feed_content + + original = render_feed_content(self.feed, feed_type="o") + translated = render_feed_content(self.feed, feed_type="t") + + self.assertIn("Original Title", original) + self.assertNotIn("翻译标题", original) + self.assertIn("翻译标题", translated) + self.assertIn("Summary", translated) + + def test_render_tag_content_merges_multiple_feeds(self): + from core.services.feed import render_tag_content + + second_feed = Feed.objects.create( + name="Render Feed 2", + feed_url="https://example.com/render2.xml", + slug="render-feed-2", + ) + second_feed.tags.add(self.tag) + Entry.objects.create( + feed=second_feed, + link="https://example.com/article2", + guid="render-guid-2", + original_title="Second Title", + original_content="

Second Content

", + pubdate=timezone.now(), + ) + + merged = render_tag_content(self.tag.slug, Feed.objects.filter(tags=self.tag)) + + self.assertIn("翻译标题", merged) + self.assertIn("Second Title", merged) + + +class FeedResponseTests(TestCase): + def test_build_feed_response_returns_json(self): + from core.services.feed import build_feed_response + + response = build_feed_response( + "JSON Feed", + "json-feed", + format="json", + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(json.loads(response.content)["title"], "JSON Feed") + + def test_build_feed_response_returns_xml_stream(self): + from core.services.feed import build_feed_response + + response = build_feed_response( + "XML Feed", + "xml-feed", + format="xml", + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(response["Content-Type"], "application/xml; charset=utf-8") + self.assertEqual( + b"".join(response.streaming_content), + b"XML Feed", + ) + + def test_build_feed_response_returns_404_json_when_empty(self): + from core.services.feed import build_feed_response + + response = build_feed_response(None, "missing-feed", format="json") + + self.assertEqual(response.status_code, 404) + self.assertEqual(json.loads(response.content)["error"], "No feed data available") + + def test_build_feed_response_returns_error_xml_when_empty(self): + from core.services.feed import build_feed_response + + response = build_feed_response(None, "missing-feed", format="xml") + + self.assertEqual(response.status_code, 200) + self.assertEqual( + b"".join(response.streaming_content), + b"No feed data available", + ) diff --git a/core/tests/test_opml_service.py b/core/tests/test_opml_service.py new file mode 100644 index 00000000..057b1dd1 --- /dev/null +++ b/core/tests/test_opml_service.py @@ -0,0 +1,58 @@ +from django.test import TestCase + +from core.models import Feed, Tag + + +class OpmlServiceTests(TestCase): + def test_import_opml_content_creates_feed(self): + from core.services.opml import import_opml_content + + created_count = import_opml_content( + b""" + + + + + + """ + ) + + self.assertEqual(created_count, 1) + self.assertTrue(Feed.objects.filter(feed_url="http://example.com/feed1.xml").exists()) + + def test_import_opml_content_creates_nested_tags(self): + from core.services.opml import import_opml_content + + created_count = import_opml_content( + b""" + + + + + + + + """ + ) + + self.assertEqual(created_count, 1) + feed = Feed.objects.get(feed_url="http://example.com/technews.xml") + self.assertTrue(feed.tags.filter(name="News").exists()) + + def test_build_opml_response_groups_feeds_by_tag(self): + from core.services.opml import build_opml_response + + tag = Tag.objects.create(name="Tech") + feed = Feed.objects.create(name="Feed", feed_url="http://example.com/feed.xml") + feed.tags.add(tag) + + response = build_opml_response( + title_prefix="Test Export", + queryset=Feed.objects.filter(id=feed.id), + get_feed_url_func=lambda current_feed: current_feed.feed_url, + filename_prefix="test", + ) + + self.assertEqual(response.status_code, 200) + self.assertIn("test_feeds_from_rssbox.opml", response["Content-Disposition"]) + self.assertIn(b"Test Export | RSSBox", response.content) diff --git a/core/tests/test_prompts.py b/core/tests/test_prompts.py new file mode 100644 index 00000000..a9b090fc --- /dev/null +++ b/core/tests/test_prompts.py @@ -0,0 +1,28 @@ +from django.test import SimpleTestCase + + +class PromptDefaultsTests(SimpleTestCase): + def test_settings_prompt_defaults_are_reexported_from_prompts_module(self): + from config import settings + from core.prompts import ( + DEFAULT_CONTENT_TRANSLATE_PROMPT, + DEFAULT_FILTER_PROMPT, + DEFAULT_SUMMARY_PROMPT, + DEFAULT_TITLE_TRANSLATE_PROMPT, + OUTPUT_FORMAT_FOR_FILTER_PROMPT, + ) + + self.assertEqual( + settings.default_title_translate_prompt, + DEFAULT_TITLE_TRANSLATE_PROMPT, + ) + self.assertEqual( + settings.default_content_translate_prompt, + DEFAULT_CONTENT_TRANSLATE_PROMPT, + ) + self.assertEqual(settings.default_summary_prompt, DEFAULT_SUMMARY_PROMPT) + self.assertEqual(settings.default_filter_prompt, DEFAULT_FILTER_PROMPT) + self.assertEqual( + settings.output_format_for_filter_prompt, + OUTPUT_FORMAT_FOR_FILTER_PROMPT, + ) diff --git a/core/tests/test_service_structure.py b/core/tests/test_service_structure.py new file mode 100644 index 00000000..767cf8e9 --- /dev/null +++ b/core/tests/test_service_structure.py @@ -0,0 +1,53 @@ +from django.test import SimpleTestCase + + +class ServiceStructureTests(SimpleTestCase): + def test_feed_service_exports(self): + from core.services.feed import ( + build_feed_response, + refresh_updated_content, + render_feed_content, + render_tag_content, + run_feed_update, + ) + + self.assertTrue(callable(run_feed_update)) + self.assertTrue(callable(refresh_updated_content)) + self.assertTrue(callable(render_feed_content)) + self.assertTrue(callable(render_tag_content)) + self.assertTrue(callable(build_feed_response)) + + def test_feed_service_hides_internal_helpers(self): + import core.services.feed as feed_services + + for name in [ + "apply_ai_filter", + "apply_feed_filters", + "apply_filter", + "apply_keywords_filter", + "apply_tag_filters", + "needs_re_evaluation", + "add_atom_entry", + "build_atom_feed", + "finalize_atom_feed", + ]: + with self.subTest(name=name): + self.assertFalse(hasattr(feed_services, name)) + + def test_admin_service_exports(self): + from core.services.admin import force_update_feeds, force_update_tags + + self.assertTrue(callable(force_update_feeds)) + self.assertTrue(callable(force_update_tags)) + + def test_admin_service_hides_batch_helpers(self): + import core.services.admin as admin_services + + self.assertFalse(hasattr(admin_services, "apply_batch_updates")) + self.assertFalse(hasattr(admin_services, "build_batch_modify_context")) + + def test_opml_service_exports(self): + from core.services.opml import build_opml_response, import_opml_content + + self.assertTrue(callable(import_opml_content)) + self.assertTrue(callable(build_opml_response)) diff --git a/core/tests/test_views.py b/core/tests/test_views.py index 3baf1226..5f402acb 100644 --- a/core/tests/test_views.py +++ b/core/tests/test_views.py @@ -2,7 +2,7 @@ from django.http import Http404, JsonResponse from unittest.mock import patch, MagicMock from django.core.files.uploadedfile import InMemoryUploadedFile -from django.urls import reverse +from django.urls import reverse, NoReverseMatch from django.contrib.messages.storage.fallback import FallbackStorage import io import json @@ -141,39 +141,43 @@ def test_import_opml_invalid_file(self): self.assertEqual(Feed.objects.count(), initial_feed_count) self.assertIn("Invalid OPML: Missing body element", [str(m) for m in messages]) - @patch("core.views.feed2json") + def test_digest_routes_removed(self): + """Digest URLs should no longer be registered.""" + with self.assertRaises(NoReverseMatch): + reverse("core:digest_rss", kwargs={"slug": "removed"}) + + with self.assertRaises(NoReverseMatch): + reverse("core:digest_json", kwargs={"slug": "removed"}) + + with self.assertRaises(NoReverseMatch): + reverse("core:digest_view", kwargs={"slug": "removed"}) + @patch("core.views.cache") @patch("core.views.cache_rss") - def test_rss_view_json_format(self, mock_cache_rss, mock_cache, mock_feed2json): + def test_rss_view_json_format(self, mock_cache_rss, mock_cache): """Test the rss view with format='json'.""" mock_cache.get.return_value = None # Cache miss mock_cache_rss.return_value = ( "Test Feed" ) - mock_feed2json.return_value = {"title": "JSON Feed"} request = self.factory.get(f"/rss/{self.feed.slug}") response = rss(request, self.feed.slug, feed_type="o", format="json") mock_cache_rss.assert_called_once_with(self.feed.slug, "o", "json") - mock_feed2json.assert_called_once_with( - "Test Feed" - ) self.assertIsInstance(response, JsonResponse) self.assertEqual(response.status_code, 200) json_content = json.loads(response.content) - self.assertEqual(json_content["title"], "JSON Feed") + self.assertEqual(json_content["title"], "Test Feed") - @patch("core.views.feed2json") @patch("core.views.cache") @patch("core.views.cache_rss") def test_rss_view_json_format_no_feed_data( - self, mock_cache_rss, mock_cache, mock_feed2json + self, mock_cache_rss, mock_cache ): """Test the rss view with format='json' when no feed data is available.""" mock_cache.get.return_value = None # Cache miss mock_cache_rss.return_value = None # No feed data - mock_feed2json.return_value = {"title": "JSON Feed"} request = self.factory.get(f"/rss/{self.feed.slug}") response = rss(request, self.feed.slug, format="json") @@ -285,7 +289,6 @@ def test_import_opml_invalid_file_type(self): response = import_opml(request) self.assertEqual(response.status_code, 302) - # The mock file will cause XML parsing error, so we check for that instead self.assertTrue(any("XML syntax error:" in str(m) for m in messages)) def test_import_opml_xml_syntax_error(self): @@ -314,9 +317,9 @@ def test_import_opml_general_exception(self): request = self.factory.post("/fake-url", {"opml_file": opml_file}) messages = self._setup_request_with_messages(request) - # Mock Feed.objects.get_or_create to raise an exception - with patch("core.views.Feed.objects.get_or_create") as mock_get_or_create: - mock_get_or_create.side_effect = Exception("Database error") + # Mock service to raise an exception + with patch("core.views.import_opml_content") as mock_import_opml_content: + mock_import_opml_content.side_effect = Exception("Database error") initial_feed_count = Feed.objects.count() import_opml(request) diff --git a/core/urls.py b/core/urls.py index c67431b3..e8f4e1ca 100644 --- a/core/urls.py +++ b/core/urls.py @@ -48,27 +48,6 @@ "json//", views.rss, kwargs={"feed_type": "t", "format": "json"} ), path("import_opml/", views.import_opml, name="import_opml"), - # Digest URLs - path("digest/view/", views.digest_view, name="digest_view"), - path("digest/view//", views.digest_view, name="digest_view"), - path( - "digest/json/", - views.digest, - kwargs={"format": "json"}, - name="digest_json", - ), - path( - "digest/json//", - views.digest, - kwargs={"format": "json"}, - name="digest_json", - ), - path( - "digest/", views.digest, kwargs={"format": "xml"}, name="digest_rss" - ), - path( - "digest//", views.digest, kwargs={"format": "xml"}, name="digest_rss" - ), path("", views.rss, kwargs={"feed_type": "t", "format": "xml"}), path("/", views.rss, kwargs={"feed_type": "t", "format": "xml"}), ] diff --git a/core/views.py b/core/views.py index fdef2988..77e765cb 100644 --- a/core/views.py +++ b/core/views.py @@ -1,19 +1,17 @@ import logging -from django.http import HttpResponse, StreamingHttpResponse, JsonResponse +from django.http import HttpResponse from django.utils.encoding import smart_str -from django.utils import timezone from django.core.cache import cache from django.views.decorators.http import condition -from .models import Feed, Tag, Digest -from django.shortcuts import redirect, get_object_or_404 +from .models import Feed, Tag +from django.shortcuts import redirect from django.contrib import messages from django.core.files.uploadedfile import InMemoryUploadedFile -from lxml import etree from django.utils.translation import gettext_lazy as _ -from feed2json import feed2json -import mistune -from .cache import cache_rss, cache_tag, cache_digest +from .cache import cache_rss, cache_tag +from core.services.opml import import_opml_content +from core.services.feed import build_feed_response logger = logging.getLogger(__name__) @@ -49,93 +47,23 @@ def _get_etag(request, feed_slug, feed_type="t", **kwargs): return etag -def _make_response(atom_feed, filename, format="xml"): - if format == "json": - # 如果需要返回 JSON 格式 - if not atom_feed: - return JsonResponse({"error": "No feed data available"}, status=404) - feed_json = feed2json(atom_feed) - response = JsonResponse(feed_json) - else: - # 使用生成器函数实现流式传输 - def stream_content(): - if not atom_feed: - yield b"No feed data available" - return - chunk_size = 4096 # 每次发送4KB - for i in range(0, len(atom_feed), chunk_size): - yield atom_feed[i : i + chunk_size] - - response = StreamingHttpResponse( - stream_content(), # 使用生成器 - content_type="application/xml; charset=utf-8", - ) - response["Content-Disposition"] = f"inline; filename={filename}.xml" - return response - - -def _get_digest_modified(request, slug: str, **kwargs): - try: - digest = Digest.objects.get(slug=slug) - return digest.last_generated - except Digest.DoesNotExist: - return None - - -def _get_digest_etag(request, slug: str, **kwargs): - try: - digest = Digest.objects.get(slug=slug) - return digest.last_generated.isoformat() if digest.last_generated else None - except Digest.DoesNotExist: - return None - - def import_opml(request): if request.method == "POST": opml_file = request.FILES.get("opml_file") if opml_file and isinstance(opml_file, InMemoryUploadedFile): try: - # 直接读取字节数据(lxml 支持二进制解析) - opml_content = opml_file.read() - - # 使用安全的 lxml 解析器解析 OPML - parser = etree.XMLParser(resolve_entities=False) - root = etree.fromstring(opml_content, parser=parser) - body = root.find("body") - - if body is None: - messages.error(request, _("Invalid OPML: Missing body element")) - return redirect("admin:core_feed_changelist") - - # 递归处理所有 outline 节点 - def process_outlines(outlines, tag: str = None): - for outline in outlines: - # 检查是否为 feed(有 xmlUrl 属性) - if "xmlUrl" in outline.attrib: - feed, created = Feed.objects.get_or_create( - feed_url=outline.get("xmlUrl"), - defaults={ - "name": outline.get("title") or outline.get("text") - }, - ) - if tag: - tag_obj, _ = Tag.objects.get_or_create(name=tag) - feed.tags.add(tag_obj) - # 处理嵌套结构(新类别) - elif outline.find("outline") is not None: - new_tag = outline.get("text") or outline.get("title") - process_outlines(outline.findall("outline"), new_tag) - - # 从 body 开始处理顶级 outline - process_outlines(body.findall("outline")) - + import_opml_content(opml_file.read()) messages.success(request, _("OPML file imported successfully.")) - except etree.XMLSyntaxError as e: - messages.error(request, _("XML syntax error: {}").format(str(e))) + except ValueError as e: + messages.error(request, _(str(e))) except Exception as e: - messages.error( - request, _("Error importing OPML file: {}").format(str(e)) - ) + message = str(e) + if "XMLSyntaxError" in type(e).__name__: + messages.error(request, _("XML syntax error: {}").format(message)) + else: + messages.error( + request, _("Error importing OPML file: {}").format(message) + ) else: messages.error(request, _("Please upload a valid OPML file.")) @@ -155,7 +83,7 @@ def rss(request, feed_slug, feed_type="t", format="xml"): else: logger.debug(f"Cache HIT for key: {cache_key}") - return _make_response(content, feed_slug, format) + return build_feed_response(content, feed_slug, format) except Exception as e: logger.warning(f"Feed not found {feed_slug}: {str(e)}") return HttpResponse( @@ -179,140 +107,10 @@ def tag(request, tag: str, feed_type="t", format="xml"): content = cache_tag(tag, feed_type, format) else: logger.debug(f"Cache HIT for key: {cache_key}") - return _make_response(content, tag, format) + return build_feed_response(content, tag, format) except Exception as e: logger.warning("tag not found: %s / %s", tag, str(e)) return HttpResponse( status=404, content="Feed not found, Maybe it's still in progress, Please try again later.", ) - - -def digest_view(request, slug): - """Display digest content as HTML page.""" - digest = get_object_or_404(Digest, slug=slug) - - # 获取最新一条摘要 Entry - digest_feed = digest.get_digest_feed() - latest = digest_feed.entries.order_by("-pubdate", "-id").first() - if not latest or not latest.ai_summary: - return HttpResponse( - status=404, - content="No digest content available. Please generate the digest first.", - ) - - # Convert markdown to HTML - # md = markdown.Markdown(extensions=['extra', 'codehilite', 'tables', 'toc']) - html_content = mistune.html(latest.ai_summary) - - # Format last_generated time with timezone conversion - if digest.last_generated: - local_time = timezone.localtime(digest.last_generated) - generated_time = local_time.strftime("%Y-%m-%d %H:%M:%S") - else: - generated_time = "Never" - - # Create HTML response - html_response = f""" - - - - - - {digest.name} - - - -
-
- Generated: {generated_time}
- Tags: {", ".join([tag.name for tag in digest.tags.all()])}
- Days Range: {digest.days_range} days -
-
- {html_content} -
-
- - - """ - - return HttpResponse(html_response, content_type="text/html; charset=utf-8") - - -@condition(etag_func=_get_digest_etag, last_modified_func=_get_digest_modified) -def digest(request, slug, format="xml"): - """Return digest as ATOM/JSON feed, with caching.""" - slug = smart_str(slug) - try: - cache_key = f"cache_digest_{slug}_{format}" - content = cache.get(cache_key) - if content is None: - logger.debug(f"Cache MISS for key: {cache_key}") - content = cache_digest(slug, format) - else: - logger.debug(f"Cache HIT for key: {cache_key}") - - return _make_response(content, slug, format) - except Exception as e: - logger.warning(f"Digest not found {slug}: {str(e)}") - return HttpResponse( - status=404, - content="Digest not found, or not generated yet.", - ) diff --git a/docs/docs/architecture.en.md b/docs/docs/architecture.en.md new file mode 100644 index 00000000..691ec6d9 --- /dev/null +++ b/docs/docs/architecture.en.md @@ -0,0 +1,79 @@ +--- +title: Architecture +summary: Current RSSBox code structure and responsibilities +--- + +# Architecture + +RSSBox is currently organized into these main layers: + +## `core/models` + +Stores data structures and minimal model behavior. + +- `feed.py`: RSS source configuration, status, and usage stats +- `entry.py`: article content +- `filter.py`: filter configuration and filter result cache +- `agent.py`: configuration models for translation and summary agents +- `tag.py`: tags and aggregated feed configuration + +## `core/services` + +Contains the actual application logic. + +### `core/services/feed` + +All Feed-related workflow logic lives here: + +- `pipeline.py`: single-feed update pipeline +- `refresh.py`: cache refresh and tag aggregation refresh +- `filters.py`: filter rules for feeds and tags +- `rendering.py`: RSS/Atom content generation +- `response.py`: RSS/JSON response wrapping + +### `core/services/admin` + +Admin action logic: + +- `actions.py`: force update and tag recombination +- `batch.py`: batch modification + +### `core/services/agent` + +Concrete implementations for agent behavior: + +- `openai.py` +- `deepl.py` +- `libretranslate.py` +- `test_agent.py` + +### `core/services/opml.py` + +OPML import and export. + +## `core/views.py` + +Only handles requests, cache lookup, service calls, and responses. + +## `core/actions.py` + +Only keeps Django admin action entry points and redirect/render behavior. + +## `core/management/commands` + +Command entry layer. +The main command is `feed_updater.py`, which schedules the update workflow. + +## `core/cache.py` + +Cache entry layer. +It manages cache keys and delegates content generation to services. + +## Maintenance rules + +Future changes should follow these rules: + +1. Views and commands should stay as entry points, not hold large workflow logic. +2. Models should keep configuration and minimal behavior; complex workflows belong in `services`. +3. Rules of the same kind should live in one place only. +4. New behavior should add tests first, then service-layer implementation. diff --git a/docs/docs/architecture.md b/docs/docs/architecture.md new file mode 100644 index 00000000..70b707f6 --- /dev/null +++ b/docs/docs/architecture.md @@ -0,0 +1,79 @@ +--- +title: 代码结构 +summary: 当前 RSSBox 的主要代码分层与职责 +--- + +# 代码结构 + +RSSBox 当前的代码主要分成这几层: + +## `core/models` + +保存数据结构和最基本的模型行为。 + +- `feed.py`:RSS 源配置、状态和统计信息 +- `entry.py`:文章内容 +- `filter.py`:过滤器配置和过滤结果缓存 +- `agent.py`:各类翻译/摘要 Agent 的配置模型 +- `tag.py`:标签与聚合源配置 + +## `core/services` + +保存真正的业务处理逻辑。 + +### `core/services/feed` + +和 Feed 主流程相关的处理都在这里: + +- `pipeline.py`:单个 Feed 的更新流程 +- `refresh.py`:更新后的缓存刷新与标签聚合刷新 +- `filters.py`:Feed 与 Tag 的过滤规则 +- `rendering.py`:RSS/Atom 输出内容生成 +- `response.py`:RSS/JSON 返回格式包装 + +### `core/services/admin` + +后台动作相关的处理: + +- `actions.py`:强制更新、标签重组 +- `batch.py`:批量修改 + +### `core/services/agent` + +不同 Agent 的具体执行逻辑: + +- `openai.py` +- `deepl.py` +- `libretranslate.py` +- `test_agent.py` + +### `core/services/opml.py` + +OPML 导入与导出。 + +## `core/views.py` + +只负责接收请求、读取缓存、调用服务并返回响应。 + +## `core/actions.py` + +只负责 Django admin action 入口与页面跳转,不再保存复杂业务逻辑。 + +## `core/management/commands` + +命令入口层。 +当前重点命令是 `feed_updater.py`,负责按频率调度更新流程。 + +## `core/cache.py` + +缓存入口层。 +负责读写缓存键,并调用服务层生成输出内容。 + +## 维护原则 + +后续改动优先遵守这几点: + +1. 页面和命令只做入口,不直接写大段业务逻辑。 +2. 模型保留配置和最小行为,复杂流程放进 `services`。 +3. 同一类规则只放一个位置,避免“改一处漏两处”。 +4. 新功能优先先补测试,再进入服务层实现。 diff --git a/docs/docs/faq.en.md b/docs/docs/faq.en.md index c15943c6..d0cd8441 100644 --- a/docs/docs/faq.en.md +++ b/docs/docs/faq.en.md @@ -36,11 +36,6 @@ If deployed using other methods, wait for 5 minutes and if the problem persists, 1. Check if AI engine is correctly configured and valid 2. Check if filter prompts are clear and specific -### AI Digest Generation Failed -1. Check if RSS sources are correctly created and associated with tags -2. Confirm that AI engine used for digest generation is working properly -3. Check if tags have sufficient content (recommend 3-5 active sources) - ### Tag System Issues 1. Check if RSS sources are correctly associated with tags 2. Confirm that associated sources have new content updates @@ -81,4 +76,4 @@ If this environment variable is not set, the system defaults to UTC timezone. If Cloudflare's DNS proxy is enabled, you need to select Full for encryption mode on Cloudflare's SSL/TLS page. ### Still can't resolve the issue? -Please [Submit an Issue](https://github.com/versun/rssbox/issues) or provide feedback in the [Telegram Group](https://t.me/rssboxapp) \ No newline at end of file +Please [Submit an Issue](https://github.com/versun/rssbox/issues) or provide feedback in the [Telegram Group](https://t.me/rssboxapp) diff --git a/docs/docs/faq.md b/docs/docs/faq.md index bf0a557f..2fd9d540 100644 --- a/docs/docs/faq.md +++ b/docs/docs/faq.md @@ -33,11 +33,6 @@ 1. 检查AI引擎是否正确配置且有效 2. 检查过滤提示词是否明确具体 -### AI日报生成失败 -1. 检查是否正确创建并关联RSS源到标签 -2. 确认日报生成使用的AI引擎正常工作 -3. 检查标签下是否有足够的内容(建议3-5个活跃源) - ### 标签系统问题 1. 检查是否正确关联了RSS源到标签 2. 确认关联的源有新内容更新 diff --git a/docs/docs/guide.en.md b/docs/docs/guide.en.md index 03a6c9ff..07fe71b5 100644 --- a/docs/docs/guide.en.md +++ b/docs/docs/guide.en.md @@ -60,29 +60,6 @@ Through the tag system you can: - Organize multiple related RSS sources together - Create topic-categorized aggregated sources - Apply unified filtering rules -- Facilitate subsequent AI digest generation - -### AI Digest Generation - -**AI Digest is a highlight feature of RSSBox**, intelligently aggregating multiple information sources: - -#### Configuration Steps -1. **Create tags and associate RSS sources** - - Create new tags in the admin interface (e.g., "Tech News", "AI Information", etc.) - - Associate relevant RSS sources to corresponding tags - - Ensure associated sources have regularly updated content - -2. **Configure digest settings** - - Enter digest management page - - Create new digest configuration: - - Name: Daily Tech Information - - Description: Intelligent analysis of global tech news - - Associated tags: Select created tags - - AI Engine: Select valid translation engine as summary generator - - Publishing days: Can check Monday through Sunday, starts generating around 2 AM - -3. **Subscribe to generated digests** - - After configuration is complete, the system will automatically generate digests at specified times ## 📋 Practical Tips diff --git a/docs/docs/guide.md b/docs/docs/guide.md index e1f26ebf..a535eca1 100644 --- a/docs/docs/guide.md +++ b/docs/docs/guide.md @@ -60,29 +60,6 @@ - 将多个相关RSS源组织在一起 - 创建主题分类的聚合源 - 统一应用过滤规则 -- 便于后续AI日报生成 - -### AI日报生成 - -**AI日报是RSSBox的亮点功能**,可以智能聚合多个信息源: - -#### 配置步骤 -1. **创建标签并关联RSS源** - - 在管理界面中创建新标签(如"科技新闻"、"AI资讯"等) - - 将相关的RSS源关联到对应标签 - - 确保关联的源有定期更新的内容 - -2. **配置日报设置** - - 进入日报管理页面 - - 创建新的日报配置: - - 名称:每日科技资讯 - - 描述:汇聚全球科技新闻的智能分析 - - 关联标签:选择已创建的标签 - - AI引擎:选择有效的翻译引擎作为摘要生成器 - - 发布天数:可勾选周一至周日,固定在凌晨2点左右开始生成 - -3. **订阅生成的日报** - - 配置完成后,系统会在指定时间自动生成日报 ## 📋 实用技巧 diff --git a/docs/docs/index.en.md b/docs/docs/index.en.md index 546fccb5..9250030d 100644 --- a/docs/docs/index.en.md +++ b/docs/docs/index.en.md @@ -10,7 +10,7 @@ summary: Smarter RSS management, more efficient information access

Formerly RSS Translator

[中文](/) | [English](/en) -Breaking language barriers for global information access. RSSBox helps you automatically translate foreign RSS content, supporting multi-engine translation, intelligent filtering, and AI digest generation for a one-stop cross-language information solution. +Breaking language barriers for global information access. RSSBox helps you automatically translate foreign RSS content, supporting multi-engine translation and intelligent filtering for a one-stop cross-language information solution. Official website: [rssbox.app](https://rssbox.app) @@ -27,12 +27,6 @@ Official website: [rssbox.app](https://rssbox.app) - **AI Smart Filtering** - Semantic understanding-based intelligent content filtering, say goodbye to information overload - **Multi-layer Filtering** - Set independent filtering rules for each source -### 📊 AI Digest Generation -- **Intelligent Aggregation** - Automatically collect content from multiple information sources -- **Topic Classification** - AI automatically identifies and categorizes news topics -- **Custom Digests** - Generate structured daily information summaries -- **Tag Management** - Merge multiple RSS sources into unified topics through tags - ### ⚡ Efficient Management - **Batch Operations** - Support batch updating, translation, and RSS source management - **Real-time Monitoring** - View translation status and processing progress diff --git a/docs/docs/index.md b/docs/docs/index.md index 24594c6c..e7914758 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -10,7 +10,7 @@ summary: 让RSS管理更智能,让信息获取更高效

原 RSS Translator

[中文](/) | [English](/en) -让语言不再成为获取全球资讯的障碍。RSSBox帮助您自动翻译外语RSS内容,支持多引擎翻译、智能过滤和AI日报生成,为跨语言信息获取提供一站式解决方案。 +让语言不再成为获取全球资讯的障碍。RSSBox帮助您自动翻译外语RSS内容,支持多引擎翻译和智能过滤,为跨语言信息获取提供一站式解决方案。 官方网站:[rssbox.app](https://rssbox.app) @@ -27,12 +27,6 @@ summary: 让RSS管理更智能,让信息获取更高效 - **AI智能过滤** - 基于语义理解的智能内容筛选,告别信息过载 - **多层过滤** - 可为每个源设置独立的过滤规则 -### 📊 AI日报生成 -- **智能聚合** - 自动收集多个信息源的内容 -- **主题分类** - AI自动识别和分类新闻主题 -- **定制日报** - 生成结构化的每日资讯摘要 -- **标签管理** - 通过标签将多个RSS源合并为统一主题 - ### ⚡ 高效管理 - **批量操作** - 支持批量更新、翻译和管理RSS源 - **实时监控** - 查看翻译状态和处理进度 diff --git a/docs/docs/refactor.en.md b/docs/docs/refactor.en.md new file mode 100644 index 00000000..988707b3 --- /dev/null +++ b/docs/docs/refactor.en.md @@ -0,0 +1,60 @@ +--- +title: Refactor Notes +summary: Main structural changes from the refactor and where to maintain things now +--- + +# Refactor Notes + +The goal of this refactor was simple: views, models, admin actions, commands, and services should each do one kind of work. + +## Main changes + +### 1. Digest removed + +- Removed Digest code, routes, admin entry points, scheduled tasks, and related documentation +- Kept cleanup logic for old Digest-generated data + +### 2. Feed workflow unified + +- Single-feed update workflow moved to `core/services/feed/pipeline.py` +- Post-update refresh logic moved to `core/services/feed/refresh.py` + +### 3. Output and filtering unified + +- Feed rendering moved to `core/services/feed/rendering.py` +- RSS/JSON response wrapping moved to `core/services/feed/response.py` +- Feed and tag filtering rules moved to `core/services/feed/filters.py` + +### 4. Admin actions unified + +- Force update and tag recombination moved to `core/services/admin/actions.py` +- Batch modification moved to `core/services/admin/batch.py` + +### 5. Agent execution logic extracted + +- OpenAI / DeepL / LibreTranslate / TestAgent execution logic moved to `core/services/agent/` +- `core/models/agent.py` now mainly keeps fields and thin wrappers + +### 6. Prompt defaults extracted + +- Default prompts moved to `core/prompts.py` +- `config/settings.py` now keeps configuration plus compatibility exports + +## Current maintenance entry points + +If you need to change something later: + +- Feed update flow: `core/services/feed/pipeline.py` +- Refresh and cache behavior: `core/services/feed/refresh.py` +- Output generation: `core/services/feed/rendering.py` and `response.py` +- Filtering rules: `core/services/feed/filters.py` +- Admin actions: `core/services/admin/` +- OPML: `core/services/opml.py` +- Agent behavior: `core/services/agent/` + +## Rules to keep + +1. Views and commands stay as entry points only +2. Models keep data and minimal behavior; complex logic belongs in services +3. Rules of the same kind should live in one place only +4. New behavior should add tests first, then service-layer implementation diff --git a/docs/docs/refactor.md b/docs/docs/refactor.md new file mode 100644 index 00000000..594508e7 --- /dev/null +++ b/docs/docs/refactor.md @@ -0,0 +1,62 @@ +--- +title: 重构说明 +summary: 本次结构重构后的重点变化与维护入口 +--- + +# 重构说明 + +这次重构的目标很直接:让页面、模型、后台动作、命令、服务各自只负责自己的事情。 + +## 已完成的主要调整 + +### 1. 移除 Digest + +- 删除了 Digest 功能的代码、路由、后台入口、定时任务和文档说明 +- 清理了与 Digest 相关的旧数据迁移逻辑 + +### 2. 统一 Feed 主流程 + +- 单个 Feed 的更新流程集中到 `core/services/feed/pipeline.py` +- 批量更新后的刷新逻辑集中到 `core/services/feed/refresh.py` + +### 3. 统一输出与过滤 + +- 输出生成集中到 `core/services/feed/rendering.py` +- RSS/JSON 响应包装集中到 `core/services/feed/response.py` +- Feed 和 Tag 的过滤规则集中到 `core/services/feed/filters.py` + +### 4. 统一后台动作 + +- 强制更新、标签重组集中到 `core/services/admin/actions.py` +- 批量修改集中到 `core/services/admin/batch.py` + +### 5. 抽离 Agent 执行逻辑 + +- OpenAI / DeepL / LibreTranslate / TestAgent 的执行细节集中到 `core/services/agent/` +- `core/models/agent.py` 现在只保留字段和薄包装 + +### 6. 抽离提示词 + +- 默认提示词迁到 `core/prompts.py` +- `config/settings.py` 只保留配置和对外兼容变量 + +## 当前维护入口 + +如果以后要继续改: + +- 改 Feed 更新:看 `core/services/feed/pipeline.py` +- 改刷新与缓存:看 `core/services/feed/refresh.py` +- 改输出格式:看 `core/services/feed/rendering.py` 和 `response.py` +- 改过滤规则:看 `core/services/feed/filters.py` +- 改后台动作:看 `core/services/admin/` +- 改 OPML:看 `core/services/opml.py` +- 改 Agent 行为:看 `core/services/agent/` + +## 当前原则 + +后续改动优先保持这几条: + +1. 页面和命令只做入口,不直接写复杂流程 +2. 模型保留数据和最少行为,复杂逻辑放服务层 +3. 同一类规则只放一个位置 +4. 新行为优先先写测试,再落到服务层 diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index dd0bef89..c678a1df 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -36,6 +36,8 @@ plugins: 一键部署: One Click Docker安装: Docker 使用说明: Guide + 代码结构: Architecture + 重构说明: Refactor Notes 翻译服务: Translate Services 环境变量配置: Environment Variables 常见问题: FAQ @@ -50,6 +52,8 @@ nav: - Docker安装: install/docker.md - 一键部署: install/one-click.md - 使用说明: guide.md + - 代码结构: architecture.md + - 重构说明: refactor.md - 翻译服务: translator.md - 环境变量配置: config.md - 常见问题: faq.md