Enhanced multiple features and fixed issues (#86)

mmmay0722 · web-flow · commit 693737ac19f1 · 2025-11-06T16:34:33.000+08:00
* Enhanced multiple features and fixed issues:
- Add dynamic page resolution to handle new tabs/windows
- Separate warning/failed counts in reports and output
- Improve UX tool to use dynamic page reference
- Fix page reference updates after navigation events
- Add error handling for screenshot timeouts

* fix: set full_page to True for error screenshot
diff --git a/webqa-agent.py b/webqa-agent.py
@@ -379,6 +379,7 @@ async def run_tests(cfg):
         if result_count:
             print(f"🔢 Total evaluations: {result_count.get('total', 0)}")
             print(f"✅ Passed: {result_count.get('passed', 0)}")
+            print(f"⚠️  Warning: {result_count.get('warning', 0)}")
             print(f"❌ Failed: {result_count.get('failed', 0)}")
 
         if html_report_path:
diff --git a/webqa_agent/__init__.py b/webqa_agent/__init__.py
@@ -0,0 +1,5 @@
+__all__ = [
+]
+
+__version__ = "0.2.0"
+
diff --git a/webqa_agent/actions/action_handler.py b/webqa_agent/actions/action_handler.py
diff --git a/webqa_agent/browser/driver.py b/webqa_agent/browser/driver.py
@@ -104,6 +104,18 @@ def get_page(self):
         except Exception as e:
             logging.error("Failed to get Driver instance: %s", e, exc_info=True)
             raise
+    
+    async def get_url(self):
+        """Returns: the current page URL and title."""
+        try:
+            if self.page is None:
+                raise RuntimeError("No active page. Did you call create_browser?")
+            url = self.page.url
+            title = await self.page.title()
+            return url, title
+        except Exception as e:
+            logging.error("Failed to get URL: %s", e, exc_info=True)
+            raise
 
     async def get_new_page(self):
         """Switches to the most recently opened page in the browser.
@@ -115,8 +127,8 @@ async def get_new_page(self):
             pages = self.context.pages
             logging.debug(f"page number: {len(pages)}")
             if len(pages) > 1:
-                logging.debug("New page detected.")
                 self.page = pages[-1]
+                logging.debug(f"New page detected, page index: {len(pages) - 1}")
                 return self.page
             else:
                 return self.page
diff --git a/webqa_agent/executor/result_aggregator.py b/webqa_agent/executor/result_aggregator.py
@@ -59,17 +59,30 @@ async def aggregate_results(self, test_session: ParallelTestSession) -> Dict[str
             for sub in (r.sub_tests or [])
             if sub.status == TestStatus.PASSED
         )
+        warning_sub_tests = sum(
+            1
+            for r in test_session.test_results.values()
+            for sub in (r.sub_tests or [])
+            if sub.status == TestStatus.WARNING
+        )
+        failed_sub_tests = sum(
+            1
+            for r in test_session.test_results.values()
+            for sub in (r.sub_tests or [])
+            if sub.status == TestStatus.FAILED
+        )
         critical_sub_tests = total_sub_tests - passed_sub_tests  # 未通过即视为关键问题
         
-        logging.debug(f"Debug: total_sub_tests={total_sub_tests}, passed_sub_tests={passed_sub_tests}, critical_sub_tests={critical_sub_tests}")
+        logging.debug(f"Debug: total_sub_tests={total_sub_tests}, passed_sub_tests={passed_sub_tests}, warning_sub_tests={warning_sub_tests}, failed_sub_tests={failed_sub_tests}, critical_sub_tests={critical_sub_tests}")
 
         # Build content for executive summary tab
         executive_content = {
             "executiveSummary": "",
             "statistics": [
                 {"label": self._get_text('assessment_categories'), "value": str(total_sub_tests), "colorClass": "var(--warning-color)"},
                 {"label": self._get_text('passed_count'), "value": str(passed_sub_tests), "colorClass": "var(--success-color)"},
-                {"label": self._get_text('failed_count'), "value": str(critical_sub_tests), "colorClass": "var(--failure-color)"},
+                {"label": self._get_text('warning_count'), "value": str(warning_sub_tests), "colorClass": "var(--warning-color)"},
+                {"label": self._get_text('failed_count'), "value": str(failed_sub_tests), "colorClass": "var(--failure-color)"},
             ]
         }
 
@@ -110,7 +123,8 @@ def dict_to_text(d, indent=0):
             "count":{
                 "total": total_sub_tests,
                 "passed": passed_sub_tests,
-                "failed": critical_sub_tests,
+                "warning": warning_sub_tests,
+                "failed": failed_sub_tests,
             }
         }
 
diff --git a/webqa_agent/static/i18n/en-US.json b/webqa_agent/static/i18n/en-US.json
@@ -7,6 +7,7 @@
     "issue_list_note": "Note: This list summarizes all detected \"failed\" and \"warning\" items",
     "assessment_categories": "Test Cases",
     "passed_count": "Passed",
+    "warning_count": "Warning",
     "failed_count": "Failed",
     "test_failed_prefix": "Test Failed: ",
     "execution_error_prefix": "Execution Error: ",
diff --git a/webqa_agent/static/i18n/zh-CN.json b/webqa_agent/static/i18n/zh-CN.json
@@ -7,6 +7,7 @@
     "issue_list_note": "注：此列表汇总了所有检测到的\"失败\"和\"警告\"项",
     "assessment_categories": "评估类别",
     "passed_count": "通过数",
+    "warning_count": "警告数",
     "failed_count": "失败数",
     "test_failed_prefix": "测试不通过: ",
     "execution_error_prefix": "执行异常: ",
diff --git a/webqa_agent/testers/case_gen/agents/execute_agent.py b/webqa_agent/testers/case_gen/agents/execute_agent.py
@@ -650,12 +650,13 @@ async def agent_worker_node(state: dict, config: dict) -> dict:
     )
     logging.debug(f"LLM configured: {llm_config.get('model')} at {llm_config.get('base_url')}")
 
-    page = ui_tester_instance.driver.get_page()
     # Instantiate tools with correct parameters
+    # Note: All tools now use ui_tester_instance to dynamically get page,
+    # which ensures correct page reference after get_new_page operations
     tools = [
         UITool(ui_tester_instance=ui_tester_instance),
         UIAssertTool(ui_tester_instance=ui_tester_instance),
-        UIUXViewportTool(page=page, llm_config=llm_config, case_recorder=case_recorder),
+        UIUXViewportTool(ui_tester_instance=ui_tester_instance, llm_config=llm_config, case_recorder=case_recorder),
     ]
     logging.debug(f"Tools initialized: {[tool.name for tool in tools]}")
 
@@ -867,22 +868,20 @@ def extract_path(u):
         dp = DeepCrawler(page)
         await dp.crawl(highlight=True, viewport_only=True)
         screenshot = await ui_tester_instance._actions.b64_page_screenshot(
-            full_page=False, file_name="agent_step_vision", save_to_log=False
+            file_name="agent_step_vision", save_to_log=False
         )
         await dp.remove_marker()
         logging.debug("Generated highlighted screenshot for the agent.")
         # ------------------------------------
 
         # Create a new message with the current step's instruction and visual context
-        step_message = HumanMessage(
-            content=[
-                {"type": "text", "text": formatted_instruction},
-                {
-                    "type": "image_url",
-                    "image_url": {"url": f"{screenshot}", "detail": "low"},
-                },
-            ]
-        )
+        step_content = [{"type": "text", "text": formatted_instruction}]
+        if screenshot:
+            step_content.append({
+                "type": "image_url",
+                "image_url": {"url": f"{screenshot}", "detail": "low"},
+            })
+        step_message = HumanMessage(content=step_content)
 
         # The agent's history includes all prior messages
         current_messages = messages + [step_message]
@@ -949,11 +948,13 @@ def extract_path(u):
             messages.append(AIMessage(content=tool_output))
             
             # Check for warnings in the tool output (e.g., UX issues)
-            if "[warning]" in tool_output.lower():
+            # Check both agent output and raw tool result from intermediate steps
+            intermediate_output = safe_get_intermediate_step(result, index=0, subindex=1, default="")
+            combined_output = f"{tool_output}\n{intermediate_output}"
+            if "[warning]" in combined_output.lower():
                 warning_steps.append(i + 1)
                 logging.info(f"Step {i+1} completed with warnings (e.g., UX issues detected)")
 
-            intermediate_output = safe_get_intermediate_step(result, index=0, subindex=1, default="")
             is_failure = "[failure]" in intermediate_output.lower() or "failed" in tool_output.lower()
 
             # Check if this is an ELEMENT_NOT_FOUND error (potentially recoverable)
@@ -988,7 +989,7 @@ def extract_path(u):
                             # Get current page screenshot for LLM analysis
                             try:
                                 recovery_screenshot = await ui_tester_instance._actions.b64_page_screenshot(
-                                    full_page=False, file_name="recovery_screenshot", save_to_log=False
+                                    file_name="recovery_screenshot", save_to_log=False
                                 )
                             except Exception as e:
                                 logging.error(f"Failed to capture recovery screenshot: {e}")
@@ -1083,7 +1084,7 @@ def extract_path(u):
                         try:
                             # Capture screenshot for visual context after successful step execution
                             logging.debug("Capturing screenshot for dynamic step generation context")
-                            screenshot = await ui_tester_instance._actions.b64_page_screenshot(full_page=False)
+                            screenshot = await ui_tester_instance._actions.b64_page_screenshot()
                             
                             # Enhance objective with generation context for smarter LLM decision-making
                             enhanced_objective = case.get("objective", "")
diff --git a/webqa_agent/testers/case_gen/tools/ux_tool.py b/webqa_agent/testers/case_gen/tools/ux_tool.py
@@ -22,7 +22,7 @@ class UIUXViewportTool(BaseTool):
         "Performs two UX checks in the current viewport: (1) Typo/grammar/text accuracy using page text; "
         "(2) Layout/visual rendering using screenshot + viewport structure. Returns both analyses."
     )
-    page: Any = Field(..., description="Playwright Page instance")
+    ui_tester_instance: Any = Field(..., description="UITester instance to access driver and page")
     llm_config: dict | None = Field(default=None, description="LLM configuration for independent client")
     case_recorder: Any | None = Field(default=None, description="Optional CentralCaseRecorder to record ux_verify step")
 
@@ -52,21 +52,64 @@ def _annotate_b64_image(self, image_b64: str, rect: List[int]) -> str:
             return image_b64
 
     async def _arun(self, assertion: str) -> str:
-        if not self.page:
-            return "[FAILURE] Error: Page instance not provided for UX collection."
+        if not self.ui_tester_instance:
+            return "[FAILURE] Error: UITester instance not provided for UX collection."
 
         try:
             logging.debug(f"Executing UX verification: {assertion}")
 
-            dp = DeepCrawler(self.page)
-            # Viewport-only crawl; do NOT scroll; collect text-rich structure
-            crawl_result = await dp.crawl(highlight=False, filter_text=True, viewport_only=True, include_styles=True)
+            # Dynamically get current page from driver (handles get_new_page updates)
+            page = self.ui_tester_instance.driver.get_page()
+
+            dp = DeepCrawler(page)
+            # Crawl for interactive elements with layout info (for layout check)
+            crawl_result = await dp.crawl(highlight=False, filter_text=False, viewport_only=False, include_styles=True)
             id_map = crawl_result.raw_dict()
-            viewport_structure = dp.get_text()
+            
+            # Get full page text directly from page for text/typo check (more comprehensive)
+            viewport_structure = await page.evaluate("""
+                () => {
+                    // Extract all visible text from the page
+                    const textElements = [];
+                    const walker = document.createTreeWalker(
+                        document.body,
+                        NodeFilter.SHOW_TEXT,
+                        {
+                            acceptNode: function(node) {
+                                const parent = node.parentElement;
+                                if (!parent) return NodeFilter.FILTER_REJECT;
+                                
+                                // Skip script, style, and hidden elements
+                                const style = window.getComputedStyle(parent);
+                                if (style.display === 'none' || 
+                                    style.visibility === 'hidden' || 
+                                    parent.tagName === 'SCRIPT' || 
+                                    parent.tagName === 'STYLE') {
+                                    return NodeFilter.FILTER_REJECT;
+                                }
+                                
+                                const text = node.textContent.trim();
+                                return text.length > 0 ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT;
+                            }
+                        }
+                    );
+                    
+                    let node;
+                    while (node = walker.nextNode()) {
+                        const text = node.textContent.trim();
+                        if (text && text.length > 0) {
+                            textElements.push(text);
+                        }
+                    }
+                    
+                    // Deduplicate and return as JSON
+                    return JSON.stringify([...new Set(textElements)]);
+                }
+            """)
             logging.debug(f"Viewport Text Structure: {viewport_structure}")
 
             screenshot = None
-            img_bytes = await self.page.screenshot(full_page=False)
+            img_bytes = await page.screenshot(full_page=True)
             screenshot = f"data:image/png;base64,{base64.b64encode(img_bytes).decode('utf-8')}"
 
             try:
@@ -127,7 +170,7 @@ async def _arun(self, assertion: str) -> str:
                 "- Conciseness: Keep each error description concise and direct, avoid explanations.\n"
             )
 
-            logging.debug(f"UX text typo analysis prompt: {text_prompt}")
+            # logging.debug(f"UX text typo analysis prompt: {text_prompt}")
 
             typo_response = await llm_client.get_llm_response(
                 LLMPrompt.page_default_prompt,
@@ -146,7 +189,7 @@ async def _arun(self, assertion: str) -> str:
             # 2) Layout/visual analysis (screenshot + structure)
             layout_prompt = self._build_layout_prompt(layout_user_case, id_map, len(screenshot))
 
-            logging.debug(f"UX layout analysis prompt: {layout_prompt}")
+            # logging.debug(f"UX layout analysis prompt: {layout_prompt}")
 
             images = [screenshot] if isinstance(screenshot, str) else None
             layout_response = await llm_client.get_llm_response(
diff --git a/webqa_agent/testers/function_tester.py b/webqa_agent/testers/function_tester.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +__all__ = [
 +]
++
 +__version__ = "0.2.0"
++