Skip to content

Commit 693737a

Browse files
authored
Enhanced multiple features and fixed issues (#86)
* Enhanced multiple features and fixed issues: - Add dynamic page resolution to handle new tabs/windows - Separate warning/failed counts in reports and output - Improve UX tool to use dynamic page reference - Fix page reference updates after navigation events - Add error handling for screenshot timeouts * fix: set full_page to True for error screenshot
1 parent 05096c6 commit 693737a

File tree

10 files changed

+222
-175
lines changed

10 files changed

+222
-175
lines changed

webqa-agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ async def run_tests(cfg):
379379
if result_count:
380380
print(f"🔢 Total evaluations: {result_count.get('total', 0)}")
381381
print(f"✅ Passed: {result_count.get('passed', 0)}")
382+
print(f"⚠️ Warning: {result_count.get('warning', 0)}")
382383
print(f"❌ Failed: {result_count.get('failed', 0)}")
383384

384385
if html_report_path:

webqa_agent/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
__all__ = [
2+
]
3+
4+
__version__ = "0.2.0"
5+

webqa_agent/actions/action_handler.py

Lines changed: 84 additions & 39 deletions
Large diffs are not rendered by default.

webqa_agent/browser/driver.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,18 @@ def get_page(self):
104104
except Exception as e:
105105
logging.error("Failed to get Driver instance: %s", e, exc_info=True)
106106
raise
107+
108+
async def get_url(self):
109+
"""Returns: the current page URL and title."""
110+
try:
111+
if self.page is None:
112+
raise RuntimeError("No active page. Did you call create_browser?")
113+
url = self.page.url
114+
title = await self.page.title()
115+
return url, title
116+
except Exception as e:
117+
logging.error("Failed to get URL: %s", e, exc_info=True)
118+
raise
107119

108120
async def get_new_page(self):
109121
"""Switches to the most recently opened page in the browser.
@@ -115,8 +127,8 @@ async def get_new_page(self):
115127
pages = self.context.pages
116128
logging.debug(f"page number: {len(pages)}")
117129
if len(pages) > 1:
118-
logging.debug("New page detected.")
119130
self.page = pages[-1]
131+
logging.debug(f"New page detected, page index: {len(pages) - 1}")
120132
return self.page
121133
else:
122134
return self.page

webqa_agent/executor/result_aggregator.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,30 @@ async def aggregate_results(self, test_session: ParallelTestSession) -> Dict[str
5959
for sub in (r.sub_tests or [])
6060
if sub.status == TestStatus.PASSED
6161
)
62+
warning_sub_tests = sum(
63+
1
64+
for r in test_session.test_results.values()
65+
for sub in (r.sub_tests or [])
66+
if sub.status == TestStatus.WARNING
67+
)
68+
failed_sub_tests = sum(
69+
1
70+
for r in test_session.test_results.values()
71+
for sub in (r.sub_tests or [])
72+
if sub.status == TestStatus.FAILED
73+
)
6274
critical_sub_tests = total_sub_tests - passed_sub_tests # 未通过即视为关键问题
6375

64-
logging.debug(f"Debug: total_sub_tests={total_sub_tests}, passed_sub_tests={passed_sub_tests}, critical_sub_tests={critical_sub_tests}")
76+
logging.debug(f"Debug: total_sub_tests={total_sub_tests}, passed_sub_tests={passed_sub_tests}, warning_sub_tests={warning_sub_tests}, failed_sub_tests={failed_sub_tests}, critical_sub_tests={critical_sub_tests}")
6577

6678
# Build content for executive summary tab
6779
executive_content = {
6880
"executiveSummary": "",
6981
"statistics": [
7082
{"label": self._get_text('assessment_categories'), "value": str(total_sub_tests), "colorClass": "var(--warning-color)"},
7183
{"label": self._get_text('passed_count'), "value": str(passed_sub_tests), "colorClass": "var(--success-color)"},
72-
{"label": self._get_text('failed_count'), "value": str(critical_sub_tests), "colorClass": "var(--failure-color)"},
84+
{"label": self._get_text('warning_count'), "value": str(warning_sub_tests), "colorClass": "var(--warning-color)"},
85+
{"label": self._get_text('failed_count'), "value": str(failed_sub_tests), "colorClass": "var(--failure-color)"},
7386
]
7487
}
7588

@@ -110,7 +123,8 @@ def dict_to_text(d, indent=0):
110123
"count":{
111124
"total": total_sub_tests,
112125
"passed": passed_sub_tests,
113-
"failed": critical_sub_tests,
126+
"warning": warning_sub_tests,
127+
"failed": failed_sub_tests,
114128
}
115129
}
116130

webqa_agent/static/i18n/en-US.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"issue_list_note": "Note: This list summarizes all detected \"failed\" and \"warning\" items",
88
"assessment_categories": "Test Cases",
99
"passed_count": "Passed",
10+
"warning_count": "Warning",
1011
"failed_count": "Failed",
1112
"test_failed_prefix": "Test Failed: ",
1213
"execution_error_prefix": "Execution Error: ",

webqa_agent/static/i18n/zh-CN.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"issue_list_note": "注:此列表汇总了所有检测到的\"失败\"\"警告\"",
88
"assessment_categories": "评估类别",
99
"passed_count": "通过数",
10+
"warning_count": "警告数",
1011
"failed_count": "失败数",
1112
"test_failed_prefix": "测试不通过: ",
1213
"execution_error_prefix": "执行异常: ",

webqa_agent/testers/case_gen/agents/execute_agent.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -650,12 +650,13 @@ async def agent_worker_node(state: dict, config: dict) -> dict:
650650
)
651651
logging.debug(f"LLM configured: {llm_config.get('model')} at {llm_config.get('base_url')}")
652652

653-
page = ui_tester_instance.driver.get_page()
654653
# Instantiate tools with correct parameters
654+
# Note: All tools now use ui_tester_instance to dynamically get page,
655+
# which ensures correct page reference after get_new_page operations
655656
tools = [
656657
UITool(ui_tester_instance=ui_tester_instance),
657658
UIAssertTool(ui_tester_instance=ui_tester_instance),
658-
UIUXViewportTool(page=page, llm_config=llm_config, case_recorder=case_recorder),
659+
UIUXViewportTool(ui_tester_instance=ui_tester_instance, llm_config=llm_config, case_recorder=case_recorder),
659660
]
660661
logging.debug(f"Tools initialized: {[tool.name for tool in tools]}")
661662

@@ -867,22 +868,20 @@ def extract_path(u):
867868
dp = DeepCrawler(page)
868869
await dp.crawl(highlight=True, viewport_only=True)
869870
screenshot = await ui_tester_instance._actions.b64_page_screenshot(
870-
full_page=False, file_name="agent_step_vision", save_to_log=False
871+
file_name="agent_step_vision", save_to_log=False
871872
)
872873
await dp.remove_marker()
873874
logging.debug("Generated highlighted screenshot for the agent.")
874875
# ------------------------------------
875876

876877
# Create a new message with the current step's instruction and visual context
877-
step_message = HumanMessage(
878-
content=[
879-
{"type": "text", "text": formatted_instruction},
880-
{
881-
"type": "image_url",
882-
"image_url": {"url": f"{screenshot}", "detail": "low"},
883-
},
884-
]
885-
)
878+
step_content = [{"type": "text", "text": formatted_instruction}]
879+
if screenshot:
880+
step_content.append({
881+
"type": "image_url",
882+
"image_url": {"url": f"{screenshot}", "detail": "low"},
883+
})
884+
step_message = HumanMessage(content=step_content)
886885

887886
# The agent's history includes all prior messages
888887
current_messages = messages + [step_message]
@@ -949,11 +948,13 @@ def extract_path(u):
949948
messages.append(AIMessage(content=tool_output))
950949

951950
# Check for warnings in the tool output (e.g., UX issues)
952-
if "[warning]" in tool_output.lower():
951+
# Check both agent output and raw tool result from intermediate steps
952+
intermediate_output = safe_get_intermediate_step(result, index=0, subindex=1, default="")
953+
combined_output = f"{tool_output}\n{intermediate_output}"
954+
if "[warning]" in combined_output.lower():
953955
warning_steps.append(i + 1)
954956
logging.info(f"Step {i+1} completed with warnings (e.g., UX issues detected)")
955957

956-
intermediate_output = safe_get_intermediate_step(result, index=0, subindex=1, default="")
957958
is_failure = "[failure]" in intermediate_output.lower() or "failed" in tool_output.lower()
958959

959960
# Check if this is an ELEMENT_NOT_FOUND error (potentially recoverable)
@@ -988,7 +989,7 @@ def extract_path(u):
988989
# Get current page screenshot for LLM analysis
989990
try:
990991
recovery_screenshot = await ui_tester_instance._actions.b64_page_screenshot(
991-
full_page=False, file_name="recovery_screenshot", save_to_log=False
992+
file_name="recovery_screenshot", save_to_log=False
992993
)
993994
except Exception as e:
994995
logging.error(f"Failed to capture recovery screenshot: {e}")
@@ -1083,7 +1084,7 @@ def extract_path(u):
10831084
try:
10841085
# Capture screenshot for visual context after successful step execution
10851086
logging.debug("Capturing screenshot for dynamic step generation context")
1086-
screenshot = await ui_tester_instance._actions.b64_page_screenshot(full_page=False)
1087+
screenshot = await ui_tester_instance._actions.b64_page_screenshot()
10871088

10881089
# Enhance objective with generation context for smarter LLM decision-making
10891090
enhanced_objective = case.get("objective", "")

webqa_agent/testers/case_gen/tools/ux_tool.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class UIUXViewportTool(BaseTool):
2222
"Performs two UX checks in the current viewport: (1) Typo/grammar/text accuracy using page text; "
2323
"(2) Layout/visual rendering using screenshot + viewport structure. Returns both analyses."
2424
)
25-
page: Any = Field(..., description="Playwright Page instance")
25+
ui_tester_instance: Any = Field(..., description="UITester instance to access driver and page")
2626
llm_config: dict | None = Field(default=None, description="LLM configuration for independent client")
2727
case_recorder: Any | None = Field(default=None, description="Optional CentralCaseRecorder to record ux_verify step")
2828

@@ -52,21 +52,64 @@ def _annotate_b64_image(self, image_b64: str, rect: List[int]) -> str:
5252
return image_b64
5353

5454
async def _arun(self, assertion: str) -> str:
55-
if not self.page:
56-
return "[FAILURE] Error: Page instance not provided for UX collection."
55+
if not self.ui_tester_instance:
56+
return "[FAILURE] Error: UITester instance not provided for UX collection."
5757

5858
try:
5959
logging.debug(f"Executing UX verification: {assertion}")
6060

61-
dp = DeepCrawler(self.page)
62-
# Viewport-only crawl; do NOT scroll; collect text-rich structure
63-
crawl_result = await dp.crawl(highlight=False, filter_text=True, viewport_only=True, include_styles=True)
61+
# Dynamically get current page from driver (handles get_new_page updates)
62+
page = self.ui_tester_instance.driver.get_page()
63+
64+
dp = DeepCrawler(page)
65+
# Crawl for interactive elements with layout info (for layout check)
66+
crawl_result = await dp.crawl(highlight=False, filter_text=False, viewport_only=False, include_styles=True)
6467
id_map = crawl_result.raw_dict()
65-
viewport_structure = dp.get_text()
68+
69+
# Get full page text directly from page for text/typo check (more comprehensive)
70+
viewport_structure = await page.evaluate("""
71+
() => {
72+
// Extract all visible text from the page
73+
const textElements = [];
74+
const walker = document.createTreeWalker(
75+
document.body,
76+
NodeFilter.SHOW_TEXT,
77+
{
78+
acceptNode: function(node) {
79+
const parent = node.parentElement;
80+
if (!parent) return NodeFilter.FILTER_REJECT;
81+
82+
// Skip script, style, and hidden elements
83+
const style = window.getComputedStyle(parent);
84+
if (style.display === 'none' ||
85+
style.visibility === 'hidden' ||
86+
parent.tagName === 'SCRIPT' ||
87+
parent.tagName === 'STYLE') {
88+
return NodeFilter.FILTER_REJECT;
89+
}
90+
91+
const text = node.textContent.trim();
92+
return text.length > 0 ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT;
93+
}
94+
}
95+
);
96+
97+
let node;
98+
while (node = walker.nextNode()) {
99+
const text = node.textContent.trim();
100+
if (text && text.length > 0) {
101+
textElements.push(text);
102+
}
103+
}
104+
105+
// Deduplicate and return as JSON
106+
return JSON.stringify([...new Set(textElements)]);
107+
}
108+
""")
66109
logging.debug(f"Viewport Text Structure: {viewport_structure}")
67110

68111
screenshot = None
69-
img_bytes = await self.page.screenshot(full_page=False)
112+
img_bytes = await page.screenshot(full_page=True)
70113
screenshot = f"data:image/png;base64,{base64.b64encode(img_bytes).decode('utf-8')}"
71114

72115
try:
@@ -127,7 +170,7 @@ async def _arun(self, assertion: str) -> str:
127170
"- Conciseness: Keep each error description concise and direct, avoid explanations.\n"
128171
)
129172

130-
logging.debug(f"UX text typo analysis prompt: {text_prompt}")
173+
# logging.debug(f"UX text typo analysis prompt: {text_prompt}")
131174

132175
typo_response = await llm_client.get_llm_response(
133176
LLMPrompt.page_default_prompt,
@@ -146,7 +189,7 @@ async def _arun(self, assertion: str) -> str:
146189
# 2) Layout/visual analysis (screenshot + structure)
147190
layout_prompt = self._build_layout_prompt(layout_user_case, id_map, len(screenshot))
148191

149-
logging.debug(f"UX layout analysis prompt: {layout_prompt}")
192+
# logging.debug(f"UX layout analysis prompt: {layout_prompt}")
150193

151194
images = [screenshot] if isinstance(screenshot, str) else None
152195
layout_response = await llm_client.get_llm_response(

0 commit comments

Comments
 (0)