Skip to content

Commit 223fc03

Browse files
authored
ci: fix 3 CI failures on self-hosted runner (#62)
1. Exclude tests/integration/saas/ from full test suite — requires private SaaS worker (localhost:8787) not available in CI. Eliminates 45 errors and prevents timeout on Python 3.9/3.10/3.12/3.13. 2. Pin Python version in quick-check and post-merge jobs via --python $DEFAULT_PYTHON_VERSION. The self-hosted runner has 3.14 installed, causing uv to select it over 3.12 and triggering stale Rust target cache collisions with maturin. 3. Fix test_l1_cache_speedup_ratio_validation — was asserting hits faster than misses, but L1 hits include LRU bookkeeping (move_to_end) that misses skip. Now validates both paths are sub-microsecond and hit overhead is bounded (< 2x miss).
1 parent cf4fa00 commit 223fc03

2 files changed

Lines changed: 29 additions & 20 deletions

File tree

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
venv-${{ runner.os }}-py${{ env.DEFAULT_PYTHON_VERSION }}-
4545
4646
- name: Install dependencies (if not cached)
47-
run: uv sync --group dev
47+
run: uv sync --python ${{ env.DEFAULT_PYTHON_VERSION }} --group dev
4848

4949
- name: Check Python formatting
5050
run: uv run ruff format --check .
@@ -131,6 +131,7 @@ jobs:
131131
run: |
132132
uv run pytest tests/ -m "not slow" \
133133
--ignore=tests/fuzzing \
134+
--ignore=tests/integration/saas \
134135
--cov=src/cachekit \
135136
--cov-report=xml \
136137
--cov-report=term \
@@ -180,7 +181,7 @@ jobs:
180181
workspaces: rust
181182

182183
- name: Install dependencies (if not cached)
183-
run: uv sync --group dev
184+
run: uv sync --python ${{ env.DEFAULT_PYTHON_VERSION }} --group dev
184185

185186
- name: Scan Python dependencies for CVEs
186187
run: uv run pip-audit --desc

tests/performance/test_statistical_rigor.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from cachekit.l1_cache import L1Cache
1818

19-
from .stats_utils import benchmark_with_gc_handling, speedup_ratio
19+
from .stats_utils import benchmark_with_gc_handling
2020

2121

2222
@pytest.fixture(scope="session", autouse=True)
@@ -251,10 +251,15 @@ def test_confidence_intervals_matter() -> None:
251251

252252
@pytest.mark.performance
253253
def test_l1_cache_speedup_ratio_validation() -> None:
254-
"""Validate that L1 cache hits are significantly faster than misses.
254+
"""Validate that L1 cache hit and miss are both sub-microsecond.
255255
256-
This demonstrates the value of L1 caching - hits should be 2x+ faster
257-
than misses because hits avoid the L2 backend roundtrip.
256+
In L1-only mode (no L2 backend), hits and misses are both fast dict
257+
operations. Hits include LRU bookkeeping (move_to_end), so they may
258+
be slightly slower than misses — this is expected.
259+
260+
Real speedup (10-20x) only manifests when L2 backend (Redis/SaaS)
261+
adds network latency to misses. This test validates absolute
262+
performance bounds for both paths.
258263
"""
259264
cache = L1Cache(max_memory_mb=100)
260265

@@ -288,22 +293,25 @@ def miss_fn() -> None:
288293
unit="ns",
289294
)
290295

291-
# Calculate speedup ratio
292-
hit_samples = [hit_result.mean] * 100 # Synthetic for ratio calculation
293-
miss_samples = [miss_result.mean] * 100
294-
ratio, interpretation = speedup_ratio(miss_samples, hit_samples)
295-
296-
print("\nSpeedup Ratio Analysis:")
296+
print("\nL1 Performance Analysis:")
297297
print(f" Hit mean: {hit_result.mean:.0f}ns")
298298
print(f" Miss mean: {miss_result.mean:.0f}ns")
299-
print(f" Speedup: {ratio:.1f}x {interpretation}")
300-
301-
# Validate speedup is meaningful (hits should be faster than misses)
302-
# Note: In this L1-only test, both hit and miss are fast dict operations (~450-500ns).
303-
# With a real L2 Redis backend, hits would be 10-20x faster (avoiding network).
304-
# We validate >1.0x (measurable) rather than 2.0x (more visible with L2 overhead).
305-
assert ratio > 1.0, f"L1 hits should be faster than misses, got {ratio:.1f}x"
306-
print(f"✅ Speedup ratio validated: {ratio:.1f}x (hits are measurably faster)")
299+
300+
# Both operations must be sub-microsecond (1000ns)
301+
assert hit_result.p95 < 1000, f"L1 hit p95 {hit_result.p95:.0f}ns exceeds 1000ns target"
302+
assert miss_result.p95 < 1000, f"L1 miss p95 {miss_result.p95:.0f}ns exceeds 1000ns target"
303+
304+
# Hit overhead from LRU bookkeeping should be bounded (< 2x miss time)
305+
# Hits do more work (move_to_end), so they can be slower — but not drastically
306+
if miss_result.mean > 0:
307+
overhead_ratio = hit_result.mean / miss_result.mean
308+
print(f" Hit/Miss ratio: {overhead_ratio:.2f}x")
309+
assert overhead_ratio < 2.0, (
310+
f"L1 hit overhead too high: {overhead_ratio:.1f}x miss time "
311+
f"(hit={hit_result.mean:.0f}ns, miss={miss_result.mean:.0f}ns)"
312+
)
313+
314+
print("✅ L1 cache performance validated (both paths sub-microsecond)")
307315

308316

309317
@pytest.mark.performance

0 commit comments

Comments
 (0)