Skip to content

Commit 078eeb9

Browse files
trevor-emchen-sentry
authored andcommitted
fix(integrations): cache empty repo 409 responses to reduce GitHub API calls (#109153)
Cache the result when GitHub's Git Trees API returns a 409 ("Git Repository is empty") instead of re-fetching on every task run. Previously, when `get_tree` hit a 409 for an empty repo, the exception propagated without writing to cache. This meant every subsequent `auto_source_code_config` task run would re-fetch the same empty repos, wasting API calls and contributing to GitHub rate limiting. For large orgs with many empty repos, this adds up to thousands of unnecessary API calls per day. Now we catch `ApiConflictError` in `get_cached_repo_files`, cache an empty list with the same TTL as successful results (24h + staggered offset), and return early. The existing error handling in `_populate_trees` is unaffected since the exception no longer propagates. Pairs with #109134 which records these 409s as halts instead of failures in the interaction event lifecycle. Refs SENTRY-5K7F
1 parent 44a475d commit 078eeb9

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

src/sentry/integrations/source_code_management/repo_trees.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from sentry.integrations.services.integration import RpcOrganizationIntegration
99
from sentry.issues.auto_source_code_config.utils.platform import get_supported_extensions
10-
from sentry.shared_integrations.exceptions import ApiError, IntegrationError
10+
from sentry.shared_integrations.exceptions import ApiConflictError, ApiError, IntegrationError
1111
from sentry.utils import metrics
1212
from sentry.utils.cache import cache
1313

@@ -199,7 +199,17 @@ def get_cached_repo_files(
199199
repo_files: list[str] = cache.get(key, [])
200200
if use_api:
201201
# Cache miss – fetch from API
202-
tree = self.get_client().get_tree(repo_full_name, tree_sha)
202+
try:
203+
tree = self.get_client().get_tree(repo_full_name, tree_sha)
204+
except ApiConflictError:
205+
# Empty repos return 409 — cache the empty result so we don't
206+
# keep burning API calls on repos we know have no files.
207+
logger.info(
208+
"Caching empty files result for repo",
209+
extra={"repo": repo_full_name},
210+
)
211+
cache.set(key, [], self.CACHE_SECONDS + shifted_seconds)
212+
tree = None
203213
if tree:
204214
# Keep files; discard directories
205215
repo_files = [node["path"] for node in tree if node["type"] == "blob"]

tests/sentry/integrations/github/test_integration.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -899,10 +899,12 @@ def get_installation_helper(self) -> GitHubIntegration:
899899

900900
def _expected_trees(self, repo_info_list=None):
901901
result = {}
902-
# bar and baz are defined to fail, thus, do not show up in the default case
902+
# bar (409 empty repo) returns an empty RepoTree since we cache the result
903+
# baz (404) still fails and is excluded
903904
list = repo_info_list or [
904905
("xyz", "master", ["src/xyz.py"]),
905906
("foo", "master", ["src/sentry/api/endpoints/auth_login.py"]),
907+
("bar", "main", []),
906908
]
907909
for repo, branch, files in list:
908910
result[f"{self.gh_org}/{repo}"] = RepoTree(
@@ -983,6 +985,7 @@ def test_get_trees_for_org_prevent_exhaustion_some_repos(self) -> None:
983985
("xyz", "master", ["src/xyz.py"]),
984986
# Now that the rate limit is reset we should get files for foo
985987
("foo", "master", ["src/sentry/api/endpoints/auth_login.py"]),
988+
("bar", "main", []),
986989
]
987990
)
988991

@@ -1007,12 +1010,14 @@ def test_get_trees_for_org_rate_limit_401(self) -> None:
10071010
)
10081011

10091012
# This time the rate limit will not fail, thus, it will fetch the trees
1013+
# bar (409 empty repo) now returns an empty RepoTree since we cache the empty result
10101014
self.set_rate_limit()
10111015
trees = installation.get_trees_for_org()
10121016
assert trees == self._expected_trees(
10131017
[
10141018
("xyz", "master", ["src/xyz.py"]),
10151019
("foo", "master", ["src/sentry/api/endpoints/auth_login.py"]),
1020+
("bar", "main", []),
10161021
]
10171022
)
10181023

@@ -1059,8 +1064,9 @@ def test_get_trees_for_org_makes_API_requests_before_MAX_CONNECTION_ERRORS_is_hi
10591064
# xyz is missing because its request errors
10601065
# foo has data because its API request is made in spite of xyz's error
10611066
("foo", "master", ["src/sentry/api/endpoints/auth_login.py"]),
1062-
# bar and baz are missing because their API requests throw errors for
1063-
# other reasons in the default mock responses
1067+
# bar (409 empty repo) is present with empty files since we cache the result
1068+
# baz (404) is missing because its API request throws an error
1069+
("bar", "main", []),
10641070
]
10651071
)
10661072

0 commit comments

Comments
 (0)