diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 39566e41aa0575..15851e74468c1a 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -288,6 +288,9 @@ from sentry.integrations.api.endpoints.organization_repository_details import ( OrganizationRepositoryDetailsEndpoint, ) +from sentry.integrations.api.endpoints.organization_repository_platforms import ( + OrganizationRepositoryPlatformsEndpoint, +) from sentry.integrations.api.endpoints.organization_repository_settings import ( OrganizationRepositorySettingsEndpoint, ) @@ -2218,6 +2221,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationRepositoryCommitsEndpoint.as_view(), name="sentry-api-0-organization-repository-commits", ), + re_path( + r"^(?P[^/]+)/repos/(?P[^/]+)/platforms/$", + OrganizationRepositoryPlatformsEndpoint.as_view(), + name="sentry-api-0-organization-repository-platforms", + ), re_path( r"^(?P[^/]+)/plugins/$", OrganizationPluginsEndpoint.as_view(), diff --git a/src/sentry/integrations/api/bases/organization_repository.py b/src/sentry/integrations/api/bases/organization_repository.py new file mode 100644 index 00000000000000..09c6ce3fb5dacb --- /dev/null +++ b/src/sentry/integrations/api/bases/organization_repository.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Any + +from rest_framework.request import Request + +from sentry.api.bases.organization import OrganizationEndpoint, OrganizationIntegrationsPermission +from sentry.api.exceptions import ResourceDoesNotExist +from sentry.models.organization import Organization +from sentry.models.repository import Repository + + +class OrganizationRepositoryEndpoint(OrganizationEndpoint): + """Base endpoint that resolves repo_id to a Repository in convert_args.""" + + permission_classes = (OrganizationIntegrationsPermission,) + + def convert_args( + self, + request: Request, + organization_id_or_slug: int | str | None = None, + *args: Any, + **kwargs: Any, + ) -> tuple[tuple[Any, ...], dict[str, Any]]: + args, kwargs = super().convert_args(request, organization_id_or_slug, *args, **kwargs) + organization: Organization = kwargs["organization"] + repo_id = kwargs.pop("repo_id") + try: + kwargs["repo"] = Repository.objects.get(id=repo_id, organization_id=organization.id) + except (Repository.DoesNotExist, ValueError): + raise ResourceDoesNotExist + + return args, kwargs diff --git a/src/sentry/integrations/api/endpoints/organization_repository_platforms.py b/src/sentry/integrations/api/endpoints/organization_repository_platforms.py new file mode 100644 index 00000000000000..cf47f22f4cce48 --- /dev/null +++ b/src/sentry/integrations/api/endpoints/organization_repository_platforms.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import logging + +from rest_framework.request import Request +from rest_framework.response import Response + +from sentry import features +from sentry.api.api_owners import ApiOwner +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import region_silo_endpoint +from sentry.integrations.api.bases.organization_repository import OrganizationRepositoryEndpoint +from sentry.integrations.github.client import GitHubApiClient +from sentry.integrations.github.platform_detection import detect_platforms +from sentry.integrations.services.integration import integration_service +from sentry.integrations.types import IntegrationProviderSlug +from sentry.models.organization import Organization +from sentry.models.repository import Repository +from sentry.shared_integrations.exceptions import ApiError + +logger = logging.getLogger(__name__) + + +@region_silo_endpoint +class OrganizationRepositoryPlatformsEndpoint(OrganizationRepositoryEndpoint): + owner = ApiOwner.INTEGRATIONS + publish_status = { + "GET": ApiPublishStatus.PRIVATE, + } + + def get(self, request: Request, organization: Organization, repo: Repository) -> Response: + if not features.has( + "organizations:integrations-github-platform-detection", + organization, + actor=request.user, + ): + return Response(status=404) + + if ( + not repo.integration_id + or repo.provider != f"integrations:{IntegrationProviderSlug.GITHUB}" + ): + return Response( + {"detail": "Platform detection is only supported for GitHub repositories."}, + status=400, + ) + + integration = integration_service.get_integration(integration_id=repo.integration_id) + if integration is None: + return Response({"detail": "GitHub integration not found."}, status=400) + + org_integration = integration_service.get_organization_integration( + integration_id=repo.integration_id, organization_id=organization.id + ) + if org_integration is None: + return Response( + {"detail": "GitHub integration is not configured for this organization."}, + status=400, + ) + + client = GitHubApiClient(integration=integration, org_integration_id=org_integration.id) + + try: + platforms = detect_platforms(client=client, repo=repo.name) + except (ApiError, ValueError): + logger.exception( + "integrations.github.platform_detection_failed", + extra={"repo_id": repo.id, "repo_name": repo.name}, + ) + return Response({"detail": "Failed to detect platforms from GitHub."}, status=502) + + return Response({"platforms": platforms}) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index c2a2007310e77f..3732153ad3554b 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -504,6 +504,16 @@ def get_repo(self, repo: str) -> Any: """ return self.get(f"/repos/{repo}") + def get_languages(self, repo: str) -> dict[str, int]: + """ + https://docs.github.com/en/rest/repos/repos#list-repository-languages + + :param repo: "owner/repo" format + :returns: {"Python": 50000, "JavaScript": 30000, ...} + Keys are GitHub Linguist names, values are bytes of code. + """ + return self.get(f"/repos/{repo}/languages") + # https://docs.github.com/en/rest/rate-limit?apiVersion=2022-11-28 def get_rate_limit(self, specific_resource: str = "core") -> GithubRateLimitInfo: """This gives information of the current rate limit""" diff --git a/src/sentry/integrations/github/platform_detection.py b/src/sentry/integrations/github/platform_detection.py new file mode 100644 index 00000000000000..32ee1a961564e8 --- /dev/null +++ b/src/sentry/integrations/github/platform_detection.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +import logging +from base64 import b64decode +from typing import TYPE_CHECKING, TypedDict + +from sentry.shared_integrations.exceptions import ApiError +from sentry.utils import json + +if TYPE_CHECKING: + from sentry.integrations.github.client import GitHubBaseClient + +logger = logging.getLogger(__name__) + +# GitHub Linguist name → Sentry base platform ID +GITHUB_LANGUAGE_TO_SENTRY_PLATFORM: dict[str, str] = { + "Python": "python", + "JavaScript": "javascript", + "TypeScript": "javascript", + "Java": "java", + "Kotlin": "kotlin", + "Swift": "swift", + "Objective-C": "apple-ios", + "Objective-C++": "apple-ios", + "Go": "go", + "Ruby": "ruby", + "PHP": "php", + "Rust": "rust", + "C#": "dotnet", + "Dart": "dart", + "Elixir": "elixir", + "C": "native", + "C++": "native", + "Perl": "perl", +} + +# Languages with no Sentry SDK — filtered out of detection results +IGNORED_LANGUAGES = frozenset( + { + "Shell", + "Makefile", + "Dockerfile", + "HTML", + "CSS", + "SCSS", + "Less", + "Vim Script", + "Emacs Lisp", + "Nix", + "Starlark", + "HCL", + "Jsonnet", + "Batchfile", + "PowerShell", + "CMake", + "M4", + "Roff", + "TeX", + "XSLT", + "PLpgSQL", + "PLSQL", + "TSQL", + } +) + + +class DetectedPlatform(TypedDict): + platform: str # Sentry platform ID, e.g. "python-django" + language: str # GitHub Linguist name, e.g. "Python" + bytes: int # Bytes of code in that language + confidence: str # "high" (framework detected) or "medium" (language only) + + +# Maps base_platform -> list of (manifest_file, {dependency_name: sentry_platform_id}) +FRAMEWORK_DETECTORS: dict[str, list[tuple[str, dict[str, str]]]] = { + "javascript": [ + ( + "package.json", + { + "next": "javascript-nextjs", + "react": "javascript-react", + "vue": "javascript-vue", + "@angular/core": "javascript-angular", + "svelte": "javascript-svelte", + "remix": "javascript-remix", + "nuxt": "javascript-nuxt", + "express": "node-express", + "hono": "node-hono", + "koa": "node-koa", + }, + ), + ], + "python": [ + ( + "requirements.txt", + { + "django": "python-django", + "flask": "python-flask", + "fastapi": "python-fastapi", + "starlette": "python-starlette", + "celery": "python-celery", + "tornado": "python-tornado", + }, + ), + ( + "pyproject.toml", + { + "django": "python-django", + "flask": "python-flask", + "fastapi": "python-fastapi", + "starlette": "python-starlette", + "celery": "python-celery", + "tornado": "python-tornado", + }, + ), + ( + "Pipfile", + { + "django": "python-django", + "flask": "python-flask", + "fastapi": "python-fastapi", + "starlette": "python-starlette", + "celery": "python-celery", + "tornado": "python-tornado", + }, + ), + ], + "ruby": [ + ( + "Gemfile", + { + "rails": "ruby-rails", + }, + ), + ], + "php": [ + ( + "composer.json", + { + "laravel/framework": "php-laravel", + "symfony/": "php-symfony", + }, + ), + ], + "java": [ + ( + "build.gradle", + { + "spring-boot": "java-spring-boot", + "spring-framework": "java-spring", + }, + ), + ( + "pom.xml", + { + "spring-boot": "java-spring-boot", + "spring-framework": "java-spring", + }, + ), + ], + "go": [ + ( + "go.mod", + { + "echo": "go-echo", + "gin": "go-gin", + "fiber": "go-fiber", + }, + ), + ], +} + + +def _get_repo_file_content( + client: GitHubBaseClient, repo: str, path: str, ref: str | None = None +) -> str | None: + """Fetch a file's content from a GitHub repo. Returns None if not found.""" + try: + params: dict[str, str] = {} + if ref: + params["ref"] = ref + response = client.get( + f"/repos/{repo}/contents/{path}", + params=params, + ) + return b64decode(response["content"]).decode("utf-8") + except (ApiError, KeyError, TypeError, UnicodeDecodeError, ValueError): + return None + + +def _detect_frameworks_from_content( + content: str, + manifest_file: str, + dependency_map: dict[str, str], +) -> list[str]: + """Check manifest file content for known framework dependencies.""" + detected: list[str] = [] + + if manifest_file == "package.json": + try: + pkg = json.loads(content) + all_deps: dict[str, str] = {} + all_deps.update(pkg.get("dependencies", {})) + all_deps.update(pkg.get("devDependencies", {})) + for dep_name, platform_id in dependency_map.items(): + if dep_name in all_deps: + detected.append(platform_id) + except (json.JSONDecodeError, TypeError): + pass + + elif manifest_file == "composer.json": + try: + composer = json.loads(content) + all_deps = {} + all_deps.update(composer.get("require", {})) + all_deps.update(composer.get("require-dev", {})) + for dep_name, platform_id in dependency_map.items(): + for pkg_name in all_deps: + if pkg_name == dep_name or pkg_name.startswith(dep_name): + detected.append(platform_id) + break + except (json.JSONDecodeError, TypeError): + pass + + else: + # Text-based manifest files: requirements.txt, Gemfile, + # pyproject.toml, build.gradle, pom.xml, go.mod + content_lower = content.lower() + for dep_name, platform_id in dependency_map.items(): + if dep_name.lower() in content_lower: + detected.append(platform_id) + + return detected + + +def detect_framework( + client: GitHubBaseClient, + repo: str, + base_platform: str, + ref: str | None = None, +) -> list[str]: + """ + Refine a base platform (e.g. "python") into specific framework + platforms (e.g. "python-django") by reading manifest files. + + Returns detected framework platform IDs, or an empty list if none found. + """ + detectors = FRAMEWORK_DETECTORS.get(base_platform, []) + detected: list[str] = [] + + for manifest_file, dependency_map in detectors: + content = _get_repo_file_content(client, repo, manifest_file, ref) + if content is None: + continue + frameworks = _detect_frameworks_from_content(content, manifest_file, dependency_map) + detected.extend(frameworks) + if detected: + break + + # Deduplicate while preserving order + seen: set[str] = set() + unique: list[str] = [] + for platform_id in detected: + if platform_id not in seen: + seen.add(platform_id) + unique.append(platform_id) + + return unique + + +def detect_platforms( + client: GitHubBaseClient, + repo: str, + ref: str | None = None, +) -> list[DetectedPlatform]: + """ + Detect Sentry platforms for a GitHub repository. + + Calls the GitHub Languages API, maps languages to Sentry platform IDs, + and attempts framework refinement via manifest file inspection. + + Returns platforms ordered by relevance (bytes of code, descending). + """ + languages = client.get_languages(repo) + + results: list[DetectedPlatform] = [] + seen_platforms: set[str] = set() + + for language, byte_count in languages.items(): + if language in IGNORED_LANGUAGES: + continue + + base_platform = GITHUB_LANGUAGE_TO_SENTRY_PLATFORM.get(language) + if base_platform is None: + continue + + frameworks = detect_framework(client, repo, base_platform, ref) + + for framework_id in frameworks: + if framework_id not in seen_platforms: + seen_platforms.add(framework_id) + results.append( + DetectedPlatform( + platform=framework_id, + language=language, + bytes=byte_count, + confidence="high", + ) + ) + + if base_platform not in seen_platforms: + seen_platforms.add(base_platform) + results.append( + DetectedPlatform( + platform=base_platform, + language=language, + bytes=byte_count, + confidence="medium", + ) + ) + + return results diff --git a/static/app/utils/api/knownSentryApiUrls.generated.ts b/static/app/utils/api/knownSentryApiUrls.generated.ts index 7ebfbf9e1e657b..d27330c997f12d 100644 --- a/static/app/utils/api/knownSentryApiUrls.generated.ts +++ b/static/app/utils/api/knownSentryApiUrls.generated.ts @@ -528,6 +528,7 @@ export type KnownSentryApiUrls = | '/organizations/$organizationIdOrSlug/repos/' | '/organizations/$organizationIdOrSlug/repos/$repoId/' | '/organizations/$organizationIdOrSlug/repos/$repoId/commits/' + | '/organizations/$organizationIdOrSlug/repos/$repoId/platforms/' | '/organizations/$organizationIdOrSlug/repos/settings/' | '/organizations/$organizationIdOrSlug/request-project-creation/' | '/organizations/$organizationIdOrSlug/sampling/admin-metrics/' diff --git a/tests/sentry/integrations/api/endpoints/test_organization_repository_platforms.py b/tests/sentry/integrations/api/endpoints/test_organization_repository_platforms.py new file mode 100644 index 00000000000000..d05241e4be67c3 --- /dev/null +++ b/tests/sentry/integrations/api/endpoints/test_organization_repository_platforms.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +from base64 import b64encode +from datetime import timedelta +from unittest import mock + +import responses +from django.utils import timezone + +from sentry.models.repository import Repository +from sentry.testutils.cases import APITestCase + +FEATURE_FLAG = "organizations:integrations-github-platform-detection" + + +class OrganizationRepositoryPlatformsGetTest(APITestCase): + endpoint = "sentry-api-0-organization-repository-platforms" + + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + + ten_days = timezone.now() + timedelta(days=10) + self.integration = self.create_integration( + organization=self.organization, + provider="github", + name="Github Test Org", + external_id="1", + metadata={ + "access_token": "12345token", + "expires_at": ten_days.strftime("%Y-%m-%dT%H:%M:%S"), + }, + ) + self.repo = Repository.objects.create( + organization_id=self.organization.id, + name="Test-Organization/foo", + url="https://github.com/Test-Organization/foo", + provider="integrations:github", + external_id="123", + integration_id=self.integration.id, + ) + + def test_feature_flag_required(self) -> None: + response = self.get_response(self.organization.slug, self.repo.id) + assert response.status_code == 404 + + @mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") + @responses.activate + def test_detects_platforms(self, get_jwt: mock.MagicMock) -> None: + responses.add( + method=responses.GET, + url="https://api.github.com/repos/Test-Organization/foo/languages", + json={"Python": 50000, "JavaScript": 30000}, + status=200, + ) + # 404 for all manifest file lookups (no framework detection) + for manifest in ("requirements.txt", "pyproject.toml", "Pipfile", "package.json"): + responses.add( + method=responses.GET, + url=f"https://api.github.com/repos/Test-Organization/foo/contents/{manifest}", + json={"message": "Not Found"}, + status=404, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_success_response( + self.organization.slug, self.repo.id, status_code=200 + ) + + assert response.data == { + "platforms": [ + { + "platform": "python", + "language": "Python", + "bytes": 50000, + "confidence": "medium", + }, + { + "platform": "javascript", + "language": "JavaScript", + "bytes": 30000, + "confidence": "medium", + }, + ] + } + + @mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") + @responses.activate + def test_detects_framework(self, get_jwt: mock.MagicMock) -> None: + responses.add( + method=responses.GET, + url="https://api.github.com/repos/Test-Organization/foo/languages", + json={"Python": 50000}, + status=200, + ) + # Root directory listing with requirements.txt so framework detection can find it + responses.add( + method=responses.GET, + url="https://api.github.com/repos/Test-Organization/foo/contents", + json=[{"name": "requirements.txt", "type": "file"}], + status=200, + ) + + requirements_content = b64encode(b"Django==4.2\ncelery>=5.0\n").decode() + responses.add( + method=responses.GET, + url="https://api.github.com/repos/Test-Organization/foo/contents/requirements.txt", + json={"content": requirements_content}, + status=200, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_success_response( + self.organization.slug, self.repo.id, status_code=200 + ) + + assert response.data == { + "platforms": [ + { + "platform": "python-django", + "language": "Python", + "bytes": 50000, + "confidence": "high", + }, + { + "platform": "python-celery", + "language": "Python", + "bytes": 50000, + "confidence": "high", + }, + { + "platform": "python", + "language": "Python", + "bytes": 50000, + "confidence": "medium", + }, + ] + } + + def test_repo_not_found(self) -> None: + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, 99999) + assert response.status_code == 404 + + def test_non_github_repo(self) -> None: + repo = Repository.objects.create( + organization_id=self.organization.id, + name="non-github-repo", + provider="integrations:bitbucket", + external_id="456", + ) + + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, repo.id) + assert response.status_code == 400 + assert "only supported for GitHub" in response.data["detail"] + + def test_github_enterprise_repo_rejected(self) -> None: + repo = Repository.objects.create( + organization_id=self.organization.id, + name="enterprise-repo", + provider="integrations:github_enterprise", + external_id="999", + integration_id=self.integration.id, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, repo.id) + assert response.status_code == 400 + assert "only supported for GitHub" in response.data["detail"] + + def test_repo_without_integration(self) -> None: + repo = Repository.objects.create( + organization_id=self.organization.id, + name="orphan-repo", + provider="integrations:github", + external_id="789", + integration_id=None, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, repo.id) + assert response.status_code == 400 + + def test_other_orgs_repo_not_accessible(self) -> None: + other_org = self.create_organization(name="other-org") + other_repo = Repository.objects.create( + organization_id=other_org.id, + name="Test-Organization/secret", + provider="integrations:github", + external_id="secret", + integration_id=self.integration.id, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, other_repo.id) + assert response.status_code == 404 + + @mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") + @responses.activate + def test_github_api_error_returns_502(self, get_jwt: mock.MagicMock) -> None: + responses.add( + method=responses.GET, + url="https://api.github.com/repos/Test-Organization/foo/languages", + json={"message": "Server Error"}, + status=500, + ) + + with self.feature(FEATURE_FLAG): + response = self.get_response(self.organization.slug, self.repo.id) + assert response.status_code == 502 + assert "Failed to detect" in response.data["detail"] diff --git a/tests/sentry/integrations/github/test_platform_detection.py b/tests/sentry/integrations/github/test_platform_detection.py new file mode 100644 index 00000000000000..c796e53283cc06 --- /dev/null +++ b/tests/sentry/integrations/github/test_platform_detection.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +from base64 import b64encode +from unittest import mock + +import pytest + +from sentry.integrations.github.platform_detection import ( + GITHUB_LANGUAGE_TO_SENTRY_PLATFORM, + DetectedPlatform, + _detect_frameworks_from_content, + _get_repo_file_content, + detect_framework, + detect_platforms, +) +from sentry.shared_integrations.exceptions import ApiError +from sentry.utils import json + + +class TestGithubLanguageMapping: + def test_python_maps_to_python(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM["Python"] == "python" + + def test_typescript_maps_to_javascript(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM["TypeScript"] == "javascript" + + def test_javascript_maps_to_javascript(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM["JavaScript"] == "javascript" + + def test_csharp_maps_to_dotnet(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM["C#"] == "dotnet" + + def test_objectivec_maps_to_apple_ios(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM["Objective-C"] == "apple-ios" + + def test_unmapped_language_returns_none(self) -> None: + assert GITHUB_LANGUAGE_TO_SENTRY_PLATFORM.get("Haskell") is None + + +class TestDetectFrameworksFromContent: + def test_package_json_detects_next(self) -> None: + content = json.dumps({"dependencies": {"next": "^14.0.0", "react": "^18.0.0"}}) + result = _detect_frameworks_from_content( + content, "package.json", {"next": "javascript-nextjs", "react": "javascript-react"} + ) + assert "javascript-nextjs" in result + assert "javascript-react" in result + + def test_package_json_checks_dev_dependencies(self) -> None: + content = json.dumps({"devDependencies": {"svelte": "^4.0.0"}}) + result = _detect_frameworks_from_content( + content, "package.json", {"svelte": "javascript-svelte"} + ) + assert result == ["javascript-svelte"] + + def test_package_json_no_match(self) -> None: + content = json.dumps({"dependencies": {"lodash": "^4.0.0"}}) + result = _detect_frameworks_from_content( + content, "package.json", {"next": "javascript-nextjs"} + ) + assert result == [] + + def test_package_json_invalid_json(self) -> None: + result = _detect_frameworks_from_content( + "not valid json", "package.json", {"next": "javascript-nextjs"} + ) + assert result == [] + + def test_requirements_txt_detects_django(self) -> None: + content = "Django==4.2\ncelery>=5.0\nredis\n" + result = _detect_frameworks_from_content( + content, + "requirements.txt", + {"django": "python-django", "celery": "python-celery"}, + ) + assert "python-django" in result + assert "python-celery" in result + + def test_requirements_txt_case_insensitive(self) -> None: + content = "Flask==3.0\n" + result = _detect_frameworks_from_content( + content, "requirements.txt", {"flask": "python-flask"} + ) + assert result == ["python-flask"] + + def test_gemfile_detects_rails(self) -> None: + content = 'gem "rails", "~> 7.0"\ngem "pg"\n' + result = _detect_frameworks_from_content(content, "Gemfile", {"rails": "ruby-rails"}) + assert result == ["ruby-rails"] + + def test_composer_json_detects_laravel(self) -> None: + content = json.dumps({"require": {"laravel/framework": "^10.0"}}) + result = _detect_frameworks_from_content( + content, "composer.json", {"laravel/framework": "php-laravel"} + ) + assert result == ["php-laravel"] + + def test_composer_json_prefix_match_symfony(self) -> None: + content = json.dumps({"require": {"symfony/framework-bundle": "^6.0"}}) + result = _detect_frameworks_from_content( + content, "composer.json", {"symfony/": "php-symfony"} + ) + assert result == ["php-symfony"] + + def test_go_mod_detects_gin(self) -> None: + content = "module example.com/myapp\n\nrequire github.com/gin-gonic/gin v1.9.1\n" + result = _detect_frameworks_from_content(content, "go.mod", {"gin": "go-gin"}) + assert result == ["go-gin"] + + def test_build_gradle_detects_spring_boot(self) -> None: + content = ( + "dependencies {\n implementation 'org.springframework.boot:spring-boot-starter'\n}\n" + ) + result = _detect_frameworks_from_content( + content, "build.gradle", {"spring-boot": "java-spring-boot"} + ) + assert result == ["java-spring-boot"] + + +def _make_b64_response(content: str) -> dict: + """Helper to create a GitHub contents API response with base64-encoded content.""" + return {"content": b64encode(content.encode()).decode()} + + +class TestGetRepoFileContent: + def test_returns_decoded_content(self) -> None: + client = mock.MagicMock() + client.get.return_value = _make_b64_response("hello world") + + result = _get_repo_file_content(client, "owner/repo", "README.md") + + assert result == "hello world" + + def test_returns_none_on_api_error(self) -> None: + client = mock.MagicMock() + client.get.side_effect = ApiError("Not Found", code=404) + + assert _get_repo_file_content(client, "owner/repo", "missing.txt") is None + + def test_returns_none_on_missing_content_key(self) -> None: + client = mock.MagicMock() + client.get.return_value = {"name": "file.txt"} + + assert _get_repo_file_content(client, "owner/repo", "file.txt") is None + + def test_returns_none_on_invalid_base64(self) -> None: + client = mock.MagicMock() + client.get.return_value = {"content": "not-valid-base64!!!"} + + assert _get_repo_file_content(client, "owner/repo", "file.txt") is None + + def test_returns_none_on_binary_content(self) -> None: + client = mock.MagicMock() + # Valid base64 but decodes to invalid UTF-8 + client.get.return_value = {"content": b64encode(b"\x80\x81\x82").decode()} + + assert _get_repo_file_content(client, "owner/repo", "binary.bin") is None + + def test_returns_none_on_directory_listing(self) -> None: + client = mock.MagicMock() + # GitHub returns a list (not a dict) when path is a directory + client.get.return_value = [{"name": "file.txt", "type": "file"}] + + assert _get_repo_file_content(client, "owner/repo", "some-dir") is None + + +class TestDetectFramework: + def test_detects_python_django(self) -> None: + client = mock.MagicMock() + client.get.return_value = _make_b64_response("Django==4.2\ncelery>=5.0\n") + + result = detect_framework(client, "owner/repo", "python") + + assert "python-django" in result + assert "python-celery" in result + + def test_falls_back_when_manifest_not_found(self) -> None: + client = mock.MagicMock() + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_framework(client, "owner/repo", "python") + + assert result == [] + + def test_stops_after_first_manifest_with_results(self) -> None: + client = mock.MagicMock() + client.get.return_value = _make_b64_response("Django==4.2\n") + + result = detect_framework(client, "owner/repo", "python") + + assert result == ["python-django"] + # Should have only called get() once (for requirements.txt), + # not continued to pyproject.toml + assert client.get.call_count == 1 + + def test_tries_next_manifest_when_first_has_no_match(self) -> None: + client = mock.MagicMock() + + def side_effect(path, params=None): + if "requirements.txt" in path: + return _make_b64_response("some-unrelated-package\n") + if "pyproject.toml" in path: + return _make_b64_response('[project]\ndependencies = ["flask"]\n') + raise ApiError("Not Found", code=404) + + client.get.side_effect = side_effect + + result = detect_framework(client, "owner/repo", "python") + + assert result == ["python-flask"] + + def test_unknown_platform_returns_empty(self) -> None: + client = mock.MagicMock() + result = detect_framework(client, "owner/repo", "unknown-platform") + assert result == [] + client.get.assert_not_called() + + def test_deduplicates_results(self) -> None: + client = mock.MagicMock() + # package.json with both react in deps and devDeps + content = json.dumps( + {"dependencies": {"react": "^18.0.0"}, "devDependencies": {"react": "^18.0.0"}} + ) + client.get.return_value = _make_b64_response(content) + + result = detect_framework(client, "owner/repo", "javascript") + + assert result.count("javascript-react") == 1 + + +class TestDetectPlatforms: + def test_detects_single_language_repo(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {"Python": 50000} + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + assert len(result) == 1 + assert result[0] == DetectedPlatform( + platform="python", + language="Python", + bytes=50000, + confidence="medium", + ) + + def test_detects_multi_language_repo(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {"Python": 50000, "JavaScript": 30000} + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + platforms = [r["platform"] for r in result] + assert "python" in platforms + assert "javascript" in platforms + + def test_filters_ignored_languages(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = { + "Python": 50000, + "Shell": 5000, + "Makefile": 1000, + "Dockerfile": 500, + } + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + languages = [r["language"] for r in result] + assert "Python" in languages + assert "Shell" not in languages + assert "Makefile" not in languages + assert "Dockerfile" not in languages + + def test_framework_detection_gives_high_confidence(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {"Python": 50000} + + def get_side_effect(path, params=None): + if "requirements.txt" in path: + return _make_b64_response("Django==4.2\n") + raise ApiError("Not Found", code=404) + + client.get.side_effect = get_side_effect + + result = detect_platforms(client, "owner/repo") + + django_result = next(r for r in result if r["platform"] == "python-django") + assert django_result["confidence"] == "high" + + python_result = next(r for r in result if r["platform"] == "python") + assert python_result["confidence"] == "medium" + + def test_preserves_github_ordering(self) -> None: + client = mock.MagicMock() + # GitHub returns languages ordered by bytes descending + client.get_languages.return_value = {"Python": 50000, "Go": 30000, "Ruby": 10000} + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + assert result[0]["language"] == "Python" + assert result[1]["language"] == "Go" + assert result[2]["language"] == "Ruby" + + def test_typescript_and_javascript_deduplicated(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {"TypeScript": 40000, "JavaScript": 10000} + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + platforms = [r["platform"] for r in result] + assert platforms.count("javascript") == 1 + + def test_empty_repo_returns_empty(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {} + + result = detect_platforms(client, "owner/repo") + + assert result == [] + + def test_only_ignored_languages_returns_empty(self) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {"Shell": 5000, "Makefile": 1000} + + result = detect_platforms(client, "owner/repo") + + assert result == [] + + @pytest.mark.parametrize( + ("language", "expected_platform"), + [ + ("Python", "python"), + ("JavaScript", "javascript"), + ("TypeScript", "javascript"), + ("Java", "java"), + ("Kotlin", "kotlin"), + ("Swift", "swift"), + ("Go", "go"), + ("Ruby", "ruby"), + ("PHP", "php"), + ("Rust", "rust"), + ("C#", "dotnet"), + ("Dart", "dart"), + ("Elixir", "elixir"), + ], + ) + def test_all_mapped_languages_detected(self, language: str, expected_platform: str) -> None: + client = mock.MagicMock() + client.get_languages.return_value = {language: 10000} + client.get.side_effect = ApiError("Not Found", code=404) + + result = detect_platforms(client, "owner/repo") + + assert len(result) >= 1 + assert result[0]["platform"] == expected_platform