diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 68d0304..1bc105c 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -19,9 +19,10 @@ README_PATH = Path(__file__).parent.parent / "README.md" GRAPHQL_URL = "https://api.github.com/graphql" BATCH_SIZE = 50 -# Allowlist for valid GitHub owner/repo name characters. -# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots. -_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$") +# GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric. +_GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$") +# GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric. +_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$") def extract_github_repos(text: str) -> set[str]: @@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str: parts = [] for i, repo in enumerate(repos): owner, name = repo.split("/", 1) - if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name): + if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name): continue parts.append( f'repo_{i}: repository(owner: "{owner}", name: "{name}") ' diff --git a/website/tests/test_fetch_github_stars.py b/website/tests/test_fetch_github_stars.py index 10d6478..e26f74b 100644 --- a/website/tests/test_fetch_github_stars.py +++ b/website/tests/test_fetch_github_stars.py @@ -98,6 +98,38 @@ class TestBuildGraphqlQuery: assert "good" in query assert "bad" not in query + def test_skips_graphql_injection_in_owner(self): + query = build_graphql_query(['org"){evil}/repo']) + assert query == "" + + def test_skips_graphql_injection_in_name(self): + query = build_graphql_query(['org/repo"){evil}']) + assert query == "" + + def test_skips_owner_starting_with_hyphen(self): + query = build_graphql_query(["-bad/repo"]) + assert query == "" + + def test_skips_owner_starting_with_dot(self): + query = build_graphql_query([".bad/repo"]) + assert query == "" + + def test_skips_repo_starting_with_dot(self): + query = build_graphql_query(["org/.hidden"]) + assert query == "" + + def test_allows_repo_with_dots_and_underscores(self): + query = build_graphql_query(["org/my_repo.py"]) + assert 'name: "my_repo.py"' in query + + def test_allows_hyphenated_owner(self): + query = build_graphql_query(["my-org/repo"]) + assert 'owner: "my-org"' in query + + def test_skips_owner_with_underscore(self): + query = build_graphql_query(["bad_owner/repo"]) + assert query == "" + class TestParseGraphqlResponse: def test_parses_star_count_and_owner(self):