From 1ae889b4fd4c1a3e0b35acd6e10ee2da16f294e6 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Mon, 30 Mar 2026 15:03:06 +0800 Subject: [PATCH] fix: stricter GitHub owner/repo regexes and injection tests Split _GITHUB_NAME_RE into separate owner and repo patterns. Owner regex now rejects leading/trailing hyphens and dots (matching GitHub's actual username rules). Repo regex requires alphanumeric start but allows dots and underscores anywhere after. New tests cover GraphQL injection attempts, invalid leading chars, and valid hyphenated/underscore/dot combinations. Co-Authored-By: Claude --- website/fetch_github_stars.py | 9 ++++--- website/tests/test_fetch_github_stars.py | 32 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 68d0304..1bc105c 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -19,9 +19,10 @@ README_PATH = Path(__file__).parent.parent / "README.md" GRAPHQL_URL = "https://api.github.com/graphql" BATCH_SIZE = 50 -# Allowlist for valid GitHub owner/repo name characters. -# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots. -_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$") +# GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric. +_GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$") +# GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric. +_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$") def extract_github_repos(text: str) -> set[str]: @@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str: parts = [] for i, repo in enumerate(repos): owner, name = repo.split("/", 1) - if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name): + if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name): continue parts.append( f'repo_{i}: repository(owner: "{owner}", name: "{name}") ' diff --git a/website/tests/test_fetch_github_stars.py b/website/tests/test_fetch_github_stars.py index 10d6478..e26f74b 100644 --- a/website/tests/test_fetch_github_stars.py +++ b/website/tests/test_fetch_github_stars.py @@ -98,6 +98,38 @@ class TestBuildGraphqlQuery: assert "good" in query assert "bad" not in query + def test_skips_graphql_injection_in_owner(self): + query = build_graphql_query(['org"){evil}/repo']) + assert query == "" + + def test_skips_graphql_injection_in_name(self): + query = build_graphql_query(['org/repo"){evil}']) + assert query == "" + + def test_skips_owner_starting_with_hyphen(self): + query = build_graphql_query(["-bad/repo"]) + assert query == "" + + def test_skips_owner_starting_with_dot(self): + query = build_graphql_query([".bad/repo"]) + assert query == "" + + def test_skips_repo_starting_with_dot(self): + query = build_graphql_query(["org/.hidden"]) + assert query == "" + + def test_allows_repo_with_dots_and_underscores(self): + query = build_graphql_query(["org/my_repo.py"]) + assert 'name: "my_repo.py"' in query + + def test_allows_hyphenated_owner(self): + query = build_graphql_query(["my-org/repo"]) + assert 'owner: "my-org"' in query + + def test_skips_owner_with_underscore(self): + query = build_graphql_query(["bad_owner/repo"]) + assert query == "" + class TestParseGraphqlResponse: def test_parses_star_count_and_owner(self):