fix: stricter GitHub owner/repo regexes and injection tests

Split _GITHUB_NAME_RE into separate owner and repo patterns.
Owner regex now rejects leading/trailing hyphens and dots (matching
GitHub's actual username rules). Repo regex requires alphanumeric
start but allows dots and underscores anywhere after.

New tests cover GraphQL injection attempts, invalid leading chars,
and valid hyphenated/underscore/dot combinations.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen 2026-03-30 15:03:06 +08:00
parent 87c5f3bde9
commit 1ae889b4fd
No known key found for this signature in database
GPG Key ID: B93DE4F003C33630
2 changed files with 37 additions and 4 deletions

View File

@ -19,9 +19,10 @@ README_PATH = Path(__file__).parent.parent / "README.md"
GRAPHQL_URL = "https://api.github.com/graphql" GRAPHQL_URL = "https://api.github.com/graphql"
BATCH_SIZE = 50 BATCH_SIZE = 50
# Allowlist for valid GitHub owner/repo name characters. # GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric.
# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots. _GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$")
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$") # GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric.
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
def extract_github_repos(text: str) -> set[str]: def extract_github_repos(text: str) -> set[str]:
@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str:
parts = [] parts = []
for i, repo in enumerate(repos): for i, repo in enumerate(repos):
owner, name = repo.split("/", 1) owner, name = repo.split("/", 1)
if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name): if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
continue continue
parts.append( parts.append(
f'repo_{i}: repository(owner: "{owner}", name: "{name}") ' f'repo_{i}: repository(owner: "{owner}", name: "{name}") '

View File

@ -98,6 +98,38 @@ class TestBuildGraphqlQuery:
assert "good" in query assert "good" in query
assert "bad" not in query assert "bad" not in query
def test_skips_graphql_injection_in_owner(self):
query = build_graphql_query(['org"){evil}/repo'])
assert query == ""
def test_skips_graphql_injection_in_name(self):
query = build_graphql_query(['org/repo"){evil}'])
assert query == ""
def test_skips_owner_starting_with_hyphen(self):
query = build_graphql_query(["-bad/repo"])
assert query == ""
def test_skips_owner_starting_with_dot(self):
query = build_graphql_query([".bad/repo"])
assert query == ""
def test_skips_repo_starting_with_dot(self):
query = build_graphql_query(["org/.hidden"])
assert query == ""
def test_allows_repo_with_dots_and_underscores(self):
query = build_graphql_query(["org/my_repo.py"])
assert 'name: "my_repo.py"' in query
def test_allows_hyphenated_owner(self):
query = build_graphql_query(["my-org/repo"])
assert 'owner: "my-org"' in query
def test_skips_owner_with_underscore(self):
query = build_graphql_query(["bad_owner/repo"])
assert query == ""
class TestParseGraphqlResponse: class TestParseGraphqlResponse:
def test_parses_star_count_and_owner(self): def test_parses_star_count_and_owner(self):