Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/auto-label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Auto Label
on:
pull_request:
types: [opened, reopened, synchronized]
permissions:
issues: write
pull-requests: read
jobs:
label:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/issue-triage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: Issue Triage
on:
issues:
types: [opened]
permissions:
issues: write
jobs:
triage:
runs-on: ubuntu-latest
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/pr-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
types: [opened, reopened, synchronize, edited]
permissions:
issues: write
pull-requests: write
jobs:
validate:
runs-on: ubuntu-latest
Expand All @@ -17,7 +18,7 @@ jobs:
if (pr.title.length < 10) {
issues.push('❌ PR title too short (minimum 10 characters)');
}
if (!/^(feat|fix|docs|style|refactor|test|chore|perf|ci|build|revert)(\(.+\))?:/.test(pr.title)) {
if (!/^([\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]\s*)?(feat|fix|docs|style|refactor|test|chore|perf|ci|build|revert)(\(.+\))?:|^(⚡\s*)?Performance improvement/u.test(pr.title)) {
issues.push('⚠️ PR title should follow conventional commits format');
}

Expand Down
132 changes: 76 additions & 56 deletions src/youtube_extension/backend/deployment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pathlib import Path
from typing import Any, Optional

import requests
import httpx

from youtube_extension.backend.deploy import deploy_project as _adapter_deploy

Expand Down Expand Up @@ -493,43 +493,45 @@ async def _create_github_repository(self, repo_name: str, project_config: dict[s
"Accept": "application/vnd.github.v3+json"
}

# Get user info
user_response = requests.get("https://api.github.com/user", headers=headers)
if user_response.status_code != 200:
raise Exception(f"Failed to get GitHub user info: {user_response.text}")

user_data = user_response.json()
username = user_data["login"]

# Create repository
repo_data = {
"name": repo_name,
"description": f"Generated by UVAI from YouTube tutorial - {project_config.get('title', 'Unknown')}",
"private": False,
"auto_init": True,
"has_issues": True,
"has_projects": True,
"has_wiki": False
}
# Use httpx.AsyncClient for non-blocking HTTP requests
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

httpx.AsyncClient instances created without explicit timeout configuration

Fix on Vercel

async with httpx.AsyncClient() as client:
# Get user info
user_response = await client.get("https://api.github.com/user", headers=headers)
if user_response.status_code != 200:
raise Exception(f"Failed to get GitHub user info: {user_response.text}")
Comment on lines +499 to +501
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instead of manually checking the status code and raising a generic Exception, it's more idiomatic to use httpx's raise_for_status() method. This raises a specific httpx.HTTPStatusError on 4xx or 5xx responses, which provides more context and makes error handling more robust. This pattern can also be applied to the repo_response check on lines 528-532.

Suggested change
user_response = await client.get("https://api.github.com/user", headers=headers)
if user_response.status_code != 200:
raise Exception(f"Failed to get GitHub user info: {user_response.text}")
try:
user_response = await client.get("https://api.github.com/user", headers=headers)
user_response.raise_for_status()
except httpx.HTTPStatusError as exc:
raise Exception(f"Failed to get GitHub user info: {exc.response.text}") from exc


user_data = user_response.json()
username = user_data["login"]

# Create repository
repo_data = {
"name": repo_name,
"description": f"Generated by UVAI from YouTube tutorial - {project_config.get('title', 'Unknown')}",
"private": False,
"auto_init": True,
"has_issues": True,
"has_projects": True,
"has_wiki": False
}

response = requests.post(
"https://api.github.com/user/repos",
headers=headers,
json=repo_data
)
response = await client.post(
"https://api.github.com/user/repos",
headers=headers,
json=repo_data
)

if response.status_code not in [201, 422]: # 422 if repo already exists
raise Exception(f"Failed to create GitHub repository: {response.text}")
if response.status_code not in [201, 422]: # 422 if repo already exists
raise Exception(f"Failed to create GitHub repository: {response.text}")

if response.status_code == 422:
# Repository already exists, get its info
repo_response = requests.get(f"https://api.github.com/repos/{username}/{repo_name}", headers=headers)
if repo_response.status_code == 200:
repo_info = repo_response.json()
if response.status_code == 422:
# Repository already exists, get its info
repo_response = await client.get(f"https://api.github.com/repos/{username}/{repo_name}", headers=headers)
if repo_response.status_code == 200:
repo_info = repo_response.json()
else:
raise Exception(f"Repository exists but can't access it: {repo_response.text}")
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing error handling when parsing JSON response from GitHub API calls - unhandled JSONDecodeError on invalid responses

Fix on Vercel

else:
raise Exception(f"Repository exists but can't access it: {repo_response.text}")
else:
repo_info = response.json()
repo_info = response.json()
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing validation for required fields in GitHub API response causes KeyError if fields are absent

Fix on Vercel


return {
"repo_name": repo_name,
Expand All @@ -549,30 +551,27 @@ async def _upload_to_github(self, project_path: str, repo_name: str) -> dict[str
"Accept": "application/vnd.github.v3+json"
}

# Get user info
user_response = requests.get("https://api.github.com/user", headers=headers)
user_data = user_response.json()
username = user_data["login"]
async with httpx.AsyncClient() as client:
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing timeout configuration for httpx.AsyncClient in GitHub deployment methods causes requests to timeout after 5 seconds by default.

Fix on Vercel

# Get user info
user_response = await client.get("https://api.github.com/user", headers=headers)
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In _upload_to_github, user_response is parsed as JSON without checking status_code. If the token is invalid or GitHub returns an error payload, this will raise a KeyError on login and hide the real cause. Check for a 200 response (and raise a clear exception) before accessing user_data["login"].

Suggested change
user_response = await client.get("https://api.github.com/user", headers=headers)
user_response = await client.get("https://api.github.com/user", headers=headers)
if user_response.status_code != 200:
raise Exception(
f"GitHub authentication failed: {user_response.status_code} - {user_response.text}"
)

Copilot uses AI. Check for mistakes.
user_data = user_response.json()
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing error handling when parsing JSON response from GitHub API user endpoint

Fix on Vercel

username = user_data["login"]
Comment on lines +556 to +558
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The _upload_to_github function does not check the HTTP status code after a GitHub API call, which can cause a KeyError on failure instead of a clear error message.
Severity: MEDIUM

Suggested Fix

Before parsing the JSON response in _upload_to_github, add a check for the HTTP status code. If user_response.status_code is not 200, raise an exception with a descriptive error message, similar to the implementation in _create_github_repository. For example: if user_response.status_code != 200: raise Exception(f"Failed to get GitHub user info: {user_response.text}").

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/youtube_extension/backend/deployment_manager.py#L556-L558

Potential issue: The `_upload_to_github` function calls the GitHub API to get user info
but fails to check the HTTP status code of the response before parsing it. If the API
call fails (e.g., due to an invalid token, rate limiting, or server issues), the
response will be an error object without a `"login"` key. Attempting to access
`user_data["login"]` will then raise a `KeyError`. This is inconsistent with the
`_create_github_repository` function, which correctly handles this case. While the
exception is caught upstream, the resulting error message is a misleading `KeyError`
instead of a clear indication of the API failure.

Did we get this right? 👍 / 👎 to inform future reviews.


uploaded_files = []
project_path_obj = Path(project_path)
uploaded_files = []
project_path_obj = Path(project_path)

# Directories to exclude from GitHub upload (standard .gitignore patterns)
EXCLUDED_DIRS = {'node_modules', '.next', '.git', '__pycache__', '.vercel', 'dist', '.turbo'}
# Directories to exclude from GitHub upload (standard .gitignore patterns)
EXCLUDED_DIRS = {'node_modules', '.next', '.git', '__pycache__', '.vercel', 'dist', '.turbo'}

def should_skip_path(path: Path) -> bool:
"""Check if any parent directory is in the exclusion list"""
return any(part in EXCLUDED_DIRS for part in path.parts)
def should_skip_path(path: Path) -> bool:
"""Check if any parent directory is in the exclusion list"""
return any(part in EXCLUDED_DIRS for part in path.parts)

# Upload each file
for file_path in project_path_obj.rglob("*"):
# Skip excluded directories and dotfiles
if should_skip_path(file_path.relative_to(project_path_obj)):
continue
if file_path.is_file() and not file_path.name.startswith('.'):
try:
relative_path = file_path.relative_to(project_path_obj)
# Read all files to upload concurrently to improve performance further
upload_tasks = []

async def upload_file(client, file_path, relative_path):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The repository's style guide requires strict type hinting for all functions. The inner helper function upload_file is missing type hints. Adding them will improve code clarity and maintainability.

Suggested change
async def upload_file(client, file_path, relative_path):
async def upload_file(client: httpx.AsyncClient, file_path: Path, relative_path: Path) -> Optional[str]:
References
  1. The style guide requires all functions to have strict type hinting. (link)

try:
# Read file content
with open(file_path, 'rb') as f:
content = f.read()
Comment on lines 575 to 577
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

upload_file() performs synchronous disk I/O (open(...).read()) inside an async function. With many uploads this can still block the event loop and reduce the concurrency gains from switching to httpx. Consider moving the file read/encode to a thread (e.g., asyncio.to_thread) or using an async file reader so HTTP awaits aren’t delayed by filesystem reads.

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The upload_file function reads the entire content of each file into memory using f.read(), which, especially with concurrent uploads, can lead to significant memory consumption and a Denial of Service (DoS) vulnerability if large files are processed. Additionally, this synchronous file operation blocks the asyncio event loop, undermining the performance gains from using an async HTTP client. Consider using aiofiles for non-blocking file I/O to address both the performance bottleneck and the potential memory exhaustion issue.

Suggested change
content = f.read()
import aiofiles
async with aiofiles.open(file_path, 'rb') as f:
content = await f.read()

Expand All @@ -587,15 +586,36 @@ def should_skip_path(path: Path) -> bool:
}

upload_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{relative_path}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The relative_path of files is concatenated directly into the GitHub API URL without URL encoding. If a filename contains special characters such as ?, #, or spaces, they will not be properly handled by the API, potentially leading to URL manipulation. For example, a filename containing ?branch=main could inadvertently target a different branch than intended. Additionally, on Windows systems, the path separator \ will be used, which is not compatible with the GitHub API's expected / separator.

response = requests.put(upload_url, headers=headers, json=file_data)
response = await client.put(upload_url, headers=headers, json=file_data)
Copy link
Contributor

@vercel vercel bot Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Path object used directly in GitHub API URL without converting to forward slashes, causing invalid URLs on Windows systems

Fix on Vercel


if response.status_code in [201, 200]:
uploaded_files.append(str(relative_path))
return str(relative_path)
else:
logger.warning(f"Failed to upload {relative_path}: {response.text}")

return None
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

upload_file() catches broad Exception, which will also catch asyncio.CancelledError and prevent task cancellation from propagating (e.g., during request shutdown). Re-raise CancelledError explicitly and only swallow/log non-cancellation exceptions.

Suggested change
return None
return None
except asyncio.CancelledError:
# Propagate cancellation so that calling code can handle shutdown correctly
raise

Copilot uses AI. Check for mistakes.
Comment on lines 591 to +595
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With uploads now running concurrently, hitting GitHub secondary rate limits (403) or abuse detection is more likely. Currently failures are only logged and the deployment can still report success with missing files. Consider detecting 403/429 responses and applying retry/backoff (or failing the GitHub deployment) so callers don’t get a "success" repo that’s only partially uploaded.

Copilot uses AI. Check for mistakes.
except Exception as e:
logger.warning(f"Error uploading {file_path}: {e}")
return None

# Collect tasks
for file_path in project_path_obj.rglob("*"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-critical critical

The _upload_to_github method is vulnerable to path traversal because it uses the project_path argument directly to read files from the filesystem. If project_path is influenced by user input (e.g., via the project_type field in the API), an attacker can use traversal sequences like ../../ to point it to sensitive system directories (e.g., /etc). The system will then recursively read and upload all accessible files from that directory to a GitHub repository, leading to critical data exfiltration.

# Skip excluded directories and dotfiles
if should_skip_path(file_path.relative_to(project_path_obj)):
continue
if file_path.is_file() and not file_path.name.startswith('.'):
relative_path = file_path.relative_to(project_path_obj)
upload_tasks.append(upload_file(client, file_path, relative_path))

# Run uploads concurrently with a semaphore to avoid overwhelming the GitHub API
# Secondary rate limit for GitHub is generally not strictly documented for concurrent writes but 10-20 concurrent requests is a safe maximum.
semaphore = asyncio.Semaphore(10)

Comment on lines +609 to +612
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The concurrent upload implementation builds upload_tasks for every file and then gates execution with a semaphore. For large repos this can still create thousands of coroutine objects at once and increase memory pressure. Consider a bounded producer/consumer (worker pool pulling from a queue) so you only keep ~N in-flight uploads.

Copilot uses AI. Check for mistakes.
async def run_with_semaphore(coro):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The repository's style guide requires strict type hinting for all functions. The inner helper function run_with_semaphore is missing type hints. Adding them will improve code clarity and maintainability.

Suggested change
async def run_with_semaphore(coro):
async def run_with_semaphore(coro: asyncio.Coroutine) -> Any:
References
  1. The style guide requires all functions to have strict type hinting. (link)

async with semaphore:
return await coro

results = await asyncio.gather(*(run_with_semaphore(task) for task in upload_tasks))
uploaded_files = [res for res in results if res is not None]

return {
"files_uploaded": len(uploaded_files),
Expand Down