-
Notifications
You must be signed in to change notification settings - Fork 39
fix: address Gemini code review feedback on profile README generation #3963
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -650,28 +650,69 @@ _lang_badge() { | |||||
| return 0 | ||||||
| } | ||||||
|
|
||||||
| # --- Sanitize a string for safe use in markdown --- | ||||||
| # Strips characters that could break markdown link/image syntax | ||||||
| _sanitize_md() { | ||||||
| local input="$1" | ||||||
| # Remove markdown-breaking characters: [ ] ( ) and backticks | ||||||
| local sanitized | ||||||
| sanitized="${input//[\[\]()]/}" | ||||||
| sanitized="${sanitized//\`/}" | ||||||
| echo "$sanitized" | ||||||
| return 0 | ||||||
| } | ||||||
|
|
||||||
| # --- Validate a URL for safe embedding in markdown --- | ||||||
| # Rejects javascript: URIs, non-http(s) schemes, and markdown-breaking chars | ||||||
| _sanitize_url() { | ||||||
| local url="$1" | ||||||
| # Must start with http:// or https:// | ||||||
| if [[ "$url" != http://* && "$url" != https://* ]]; then | ||||||
| echo "" | ||||||
| return 0 | ||||||
| fi | ||||||
| # Reject URLs containing markdown-breaking characters or whitespace | ||||||
| if [[ "$url" == *'('* || "$url" == *')'* || "$url" == *'['* || "$url" == *']'* || "$url" == *' '* ]]; then | ||||||
| echo "" | ||||||
| return 0 | ||||||
| fi | ||||||
| echo "$url" | ||||||
| return 0 | ||||||
| } | ||||||
|
|
||||||
| # --- Generate rich profile README from GitHub data --- | ||||||
| _generate_rich_readme() { | ||||||
| local gh_user="$1" | ||||||
| local readme_path="$2" | ||||||
|
|
||||||
| # Fetch user profile | ||||||
| # Fetch user profile — single jq pass for all fields | ||||||
| local user_json | ||||||
| user_json=$(gh api "users/${gh_user}" 2>/dev/null) || user_json="{}" | ||||||
| user_json=$(gh api "users/${gh_user}") || user_json="{}" | ||||||
| local display_name bio blog twitter | ||||||
| display_name=$(echo "$user_json" | jq -r '.name // empty' 2>/dev/null) | ||||||
| IFS=$'\t' read -r display_name bio blog twitter < <( | ||||||
| echo "$user_json" | jq -r '[ | ||||||
| ((.name // "") | gsub("[\\t\\n]"; " ")), | ||||||
| ((.bio // "") | gsub("[\\t\\n]"; " ")), | ||||||
| (if .blog != null and .blog != "" then (.blog | gsub("[\\t\\n]"; "")) else "" end), | ||||||
| (if .twitter_username != null and .twitter_username != "" then (.twitter_username | gsub("[\\t\\n]"; "")) else "" end) | ||||||
| ] | join("\t")' || printf '\t\t\t\n' | ||||||
| ) | ||||||
| display_name="${display_name:-$gh_user}" | ||||||
| bio=$(echo "$user_json" | jq -r '.bio // empty' 2>/dev/null) | ||||||
| blog=$(echo "$user_json" | jq -r 'select(.blog != null and .blog != "") | .blog' 2>/dev/null) | ||||||
| twitter=$(echo "$user_json" | jq -r 'select(.twitter_username != null and .twitter_username != "") | .twitter_username' 2>/dev/null) | ||||||
|
|
||||||
| # Sanitize user-controlled fields | ||||||
| display_name=$(_sanitize_md "$display_name") | ||||||
| bio=$(_sanitize_md "$bio") | ||||||
| blog=$(_sanitize_url "$blog") | ||||||
| # twitter is used as a path component, strip non-alphanumeric/underscore | ||||||
| twitter="${twitter//[^a-zA-Z0-9_]/}" | ||||||
|
|
||||||
| # Fetch repos and detect languages | ||||||
| local repos_json | ||||||
| repos_json=$(gh api "users/${gh_user}/repos?per_page=100&sort=updated" 2>/dev/null) || repos_json="[]" | ||||||
| repos_json=$(gh api "users/${gh_user}/repos?per_page=100&sort=updated") || repos_json="[]" | ||||||
|
|
||||||
| # Unique languages from all repos (sorted) | ||||||
| local languages | ||||||
| languages=$(echo "$repos_json" | jq -r '[.[].language | select(. != null)] | unique | .[]' 2>/dev/null) | ||||||
| languages=$(echo "$repos_json" | jq -r '[.[].language | select(. != null)] | unique | .[]') | ||||||
|
|
||||||
| # Build badge line | ||||||
| local badges="" | ||||||
|
|
@@ -686,39 +727,39 @@ _generate_rich_readme() { | |||||
| badges="${badges}"''$'\n' | ||||||
| badges="${badges}"''$'\n' | ||||||
|
|
||||||
| # Build own repos section (non-fork, non-profile, with description) | ||||||
| local own_repos="" | ||||||
| while IFS= read -r row; do | ||||||
| [[ -z "$row" ]] && continue | ||||||
| local rname rdesc rurl | ||||||
| rname=$(echo "$row" | jq -r '.name') | ||||||
| rdesc=$(echo "$row" | jq -r '.description // "No description"') | ||||||
| rurl=$(echo "$row" | jq -r '.html_url') | ||||||
| own_repos="${own_repos}- **[${rname}](${rurl})** -- ${rdesc}"$'\n' | ||||||
| done < <(echo "$repos_json" | jq -c ".[] | select(.fork == false and .name != \"${gh_user}\")") | ||||||
|
|
||||||
| # Build contributions section (forks with description) | ||||||
| # Build own repos section — single jq pass (no loop) | ||||||
| local own_repos | ||||||
| own_repos=$(echo "$repos_json" | jq -r --arg user "$gh_user" ' | ||||||
| [.[] | select(.fork == false and .name != $user)] | | ||||||
| map("- **[\(.name | gsub("[\\[\\]()]"; ""))](\(.html_url))** -- \(.description // "No description" | gsub("[\\[\\]()]"; ""))") | | ||||||
| .[] | ||||||
| ') | ||||||
|
coderabbitai[bot] marked this conversation as resolved.
|
||||||
|
|
||||||
| # Build contributions section — batch-fetch parent URLs for forks | ||||||
| local fork_names | ||||||
| fork_names=$(echo "$repos_json" | jq -r '.[] | select(.fork == true) | .name') | ||||||
| local contrib_repos="" | ||||||
| while IFS= read -r row; do | ||||||
| [[ -z "$row" ]] && continue | ||||||
| local rname rdesc rurl | ||||||
| rname=$(echo "$row" | jq -r '.name') | ||||||
| rdesc=$(echo "$row" | jq -r '.description // "No description"') | ||||||
| rurl=$(echo "$row" | jq -r '.html_url') | ||||||
| # Try to get parent repo URL | ||||||
| local parent_url | ||||||
| parent_url=$(gh api "repos/${gh_user}/${rname}" --jq '.parent.html_url // empty' 2>/dev/null) | ||||||
| if [[ -n "$parent_url" ]]; then | ||||||
| contrib_repos="${contrib_repos}- **[${rname}](${parent_url})** -- ${rdesc}"$'\n' | ||||||
| else | ||||||
| if [[ -n "$fork_names" ]]; then | ||||||
| # Fetch all fork details in parallel (up to 6 concurrent) to get parent URLs | ||||||
| local fork_details | ||||||
| fork_details=$(echo "$fork_names" | xargs -P 6 -I{} gh api "repos/${gh_user}/{}" --jq ' | ||||||
| "\(.name)\t\(.description // "No description" | gsub("[\\t\\n]"; " "))\t\(.parent.html_url // .html_url)" | ||||||
| ' || true) | ||||||
| while IFS=$'\t' read -r rname rdesc rurl; do | ||||||
| [[ -z "$rname" ]] && continue | ||||||
| rname=$(_sanitize_md "$rname") | ||||||
| rdesc=$(_sanitize_md "$rdesc") | ||||||
| contrib_repos="${contrib_repos}- **[${rname}](${rurl})** -- ${rdesc}"$'\n' | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Repository names and descriptions for forks are not sanitized before being embedded in the markdown output. This allows for markdown injection from user-controlled repository data.
Suggested change
References
|
||||||
| fi | ||||||
| done < <(echo "$repos_json" | jq -c '.[] | select(.fork == true)') | ||||||
| done <<<"$fork_details" | ||||||
| fi | ||||||
|
|
||||||
| # Build connect section | ||||||
| local connect="" | ||||||
| if [[ -n "$blog" ]]; then | ||||||
| connect="${connect}[](${blog})"$'\n' | ||||||
| local blog_display | ||||||
| blog_display="${blog##*//}" | ||||||
| blog_display=$(_sanitize_md "$blog_display") | ||||||
| connect="${connect}[](${blog})"$'\n' | ||||||
| fi | ||||||
| if [[ -n "$twitter" ]]; then | ||||||
| connect="${connect}[](https://twitter.com/${twitter})"$'\n' | ||||||
|
|
@@ -747,7 +788,7 @@ _generate_rich_readme() { | |||||
| if [[ -n "$own_repos" ]]; then | ||||||
| echo "## Projects" | ||||||
| echo "" | ||||||
| printf '%s' "$own_repos" | ||||||
| printf '%s\n' "$own_repos" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||
| echo "" | ||||||
| fi | ||||||
| # Contributions | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The script uses tab characters as delimiters when processing data from the GitHub API. Since fields like
bioanddescriptionare user-controlled and can contain tabs, an attacker can inject tabs to shift fields and control variables likeblogorrurl. This can be used to bypass sanitization or inject malicious URLs into the markdown output. Consider sanitizing user-controlled fields for the delimiter character (tab) before joining them injq.