From 94b2f5b04295cf5537c0a7b8f3525a7dc2e7ef1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SAL=C4=B0H=20=C3=96ZKARA?= <58659931+salihozkara@users.noreply.github.com> Date: Tue, 21 Oct 2025 10:04:42 +0300 Subject: [PATCH] Refactor SEO workflow to use GitHub API for changed files Replaces git diff logic with GitHub API to reliably detect added/modified markdown files in docs/en/ from merged PRs. Improves branch creation and file processing steps, ensuring only relevant files are handled for SEO updates. --- .github/workflows/auto-add-seo.yml | 124 ++++++++++++++++------------- 1 file changed, 69 insertions(+), 55 deletions(-) diff --git a/.github/workflows/auto-add-seo.yml b/.github/workflows/auto-add-seo.yml index b8829f869c..c56079af25 100644 --- a/.github/workflows/auto-add-seo.yml +++ b/.github/workflows/auto-add-seo.yml @@ -23,7 +23,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - ref: ${{ github.event.pull_request.merge_commit_sha }} + ref: ${{ github.event.pull_request.base.ref }} fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} @@ -36,78 +36,90 @@ jobs: run: | pip install openai + - name: Get changed markdown files from merged PR using GitHub API + id: changed-files + uses: actions/github-script@v7 + with: + script: | + const prNumber = ${{ github.event.pull_request.number }}; + + // Get all files changed in the PR with pagination + const allFiles = []; + let page = 1; + let hasMore = true; + + while (hasMore) { + const { data: files } = await github.rest.pulls.listFiles({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + per_page: 100, + page: page + }); + + allFiles.push(...files); + hasMore = files.length === 100; + page++; + } + + console.log(`Total files changed in PR: ${allFiles.length}`); + + // Filter for only added/modified markdown files in docs/en/ + const changedMdFiles = allFiles + .filter(file => + (file.status === 'added' || file.status === 'modified') && + file.filename.startsWith('docs/en/') && + file.filename.endsWith('.md') + ) + .map(file => file.filename); + + console.log(`\nFound ${changedMdFiles.length} added/modified markdown files in docs/en/:`); + changedMdFiles.forEach(file => console.log(` - ${file}`)); + + // Write to environment file for next steps + const fs = require('fs'); + fs.writeFileSync(process.env.GITHUB_OUTPUT, + `any_changed=${changedMdFiles.length > 0 ? 'true' : 'false'}\n` + + `all_changed_files=${changedMdFiles.join(' ')}\n`, + { flag: 'a' } + ); + + return changedMdFiles; + - name: Create new branch for SEO updates + if: steps.changed-files.outputs.any_changed == 'true' run: | git config --local user.email "github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" - # Checkout base branch first - git checkout ${{ github.event.pull_request.base.ref }} - - # Create new branch from base + # Create new branch from current base branch (which already has merged files) BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}" git checkout -b $BRANCH_NAME echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV - - - name: Get changed markdown files from merged PR - id: changed-files - run: | - # Get the list of commits in the PR - PR_HEAD_SHA="${{ github.event.pull_request.head.sha }}" - PR_BASE_SHA="${{ github.event.pull_request.base.sha }}" - - echo "PR commits range: $PR_BASE_SHA..$PR_HEAD_SHA" - # Get files changed in the PR commits only (only Added and Modified, exclude Deleted) - FILES=$(git diff --name-only --diff-filter=AM $PR_BASE_SHA..$PR_HEAD_SHA | grep 'docs/en/.*\.md$' || true) - - echo "Files changed in the merged PR (added/modified only):" - echo "$FILES" + echo "✅ Created branch: $BRANCH_NAME" echo "" - - echo "changed_files<> $GITHUB_OUTPUT - echo "$FILES" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - - if [ -z "$FILES" ]; then - echo "has_files=false" >> $GITHUB_OUTPUT - echo "No markdown files changed in docs/en/" - else - echo "has_files=true" >> $GITHUB_OUTPUT - - # Checkout the changed files from merge commit to get the merged content - echo "" - echo "Checking out changed files from merge commit..." - while IFS= read -r file; do - if [ -n "$file" ]; then - echo " Checking out: $file" - # Create directory if it doesn't exist - mkdir -p "$(dirname "$file")" - # Checkout the file from merge commit - if ! git show "${{ github.event.pull_request.merge_commit_sha }}:$file" > "$file" 2>err.log; then - echo " Warning: Could not checkout $file" - echo " Reason: $(cat err.log)" - fi - fi - done <<< "$FILES" - - echo "" - echo "Files now in working directory:" - git status --short - fi + echo "📝 Files to process for SEO descriptions:" + for file in ${{ steps.changed-files.outputs.all_changed_files }}; do + if [ -f "$file" ]; then + echo " ✓ $file" + else + echo " ✗ $file (not found)" + fi + done - name: Process changed files and add SEO descriptions - if: steps.changed-files.outputs.has_files == 'true' + if: steps.changed-files.outputs.any_changed == 'true' env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} IGNORED_FOLDERS: ${{ vars.DOCS_SEO_IGNORED_FOLDERS }} - CHANGED_FILES: ${{ steps.changed-files.outputs.changed_files }} + CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} run: | python3 .github/scripts/add_seo_descriptions.py - name: Commit and push changes - if: steps.changed-files.outputs.has_files == 'true' + if: steps.changed-files.outputs.any_changed == 'true' run: | git add -A docs/en/ @@ -115,9 +127,11 @@ jobs: echo "No changes to commit" echo "has_commits=false" >> $GITHUB_ENV else + BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}" git commit -m "docs: Add SEO descriptions to modified documentation files" -m "Related to PR #${{ github.event.pull_request.number }}" - git push origin ${{ env.BRANCH_NAME }} + git push origin $BRANCH_NAME echo "has_commits=true" >> $GITHUB_ENV + echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV fi - name: Create Pull Request @@ -133,7 +147,7 @@ jobs: const skippedIgnored = parseInt(stats[3]) || 0; const prNumber = ${{ github.event.pull_request.number }}; const baseRef = '${{ github.event.pull_request.base.ref }}'; - const branchName = '${{ env.BRANCH_NAME }}'; + const branchName = `auto-docs-seo/${prNumber}`; if (processedCount > 0) { // Read the actually updated files list (not all changed files)