Browse Source

Refactor SEO workflow to use GitHub API for changed files

Replaces git diff logic with GitHub API to reliably detect added/modified markdown files in docs/en/ from merged PRs. Improves branch creation and file processing steps, ensuring only relevant files are handled for SEO updates.
pull/24004/head
SALİH ÖZKARA 2 months ago
parent
commit
94b2f5b042
  1. 124
      .github/workflows/auto-add-seo.yml

124
.github/workflows/auto-add-seo.yml

@ -23,7 +23,7 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.merge_commit_sha }}
ref: ${{ github.event.pull_request.base.ref }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
@ -36,78 +36,90 @@ jobs:
run: |
pip install openai
- name: Get changed markdown files from merged PR using GitHub API
id: changed-files
uses: actions/github-script@v7
with:
script: |
const prNumber = ${{ github.event.pull_request.number }};
// Get all files changed in the PR with pagination
const allFiles = [];
let page = 1;
let hasMore = true;
while (hasMore) {
const { data: files } = await github.rest.pulls.listFiles({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
per_page: 100,
page: page
});
allFiles.push(...files);
hasMore = files.length === 100;
page++;
}
console.log(`Total files changed in PR: ${allFiles.length}`);
// Filter for only added/modified markdown files in docs/en/
const changedMdFiles = allFiles
.filter(file =>
(file.status === 'added' || file.status === 'modified') &&
file.filename.startsWith('docs/en/') &&
file.filename.endsWith('.md')
)
.map(file => file.filename);
console.log(`\nFound ${changedMdFiles.length} added/modified markdown files in docs/en/:`);
changedMdFiles.forEach(file => console.log(` - ${file}`));
// Write to environment file for next steps
const fs = require('fs');
fs.writeFileSync(process.env.GITHUB_OUTPUT,
`any_changed=${changedMdFiles.length > 0 ? 'true' : 'false'}\n` +
`all_changed_files=${changedMdFiles.join(' ')}\n`,
{ flag: 'a' }
);
return changedMdFiles;
- name: Create new branch for SEO updates
if: steps.changed-files.outputs.any_changed == 'true'
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
# Checkout base branch first
git checkout ${{ github.event.pull_request.base.ref }}
# Create new branch from base
# Create new branch from current base branch (which already has merged files)
BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}"
git checkout -b $BRANCH_NAME
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
- name: Get changed markdown files from merged PR
id: changed-files
run: |
# Get the list of commits in the PR
PR_HEAD_SHA="${{ github.event.pull_request.head.sha }}"
PR_BASE_SHA="${{ github.event.pull_request.base.sha }}"
echo "PR commits range: $PR_BASE_SHA..$PR_HEAD_SHA"
# Get files changed in the PR commits only (only Added and Modified, exclude Deleted)
FILES=$(git diff --name-only --diff-filter=AM $PR_BASE_SHA..$PR_HEAD_SHA | grep 'docs/en/.*\.md$' || true)
echo "Files changed in the merged PR (added/modified only):"
echo "$FILES"
echo "✅ Created branch: $BRANCH_NAME"
echo ""
echo "changed_files<<EOF" >> $GITHUB_OUTPUT
echo "$FILES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
if [ -z "$FILES" ]; then
echo "has_files=false" >> $GITHUB_OUTPUT
echo "No markdown files changed in docs/en/"
else
echo "has_files=true" >> $GITHUB_OUTPUT
# Checkout the changed files from merge commit to get the merged content
echo ""
echo "Checking out changed files from merge commit..."
while IFS= read -r file; do
if [ -n "$file" ]; then
echo " Checking out: $file"
# Create directory if it doesn't exist
mkdir -p "$(dirname "$file")"
# Checkout the file from merge commit
if ! git show "${{ github.event.pull_request.merge_commit_sha }}:$file" > "$file" 2>err.log; then
echo " Warning: Could not checkout $file"
echo " Reason: $(cat err.log)"
fi
fi
done <<< "$FILES"
echo ""
echo "Files now in working directory:"
git status --short
fi
echo "📝 Files to process for SEO descriptions:"
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
if [ -f "$file" ]; then
echo " ✓ $file"
else
echo " ✗ $file (not found)"
fi
done
- name: Process changed files and add SEO descriptions
if: steps.changed-files.outputs.has_files == 'true'
if: steps.changed-files.outputs.any_changed == 'true'
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
IGNORED_FOLDERS: ${{ vars.DOCS_SEO_IGNORED_FOLDERS }}
CHANGED_FILES: ${{ steps.changed-files.outputs.changed_files }}
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
python3 .github/scripts/add_seo_descriptions.py
- name: Commit and push changes
if: steps.changed-files.outputs.has_files == 'true'
if: steps.changed-files.outputs.any_changed == 'true'
run: |
git add -A docs/en/
@ -115,9 +127,11 @@ jobs:
echo "No changes to commit"
echo "has_commits=false" >> $GITHUB_ENV
else
BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}"
git commit -m "docs: Add SEO descriptions to modified documentation files" -m "Related to PR #${{ github.event.pull_request.number }}"
git push origin ${{ env.BRANCH_NAME }}
git push origin $BRANCH_NAME
echo "has_commits=true" >> $GITHUB_ENV
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
fi
- name: Create Pull Request
@ -133,7 +147,7 @@ jobs:
const skippedIgnored = parseInt(stats[3]) || 0;
const prNumber = ${{ github.event.pull_request.number }};
const baseRef = '${{ github.event.pull_request.base.ref }}';
const branchName = '${{ env.BRANCH_NAME }}';
const branchName = `auto-docs-seo/${prNumber}`;
if (processedCount > 0) {
// Read the actually updated files list (not all changed files)

Loading…
Cancel
Save