mirror of https://github.com/abpframework/abp.git
committed by
GitHub
3 changed files with 413 additions and 1 deletions
@ -0,0 +1,222 @@ |
|||
import os |
|||
import sys |
|||
import re |
|||
from openai import OpenAI |
|||
|
|||
client = OpenAI(api_key=os.environ['OPENAI_API_KEY']) |
|||
|
|||
def has_seo_description(content): |
|||
"""Check if content already has SEO description with Description field""" |
|||
import json |
|||
|
|||
# Match SEO description block with 3 or more backticks |
|||
pattern = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+' |
|||
match = re.search(pattern, content, flags=re.DOTALL) |
|||
|
|||
if not match: |
|||
return False |
|||
|
|||
# Check if Description field exists and is not empty |
|||
try: |
|||
json_str = match.group(1) |
|||
seo_data = json.loads(json_str) |
|||
return 'Description' in seo_data and seo_data['Description'] |
|||
except json.JSONDecodeError: |
|||
return False |
|||
|
|||
def is_content_too_short(content): |
|||
"""Check if content is less than 200 characters""" |
|||
# Remove SEO tags if present for accurate count |
|||
# Match SEO description block with 3 or more backticks |
|||
clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL) |
|||
|
|||
return len(clean_content.strip()) < 200 |
|||
|
|||
def get_content_preview(content, max_length=1000): |
|||
"""Get preview of content for OpenAI""" |
|||
# Remove existing SEO tags if present |
|||
# Match SEO description block with 3 or more backticks |
|||
clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL) |
|||
|
|||
return clean_content[:max_length].strip() |
|||
|
|||
def generate_description(content, filename): |
|||
"""Generate SEO description using OpenAI with system prompt from OpenAIService.cs""" |
|||
try: |
|||
preview = get_content_preview(content) |
|||
|
|||
response = client.chat.completions.create( |
|||
model="gpt-4o-mini", |
|||
messages=[ |
|||
{"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches. |
|||
Highlight the value for developers using ABP Framework. |
|||
Be written in a friendly and professional tone. |
|||
Stay under 150 characters. |
|||
--> https://abp.io/docs/latest <--"""}, |
|||
{"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page. |
|||
|
|||
File: {filename} |
|||
Content Preview: |
|||
{preview} |
|||
|
|||
Requirements: |
|||
- Maximum 150 characters |
|||
|
|||
Generate only the description text, nothing else:"""} |
|||
], |
|||
max_tokens=150, |
|||
temperature=0.7 |
|||
) |
|||
|
|||
description = response.choices[0].message.content.strip() |
|||
|
|||
return description |
|||
except Exception as e: |
|||
print(f"❌ Error generating description: {e}") |
|||
return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation." |
|||
|
|||
def add_seo_description(content, description): |
|||
"""Add or update SEO description in content""" |
|||
import json |
|||
|
|||
# Escape special characters for JSON |
|||
escaped_desc = description.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') |
|||
|
|||
# Check if SEO block already exists |
|||
pattern = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1' |
|||
match = re.search(pattern, content, flags=re.DOTALL) |
|||
|
|||
if match: |
|||
# SEO block exists, update Description field |
|||
backticks = match.group(1) |
|||
json_str = match.group(2) |
|||
|
|||
try: |
|||
# Parse existing JSON |
|||
seo_data = json.loads(json_str) |
|||
# Update Description |
|||
seo_data['Description'] = description |
|||
# Convert back to formatted JSON |
|||
updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False) |
|||
|
|||
# Replace the old block with updated one |
|||
new_block = f'''{backticks}json |
|||
//[doc-seo] |
|||
{updated_json} |
|||
{backticks}''' |
|||
|
|||
return re.sub(pattern, new_block, content, count=1, flags=re.DOTALL) |
|||
except json.JSONDecodeError: |
|||
# If JSON is invalid, replace the whole block |
|||
pass |
|||
|
|||
# No existing block or invalid JSON, add new block at the beginning |
|||
seo_tag = f'''```json |
|||
//[doc-seo] |
|||
{{ |
|||
"Description": "{escaped_desc}" |
|||
}} |
|||
``` |
|||
|
|||
''' |
|||
return seo_tag + content |
|||
|
|||
def is_file_ignored(filepath, ignored_folders): |
|||
"""Check if file is in an ignored folder""" |
|||
path_parts = filepath.split('/') |
|||
for ignored in ignored_folders: |
|||
if ignored in path_parts: |
|||
return True |
|||
return False |
|||
|
|||
def main(): |
|||
# Ignored folders from GitHub variable (or default values) |
|||
IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources') |
|||
IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()] |
|||
|
|||
# Get changed files from environment or command line |
|||
if len(sys.argv) > 1: |
|||
# Files passed as command line arguments |
|||
changed_files = sys.argv[1:] |
|||
else: |
|||
# Files from environment variable (for GitHub Actions) |
|||
changed_files_str = os.environ.get('CHANGED_FILES', '') |
|||
changed_files = [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()] |
|||
|
|||
processed_count = 0 |
|||
skipped_count = 0 |
|||
skipped_too_short = 0 |
|||
skipped_ignored = 0 |
|||
updated_files = [] # Track actually updated files |
|||
|
|||
print("🤖 Processing changed markdown files...\n") |
|||
print(f"🚫 Ignored folders: {', '.join(IGNORED_FOLDERS)}\n") |
|||
|
|||
for filepath in changed_files: |
|||
if not filepath.endswith('.md'): |
|||
continue |
|||
|
|||
# Check if file is in ignored folder |
|||
if is_file_ignored(filepath, IGNORED_FOLDERS): |
|||
print(f"📄 Processing: {filepath}") |
|||
print(f" 🚫 Skipped (ignored folder)\n") |
|||
skipped_ignored += 1 |
|||
skipped_count += 1 |
|||
continue |
|||
|
|||
print(f"📄 Processing: {filepath}") |
|||
|
|||
try: |
|||
# Read file |
|||
with open(filepath, 'r', encoding='utf-8') as f: |
|||
content = f.read() |
|||
|
|||
# Check if content is too short (less than 200 characters) |
|||
if is_content_too_short(content): |
|||
print(f" ⏭️ Skipped (content less than 200 characters)\n") |
|||
skipped_too_short += 1 |
|||
skipped_count += 1 |
|||
continue |
|||
|
|||
# Check if already has SEO description |
|||
if has_seo_description(content): |
|||
print(f" ⏭️ Skipped (already has SEO description)\n") |
|||
skipped_count += 1 |
|||
continue |
|||
|
|||
# Generate description |
|||
filename = os.path.basename(filepath) |
|||
print(f" 🤖 Generating description...") |
|||
description = generate_description(content, filename) |
|||
print(f" 💡 Generated: {description}") |
|||
|
|||
# Add SEO tag |
|||
updated_content = add_seo_description(content, description) |
|||
|
|||
# Write back |
|||
with open(filepath, 'w', encoding='utf-8') as f: |
|||
f.write(updated_content) |
|||
|
|||
print(f" ✅ Updated successfully\n") |
|||
processed_count += 1 |
|||
updated_files.append(filepath) # Track this file as updated |
|||
|
|||
except Exception as e: |
|||
print(f" ❌ Error: {e}\n") |
|||
|
|||
print(f"\n📊 Summary:") |
|||
print(f" ✅ Updated: {processed_count}") |
|||
print(f" ⏭️ Skipped (total): {skipped_count}") |
|||
print(f" ⏭️ Skipped (too short): {skipped_too_short}") |
|||
print(f" 🚫 Skipped (ignored folder): {skipped_ignored}") |
|||
|
|||
# Save counts and updated files list for next step |
|||
with open('/tmp/seo_stats.txt', 'w') as f: |
|||
f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}") |
|||
|
|||
# Save updated files list |
|||
with open('/tmp/seo_updated_files.txt', 'w') as f: |
|||
f.write('\n'.join(updated_files)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
|||
@ -0,0 +1,190 @@ |
|||
name: Auto Add SEO Descriptions |
|||
|
|||
on: |
|||
pull_request: |
|||
paths: |
|||
- 'docs/en/**/*.md' |
|||
branches: |
|||
- 'rel-*' |
|||
- 'dev' |
|||
types: [closed] |
|||
|
|||
jobs: |
|||
add-seo-descriptions: |
|||
if: | |
|||
github.event.pull_request.merged == true && |
|||
!startsWith(github.event.pull_request.head.ref, 'auto-docs-seo/') |
|||
runs-on: ubuntu-latest |
|||
permissions: |
|||
contents: write |
|||
pull-requests: write |
|||
|
|||
steps: |
|||
- name: Checkout code |
|||
uses: actions/checkout@v4 |
|||
with: |
|||
ref: ${{ github.event.pull_request.merge_commit_sha }} |
|||
fetch-depth: 0 |
|||
token: ${{ secrets.GITHUB_TOKEN }} |
|||
|
|||
- name: Setup Python |
|||
uses: actions/setup-python@v5 |
|||
with: |
|||
python-version: '3.11' |
|||
|
|||
- name: Install dependencies |
|||
run: | |
|||
pip install openai |
|||
|
|||
- name: Create new branch for SEO updates |
|||
run: | |
|||
git config --local user.email "github-actions[bot]@users.noreply.github.com" |
|||
git config --local user.name "github-actions[bot]" |
|||
|
|||
# Checkout base branch first |
|||
git checkout ${{ github.event.pull_request.base.ref }} |
|||
|
|||
# Create new branch from base |
|||
BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}" |
|||
git checkout -b $BRANCH_NAME |
|||
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV |
|||
|
|||
- name: Get changed markdown files from merged PR |
|||
id: changed-files |
|||
run: | |
|||
# Get files changed in the merged PR (only Added and Modified, exclude Deleted) |
|||
FILES=$(git diff --name-only --diff-filter=AM ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.merge_commit_sha }} | grep 'docs/en/.*\.md$' || true) |
|||
|
|||
echo "Files changed in the merged PR (added/modified only):" |
|||
echo "$FILES" |
|||
echo "" |
|||
|
|||
echo "changed_files<<EOF" >> $GITHUB_OUTPUT |
|||
echo "$FILES" >> $GITHUB_OUTPUT |
|||
echo "EOF" >> $GITHUB_OUTPUT |
|||
|
|||
if [ -z "$FILES" ]; then |
|||
echo "has_files=false" >> $GITHUB_OUTPUT |
|||
echo "No markdown files changed in docs/en/" |
|||
else |
|||
echo "has_files=true" >> $GITHUB_OUTPUT |
|||
|
|||
# Checkout the changed files from merge commit to get the merged content |
|||
echo "" |
|||
echo "Checking out changed files from merge commit..." |
|||
while IFS= read -r file; do |
|||
if [ -n "$file" ]; then |
|||
echo " Checking out: $file" |
|||
# Create directory if it doesn't exist |
|||
mkdir -p "$(dirname "$file")" |
|||
# Checkout the file from merge commit |
|||
if ! git show "${{ github.event.pull_request.merge_commit_sha }}:$file" > "$file" 2>err.log; then |
|||
echo " Warning: Could not checkout $file" |
|||
echo " Reason: $(cat err.log)" |
|||
fi |
|||
fi |
|||
done <<< "$FILES" |
|||
|
|||
echo "" |
|||
echo "Files now in working directory:" |
|||
git status --short |
|||
fi |
|||
|
|||
- name: Process changed files and add SEO descriptions |
|||
if: steps.changed-files.outputs.has_files == 'true' |
|||
env: |
|||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} |
|||
IGNORED_FOLDERS: ${{ vars.DOCS_SEO_IGNORED_FOLDERS }} |
|||
CHANGED_FILES: ${{ steps.changed-files.outputs.changed_files }} |
|||
run: | |
|||
python3 .github/scripts/add_seo_descriptions.py |
|||
|
|||
|
|||
- name: Commit and push changes |
|||
if: steps.changed-files.outputs.has_files == 'true' |
|||
run: | |
|||
git add -A docs/en/ |
|||
|
|||
if git diff --staged --quiet; then |
|||
echo "No changes to commit" |
|||
echo "has_commits=false" >> $GITHUB_ENV |
|||
else |
|||
git commit -m "docs: Add SEO descriptions to modified documentation files" -m "Related to PR #${{ github.event.pull_request.number }}" |
|||
git push origin ${{ env.BRANCH_NAME }} |
|||
echo "has_commits=true" >> $GITHUB_ENV |
|||
fi |
|||
|
|||
- name: Create Pull Request |
|||
if: env.has_commits == 'true' |
|||
uses: actions/github-script@v7 |
|||
with: |
|||
script: | |
|||
const fs = require('fs'); |
|||
const stats = fs.readFileSync('/tmp/seo_stats.txt', 'utf8').split('\n'); |
|||
const processedCount = parseInt(stats[0]) || 0; |
|||
const skippedCount = parseInt(stats[1]) || 0; |
|||
const skippedTooShort = parseInt(stats[2]) || 0; |
|||
const skippedIgnored = parseInt(stats[3]) || 0; |
|||
const prNumber = ${{ github.event.pull_request.number }}; |
|||
const baseRef = '${{ github.event.pull_request.base.ref }}'; |
|||
const branchName = '${{ env.BRANCH_NAME }}'; |
|||
|
|||
if (processedCount > 0) { |
|||
// Read the actually updated files list (not all changed files) |
|||
const updatedFilesStr = fs.readFileSync('/tmp/seo_updated_files.txt', 'utf8'); |
|||
const updatedFiles = updatedFilesStr.trim().split('\n').filter(f => f.trim()); |
|||
|
|||
let prBody = '🤖 **Automated SEO Descriptions**\n\n'; |
|||
prBody += `This PR automatically adds SEO descriptions to documentation files that were modified in PR #${prNumber}.\n\n`; |
|||
prBody += '## 📊 Summary\n'; |
|||
prBody += `- ✅ **Updated:** ${processedCount} file(s)\n`; |
|||
prBody += `- ⏭️ **Skipped (total):** ${skippedCount} file(s)\n`; |
|||
if (skippedTooShort > 0) { |
|||
prBody += ` - ⏭️ Content < 200 chars: ${skippedTooShort} file(s)\n`; |
|||
} |
|||
if (skippedIgnored > 0) { |
|||
prBody += ` - 🚫 Ignored folders: ${skippedIgnored} file(s)\n`; |
|||
} |
|||
prBody += '\n## 📝 Modified Files\n'; |
|||
prBody += updatedFiles.slice(0, 20).map(f => `- \`${f}\``).join('\n'); |
|||
if (updatedFiles.length > 20) { |
|||
prBody += `\n- ... and ${updatedFiles.length - 20} more`; |
|||
} |
|||
prBody += '\n\n## 🔧 Details\n'; |
|||
prBody += `- **Related PR:** #${prNumber}\n\n`; |
|||
prBody += 'These descriptions were automatically generated to improve SEO and search engine visibility. 🚀'; |
|||
|
|||
const { data: pr } = await github.rest.pulls.create({ |
|||
owner: context.repo.owner, |
|||
repo: context.repo.repo, |
|||
title: `docs: Add SEO descriptions (from PR ${prNumber})`, |
|||
head: branchName, |
|||
base: baseRef, |
|||
body: prBody |
|||
}); |
|||
|
|||
console.log(`✅ Created PR: ${pr.html_url}`); |
|||
|
|||
// Add reviewers to the PR (from GitHub variable) |
|||
const reviewersStr = '${{ vars.DOCS_SEO_REVIEWERS || '' }}'; |
|||
const reviewers = reviewersStr.split(',').map(r => r.trim()).filter(r => r); |
|||
|
|||
if (reviewers.length === 0) { |
|||
console.log('⚠️ No reviewers specified in DOCS_SEO_REVIEWERS variable.'); |
|||
return; |
|||
} |
|||
|
|||
try { |
|||
await github.rest.pulls.requestReviewers({ |
|||
owner: context.repo.owner, |
|||
repo: context.repo.repo, |
|||
pull_number: pr.number, |
|||
reviewers: reviewers, |
|||
team_reviewers: [] |
|||
}); |
|||
console.log(`✅ Added reviewers (${reviewers.join(', ')}) to PR ${pr.number}`); |
|||
} catch (error) { |
|||
console.log(`⚠️ Could not add reviewers: ${error.message}`); |
|||
} |
|||
} |
|||
|
|||
Loading…
Reference in new issue