Merge pull request #23952 from abpframework/salihozkara-patch-2

Create workflow to auto-add SEO descriptions
4 months ago · b016782e67
3 changed files with 413 additions and 1 deletions
--- a/.github/scripts/add_seo_descriptions.py
+++ b/.github/scripts/add_seo_descriptions.py
@ -0,0 +1,222 @@
+import os
+import sys
+import re
+from openai import OpenAI
+
+client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
+
+def has_seo_description(content):
+    """Check if content already has SEO description with Description field"""
+    import json
+    
+    # Match SEO description block with 3 or more backticks
+    pattern = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+'
+    match = re.search(pattern, content, flags=re.DOTALL)
+    
+    if not match:
+        return False
+    
+    # Check if Description field exists and is not empty
+    try:
+        json_str = match.group(1)
+        seo_data = json.loads(json_str)
+        return 'Description' in seo_data and seo_data['Description']
+    except json.JSONDecodeError:
+        return False
+
+def is_content_too_short(content):
+    """Check if content is less than 200 characters"""
+    # Remove SEO tags if present for accurate count
+    # Match SEO description block with 3 or more backticks
+    clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
+    
+    return len(clean_content.strip()) < 200
+
+def get_content_preview(content, max_length=1000):
+    """Get preview of content for OpenAI"""
+    # Remove existing SEO tags if present
+    # Match SEO description block with 3 or more backticks
+    clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
+    
+    return clean_content[:max_length].strip()
+
+def generate_description(content, filename):
+    """Generate SEO description using OpenAI with system prompt from OpenAIService.cs"""
+    try:
+        preview = get_content_preview(content)
+        
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches.
+Highlight the value for developers using ABP Framework.
+Be written in a friendly and professional tone.
+Stay under 150 characters.
+--> https://abp.io/docs/latest <--"""},
+                {"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page.
+
+File: {filename}
+Content Preview:
+{preview}
+
+Requirements:
+- Maximum 150 characters
+
+Generate only the description text, nothing else:"""}
+            ],
+            max_tokens=150,
+            temperature=0.7
+        )
+        
+        description = response.choices[0].message.content.strip()
+        
+        return description
+    except Exception as e:
+        print(f"❌ Error generating description: {e}")
+        return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation."
+
+def add_seo_description(content, description):
+    """Add or update SEO description in content"""
+    import json
+    
+    # Escape special characters for JSON
+    escaped_desc = description.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+    
+    # Check if SEO block already exists
+    pattern = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1'
+    match = re.search(pattern, content, flags=re.DOTALL)
+    
+    if match:
+        # SEO block exists, update Description field
+        backticks = match.group(1)
+        json_str = match.group(2)
+        
+        try:
+            # Parse existing JSON
+            seo_data = json.loads(json_str)
+            # Update Description
+            seo_data['Description'] = description
+            # Convert back to formatted JSON
+            updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False)
+            
+            # Replace the old block with updated one
+            new_block = f'''{backticks}json
+//[doc-seo]
+{updated_json}
+{backticks}'''
+            
+            return re.sub(pattern, new_block, content, count=1, flags=re.DOTALL)
+        except json.JSONDecodeError:
+            # If JSON is invalid, replace the whole block
+            pass
+    
+    # No existing block or invalid JSON, add new block at the beginning
+    seo_tag = f'''```json
+//[doc-seo]
+{{
+    "Description": "{escaped_desc}"
+}}
+```
+
+'''
+    return seo_tag + content
+
+def is_file_ignored(filepath, ignored_folders):
+    """Check if file is in an ignored folder"""
+    path_parts = filepath.split('/')
+    for ignored in ignored_folders:
+        if ignored in path_parts:
+            return True
+    return False
+
+def main():
+    # Ignored folders from GitHub variable (or default values)
+    IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources')
+    IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()]
+    
+    # Get changed files from environment or command line
+    if len(sys.argv) > 1:
+        # Files passed as command line arguments
+        changed_files = sys.argv[1:]
+    else:
+        # Files from environment variable (for GitHub Actions)
+        changed_files_str = os.environ.get('CHANGED_FILES', '')
+        changed_files = [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()]
+    
+    processed_count = 0
+    skipped_count = 0
+    skipped_too_short = 0
+    skipped_ignored = 0
+    updated_files = []  # Track actually updated files
+    
+    print("🤖 Processing changed markdown files...\n")
+    print(f"🚫 Ignored folders: {', '.join(IGNORED_FOLDERS)}\n")
+    
+    for filepath in changed_files:
+        if not filepath.endswith('.md'):
+            continue
+        
+        # Check if file is in ignored folder
+        if is_file_ignored(filepath, IGNORED_FOLDERS):
+            print(f"📄 Processing: {filepath}")
+            print(f"   🚫 Skipped (ignored folder)\n")
+            skipped_ignored += 1
+            skipped_count += 1
+            continue
+            
+        print(f"📄 Processing: {filepath}")
+        
+        try:
+            # Read file
+            with open(filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # Check if content is too short (less than 200 characters)
+            if is_content_too_short(content):
+                print(f"   ⏭️  Skipped (content less than 200 characters)\n")
+                skipped_too_short += 1
+                skipped_count += 1
+                continue
+            
+            # Check if already has SEO description
+            if has_seo_description(content):
+                print(f"   ⏭️  Skipped (already has SEO description)\n")
+                skipped_count += 1
+                continue
+            
+            # Generate description
+            filename = os.path.basename(filepath)
+            print(f"   🤖 Generating description...")
+            description = generate_description(content, filename)
+            print(f"   💡 Generated: {description}")
+            
+            # Add SEO tag
+            updated_content = add_seo_description(content, description)
+            
+            # Write back
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write(updated_content)
+            
+            print(f"   ✅ Updated successfully\n")
+            processed_count += 1
+            updated_files.append(filepath)  # Track this file as updated
+            
+        except Exception as e:
+            print(f"   ❌ Error: {e}\n")
+    
+    print(f"\n📊 Summary:")
+    print(f"   ✅ Updated: {processed_count}")
+    print(f"   ⏭️  Skipped (total): {skipped_count}")
+    print(f"   ⏭️  Skipped (too short): {skipped_too_short}")
+    print(f"   🚫 Skipped (ignored folder): {skipped_ignored}")
+    
+    # Save counts and updated files list for next step
+    with open('/tmp/seo_stats.txt', 'w') as f:
+        f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}")
+    
+    # Save updated files list
+    with open('/tmp/seo_updated_files.txt', 'w') as f:
+        f.write('\n'.join(updated_files))
+
+if __name__ == '__main__':
+    main()
--- a/.github/workflows/auto-add-seo.yml
+++ b/.github/workflows/auto-add-seo.yml
@ -0,0 +1,190 @@
+name: Auto Add SEO Descriptions
+
+on:
+  pull_request:
+    paths:
+      - 'docs/en/**/*.md'
+    branches:
+      - 'rel-*'
+      - 'dev'
+    types: [closed]
+
+jobs:
+  add-seo-descriptions:
+    if: |
+      github.event.pull_request.merged == true &&
+      !startsWith(github.event.pull_request.head.ref, 'auto-docs-seo/')
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.merge_commit_sha }}
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          pip install openai
+
+      - name: Create new branch for SEO updates
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          
+          # Checkout base branch first
+          git checkout ${{ github.event.pull_request.base.ref }}
+          
+          # Create new branch from base
+          BRANCH_NAME="auto-docs-seo/${{ github.event.pull_request.number }}"
+          git checkout -b $BRANCH_NAME
+          echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
+
+      - name: Get changed markdown files from merged PR
+        id: changed-files
+        run: |
+          # Get files changed in the merged PR (only Added and Modified, exclude Deleted)
+          FILES=$(git diff --name-only --diff-filter=AM ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.merge_commit_sha }} | grep 'docs/en/.*\.md$' || true)
+          
+          echo "Files changed in the merged PR (added/modified only):"
+          echo "$FILES"
+          echo ""
+          
+          echo "changed_files<<EOF" >> $GITHUB_OUTPUT
+          echo "$FILES" >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+          
+          if [ -z "$FILES" ]; then
+            echo "has_files=false" >> $GITHUB_OUTPUT
+            echo "No markdown files changed in docs/en/"
+          else
+            echo "has_files=true" >> $GITHUB_OUTPUT
+            
+            # Checkout the changed files from merge commit to get the merged content
+            echo ""
+            echo "Checking out changed files from merge commit..."
+            while IFS= read -r file; do
+              if [ -n "$file" ]; then
+                echo "  Checking out: $file"
+                # Create directory if it doesn't exist
+                mkdir -p "$(dirname "$file")"
+                # Checkout the file from merge commit
+                if ! git show "${{ github.event.pull_request.merge_commit_sha }}:$file" > "$file" 2>err.log; then
+                  echo "  Warning: Could not checkout $file"
+                  echo "    Reason: $(cat err.log)"
+                fi
+              fi
+            done <<< "$FILES"
+            
+            echo ""
+            echo "Files now in working directory:"
+            git status --short
+          fi
+
+      - name: Process changed files and add SEO descriptions
+        if: steps.changed-files.outputs.has_files == 'true'
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          IGNORED_FOLDERS: ${{ vars.DOCS_SEO_IGNORED_FOLDERS }}
+          CHANGED_FILES: ${{ steps.changed-files.outputs.changed_files }}
+        run: |
+          python3 .github/scripts/add_seo_descriptions.py
+
+
+      - name: Commit and push changes
+        if: steps.changed-files.outputs.has_files == 'true'
+        run: |
+          git add -A docs/en/
+          
+          if git diff --staged --quiet; then
+            echo "No changes to commit"
+            echo "has_commits=false" >> $GITHUB_ENV
+          else
+            git commit -m "docs: Add SEO descriptions to modified documentation files" -m "Related to PR #${{ github.event.pull_request.number }}"
+            git push origin ${{ env.BRANCH_NAME }}
+            echo "has_commits=true" >> $GITHUB_ENV
+          fi
+
+      - name: Create Pull Request
+        if: env.has_commits == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const stats = fs.readFileSync('/tmp/seo_stats.txt', 'utf8').split('\n');
+            const processedCount = parseInt(stats[0]) || 0;
+            const skippedCount = parseInt(stats[1]) || 0;
+            const skippedTooShort = parseInt(stats[2]) || 0;
+            const skippedIgnored = parseInt(stats[3]) || 0;
+            const prNumber = ${{ github.event.pull_request.number }};
+            const baseRef = '${{ github.event.pull_request.base.ref }}';
+            const branchName = '${{ env.BRANCH_NAME }}';
+            
+            if (processedCount > 0) {
+              // Read the actually updated files list (not all changed files)
+              const updatedFilesStr = fs.readFileSync('/tmp/seo_updated_files.txt', 'utf8');
+              const updatedFiles = updatedFilesStr.trim().split('\n').filter(f => f.trim());
+              
+              let prBody = '🤖 **Automated SEO Descriptions**\n\n';
+              prBody += `This PR automatically adds SEO descriptions to documentation files that were modified in PR #${prNumber}.\n\n`;
+              prBody += '## 📊 Summary\n';
+              prBody += `- ✅ **Updated:** ${processedCount} file(s)\n`;
+              prBody += `- ⏭️ **Skipped (total):** ${skippedCount} file(s)\n`;
+              if (skippedTooShort > 0) {
+                prBody += `  - ⏭️ Content < 200 chars: ${skippedTooShort} file(s)\n`;
+              }
+              if (skippedIgnored > 0) {
+                prBody += `  - 🚫 Ignored folders: ${skippedIgnored} file(s)\n`;
+              }
+              prBody += '\n## 📝 Modified Files\n';
+              prBody += updatedFiles.slice(0, 20).map(f => `- \`${f}\``).join('\n');
+              if (updatedFiles.length > 20) {
+                prBody += `\n- ... and ${updatedFiles.length - 20} more`;
+              }
+              prBody += '\n\n## 🔧 Details\n';
+              prBody += `- **Related PR:** #${prNumber}\n\n`;
+              prBody += 'These descriptions were automatically generated to improve SEO and search engine visibility. 🚀';
+
+              const { data: pr } = await github.rest.pulls.create({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                title: `docs: Add SEO descriptions (from PR ${prNumber})`,
+                head: branchName,
+                base: baseRef,
+                body: prBody
+              });
+              
+              console.log(`✅ Created PR: ${pr.html_url}`);
+              
+              // Add reviewers to the PR (from GitHub variable)
+              const reviewersStr = '${{ vars.DOCS_SEO_REVIEWERS || '' }}';
+              const reviewers = reviewersStr.split(',').map(r => r.trim()).filter(r => r);
+              
+              if (reviewers.length === 0) {
+                console.log('⚠️ No reviewers specified in DOCS_SEO_REVIEWERS variable.');
+                return;
+              }
+              
+              try {
+                await github.rest.pulls.requestReviewers({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  pull_number: pr.number,
+                  reviewers: reviewers,
+                  team_reviewers: []
+                });
+                console.log(`✅ Added reviewers (${reviewers.join(', ')}) to PR ${pr.number}`);
+              } catch (error) {
+                console.log(`⚠️ Could not add reviewers: ${error.message}`);
+              }
+            }
+
--- a/docs/en/cli/index.md
+++ b/docs/en/cli/index.md
@ -10,7 +10,7 @@
 ABP CLI (Command Line Interface) is a command line tool to perform some common operations for ABP based solutions or ABP Studio features.

 > With **v8.2+**, the old/legacy ABP CLI has been replaced with a new CLI system to align with the new templating system and [ABP Studio](../studio/index.md). The new ABP CLI commands are explained in this documentation. However, if you want to learn more about the differences between the old and new CLIs, want to learn the reason for the change, or need guidance to use the old ABP CLI, please refer to the [Old vs New CLI](differences-between-old-and-new-cli.md) documentation.
->
+
 > You may need to remove the Old CLI before installing the New CLI, by running the following command: `dotnet tool uninstall -g Volo.Abp.Cli`

 ## Installation