Browse Source

Refactor TOC generation to use Markdig and remove HtmlAgilityPack

Replaces the previous HTML-based table of contents (TOC) extraction using HtmlAgilityPack with a Markdig-based approach. Introduces custom Markdig extensions and renderers to extract headings directly from markdown, updates the TOC service and interface, and removes the HtmlAgilityPack dependency from the project.
pull/23666/head
Ahmet Çelik 5 months ago
parent
commit
1eecff9ea7
  1. 1
      Directory.Packages.props
  2. 2
      modules/docs/src/Volo.Docs.Web/Markdown/MarkDigMarkdownConverter.cs
  3. 12
      modules/docs/src/Volo.Docs.Web/Pages/Documents/Project/Index.cshtml.cs
  4. 73
      modules/docs/src/Volo.Docs.Web/TableOfContents/CustomHeadingRenderer.cs
  5. 30
      modules/docs/src/Volo.Docs.Web/TableOfContents/HeadingExtractionExtension.cs
  6. 2
      modules/docs/src/Volo.Docs.Web/TableOfContents/ITocGeneratorService.cs
  7. 169
      modules/docs/src/Volo.Docs.Web/TableOfContents/TocGeneratorService.cs
  8. 1
      modules/docs/src/Volo.Docs.Web/Volo.Docs.Web.csproj

1
Directory.Packages.props

@ -29,7 +29,6 @@
<PackageVersion Include="Dapper" Version="2.1.66" />
<PackageVersion Include="Dapr.AspNetCore" Version="1.15.4" />
<PackageVersion Include="Dapr.Client" Version="1.15.4" />
<PackageVersion Include="HtmlAgilityPack" Version="1.12.2" />
<PackageVersion Include="MyCSharp.HttpUserAgentParser" Version="3.0.25" />
<PackageVersion Include="Devart.Data.Oracle.EFCore" Version="10.4.235.9" />
<PackageVersion Include="DistributedLock.Core" Version="1.0.8" />

2
modules/docs/src/Volo.Docs.Web/Markdown/MarkDigMarkdownConverter.cs

@ -1,5 +1,6 @@
using System.Text;
using Markdig;
using Markdig.Extensions.AutoIdentifiers;
using Volo.Abp.DependencyInjection;
using Volo.Docs.Markdown.Extensions;
@ -12,6 +13,7 @@ namespace Volo.Docs.Markdown
public MarkDigMarkdownConverter()
{
_markdownPipeline = new MarkdownPipelineBuilder()
.UseAutoIdentifiers(AutoIdentifierOptions.GitHub)
.UseAutoLinks()
.UseBootstrap()
.UseGridTables()

12
modules/docs/src/Volo.Docs.Web/Pages/Documents/Project/Index.cshtml.cs

@ -539,17 +539,15 @@ namespace Volo.Docs.Pages.Documents.Project
Document = await GetSpecificDocumentOrDefaultAsync(language);
DocumentLanguageCode = language;
DocumentNameWithExtension = Document.Name;
SetDocumentPageTitle();
await ConvertDocumentContentToHtmlAsync();
SetDocumentPageTitle();
if (Document != null && !string.IsNullOrEmpty(Document.Content))
{
var (toc, processedContent) = _tocGeneratorService.GenerateTocAndProcessHeadings(Document.Content);
Document.Content = processedContent;
TocHtml = toc;
TocHtml = _tocGeneratorService.GenerateToc(Document.Content);
}
await ConvertDocumentContentToHtmlAsync();
return true;
}
catch (DocumentNotFoundException e)

73
modules/docs/src/Volo.Docs.Web/TableOfContents/CustomHeadingRenderer.cs

@ -0,0 +1,73 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Markdig.Renderers;
using Markdig.Renderers.Html;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;
namespace Volo.Docs.TableOfContents;
public class CustomHeadingRenderer : MarkdownObjectRenderer<HtmlRenderer, HeadingBlock>
{
private readonly HeadingExtractionExtension _extension;
private readonly HeadingRenderer _originalRenderer;
public CustomHeadingRenderer(HeadingExtractionExtension extension, HeadingRenderer originalRenderer)
{
_extension = extension;
_originalRenderer = originalRenderer ?? new HeadingRenderer();
}
protected override void Write(HtmlRenderer renderer, HeadingBlock headingBlock)
{
var headingText = GetPlainText(headingBlock.Inline);
var headingId = headingBlock.TryGetAttributes()?.Id ?? string.Empty;
_extension.Headings.Add((headingBlock.Level, headingText, headingId));
_originalRenderer.Write(renderer, headingBlock);
}
private static string GetPlainText(ContainerInline container)
{
if (container == null)
{
return string.Empty;
}
var builder = new StringBuilder();
var inlinesToProcess = new Stack<Inline>();
// Push items in reverse for left-to-right processing (LIFO stack behavior)
foreach (var inline in container.Reverse())
{
inlinesToProcess.Push(inline);
}
while (inlinesToProcess.Count > 0)
{
var currentInline = inlinesToProcess.Pop();
switch (currentInline)
{
// Case 1: Simple leaf nodes with text content
case LiteralInline literal:
builder.Append(literal.Content);
break;
case CodeInline code:
builder.Append(code.Content);
break;
// Case 2: Container nodes - process their children next
case ContainerInline childContainer:
foreach (var childInline in childContainer.Reverse())
{
inlinesToProcess.Push(childInline);
}
break;
}
}
return builder.ToString();
}
}

30
modules/docs/src/Volo.Docs.Web/TableOfContents/HeadingExtractionExtension.cs

@ -0,0 +1,30 @@
using System.Collections.Generic;
using Markdig;
using Markdig.Renderers;
using Markdig.Renderers.Html;
namespace Volo.Docs.TableOfContents;
public class HeadingExtractionExtension : IMarkdownExtension
{
public List<(int Level, string Text, string Id)> Headings { get; } = [];
public void Setup(MarkdownPipelineBuilder pipeline)
{
}
public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer)
{
if (renderer is not HtmlRenderer)
{
return;
}
var originalHeadingRenderer = renderer.ObjectRenderers.Find<HeadingRenderer>();
if (originalHeadingRenderer != null)
{
renderer.ObjectRenderers.Remove(originalHeadingRenderer);
}
renderer.ObjectRenderers.Add(new CustomHeadingRenderer(this, originalHeadingRenderer));
}
}

2
modules/docs/src/Volo.Docs.Web/TableOfContents/ITocGeneratorService.cs

@ -4,5 +4,5 @@ namespace Volo.Docs.TableOfContents;
public interface ITocGeneratorService : IApplicationService
{
(string TocHtml, string ProcessedContent) GenerateTocAndProcessHeadings(string content);
string GenerateToc(string markdownContent);
}

169
modules/docs/src/Volo.Docs.Web/TableOfContents/TocGeneratorService.cs

@ -2,157 +2,120 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Markdig;
using Markdig.Extensions.AutoIdentifiers;
using Volo.Abp.DependencyInjection;
using HtmlAgilityPack;
using Volo.Docs.Markdown;
namespace Volo.Docs.TableOfContents;
public class TocGeneratorService : ITocGeneratorService, ITransientDependency
{
private readonly HashSet<string> _generatedIds = [];
public record Heading(int Level, string Text, string Id);
public IMarkdownConverter _markdownConverter;
public (string TocHtml, string ProcessedContent) GenerateTocAndProcessHeadings(string content)
public TocGeneratorService(IMarkdownConverter markdownConverter)
{
if (content.IsNullOrWhiteSpace())
{
return (string.Empty, string.Empty);
}
_generatedIds.Clear();
var tocHeadings = new List<Heading>();
var doc = new HtmlDocument();
doc.LoadHtml(content);
_markdownConverter = markdownConverter;
}
var nodesWithId = doc.DocumentNode.SelectNodes("//*[@id]");
if (nodesWithId != null)
public string GenerateToc(string markdownContent)
{
if (markdownContent.IsNullOrWhiteSpace())
{
foreach (var node in nodesWithId)
{
_generatedIds.Add(node.Id);
}
return string.Empty;
}
var headingNodes = doc.DocumentNode.SelectNodes("//h1|//h2|//h3|//h4|//h5|//h6");
if (headingNodes != null)
{
foreach (var node in headingNodes)
{
var id = node.Id;
if (id.IsNullOrWhiteSpace())
{
id = GenerateUniqueId(node.InnerText.Trim());
node.SetAttributeValue("id", id);
}
var level = int.Parse(node.Name.Substring(1));
if (level == 2 || level == 3)
{
tocHeadings.Add(new Heading(level, node.InnerText.Trim(), id));
}
}
}
var headingExtractionExtension = new HeadingExtractionExtension();
var pipelineBuilder = new MarkdownPipelineBuilder()
.UseAutoIdentifiers(AutoIdentifierOptions.GitHub)
.UseAdvancedExtensions();
pipelineBuilder.Use(headingExtractionExtension);
var tocHtml = BuildTocHtml(tocHeadings);
var pipeline = pipelineBuilder.Build();
Markdig.Markdown.ToHtml(markdownContent, pipeline);
var processedContent = doc.DocumentNode.OuterHtml;
var headings = headingExtractionExtension.Headings
.Select(h => new Heading(h.Level, h.Text, h.Id))
.ToList();
return (tocHtml, processedContent);
return BuildTocHtml(headings);
}
private string GenerateUniqueId(string text)
private static string BuildTocHtml(List<Heading> headings)
{
if (text.IsNullOrWhiteSpace())
{
return $"section-{Guid.NewGuid().ToString("N")[..8]}";
}
var baseId = text.ToLowerInvariant();
baseId = Regex.Replace(baseId, @"[^a-z0-9]+", "-", RegexOptions.Compiled);
baseId = baseId.Trim('-');
if (baseId.IsNullOrWhiteSpace())
if (headings == null || headings.Count == 0)
{
return $"section-{Guid.NewGuid().ToString("N")[..8]}";
return string.Empty;
}
var finalId = baseId;
var counter = 1;
var relevantHeadings = headings
.Where(h => h.Level is 2 or 3)
.ToList();
while (!_generatedIds.Add(finalId))
if (relevantHeadings.Count == 0)
{
finalId = $"{baseId}-{++counter}";
relevantHeadings = headings
.Where(h => h.Level == 1)
.ToList();
}
return finalId;
}
private static string BuildTocHtml(List<Heading> headings)
{
if (headings == null || headings.Count == 0)
if (relevantHeadings.Count == 0)
{
return string.Empty;
}
const int H2Level = 2;
const int H3Level = 3;
var baseLevel = relevantHeadings.Min(h => h.Level);
var normalizedHeadings = relevantHeadings
.Select(h => h with { Level = h.Level - baseLevel + 1 })
.ToList();
var tocBuilder = new StringBuilder();
tocBuilder.Append("<ul class=\"nav nav-pills flex-column\">");
var levelStack = new Stack<int>();
levelStack.Push(0);
var currentLevel = 0;
var isFirstH2 = true;
foreach (var (index, heading) in headings.Select((h, i) => (i, h)))
for (var i = 0; i < normalizedHeadings.Count; i++)
{
var isLastItem = index == headings.Count - 1;
var nextHeading = isLastItem ? null : headings[index + 1];
var hasChildren = nextHeading?.Level == H3Level && heading.Level == H2Level;
var heading = normalizedHeadings[i];
var previousLevel = levelStack.Peek();
if (heading.Level < currentLevel)
{
tocBuilder.Append("</ul></li>");
}
else if (heading.Level == currentLevel && heading.Level == H2Level && !isFirstH2)
if (heading.Level < previousLevel)
{
tocBuilder.Append("</li>");
while (heading.Level < levelStack.Peek())
{
tocBuilder.Append("</li></ul>");
levelStack.Pop();
}
}
if (heading.Level == H2Level)
else if (heading.Level > previousLevel)
{
var liClass = hasChildren ? "nav-item toc-item-has-children" : "nav-item";
tocBuilder.Append($"<li class=\"{liClass}\"><a class=\"nav-link\" href=\"#{heading.Id}\">{heading.Text}</a>");
isFirstH2 = false;
tocBuilder.Append("<ul class=\"nav nav-pills flex-column\">");
levelStack.Push(heading.Level);
}
else if (heading.Level == H3Level)
else if (i > 0)
{
if (currentLevel < H3Level)
{
tocBuilder.Append("<ul class=\"nav nav-pills flex-column\">");
}
tocBuilder.Append($"<li class=\"nav-item\"><a class=\"nav-link\" href=\"#{heading.Id}\">{heading.Text}</a></li>");
tocBuilder.Append("</li>");
}
currentLevel = heading.Level;
}
var hasChildren = (i + 1 < normalizedHeadings.Count) &&
(normalizedHeadings[i + 1].Level > heading.Level);
if (currentLevel == H3Level)
{
tocBuilder.Append("</ul></li>");
var liClass = hasChildren ? "nav-item toc-item-has-children" : "nav-item";
tocBuilder.Append($"<li class=\"{liClass}\"><a class=\"nav-link\" href=\"#{heading.Id}\">{heading.Text}</a>");
}
else if (currentLevel == H2Level)
if (normalizedHeadings.Count > 0)
{
tocBuilder.Append("</li>");
}
tocBuilder.Append("</ul>");
while (levelStack.Count > 1)
{
tocBuilder.Append("</ul>");
levelStack.Pop();
}
return tocBuilder.ToString();
}
}

1
modules/docs/src/Volo.Docs.Web/Volo.Docs.Web.csproj

@ -21,7 +21,6 @@
<ProjectReference Include="..\..\..\..\framework\src\Volo.Abp.AspNetCore.Mvc.UI.Packages\Volo.Abp.AspNetCore.Mvc.UI.Packages.csproj" />
<ProjectReference Include="..\..\..\..\framework\src\Volo.Abp.AspNetCore.Mvc.UI.Theme.Shared\Volo.Abp.AspNetCore.Mvc.UI.Theme.Shared.csproj" />
<ProjectReference Include="..\Volo.Docs.Application.Contracts\Volo.Docs.Application.Contracts.csproj" />
<PackageReference Include="HtmlAgilityPack" />
<PackageReference Include="Markdig.Signed" />
<PackageReference Include="Scriban" />
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" />

Loading…
Cancel
Save