diff --git a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IHtmlToPdfRenderer.cs b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IHtmlToPdfRenderer.cs index c17908af51..ad1cc5105b 100644 --- a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IHtmlToPdfRenderer.cs +++ b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IHtmlToPdfRenderer.cs @@ -7,4 +7,6 @@ namespace Volo.Docs.Projects.Pdf; public interface IHtmlToPdfRenderer { Task RenderAsync(string title, string html, List documents); + + Task MergePdfFilesAsync(List pdfFiles, string title, bool disposeStreams = true); } \ No newline at end of file diff --git a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IText/ITextHtmlToPdfRenderer.cs b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IText/ITextHtmlToPdfRenderer.cs index 1ee6b679e3..0a78ceb4fe 100644 --- a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IText/ITextHtmlToPdfRenderer.cs +++ b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/IText/ITextHtmlToPdfRenderer.cs @@ -45,6 +45,51 @@ public class ITextHtmlToPdfRenderer : IHtmlToPdfRenderer ,ITransientDependency return Task.FromResult(pdfStream); } + public virtual async Task MergePdfFilesAsync(List pdfFiles, string title, bool disposeStreams = true) + { + var mergedStream = new MemoryStream(); + var mergedPdfWriter = new PdfWriter(mergedStream); + var mergedPdfDocument = new iText.Kernel.Pdf.PdfDocument(mergedPdfWriter); + mergedPdfDocument.GetDocumentInfo().SetTitle(title); + mergedPdfWriter.SetCloseStream(false); + + foreach (var pdfFile in pdfFiles) + { + try + { + using var reader = new PdfReader(pdfFile); + using var sourcePdf = new iText.Kernel.Pdf.PdfDocument(reader); + + var pageCount = sourcePdf.GetNumberOfPages(); + + for (var i = 1; i <= pageCount; i++) + { + var page = sourcePdf.GetPage(i); + mergedPdfDocument.AddPage(page.CopyTo(mergedPdfDocument)); + } + } + catch (Exception ex) + { + throw new Exception($"Error merging PDF file {pdfFile}: {ex.Message}", ex); + } + finally + { + try + { + await pdfFile.DisposeAsync(); + } + catch + { + // Ignore any exceptions during disposal + } + } + } + + mergedPdfDocument.Close(); + mergedStream.Position = 0; + return mergedStream; + } + private void BuildPdfOutlines(PdfOutline parentOutline, List pdfDocumentNodes) { foreach (var pdfDocumentNode in pdfDocumentNodes) diff --git a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/ProjectPdfGenerator.cs b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/ProjectPdfGenerator.cs index 5abb380199..a61a2b2b8d 100644 --- a/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/ProjectPdfGenerator.cs +++ b/modules/docs/src/Volo.Docs.Domain/Volo/Docs/Projects/Pdf/ProjectPdfGenerator.cs @@ -5,10 +5,8 @@ using System.Linq; using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Volo.Abp; -using Volo.Abp.Content; using Volo.Abp.DependencyInjection; using Volo.Docs.Documents; using Volo.Docs.Documents.Rendering; @@ -33,6 +31,8 @@ public class ProjectPdfGenerator : IProjectPdfGenerator, ITransientDependency protected Project Project { get; set; } protected List AllPdfDocuments { get; } = []; + protected int ChunkSize { get; set; } = 10; + public ProjectPdfGenerator( IDocumentSourceFactory documentStoreFactory, IDocumentRepository documentRepository, @@ -40,7 +40,8 @@ public class ProjectPdfGenerator : IProjectPdfGenerator, ITransientDependency IDocumentSectionRenderer documentSectionRenderer, IProjectPdfFileStore projectPdfFileStore, IHtmlToPdfRenderer htmlToPdfRenderer, - IDocumentToHtmlConverterFactory documentToHtmlConverterFactory) + IDocumentToHtmlConverterFactory documentToHtmlConverterFactory, + ILogger logger) { DocumentStoreFactory = documentStoreFactory; DocumentRepository = documentRepository; @@ -49,7 +50,7 @@ public class ProjectPdfGenerator : IProjectPdfGenerator, ITransientDependency ProjectPdfFileStore = projectPdfFileStore; HtmlToPdfRenderer = htmlToPdfRenderer; DocumentToHtmlConverterFactory = documentToHtmlConverterFactory; - Logger = NullLogger.Instance; + Logger = logger; } public virtual async Task GenerateAsync(Project project, string version, string languageCode) @@ -60,17 +61,85 @@ public class ProjectPdfGenerator : IProjectPdfGenerator, ITransientDependency var navigation = await GetNavigationAsync(project, version, languageCode); await SetAllPdfDocumentsAsync(navigation.Items, project, version, languageCode); - - var html = await BuildHtmlAsync(); + var title = Options.Value.CalculatePdfFileTitle?.Invoke(project) ?? project.Name; - var pdfStream = await HtmlToPdfRenderer.RenderAsync(title, html, AllPdfDocuments); + var tempStreams = new List(); + + try + { + var documentChunks = ChunkDocuments(AllPdfDocuments); + Logger.LogInformation("Documents split into {ChunkCount} chunks for processing", documentChunks.Count); + + foreach (var (chunk, index) in documentChunks.Select((chunk, index) => (chunk, index))) + { + + Logger.LogInformation("Processing chunk {Index}/{Total}", index + 1, documentChunks.Count); + + var chunkHtml = await BuildHtmlAsync(chunk); + + var pdfStream = await HtmlToPdfRenderer.RenderAsync($"{title} - Part {index + 1}", chunkHtml, chunk); + + tempStreams.Add(pdfStream); + + GC.Collect(); + GC.WaitForPendingFinalizers(); + } + + using var mergedPdfStream = await MergePdfFilesAsync(tempStreams, title, disposeStreams: true); + await ProjectPdfFileStore.SetAsync(project, version, languageCode, mergedPdfStream); + } + catch + { + foreach (var tempStream in tempStreams) + { + try + { + await tempStream.DisposeAsync(); + } + catch + { + // ignore any exceptions during disposal + } + } + } + finally + { + GC.Collect(); + GC.WaitForPendingFinalizers(); + } + } + + protected virtual List> ChunkDocuments(List documents) + { + var flatDocuments = FlattenDocuments(documents); - await ProjectPdfFileStore.SetAsync(project, version, languageCode, pdfStream); + return flatDocuments + .Select((doc, index) => new { doc, index }) + .GroupBy(x => x.index / ChunkSize) + .Select(g => g.Select(x => x.doc).ToList()) + .ToList(); } - - protected virtual async Task BuildHtmlAsync() + + protected virtual List FlattenDocuments(List documents) { - var htmlContent = await ConvertDocumentsToHtmlAsync(AllPdfDocuments); + var result = new List(); + + foreach (var document in documents) + { + result.Add(document); + + if (document.HasChildren) + { + result.AddRange(FlattenDocuments(document.Children)); + } + } + + return result; + } + + protected virtual async Task BuildHtmlAsync(List pdfDocuments) + { + var htmlContent = await ConvertDocumentsToHtmlAsync(pdfDocuments); var htmlBuilder = new StringBuilder(); htmlBuilder.Append(Options.Value.HtmlLayout); @@ -103,6 +172,16 @@ public class ProjectPdfGenerator : IProjectPdfGenerator, ITransientDependency return contentBuilder.ToString(); } + protected virtual async Task MergePdfFilesAsync(List pdfFiles, string title, bool disposeStreams = true) + { + if (pdfFiles.Count == 0) + { + throw new ArgumentException("No PDF files to merge", nameof(pdfFiles)); + } + + return await HtmlToPdfRenderer.MergePdfFilesAsync(pdfFiles, title, disposeStreams); + } + protected virtual IDocumentToHtmlConverter GetDocumentToHtmlConverter(Project project, PdfDocument pdfDocument) { return DocumentToHtmlConverterFactory.Create(DocsDomainConsts.PdfDocumentToHtmlConverterPrefix +(pdfDocument.Document.Format ?? project.Format));