using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Web; using Volo.Docs.Utils; namespace Volo.Docs.HtmlConverting { public static class HtmlNormalizer { public static string ReplaceImageSources(string content, string documentRawRootUrl, string localDirectory) { if (content == null) { return null; } content = Regex.Replace(content, @"(]*)src=""([^""]*)""([^>]*>)", delegate (Match match) { if (UrlHelper.IsExternalLink(match.Groups[2].Value)) { return match.Value; } var newImageSource = documentRawRootUrl.EnsureEndsWith('/') + (localDirectory.IsNullOrEmpty() ? "" : localDirectory.TrimStart('/').EnsureEndsWith('/')) + match.Groups[2].Value.TrimStart('/'); return match.Groups[1] + " src=\"" + HttpUtility.HtmlEncode(newImageSource) + "\" " + match.Groups[3]; }, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline); return content; } public static string ReplaceCodeBlocksLanguage(string content, string currentLanguage, string newLanguage) { var sb = new StringBuilder(); var pattern = sb.Append("").ToString(); sb.Clear(); var replacement = sb.Append("").ToString(); return Regex.Replace(content, pattern, replacement, RegexOptions.IgnoreCase); } /// /// Wraps an image with a tag that's clickable to open the image in a new browser tab. /// public static string WrapImagesWithinAnchors(string html) { try { var ignoredIndicies = GetIgnoredImageIndicies(html); return Regex.Replace(html, "", match => { if (ignoredIndicies != null && ignoredIndicies.Contains(match.Index)) { return match.Value; } var link = match.Groups[1].Value; var imgTag = match.Groups[0].Value; var title = GetTitleFromTag(imgTag); return $"{imgTag}"; }); } catch { // ignored return html; } } private static List GetIgnoredImageIndicies(string html) { return GetIgnoredImageIndicies(FindImgTagsWithinAnchor(html)); } private static List GetIgnoredImageIndicies(MatchCollection ignoredImages) { if (ignoredImages == null) { return null; } var ignoredImageIndicies = new List(ignoredImages.Count); for (var i = 0; i < ignoredImages.Count; i++) { var ignoredImage = ignoredImages[i]; var ignoredImgIndex = ignoredImage.Index + ignoredImage.Value.IndexOf("]+)?>(?:(?!<\s*/\s*a\s*>).)*"); } catch { // ignored return null; } } private static string GetTitleFromTag(string imgTag) { if (string.IsNullOrWhiteSpace(imgTag)) { return null; } var match = Regex.Match(imgTag, @"\stitle\s?\=\s?(\""|')(.+?)(\""|')", RegexOptions.Multiline); if (match.Success && match.Groups.Count > 2) { return match.Groups[2].ToString().Trim(); } match = Regex.Match(imgTag, @"\salt\s*\=\s*(\""|')(.*?)(\""|')", RegexOptions.Multiline); if (match.Success && match.Groups.Count > 2) { return match.Groups[2].ToString().Trim(); } return null; } } }