using Alba.CsCss.Style; using AngleSharp; using AngleSharp.Html.Dom; using AngleSharp.Html.Parser; using System; using System.IO; using System.Linq; using System.Text; namespace Rac.Tools { // This class can change to instance class // Implement different driver (StringParser/WebBrowserEmu/Etc) public class LinkProcessor { private static readonly CssLoader CssLoader = new CssLoader(); public static string[] FromCss(string css) { return CssLoader.GetUris(css).ToArray(); } public static string[] ExtractLinks(IHtmlDocument doc) { //extract urls from embedded style var links = doc.QuerySelectorAll("style").Select(p => FromCss(p.TextContent)).SelectMany(p => p).ToList(); //extract urls from page links.AddRange(doc.QuerySelectorAll("*[src],*[href]") .Select(e => e.Attributes["src"]?.Value ?? e.Attributes["href"]?.Value) .Where(p => p != null)); return links.ToArray(); } public static string ReplaceCssLinks(string css, Func func) { var links = FromCss(css).OrderByDescending(p => p.Length).Distinct().Where(s => string.IsNullOrWhiteSpace(s) == false).ToArray(); foreach (var link in links) { if (link != "//") css = css.Replace(link, func(link)); } return css; } public static string ReplaceHtmlLinks(byte[] html, Func func, ref Encoding encoding) { IHtmlDocument doc; if (null == encoding) { using var stream = new MemoryStream(html); doc = new HtmlParser().ParseDocument(stream); } else { var source = encoding.GetString(html); doc = new HtmlParser().ParseDocument(source); } var styles = doc.QuerySelectorAll("style"); foreach (var style in styles) { style.TextContent = ReplaceCssLinks(style.TextContent, func); } foreach (var srcEl in doc.QuerySelectorAll("*[src]")) if (srcEl.Attributes["src"]?.Value != "//") srcEl.Attributes["src"].Value = func(srcEl.Attributes["src"].Value); foreach (var srcEl in doc.QuerySelectorAll("*[href]")) if (srcEl.Attributes["href"]?.Value != "//") srcEl.Attributes["href"].Value = func(srcEl.Attributes["href"].Value); if (null == encoding) encoding = Encoding.GetEncoding(doc.CharacterSet); return doc.ToHtml(); } } }