Browse Source

commit: import project

HOME 5 years ago
parent
commit
90b75b53ad

+ 6 - 0
NuGet.config

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?> 
+<configuration> 
+  <config> 
+    <add key="repositorypath" value="C:\NuGetLocalRepo" /> 
+  </config> 
+</configuration>

+ 50 - 0
RAC_RawArchiveCrawler.sln

@@ -0,0 +1,50 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.29728.190
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rac.Core", "Rac.Core\Rac.Core.csproj", "{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rac.Executer", "Rac.Executer\Rac.Executer.csproj", "{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "@", "@", "{41C2AC90-1762-4BC7-9D76-AD200A6AFFF0}"
+	ProjectSection(SolutionItems) = preProject
+		NuGet.config = NuGet.config
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Debug|x64.ActiveCfg = Debug|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Debug|x64.Build.0 = Debug|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Debug|x86.ActiveCfg = Debug|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Debug|x86.Build.0 = Debug|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Release|x64.ActiveCfg = Release|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Release|x64.Build.0 = Release|x64
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Release|x86.ActiveCfg = Release|x86
+		{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}.Release|x86.Build.0 = Release|x86
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Debug|x64.ActiveCfg = Debug|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Debug|x64.Build.0 = Debug|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Debug|x86.ActiveCfg = Debug|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Debug|x86.Build.0 = Debug|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Release|x64.ActiveCfg = Release|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Release|x64.Build.0 = Release|x64
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Release|x86.ActiveCfg = Release|x86
+		{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}.Release|x86.Build.0 = Release|x86
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {335B6D9E-24AD-47FC-B212-DD856C6C585C}
+	EndGlobalSection
+	GlobalSection(SubversionScc) = preSolution
+		Svn-Managed = True
+		Manager = AnkhSVN - Subversion Support for Visual Studio
+	EndGlobalSection
+EndGlobal

+ 49 - 0
Rac.Core/Common/BaseService.cs

@@ -0,0 +1,49 @@
+using System;
+using Rac.Models;
+
+namespace Rac.Common
+{
+    public abstract class BaseService
+    {
+        public abstract void Start();
+
+        public abstract void Stop();
+
+        public event EventHandler<LogEventArgs> Log = delegate { };
+
+        protected void LogDebug(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Debug, Log = message });
+        }
+
+        protected void LogTrace(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Trace, Log = message });
+        }
+
+        protected void LogInfo(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Info, Log = message });
+        }
+
+        protected void LogWarning(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Warning, Log = message });
+        }
+
+        protected void LogError(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Error, Log = message });
+        }
+
+        protected void LogFatal(string message)
+        {
+            OnMessage(new LogEventArgs { Level = LogLevel.Fatal, Log = message });
+        }
+
+        protected virtual void OnMessage(LogEventArgs e)
+        {
+            Log(this, e);
+        }
+    }
+}

+ 227 - 0
Rac.Core/Crawler.cs

@@ -0,0 +1,227 @@
+using Rac.Common;
+using Rac.Entities;
+using Rac.Models;
+using Rac.Tools;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading;
+
+namespace Rac
+{
+    //public enum CrawlTaskEventType
+    //{
+    //    Started,
+    //    Stopped,
+    //}
+
+    //public class CrawlTaskEventArgs : EventArgs
+    //{
+    //    public string Url { get; set; }
+    //    public CrawlTaskEventType Type { get; set; }
+    //}
+
+    //TODO: CHECK AFFECT OF `Error loading type Typespec 0x1b000044 from  due to Could not resolve typespec token 1b000044'
+
+    public class Crawler : BaseService
+    {
+        private readonly DataAccess _db;
+        private Thread _runner;
+
+        public bool IsRunning { get; private set; }
+
+        public Crawler(string dbFilename)
+
+        {
+            _db = new DataAccess(dbFilename);
+        }
+
+        public override void Start()
+        {
+            LogInfo("Starting...");
+            IsRunning = true;
+            _runner = new Thread(RunInternal) { IsBackground = true };
+            _runner.Start();
+            LogInfo("Started.");
+        }
+
+        public override void Stop()
+        {
+            IsRunning = false;
+            _runner.Join();
+            LogInfo("Stopped.");
+        }
+
+        private void RunInternal()
+        {
+            LogDebug("Loading configs...");
+            var conf = new ConfigAdapter(_db.GetConfigs());
+            if (string.IsNullOrEmpty(conf.HomeUrl))
+            {
+                LogFatal($"The config <{nameof(conf.HomeUrl)}> is required! HALT");
+                return;
+            }
+
+            var excludeUrlPrefix = conf.UrlPrefixExclude;
+
+            var hostsInclude = new HashSet<string>(conf.HostsInclude);
+            var homeUri = new Uri(conf.HomeUrl);
+            var homeHost = homeUri.Host.ToLower();
+
+            string[] ProcessLinkFilters(Uri pageUri, params string[] links)
+            {
+                var linkUris = links.Select(p =>
+                {
+                    try
+                    {
+                        var uri = new Uri(pageUri, p);
+                        if (string.IsNullOrEmpty(uri.Fragment)) return uri;
+
+                        //remove hash
+                        var uos = uri.ToString();
+                        uos = uos.Substring(0, uos.IndexOf("#", StringComparison.Ordinal));
+                        return new Uri(uos);
+                    }
+                    catch (Exception e)
+                    {
+                        LogWarning($"{e.Message} {p}");
+                        return null;
+                    }
+                }).Where(p => null != p).Distinct().ToArray();
+
+                var lstExPfx = new List<Uri>();
+                var lstInHome = new List<Uri>();
+                var lstInHosts = new List<Uri>();
+                var lstExNon = new List<Uri>();
+
+                foreach (var linkUri in linkUris)
+                {
+                    if (excludeUrlPrefix.Any(p => linkUri.ToString().StartsWith(p)))
+                    {
+                        lstExPfx.Add(linkUri);
+                        continue;
+                    }
+
+                    if (linkUri.Host == homeHost)
+                    {
+                        lstInHome.Add(linkUri);
+                        continue;
+                    }
+
+                    if (hostsInclude.Contains(linkUri.Host))
+                    {
+                        lstInHosts.Add(linkUri);
+                        continue;
+                    }
+
+                    lstExNon.Add(linkUri);
+                }
+
+                var pageUrl = pageUri.ToString();
+                void AddLinks(List<Uri> items, bool included, string remark)
+                {
+                    if (items.Any()) _db.BulkAddResourceLink(pageUrl, included, remark, items.Select(p => p.ToString()).ToArray());
+                }
+
+                AddLinks(lstExPfx, false, "Exclude by url prefix");
+                AddLinks(lstInHome, true, "Include by home host");
+                AddLinks(lstInHosts, false, "Exclude by url prefix");
+                AddLinks(lstExNon, false, "Exclude by non matched rulers");
+
+                return lstInHome.Union(lstInHosts).Select(p => p.ToString()).ToArray();
+            }
+
+            _db.CreateArchiveEntryIfNotExist(conf.HomeUrl);
+
+            while (IsRunning)
+            {
+                var urls = _db.GetNonDumpedUrls(conf.Parallel);
+                LogInfo($"Gets {urls.Length} url from db.");
+                if (urls.Length == 0) break;
+
+                var hssExtractedLinks = new HashSet<string>();
+
+                ArchiveEntry PageProce(string url)
+                {
+                    ArchiveEntry entry;
+                    Response resp;
+
+                    var uri = new Uri(url);
+
+                    LogTrace($"GET {uri}");
+
+                    try
+                    {
+                        resp = Requester.GetHttp(url);
+
+                        entry = new ArchiveEntry
+                        {
+                            Url = url,
+                            StatusCode = (int)resp.StatusCode,
+                            StatusDescription = resp.StatusDescription,
+                            Content = resp.Body,
+                        };
+                    }
+                    catch (Exception e)
+                    {
+                        LogTrace($"GET {url} -- {e.Message}");
+
+                        entry = new ArchiveEntry
+                        {
+                            Url = url,
+                            StatusCode = 544,
+                            StatusDescription = "ArchiverError",
+                            Headers = "content-type: text/plain; charset=utf-8",
+                            Content = Encoding.UTF8.GetBytes(e.ToString())
+                        };
+                        return entry;
+                    }
+
+                    LogTrace($"GET {url} -- {entry.StatusCode}|{resp.ContentType}|{entry.Content.Length}");
+
+                    var listHeaders = new List<HttpHeader> { new HttpHeader("content-type", resp.ContentType) };
+                    listHeaders.AddRange(resp.GetServerTimeHeaders());
+
+                    if (resp.GetRedirect(out var redirectUrl))
+                    {
+                        hssExtractedLinks.AddRange(ProcessLinkFilters(uri, redirectUrl));
+                        listHeaders.Add(new HttpHeader("location", redirectUrl));
+                    }
+                    else
+                    {
+                        var linksOnPage = new HashSet<string>();
+
+                        if (resp.GetHtmlDocument(out var doc)) linksOnPage.AddRange(LinkProcessor.ExtractLinks(doc).Distinct());
+                        else if (resp.GetCss(out var css)) linksOnPage.AddRange(LinkProcessor.FromCss(css).Distinct());
+
+                        linksOnPage.Remove("//");
+
+                        hssExtractedLinks.AddRange(ProcessLinkFilters(uri, linksOnPage.ToArray()));
+                    }
+
+                    entry.Headers = listHeaders.ToStringLines();
+                    return entry;
+                }
+
+                var results = urls
+#if !DEBUG
+                    .AsParallel().WithDegreeOfParallelism(conf.Parallel)
+#endif
+                    .Select(PageProce)
+                    .ToArray();
+
+                LogInfo($"Saving extracted {hssExtractedLinks.Count} new link...");
+                var n = _db.BulkAddNewArchiveEntry(hssExtractedLinks);
+                LogInfo($"Saved new {n} link...");
+
+                LogInfo($"Updating {results.Length} entry...");
+                var u = _db.BulkUpdateArchiveEntry(results);
+                LogInfo($"Updated  {u} entry...");
+            }
+
+            IsRunning = false;
+            LogInfo("Finished!");
+        }
+    }
+}

+ 15 - 0
Rac.Core/Entities/ArchiveEntry.cs

@@ -0,0 +1,15 @@
+using System;
+
+namespace Rac.Entities
+{
+    public class ArchiveEntry
+    {
+        public string Url { get; set; }
+        public DateTime LastWriteTime { get; set; }
+
+        public int StatusCode { get; set; }
+        public string StatusDescription { get; set; }
+        public string Headers { get; set; }
+        public byte[] Content { get; set; }
+    }
+}

+ 8 - 0
Rac.Core/Entities/ConfigEntry.cs

@@ -0,0 +1,8 @@
+namespace Rac.Entities
+{
+    public class ConfigEntry
+    {
+        public string Key { get; set; }
+        public string Value { get; set; }
+    }
+}

+ 10 - 0
Rac.Core/Entities/ResourceLinkEntry.cs

@@ -0,0 +1,10 @@
+namespace Rac.Entities
+{
+    public class ResourceLinkEntry
+    {
+        public string Resource { get; set; }
+        public string Link { get; set; }
+        public bool Included { get; set; }
+        public string Remark { get; set; }
+    }
+}

+ 22 - 0
Rac.Core/FileBrowseServer.cs

@@ -0,0 +1,22 @@
+using Rac.Common;
+using System;
+
+namespace Rac
+{
+    public class FileBrowseServer : BaseService
+    {
+        public FileBrowseServer(string db)
+        {
+        }
+
+        public override void Start()
+        {
+            throw new NotImplementedException();
+        }
+
+        public override void Stop()
+        {
+            throw new NotImplementedException();
+        }
+    }
+}

+ 25 - 0
Rac.Core/Models/HttpHeader.cs

@@ -0,0 +1,25 @@
+namespace Rac.Models
+{
+    internal class HttpHeader
+    {
+        private static readonly HttpHeader[] EmptyArray = new HttpHeader[0];
+
+        public string Name { get; set; }
+        public string Value { get; set; }
+
+        public override string ToString()
+        {
+            return $"{Name}: {Value}";
+        }
+
+        public HttpHeader(string name, string value)
+        {
+            Name = name;
+            Value = value;
+        }
+
+        public HttpHeader()
+        {
+        }
+    }
+}

+ 10 - 0
Rac.Core/Models/LogEventArgs.cs

@@ -0,0 +1,10 @@
+using System;
+
+namespace Rac.Models
+{
+    public class LogEventArgs : EventArgs
+    {
+        public LogLevel Level { get; set; }
+        public string Log { get; set; }
+    }
+}

+ 12 - 0
Rac.Core/Models/LogLevel.cs

@@ -0,0 +1,12 @@
+namespace Rac.Models
+{
+    public enum LogLevel
+    {
+        Debug,
+        Trace,
+        Info,
+        Warning,
+        Error,
+        Fatal,
+    }
+}

+ 76 - 0
Rac.Core/Models/Response.cs

@@ -0,0 +1,76 @@
+using AngleSharp.Html.Dom;
+using AngleSharp.Html.Parser;
+using Rac.Tools;
+using System.Collections.Generic;
+using System.IO;
+using System.Net;
+using System.Net.Mime;
+using System.Text;
+
+namespace Rac.Models
+{
+    internal class Response
+    {
+        public HttpStatusCode StatusCode { get; set; }
+
+        public string StatusDescription { get; set; }
+
+        public WebHeaderCollection Headers { get; set; }
+
+        public byte[] Body { get; set; }
+
+        public string ContentType => Headers?["content-type"];
+
+        public bool GetHtmlDocument(out IHtmlDocument html)
+        {
+            var ct = new ContentType(ContentType);
+            if (ct.MediaType != "text/html")
+            {
+                html = null;
+                return false;
+            }
+
+            using var stream = new MemoryStream(Body);
+            var doc = new HtmlParser().ParseDocument(stream);
+            html = doc;
+            return true;
+        }
+
+        public HttpHeader[] GetServerTimeHeaders()
+        {
+            var lst = new List<HttpHeader>(2);
+
+            var last = Headers?["last-modified"];
+            var date = Headers?["date"];
+
+            if (false == string.IsNullOrEmpty(last)) lst.Add(new HttpHeader("last-modified", last));
+            if (false == string.IsNullOrEmpty(date)) lst.Add(new HttpHeader("date", date));
+
+            return lst.ToArray();
+        }
+
+        public bool GetCss(out string css)
+        {
+            var ct = new ContentType(ContentType);
+            if (ct.MediaType != "text/css")
+            {
+                css = null;
+                return false;
+            }
+            var enc = Encoding.GetEncoding(ct.CharSet ?? "utf-8");
+            css = enc.GetString(Body);
+            return true;
+        }
+
+        public bool GetRedirect(out string redirectUrl)
+        {
+            if (false == StatusCode.In(HttpStatusCode.TemporaryRedirect, HttpStatusCode.Redirect, HttpStatusCode.Moved))
+            {
+                redirectUrl = null;
+                return false;
+            }
+            redirectUrl = Headers["location"];
+            return true;
+        }
+    }
+}

+ 198 - 0
Rac.Core/OfflineWebServer.cs

@@ -0,0 +1,198 @@
+using Rac.Common;
+using Rac.Tools;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net;
+using System.Net.Mime;
+using System.Text;
+
+namespace Rac
+{
+    public class OfflineWebServer : BaseService
+    {
+        private readonly string _dbFilename;
+        private DataAccess _db;
+        private HttpListener _server;
+
+        private Uri _homeUrl;
+        private HashSet<string> _hostInclude;
+        private string[] _excludeUrlPrefixes;
+
+        private Encoding _defaultEncoding;
+
+        public OfflineWebServer(string dbFilename)
+        {
+            _dbFilename = dbFilename;
+        }
+
+        public override void Start()
+        {
+            if (false == File.Exists(_dbFilename))
+            {
+                LogFatal($"Can not find database file:{_dbFilename}");
+                return;
+            }
+            _db = new DataAccess(_dbFilename);
+            var conf = new ConfigAdapter(_db.GetConfigs());
+
+            _defaultEncoding = null != conf.DefaultCharset ? Encoding.GetEncoding(conf.DefaultCharset) : Encoding.UTF8;
+
+            _hostInclude = new HashSet<string>(conf.HostsInclude);
+            _excludeUrlPrefixes = conf.UrlPrefixExclude;
+
+            _homeUrl = new Uri(conf.HomeUrl);
+
+            _server = new HttpListener();
+            _server.Prefixes.Add("http://*:" + conf.OwsPort + "/");
+            _server.Start();
+            _server.BeginGetContext(ProcessRequest, null);
+            LogInfo($"OWS runing on " + (string.Join("|", _server.Prefixes)));
+        }
+
+        private string UrlTrancode(Uri uri, string link = null, bool includedOnly = false)
+        {
+            if (true == link?.ToLower().StartsWith("mailto:")) return link;
+
+            //       http://host:port/path?query#hash
+            // -->> /http/host/port/path?query#hash
+            if (false == string.IsNullOrEmpty(link)) uri = new Uri(uri, link);
+
+            if (includedOnly)
+            {
+                var url = uri.ToString();
+                if (_excludeUrlPrefixes.Any(p => url.StartsWith(p))) return link;
+
+                var hos = uri.Host;
+                if (hos != _homeUrl.Host && false == _hostInclude.Contains(hos)) return link;
+            }
+
+            return $"/{uri.Scheme}/{uri.Host}/{uri.Port}{uri.PathAndQuery}{uri.Fragment}";
+        }
+
+        private void ProcessRequest(IAsyncResult ar)
+        {
+            var ctx = _server.EndGetContext(ar);
+
+            LogTrace($"{ctx.Request.HttpMethod} {ctx.Request.RawUrl} From {ctx.Request.RemoteEndPoint}");
+
+            //turn scheme/host/port to virtual path
+
+            if (ctx.Request.Url.LocalPath == "/")
+            {
+                ctx.Response.Redirect(UrlTrancode(_homeUrl));
+            }
+            else
+            {
+                var path = ctx.Request.Url.GetComponents(UriComponents.PathAndQuery | UriComponents.Fragment, UriFormat.SafeUnescaped);
+                var parts = path.Split('/');
+
+                //check request path format /scheme/host/port/...
+                if (parts.Length < 4)
+                {
+                    ctx.Response.StatusCode = 400;
+                    ctx.Response.StatusDescription = "Bad Archive Request";
+                    ctx.Response.ContentType = "text/html; charset=utf-8";
+                    var buffer = Encoding.UTF8.GetBytes("<h1>400 Bad Archive Request</h1>");
+                    ctx.Response.OutputStream.Write(buffer, 0, buffer.Length);
+                }
+                else
+                {
+                    //decode to raw url pass to db
+                    try
+                    {
+                        var decodedUrl = $"{parts[1]}://{parts[2]}:{parts[3]}/{string.Join("/", parts.Skip(4))}";
+                        var decodedUri = new Uri(decodedUrl);
+                        var archiveUrl = decodedUri.ToString();
+
+                        var entry = _db.GetEntry(archiveUrl);
+                        if (entry == null || entry.StatusCode == 0)
+                        {
+                            ctx.Response.StatusCode = 404;
+                            ctx.Response.StatusDescription = "ArchiveEntryNotFound";
+                            ctx.Response.ContentType = "text/html; charset=utf-8";
+                            var buffer = Encoding.UTF8.GetBytes($"<h1>404 Not Found In Archive by {archiveUrl}</h1>");
+                            ctx.Response.OutputStream.Write(buffer, 0, buffer.Length);
+                        }
+                        else
+                        {
+                            var headers = HttpHeaderUtility.ParseStringLines(entry.Headers);
+
+                            ctx.Response.StatusCode = entry.StatusCode;
+                            ctx.Response.StatusDescription = entry.StatusDescription;
+
+                            string contentType = null;
+                            var contentEncoding = _defaultEncoding;
+
+                            // replace all urls to /scheme/host/port/path?query#hash
+                            //  in header location
+                            //  in HTML(href/src/embedded css)
+                            //  in CSS (url)
+
+                            foreach (var header in headers)
+                            {
+                                if (header.Name == "location") header.Value = UrlTrancode(decodedUri, header.Value);
+                                if (header.Name == "content-type")
+                                {
+                                    var ct = new ContentType(header.Value);
+                                    contentType = ct.MediaType;
+                                    if (null != ct.CharSet) contentEncoding = Encoding.GetEncoding(ct.CharSet);
+                                }
+                                ctx.Response.Headers.Set(header.Name, header.Value);
+                            }
+
+                            var output = entry.Content;
+
+                            if (contentType == "text/html")
+                            {
+                                var replaced = LinkProcessor.ReplaceHtmlLinks(entry.Content, p => UrlTrancode(decodedUri, p, true), ref contentEncoding);
+                                output = contentEncoding.GetBytes(replaced);
+                            }
+                            else if (contentType == "text/css")
+                            {
+                                var css = contentEncoding.GetString(entry.Content);
+                                var replaced = LinkProcessor.ReplaceCssLinks(css, p => UrlTrancode(decodedUri, p, true));
+                                output = contentEncoding.GetBytes(replaced);
+                            }
+
+                            try
+                            {
+                                ctx.Response.OutputStream.Write(output, 0, output.Length);
+                            }
+                            catch (Exception e)
+                            {
+                                LogError($"Error when writing output: {e.Message}");
+                            }
+                        }
+                    }
+                    catch (Exception e)
+                    {
+                        ctx.Response.StatusCode = 500;
+                        ctx.Response.StatusDescription = "ArchiveEntryNotFound";
+                        ctx.Response.ContentType = "text/html; charset=utf-8";
+                        var buffer = Encoding.UTF8.GetBytes($"<h1>Error</h1><pre>{e}</pre>");
+                        ctx.Response.OutputStream.Write(buffer, 0, buffer.Length);
+                    }
+                }
+            }
+
+            try
+            {
+                LogTrace($"{ctx.Request.HttpMethod} {ctx.Request.RawUrl} -- {ctx.Response.StatusCode}");
+                ctx.Response.Close();
+            }
+            catch
+            {
+                //Do nothing!
+            }
+
+            _server.BeginGetContext(ProcessRequest, null);
+        }
+
+        public override void Stop()
+        {
+            _server.Stop();
+        }
+    }
+}

+ 36 - 0
Rac.Core/Properties/AssemblyInfo.cs

@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// 有关程序集的一般信息由以下
+// 控制。更改这些特性值可修改
+// 与程序集关联的信息。
+[assembly: AssemblyTitle("Rac.Core")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("Microsoft")]
+[assembly: AssemblyProduct("Rac.Core")]
+[assembly: AssemblyCopyright("Copyright © Microsoft 2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// 将 ComVisible 设置为 false 会使此程序集中的类型
+//对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型
+//请将此类型的 ComVisible 特性设置为 true。
+[assembly: ComVisible(false)]
+
+// 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
+[assembly: Guid("1d3345dc-ee7e-48ec-ae35-3f6e941560dc")]
+
+// 程序集的版本信息由下列四个值组成: 
+//
+//      主版本
+//      次版本
+//      生成号
+//      修订号
+//
+// 可以指定所有值,也可以使用以下所示的 "*" 预置版本号和修订号
+//通过使用 "*",如下所示:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

+ 140 - 0
Rac.Core/Rac.Core.csproj

@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="C:\NuGetLocalRepo\SQLite.Native.3.12.3\build\net45\SQLite.Native.props" Condition="Exists('C:\NuGetLocalRepo\SQLite.Native.3.12.3\build\net45\SQLite.Native.props')" />
+  <Import Project="C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props" Condition="Exists('C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props')" />
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{1D3345DC-EE7E-48EC-AE35-3F6E941560DC}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Rac</RootNamespace>
+    <AssemblyName>Rac.Core</AssemblyName>
+    <TargetFrameworkVersion>v4.7.2</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <NuGetPackageImportStamp>
+    </NuGetPackageImportStamp>
+    <BaseIntermediateOutputPath>Z:\U00ST\$(MSBuildProjectName)\obj\</BaseIntermediateOutputPath>
+    <BaseOutputPath>Z:\U00ST\$(MSBuildProjectName)\</BaseOutputPath>
+    <OutputPath>$(BaseOutputPath)\bin\$(Platform)\$(Configuration)</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <LangVersion>8</LangVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <LangVersion>8</LangVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
+    <DebugSymbols>true</DebugSymbols>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <LangVersion>8</LangVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <LangVersion>8</LangVersion>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="Alba.CsCss, Version=1.0.1.0, Culture=neutral, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Alba.CsCss.1.0.1.0\lib\net35\Alba.CsCss.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="AngleSharp, Version=0.13.0.0, Culture=neutral, PublicKeyToken=e83494dcdc6d31ea, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\AngleSharp.0.13.0\lib\net46\AngleSharp.dll</HintPath>
+    </Reference>
+    <Reference Include="Dapper, Version=1.50.2.0, Culture=neutral, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Dapper.1.50.2\lib\net451\Dapper.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="Mono.Data.Sqlite, Version=2.0.0.0, Culture=neutral, PublicKeyToken=0738eb9f132ed756, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\lib\net4\Mono.Data.Sqlite.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="Newtonsoft.Json, Version=10.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="System" />
+    <Reference Include="System.Configuration" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Data.Portable, Version=4.0.0.0, Culture=neutral, PublicKeyToken=59e704a76bc4613a, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\lib\net4\System.Data.Portable.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="System.Runtime.CompilerServices.Unsafe, Version=4.0.4.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\System.Runtime.CompilerServices.Unsafe.4.5.0\lib\netstandard2.0\System.Runtime.CompilerServices.Unsafe.dll</HintPath>
+    </Reference>
+    <Reference Include="System.Text.Encoding.CodePages, Version=4.1.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\System.Text.Encoding.CodePages.4.5.0\lib\net461\System.Text.Encoding.CodePages.dll</HintPath>
+    </Reference>
+    <Reference Include="System.Transactions" />
+    <Reference Include="System.Transactions.Portable, Version=4.0.0.0, Culture=neutral, PublicKeyToken=59e704a76bc4613a, processorArchitecture=MSIL">
+      <HintPath>C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\lib\net4\System.Transactions.Portable.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Crawler.cs" />
+    <Compile Include="FileBrowseServer.cs" />
+    <Compile Include="Models\HttpHeader.cs" />
+    <Compile Include="Tools\DataAccess.cs" />
+    <Compile Include="Entities\ArchiveEntry.cs" />
+    <Compile Include="Entities\ConfigEntry.cs" />
+    <Compile Include="Entities\ResourceLinkEntry.cs" />
+    <Compile Include="Tools\HttpHeaderUtility.cs" />
+    <Compile Include="Tools\LinkProcessor.cs" />
+    <Compile Include="Models\LogEventArgs.cs" />
+    <Compile Include="Models\LogLevel.cs" />
+    <Compile Include="OfflineWebServer.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Common\BaseService.cs" />
+    <Compile Include="Tools\ConfigAdapter.cs" />
+    <Compile Include="Tools\InternalUtility.cs" />
+    <Compile Include="Tools\Requester.cs" />
+    <Compile Include="Models\Response.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="app.config" />
+    <None Include="packages.config" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>此项目引用这台计算机上缺少的 NuGet 程序包。使用 NuGet 程序包还原可下载这些程序包。有关详细信息,请参阅 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props')" Text="$([System.String]::Format('$(ErrorText)', 'C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props'))" />
+    <Error Condition="!Exists('C:\NuGetLocalRepo\SQLite.Native.3.12.3\build\net45\SQLite.Native.props')" Text="$([System.String]::Format('$(ErrorText)', 'C:\NuGetLocalRepo\SQLite.Native.3.12.3\build\net45\SQLite.Native.props'))" />
+  </Target>
+  <Import Project="C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\tools\Mono.Data.Sqlite.Portable.targets" Condition="Exists('C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\tools\Mono.Data.Sqlite.Portable.targets')" />
+  <Target Name="EnsureMonoDataSqlitePortableImported" BeforeTargets="BeforeBuild" Condition="'$(MonoDataSqlitePortableImported)' == ''">
+    <Error Condition="!Exists('C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\tools\Mono.Data.Sqlite.Portable.targets')" Text="This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them." />
+    <Error Condition="Exists('C:\NuGetLocalRepo\Mono.Data.Sqlite.Portable.1.0.3.5\tools\Mono.Data.Sqlite.Portable.targets')" Text="The build restored NuGet packages. Build the project again to include these packages in the build." />
+  </Target>
+</Project>

+ 51 - 0
Rac.Core/Tools/ConfigAdapter.cs

@@ -0,0 +1,51 @@
+using System;
+using System.Collections.Generic;
+using System.Configuration;
+using System.Runtime.CompilerServices;
+
+namespace Rac.Tools
+{
+    internal class ConfigAdapter
+    {
+        private readonly Dictionary<string, string> _dicConf;
+
+        public ConfigAdapter(Dictionary<string, string> dicConf)
+        {
+            _dicConf = dicConf;
+        }
+
+        private string GetValue([CallerMemberName] string confKey = "")
+        {
+            return _dicConf.ContainsKey(confKey)
+                ? _dicConf[confKey]
+                : null;
+        }
+
+        private string[] GetValues([CallerMemberName] string confKey = "")
+        {
+            // ReSharper disable once ExplicitCallerInfoArgument
+            return (GetValue(confKey) ?? "").Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);
+        }
+
+        private int GetInt32Value([CallerMemberName] string confKey = "")
+        {
+            // ReSharper disable once ExplicitCallerInfoArgument
+            var value = GetValue(confKey);
+
+            if (int.TryParse(value, out var int32Value))
+            {
+                return int32Value;
+            }
+
+            throw new ConfigurationErrorsException($"Bad config {confKey}");
+        }
+
+        public string HomeUrl => GetValue();
+        public string[] HostsInclude => GetValues();
+        public string[] UrlPrefixExclude => GetValues();
+        public int Parallel => GetInt32Value();
+        public int OwsPort => GetInt32Value();
+
+        public string DefaultCharset => GetValue();
+    }
+}

+ 204 - 0
Rac.Core/Tools/DataAccess.cs

@@ -0,0 +1,204 @@
+using Dapper;
+using Mono.Data.Sqlite;
+using Rac.Entities;
+using System;
+using System.Collections.Generic;
+using System.Data;
+using System.Data.Common;
+using System.IO;
+using System.Linq;
+
+namespace Rac.Tools
+{
+    internal class DataAccess
+    {
+        private readonly string _filename;
+
+        public DataAccess(string filename)
+        {
+            _filename = filename;
+
+            CreateDatabaseIfNotExist();
+        }
+
+        private void CreateDatabaseIfNotExist()
+        {
+            if (File.Exists(_filename)) return;
+
+            using (var conn = GetConnection())
+            {
+                conn.Execute(
+                    //Create table:Config
+                    "CREATE TABLE Configs("
+                     + $"{nameof(ConfigEntry.Key)} VARCHAR(16) PRIMARY KEY,"
+                     + $"{nameof(ConfigEntry.Value)} VARCHAR(512)"
+                     + ");" +
+
+                     //Create table:Archive
+                     "CREATE TABLE Archives("
+                     + $"{nameof(ArchiveEntry.Url)} VARCHAR(512) PRIMARY KEY,"
+                     + $"{nameof(ArchiveEntry.LastWriteTime)} DATETIME,"
+                     + $"{nameof(ArchiveEntry.StatusCode)} INT,"
+                     + $"{nameof(ArchiveEntry.StatusDescription)} VARCHAR(128),"
+                     + $"{nameof(ArchiveEntry.Headers)} VARCHAR(512),"
+                     + $"{nameof(ArchiveEntry.Content)} BLOB"
+                     + ");" +
+
+                     //Create table:PageLinks
+                     "CREATE TABLE ResourceLinks("
+                     + $"{nameof(ResourceLinkEntry.Resource)} VARCHAR(512),"
+                     + $"{nameof(ResourceLinkEntry.Link)} VARCHAR(512),"
+                     + $"{nameof(ResourceLinkEntry.Included)} BOOLEAN,"
+                     + $"{nameof(ResourceLinkEntry.Remark)} VARCHAR(512),"
+                     + $"PRIMARY KEY ({nameof(ResourceLinkEntry.Resource)},{nameof(ResourceLinkEntry.Link)})"
+                     + ");"
+
+                     //Init Data
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HomeUrl)}');"
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HostsInclude)}');"
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.UrlPrefixExclude)}');"
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.OwsPort)}');"
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.Parallel)}');"
+                     + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.DefaultCharset)}');"
+                );
+            }
+        }
+
+        private int BulkOperation<T>(IEnumerable<T> items, Func<T, DbConnection, int> proc)
+        {
+            var count = 0;
+            using (var conn = GetConnection())
+            {
+                using (var t = conn.BeginTransaction())
+                {
+                    count += items.Sum(p => proc(p, conn));
+
+                    t.Commit();
+                }
+            }
+
+            return count;
+        }
+
+        private DbConnection GetConnection()
+        {
+            var conn = new SqliteConnection("Data Source=" + _filename);
+            conn.Open();
+            return conn;
+        }
+
+        // --- common ---
+
+        public Dictionary<string, string> GetConfigs()
+        {
+            using (var conn = GetConnection())
+                return conn.Query<ConfigEntry>("select * from Configs")
+                           .ToDictionary(p => p.Key, p => p.Value);
+        }
+
+        // --- for crawler ---
+
+        public string[] GetNonDumpedUrls(int num)
+        {
+            using (var conn = GetConnection())
+                return conn.Query<ArchiveEntry>($"select {nameof(ArchiveEntry.Url)} from Archives WHERE {nameof(ArchiveEntry.StatusCode)} IS NULL LIMIT @num", new { num })
+                           .Select(p => p.Url).ToArray();
+        }
+
+        public int CreateArchiveEntryIfNotExist(string url, DbConnection conn = null)
+        {
+            int Run(IDbConnection con) => 1 != con.ExecuteScalar<int>($"SELECT 1 FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url })
+                ? con.Execute($"INSERT INTO Archives ({nameof(ArchiveEntry.Url)},{nameof(ArchiveEntry.LastWriteTime)}) VALUES(@url,@now)", new { url, now = DateTime.Now })
+                : 0;
+
+            if (null != conn)
+                return Run(conn);
+
+            using (conn = GetConnection())
+                return Run(conn);
+        }
+
+        public int UpdateArchiveEntity(ArchiveEntry entryToUpdate, DbConnection conn = null)
+        {
+            entryToUpdate.LastWriteTime = DateTime.Now;
+
+            int Run(IDbConnection con) => con.Execute(
+                "UPDATE Archives SET "
+                + $"{nameof(ArchiveEntry.LastWriteTime)}=@{nameof(ArchiveEntry.LastWriteTime)},"
+                + $"{nameof(ArchiveEntry.StatusCode)}=@{nameof(ArchiveEntry.StatusCode)},"
+                + $"{nameof(ArchiveEntry.StatusDescription)}=@{nameof(ArchiveEntry.StatusDescription)},"
+                + $"{nameof(ArchiveEntry.Headers)}=@{nameof(ArchiveEntry.Headers)},"
+                + $"{nameof(ArchiveEntry.Content)}=@{nameof(ArchiveEntry.Content)} " +
+                $"WHERE {nameof(ArchiveEntry.Url)}=@{nameof(ArchiveEntry.Url)}", entryToUpdate);
+
+            if (null != conn)
+                return Run(conn);
+
+            using (conn = GetConnection())
+                return Run(conn);
+        }
+
+        public int AddPageLink(ResourceLinkEntry entry, DbConnection conn = null)
+        {
+            int Run(IDbConnection con)
+            {
+                if (1 != con.ExecuteScalar<int>(
+                        "SELECT 1 FROM ResourceLinks WHERE "
+                        + $"{nameof(ResourceLinkEntry.Resource)}=@{nameof(ResourceLinkEntry.Resource)} "
+                        + $"AND {nameof(ResourceLinkEntry.Link)}=@{nameof(ResourceLinkEntry.Link)}"
+                        , entry))
+
+                    return con.Execute(
+                        "INSERT INTO ResourceLinks ("
+                        + $"{nameof(ResourceLinkEntry.Resource)},"
+                        + $"{nameof(ResourceLinkEntry.Link)},"
+                        + $"{nameof(ResourceLinkEntry.Included)},"
+                        + $"{nameof(ResourceLinkEntry.Remark)}" +
+                        ") VALUES ("
+                        + $"@{nameof(ResourceLinkEntry.Resource)},"
+                        + $"@{nameof(ResourceLinkEntry.Link)},"
+                        + $"@{nameof(ResourceLinkEntry.Included)},"
+                        + $"@{nameof(ResourceLinkEntry.Remark)}" +
+                        ")", entry);
+
+                return 0;
+            }
+
+            if (null != conn)
+                return Run(conn);
+
+            using (conn = GetConnection())
+                return Run(conn);
+        }
+
+        public int BulkAddNewArchiveEntry(IEnumerable<string> urls)
+        {
+            return BulkOperation(urls, CreateArchiveEntryIfNotExist);
+        }
+
+        public int BulkUpdateArchiveEntry(IEnumerable<ArchiveEntry> items)
+        {
+            return BulkOperation(items, UpdateArchiveEntity);
+        }
+
+        public int BulkAddResourceLink(string page, bool included, string remark, params string[] links)
+        {
+            return BulkOperation(links.Select(p => new ResourceLinkEntry { Resource = page, Link = p, Included = included, Remark = remark }), AddPageLink);
+        }
+
+        public int BulkAddResourceLink(IEnumerable<ResourceLinkEntry> items)
+        {
+            return BulkOperation(items, AddPageLink);
+        }
+
+        // --- for offline server ---
+
+        public ArchiveEntry GetEntry(string url)
+        {
+            using (var conn = GetConnection())
+            {
+                return conn.Query<ArchiveEntry>($"SELECT * FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url }).FirstOrDefault();
+            }
+        }
+    }
+}

+ 37 - 0
Rac.Core/Tools/HttpHeaderUtility.cs

@@ -0,0 +1,37 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Rac.Models;
+
+namespace Rac.Tools
+{
+    internal static class HttpHeaderUtility
+    {
+        public static HttpHeader Parse(string line)
+        {
+            var a = line.Split(':');
+            var k = a[0];
+
+            var v = k.Length == line.Length
+                ? ""
+                : line.Substring(k.Length + 1).Trim();
+
+            return new HttpHeader { Name = k, Value = v };
+        }
+
+        public static HttpHeader[] Parse(string[] lines)
+        {
+            return lines.Select(Parse).ToArray();
+        }
+
+        public static HttpHeader[] ParseStringLines(string lines)
+        {
+            return Parse(lines.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries));
+        }
+
+        public static string ToStringLines(this IEnumerable<HttpHeader> items)
+        {
+            return string.Join("\r\n", items.Select(p => p.ToString()));
+        }
+    }
+}

+ 39 - 0
Rac.Core/Tools/InternalUtility.cs

@@ -0,0 +1,39 @@
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using AngleSharp;
+using AngleSharp.Html.Parser;
+
+namespace Rac.Tools
+{
+    internal static class InternalUtility
+    {
+        public static byte[] ToBytes(this Stream stream)
+        {
+            using var ms = new MemoryStream();
+            stream.CopyTo(ms);
+            return ms.ToArray();
+        }
+
+        public static bool In<T>(this T value, params T[] checkIn)
+        {
+            return checkIn.Contains(value);
+        }
+
+        public static string LoadHtml(byte[] body, out string charset)
+        {
+            using var stream = new MemoryStream(body);
+            var htmlDocument = new HtmlParser().ParseDocument(stream);
+            charset = htmlDocument.CharacterSet;
+            return htmlDocument.ToHtml();
+        }
+
+        public static void AddRange<T>(this HashSet<T> hashSet, IEnumerable<T> toAdd)
+        {
+            foreach (var item in toAdd)
+            {
+                hashSet.Add(item);
+            }
+        }
+    }
+}

+ 75 - 0
Rac.Core/Tools/LinkProcessor.cs

@@ -0,0 +1,75 @@
+using Alba.CsCss.Style;
+using AngleSharp;
+using AngleSharp.Html.Dom;
+using AngleSharp.Html.Parser;
+using System;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Rac.Tools
+{
+    // This class can change to instance class
+    //  Implement different driver (StringParser/WebBrowserEmu/Etc)
+
+    public class LinkProcessor
+    {
+        private static readonly CssLoader CssLoader = new CssLoader();
+
+        public static string[] FromCss(string css)
+        {
+            return CssLoader.GetUris(css).ToArray();
+        }
+
+        public static string[] ExtractLinks(IHtmlDocument doc)
+        {
+            //extract urls from embedded style
+            var links = doc.QuerySelectorAll("style").Select(p => FromCss(p.TextContent)).SelectMany(p => p).ToList();
+
+            //extract urls from page
+            links.AddRange(doc.QuerySelectorAll("*[src],*[href]")
+                .Select(e => e.Attributes["src"]?.Value ?? e.Attributes["href"]?.Value)
+                .Where(p => p != null));
+
+            return links.ToArray();
+        }
+
+        public static string ReplaceCssLinks(string css, Func<string, string> func)
+        {
+            var links = FromCss(css).OrderByDescending(p => p.Length).Distinct().ToArray();
+            foreach (var link in links)
+            {
+                if (link != "//")
+                    css = css.Replace(link, func(link));
+            }
+            return css;
+        }
+
+        public static string ReplaceHtmlLinks(byte[] html, Func<string, string> func, ref Encoding encoding)
+        {
+            IHtmlDocument doc;
+            if (null == encoding)
+            {
+                using var stream = new MemoryStream(html);
+                doc = new HtmlParser().ParseDocument(stream);
+            }
+            else
+            {
+                var source = encoding.GetString(html);
+                doc = new HtmlParser().ParseDocument(source);
+            }
+
+            var styles = doc.QuerySelectorAll("style");
+            foreach (var style in styles)
+            {
+                style.TextContent = ReplaceCssLinks(style.TextContent, func);
+            }
+
+            foreach (var srcEl in doc.QuerySelectorAll("*[src]")) if (srcEl.Attributes["src"]?.Value != "//") srcEl.Attributes["src"].Value = func(srcEl.Attributes["src"].Value);
+            foreach (var srcEl in doc.QuerySelectorAll("*[href]")) if (srcEl.Attributes["href"]?.Value != "//") srcEl.Attributes["href"].Value = func(srcEl.Attributes["href"].Value);
+
+            if (null == encoding) encoding = Encoding.GetEncoding(doc.CharacterSet);
+            return doc.ToHtml();
+        }
+    }
+}

+ 43 - 0
Rac.Core/Tools/Requester.cs

@@ -0,0 +1,43 @@
+using Rac.Models;
+using System.Net;
+
+namespace Rac.Tools
+{
+    // This class can change to instance class
+    //  Implement different driver (WebRequest/WinNet/Sock/Etc(Pr0xy))
+
+    internal static class Requester
+    {
+        public static Response GetHttp(string url)
+        {
+            var req = WebRequest.CreateHttp(url);
+            req.UserAgent = "RawArchiveCrawler";
+
+            req.AllowAutoRedirect = false; //record redirections
+
+            try
+            {
+                var resp = (HttpWebResponse)req.GetResponse();
+                return new Response
+                {
+                    StatusCode = resp.StatusCode,
+                    StatusDescription = resp.StatusDescription,
+                    Headers = resp.Headers,
+                    Body = resp.GetResponseStream().ToBytes()
+                };
+            }
+            catch (WebException e)
+            {
+                if (e.Status != WebExceptionStatus.ProtocolError) throw;
+                var resp = (HttpWebResponse)e.Response;
+                return new Response
+                {
+                    StatusCode = resp.StatusCode,
+                    StatusDescription = resp.StatusDescription,
+                    Headers = resp.Headers,
+                    Body = resp.GetResponseStream().ToBytes()
+                };
+            }
+        }
+    }
+}

+ 11 - 0
Rac.Core/app.config

@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+  <runtime>
+    <assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
+      <dependentAssembly>
+        <assemblyIdentity name="System.Text.Encoding.CodePages" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.1.1.0" newVersion="4.1.1.0" />
+      </dependentAssembly>
+    </assemblyBinding>
+  </runtime>
+</configuration>

+ 12 - 0
Rac.Core/packages.config

@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="Alba.CsCss" version="1.0.1.0" targetFramework="net452" />
+  <package id="AngleSharp" version="0.13.0" targetFramework="net472" />
+  <package id="Dapper" version="1.50.2" targetFramework="net452" />
+  <package id="ILRepack" version="2.0.13" targetFramework="net452" />
+  <package id="Mono.Data.Sqlite.Portable" version="1.0.3.5" targetFramework="net452" />
+  <package id="Newtonsoft.Json" version="10.0.3" targetFramework="net452" />
+  <package id="SQLite.Native" version="3.12.3" targetFramework="net452" />
+  <package id="System.Runtime.CompilerServices.Unsafe" version="4.5.0" targetFramework="net472" />
+  <package id="System.Text.Encoding.CodePages" version="4.5.0" targetFramework="net472" />
+</packages>

+ 14 - 0
Rac.Executer/App.config

@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.7.2" />
+    </startup>
+  <runtime>
+    <assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
+      <dependentAssembly>
+        <assemblyIdentity name="System.Text.Encoding.CodePages" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.1.1.0" newVersion="4.1.1.0" />
+      </dependentAssembly>
+    </assemblyBinding>
+  </runtime>
+</configuration>

+ 96 - 0
Rac.Executer/Program.cs

@@ -0,0 +1,96 @@
+using System;
+using System.IO;
+
+namespace Rac
+{
+    internal class Program
+    {
+        //TODO: Folder browser
+        private static void Main(string[] args)
+        {
+            var db = "project.db3";
+
+            void PrintUsage()
+            {
+                Console.WriteLine("args: [mode] [args]");
+                Console.WriteLine("      crawler [project.db3] * default mode");
+                Console.WriteLine("      offline [project.db3]");
+                Console.WriteLine("      browser [project.db3]");
+            }
+
+            if (args.Length == 0)
+            {
+                RunCrawler(db);
+            }
+            else
+            {
+                if (args.Length > 1) db = args[1];
+
+                switch (args[0].ToLower())
+                {
+                    default:
+                        PrintUsage();
+                        break;
+
+                    case "crawler":
+                        RunCrawler(db);
+                        break;
+
+                    case "offline":
+                        RunOffline(db);
+                        break;
+
+                    case "browser":
+                        RunBrowser(db);
+                        break;
+                }
+            }
+
+            Console.Write("Finished, Press ENTER to exit...");
+            Console.ReadLine();
+        }
+
+
+        private static void RunCrawler(string db)
+        {
+            var inst = new Crawler(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, db));
+            inst.Log += (sender, ea) => Console.WriteLine($"RAC -- {ea.Level} -- {ea.Log}");
+
+            Console.WriteLine("Press ENTER to stop.");
+            inst.Start();
+
+            Console.ReadLine();
+            if (inst.IsRunning)
+            {
+                Console.WriteLine("Stopping...");
+                inst.Stop();
+                Console.WriteLine();
+            }
+        }
+
+        private static void RunOffline(string db)
+        {
+            var inst = new OfflineWebServer(db);
+            inst.Log += (sender, ea) => Console.WriteLine($"OWS -- {ea.Level} -- {ea.Log}");
+
+            Console.WriteLine("Press ENTER to stop.");
+            inst.Start();
+
+            Console.ReadLine();
+            inst.Stop();
+        }
+
+        private static void RunBrowser(string db)
+        {
+            var inst = new FileBrowseServer(db);
+            inst.Log += (sender, ea) => Console.WriteLine($"OWS -- {ea.Level} -- {ea.Log}");
+
+            Console.WriteLine("Press ENTER to stop.");
+            inst.Start();
+
+            Console.ReadLine();
+            inst.Stop();
+        }
+
+    }
+}

+ 36 - 0
Rac.Executer/Properties/AssemblyInfo.cs

@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// 有关程序集的一般信息由以下
+// 控制。更改这些特性值可修改
+// 与程序集关联的信息。
+[assembly: AssemblyTitle("Rac.Executer")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("Microsoft")]
+[assembly: AssemblyProduct("Rac.Executer")]
+[assembly: AssemblyCopyright("Copyright © Microsoft 2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// 将 ComVisible 设置为 false 会使此程序集中的类型
+//对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型
+//请将此类型的 ComVisible 特性设置为 true。
+[assembly: ComVisible(false)]
+
+// 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
+[assembly: Guid("15cefd6a-a0e8-4543-ae7a-67a780eb02a8")]
+
+// 程序集的版本信息由下列四个值组成: 
+//
+//      主版本
+//      次版本
+//      生成号
+//      修订号
+//
+// 可以指定所有值,也可以使用以下所示的 "*" 预置版本号和修订号
+// 方法是按如下所示使用“*”: :
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

+ 97 - 0
Rac.Executer/Rac.Executer.csproj

@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props" Condition="Exists('C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props')" />
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{15CEFD6A-A0E8-4543-AE7A-67A780EB02A8}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <RootNamespace>Rac</RootNamespace>
+    <AssemblyName>RawArchiveCrawler</AssemblyName>
+    <TargetFrameworkVersion>v4.7.2</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+    <NuGetPackageImportStamp>
+    </NuGetPackageImportStamp>
+    <BaseIntermediateOutputPath>Z:\U00ST\$(MSBuildProjectName)\obj\</BaseIntermediateOutputPath>
+    <BaseOutputPath>Z:\U00ST\$(MSBuildProjectName)\</BaseOutputPath>
+    <OutputPath>$(BaseOutputPath)\bin\$(Platform)\$(Configuration)</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <RunPostBuildEvent>OnOutputUpdated</RunPostBuildEvent>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
+    <DebugSymbols>true</DebugSymbols>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="App.config" />
+    <None Include="packages.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Rac.Core\Rac.Core.csproj">
+      <Project>{1d3345dc-ee7e-48ec-ae35-3f6e941560dc}</Project>
+      <Name>Rac.Core</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <PropertyGroup>
+    <PostBuildEvent>if $(ConfigurationName) == Release if not exist $(TargetDir)Packed md $(TargetDir)Packed
+if $(ConfigurationName) == Release $(ILRepack) /ndebug /out:$(TargetDir)Packed\$(TargetFileName) $(TargetPath) Rac.Core.dll Dapper.dll Mono.Data.Sqlite.dll AngleSharp.dll Alba.CsCss.dll System.Text.Encoding.CodePages.dll System.Runtime.CompilerServices.Unsafe.dll
+if $(ConfigurationName) == Release copy $(TargetDir)sqlite3.dll $(TargetDir)Packed\sqlite3.dll
+if $(ConfigurationName) == Release if exist $(TargetDir)Packed\$(TargetFileName).config del $(TargetDir)Packed\$(TargetFileName).config</PostBuildEvent>
+  </PropertyGroup>
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>此项目引用这台计算机上缺少的 NuGet 程序包。使用 NuGet 程序包还原可下载这些程序包。有关详细信息,请参阅 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props')" Text="$([System.String]::Format('$(ErrorText)', 'C:\NuGetLocalRepo\ILRepack.2.0.13\build\ILRepack.props'))" />
+  </Target>
+</Project>

+ 4 - 0
Rac.Executer/packages.config

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="ILRepack" version="2.0.13" targetFramework="net452" />
+</packages>