using Rac.Common; using Rac.Tools; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Net.Mime; using System.Text; namespace Rac { public class OfflineWebServer : BaseService { private readonly string _dbFilePath; private DataAccess _db; private HttpListener _server; private Uri _homeUrl; private HashSet _hostInclude; private string[] _excludeUrlPrefixes; private Encoding _defaultEncoding; public OfflineWebServer(string dbFilePath) { _dbFilePath = dbFilePath; } public override void Start() { if (false == File.Exists(_dbFilePath)) { LogFatal($"Can not find database file:{_dbFilePath}"); return; } _db = new DataAccess(_dbFilePath); var conf = new ConfigAdapter(_db.GetConfigs()); _defaultEncoding = null != conf.DefaultCharset ? Encoding.GetEncoding(conf.DefaultCharset) : Encoding.UTF8; _hostInclude = new HashSet(conf.HostsInclude); _excludeUrlPrefixes = conf.UrlPrefixExclude; _homeUrl = new Uri(conf.HomeUrl); _server = new HttpListener(); _server.Prefixes.Add("http://*:" + conf.OwsPort + "/"); _server.Start(); _server.BeginGetContext(ProcessRequest, null); LogInfo($"OWS runing on " + (string.Join("|", _server.Prefixes))); } private string UrlTranscode(Uri uri, string link = null, bool includedOnly = false) { if (true == link?.ToLower().StartsWith("mailto:")) return link; // http://host:port/path?query#hash // -->> /http/host/port/path?query#hash if (false == string.IsNullOrEmpty(link)) uri = new Uri(uri, link); if (includedOnly) { var url = uri.ToString(); if (_excludeUrlPrefixes.Any(p => url.StartsWith(p))) return link; var hos = uri.Host; if (hos != _homeUrl.Host && false == _hostInclude.Contains(hos)) return link; } return $"/{uri.Scheme}/{uri.Host}/{uri.Port}{uri.PathAndQuery}{uri.Fragment}"; } private void ProcessRequest(IAsyncResult ar) { var ctx = _server.EndGetContext(ar); LogTrace($"{ctx.Request.HttpMethod} {ctx.Request.RawUrl} From {ctx.Request.RemoteEndPoint}"); //turn scheme/host/port to virtual path if (ctx.Request.Url.LocalPath == "/") { ctx.Response.Redirect(UrlTranscode(_homeUrl)); } else { var path = ctx.Request.Url.GetComponents(UriComponents.PathAndQuery | UriComponents.Fragment, UriFormat.SafeUnescaped); var parts = path.Split('/'); //check request path format /scheme/host/port/... if (parts.Length < 4) { ctx.Response.StatusCode = 400; ctx.Response.StatusDescription = "Bad Archive Request"; ctx.Response.ContentType = "text/html; charset=utf-8"; var buffer = Encoding.UTF8.GetBytes("

400 Bad Archive Request

"); ctx.Response.OutputStream.Write(buffer, 0, buffer.Length); } else { //decode to raw url pass to db try { var decodedUrl = $"{parts[1]}://{parts[2]}:{parts[3]}/{string.Join("/", parts.Skip(4))}"; var decodedUri = new Uri(decodedUrl); var archiveUrl = decodedUri.ToString(); var entry = _db.GetEntry(archiveUrl); if (entry == null || entry.StatusCode == 0) { ctx.Response.StatusCode = 404; ctx.Response.StatusDescription = "ArchiveEntryNotFound"; ctx.Response.ContentType = "text/html; charset=utf-8"; var buffer = Encoding.UTF8.GetBytes($"

404 Not Found In Archive by {archiveUrl}

"); ctx.Response.OutputStream.Write(buffer, 0, buffer.Length); } else { var headers = HttpHeaderUtility.ParseStringLines(entry.Headers); ctx.Response.StatusCode = entry.StatusCode; ctx.Response.StatusDescription = entry.StatusDescription; string contentType = null; var contentEncoding = _defaultEncoding; // replace all urls to /scheme/host/port/path?query#hash // in header location // in HTML(href/src/embedded css) // in CSS (url) foreach (var header in headers) { if (header.Name == "location") header.Value = UrlTranscode(decodedUri, header.Value); if (header.Name == "content-type") { var ct = new ContentType(header.Value); contentType = ct.MediaType; if (null != ct.CharSet) contentEncoding = Encoding.GetEncoding(ct.CharSet); } ctx.Response.Headers.Set(header.Name, header.Value); } var output = entry.Content; if (contentType == "text/html") { var replaced = LinkProcessor.ReplaceHtmlLinks(entry.Content, p => UrlTranscode(decodedUri, p, true), ref contentEncoding); output = contentEncoding.GetBytes(replaced); } else if (contentType == "text/css") { var css = contentEncoding.GetString(entry.Content); var replaced = LinkProcessor.ReplaceCssLinks(css, p => UrlTranscode(decodedUri, p, true)); output = contentEncoding.GetBytes(replaced); } try { ctx.Response.OutputStream.Write(output, 0, output.Length); } catch (Exception e) { LogError($"Error when writing output: {e.Message}"); } } } catch (Exception e) { ctx.Response.StatusCode = 500; ctx.Response.StatusDescription = "ArchiveEntryNotFound"; ctx.Response.ContentType = "text/html; charset=utf-8"; var buffer = Encoding.UTF8.GetBytes($"

Error

{e}
"); ctx.Response.OutputStream.Write(buffer, 0, buffer.Length); } } } try { LogTrace($"{ctx.Request.HttpMethod} {ctx.Request.RawUrl} -- {ctx.Response.StatusCode}"); ctx.Response.Close(); } catch { //Do nothing! } _server.BeginGetContext(ProcessRequest, null); } public override void Stop() { _server.Stop(); } } }