using Dapper; using Mono.Data.Sqlite; using Rac.Entities; using System; using System.Collections.Generic; using System.Data; using System.Data.Common; using System.IO; using System.Linq; namespace Rac.Tools { internal class DataAccess { private readonly string _dbFilePath; public DataAccess(string dbFilePath) { _dbFilePath = dbFilePath; CreateDatabaseIfNotExist(); } private void CreateDatabaseIfNotExist() { if (File.Exists(_dbFilePath)) return; using (var conn = GetConnection()) { conn.Execute( //Create table:Config "CREATE TABLE Configs(" + $"{nameof(ConfigEntry.Key)} VARCHAR(16) PRIMARY KEY," + $"{nameof(ConfigEntry.Value)} VARCHAR(512)" + ");" + //Create table:Archive "CREATE TABLE Archives(" + $"{nameof(ArchiveEntry.Url)} VARCHAR(512) PRIMARY KEY," + $"{nameof(ArchiveEntry.LastWriteTime)} DATETIME," + $"{nameof(ArchiveEntry.StatusCode)} INT," + $"{nameof(ArchiveEntry.StatusDescription)} VARCHAR(128)," + $"{nameof(ArchiveEntry.Headers)} VARCHAR(512)," + $"{nameof(ArchiveEntry.Content)} BLOB" + ");" + //Create table:PageLinks "CREATE TABLE ResourceLinks(" + $"{nameof(ResourceLinkEntry.Resource)} VARCHAR(512)," + $"{nameof(ResourceLinkEntry.Link)} VARCHAR(512)," + $"{nameof(ResourceLinkEntry.Included)} BOOLEAN," + $"{nameof(ResourceLinkEntry.Remark)} VARCHAR(512)," + $"PRIMARY KEY ({nameof(ResourceLinkEntry.Resource)},{nameof(ResourceLinkEntry.Link)})" + ");" //Init Data + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HomeUrl)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HostsInclude)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.UrlPrefixExclude)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.OwsPort)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.Parallel)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.DefaultCharset)}');" + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.ExtractWebRoot)}');" ); } } private int BulkOperation(IEnumerable items, Func proc) { var count = 0; using (var conn = GetConnection()) { using (var t = conn.BeginTransaction()) { count += items.Sum(p => proc(p, conn)); t.Commit(); } } return count; } private DbConnection GetConnection() { var conn = new SqliteConnection("Data Source=" + _dbFilePath); conn.Open(); return conn; } // --- common --- public Dictionary GetConfigs() { using (var conn = GetConnection()) return conn.Query("select * from Configs") .ToDictionary(p => p.Key, p => p.Value); } // --- for crawler --- public string[] GetNonDumpedUrls(int num) { using (var conn = GetConnection()) return conn.Query($"select {nameof(ArchiveEntry.Url)} from Archives WHERE {nameof(ArchiveEntry.StatusCode)} IS NULL LIMIT @num", new { num }) .Select(p => p.Url).ToArray(); } public int CreateArchiveEntryIfNotExist(string url, DbConnection conn = null) { int Run(IDbConnection con) => 1 != con.ExecuteScalar($"SELECT 1 FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url }) ? con.Execute($"INSERT INTO Archives ({nameof(ArchiveEntry.Url)},{nameof(ArchiveEntry.LastWriteTime)}) VALUES(@url,@now)", new { url, now = DateTime.Now }) : 0; if (null != conn) return Run(conn); using (conn = GetConnection()) return Run(conn); } public int UpdateArchiveEntity(ArchiveEntry entryToUpdate, DbConnection conn = null) { entryToUpdate.LastWriteTime = DateTime.Now; int Run(IDbConnection con) => con.Execute( "UPDATE Archives SET " + $"{nameof(ArchiveEntry.LastWriteTime)}=@{nameof(ArchiveEntry.LastWriteTime)}," + $"{nameof(ArchiveEntry.StatusCode)}=@{nameof(ArchiveEntry.StatusCode)}," + $"{nameof(ArchiveEntry.StatusDescription)}=@{nameof(ArchiveEntry.StatusDescription)}," + $"{nameof(ArchiveEntry.Headers)}=@{nameof(ArchiveEntry.Headers)}," + $"{nameof(ArchiveEntry.Content)}=@{nameof(ArchiveEntry.Content)} " + $"WHERE {nameof(ArchiveEntry.Url)}=@{nameof(ArchiveEntry.Url)}", entryToUpdate); if (null != conn) return Run(conn); using (conn = GetConnection()) return Run(conn); } public int AddPageLink(ResourceLinkEntry entry, DbConnection conn = null) { int Run(IDbConnection con) { if (1 != con.ExecuteScalar( "SELECT 1 FROM ResourceLinks WHERE " + $"{nameof(ResourceLinkEntry.Resource)}=@{nameof(ResourceLinkEntry.Resource)} " + $"AND {nameof(ResourceLinkEntry.Link)}=@{nameof(ResourceLinkEntry.Link)}" , entry)) return con.Execute( "INSERT INTO ResourceLinks (" + $"{nameof(ResourceLinkEntry.Resource)}," + $"{nameof(ResourceLinkEntry.Link)}," + $"{nameof(ResourceLinkEntry.Included)}," + $"{nameof(ResourceLinkEntry.Remark)}" + ") VALUES (" + $"@{nameof(ResourceLinkEntry.Resource)}," + $"@{nameof(ResourceLinkEntry.Link)}," + $"@{nameof(ResourceLinkEntry.Included)}," + $"@{nameof(ResourceLinkEntry.Remark)}" + ")", entry); return 0; } if (null != conn) return Run(conn); using (conn = GetConnection()) return Run(conn); } public int BulkAddNewArchiveEntry(IEnumerable urls) { return BulkOperation(urls, CreateArchiveEntryIfNotExist); } public int BulkUpdateArchiveEntry(IEnumerable items) { return BulkOperation(items, UpdateArchiveEntity); } public int BulkAddResourceLink(string page, bool included, string remark, params string[] links) { return BulkOperation(links.Select(p => new ResourceLinkEntry { Resource = page, Link = p, Included = included, Remark = remark }), AddPageLink); } public int BulkAddResourceLink(IEnumerable items) { return BulkOperation(items, AddPageLink); } public string GetReferer(string url) { using (var conn = GetConnection()) { return conn.Query($"SELECT Resource FROM ResourceLinks WHERE {nameof(ResourceLinkEntry.Link)}=@url", new { url }).FirstOrDefault(); } } // --- for offline server & extractor --- public ArchiveEntry GetEntry(string url) { using (var conn = GetConnection()) { return conn.Query($"SELECT * FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url }).FirstOrDefault(); } } // --- for extractor --- public string[] GetAllUrl() { using (var conn = GetConnection()) { return conn.Query($"SELECT {nameof(ArchiveEntry.Url)} FROM Archives").ToArray(); } } } }