123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- using Dapper;
- using Mono.Data.Sqlite;
- using Rac.Entities;
- using System;
- using System.Collections.Generic;
- using System.Data;
- using System.Data.Common;
- using System.IO;
- using System.Linq;
- namespace Rac.Tools
- {
- internal class DataAccess
- {
- private readonly string _dbFilePath;
- public DataAccess(string dbFilePath)
- {
- _dbFilePath = dbFilePath;
- CreateDatabaseIfNotExist();
- }
- private void CreateDatabaseIfNotExist()
- {
- if (File.Exists(_dbFilePath)) return;
- using (var conn = GetConnection())
- {
- conn.Execute(
- //Create table:Config
- "CREATE TABLE Configs("
- + $"{nameof(ConfigEntry.Key)} VARCHAR(16) PRIMARY KEY,"
- + $"{nameof(ConfigEntry.Value)} VARCHAR(512)"
- + ");" +
- //Create table:Archive
- "CREATE TABLE Archives("
- + $"{nameof(ArchiveEntry.Url)} VARCHAR(512) PRIMARY KEY,"
- + $"{nameof(ArchiveEntry.LastWriteTime)} DATETIME,"
- + $"{nameof(ArchiveEntry.StatusCode)} INT,"
- + $"{nameof(ArchiveEntry.StatusDescription)} VARCHAR(128),"
- + $"{nameof(ArchiveEntry.Headers)} VARCHAR(512),"
- + $"{nameof(ArchiveEntry.Content)} BLOB"
- + ");" +
- //Create table:PageLinks
- "CREATE TABLE ResourceLinks("
- + $"{nameof(ResourceLinkEntry.Resource)} VARCHAR(512),"
- + $"{nameof(ResourceLinkEntry.Link)} VARCHAR(512),"
- + $"{nameof(ResourceLinkEntry.Included)} BOOLEAN,"
- + $"{nameof(ResourceLinkEntry.Remark)} VARCHAR(512),"
- + $"PRIMARY KEY ({nameof(ResourceLinkEntry.Resource)},{nameof(ResourceLinkEntry.Link)})"
- + ");"
- //Init Data
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HomeUrl)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HostsInclude)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.UrlPrefixExclude)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.OwsPort)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.Parallel)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.DefaultCharset)}');"
- + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.ExtractWebRoot)}');"
- );
- }
- }
- private int BulkOperation<T>(IEnumerable<T> items, Func<T, DbConnection, int> proc)
- {
- var count = 0;
- using (var conn = GetConnection())
- {
- using (var t = conn.BeginTransaction())
- {
- count += items.Sum(p => proc(p, conn));
- t.Commit();
- }
- }
- return count;
- }
- private DbConnection GetConnection()
- {
- var conn = new SqliteConnection("Data Source=" + _dbFilePath);
- conn.Open();
- return conn;
- }
- // --- common ---
- public Dictionary<string, string> GetConfigs()
- {
- using (var conn = GetConnection())
- return conn.Query<ConfigEntry>("select * from Configs")
- .ToDictionary(p => p.Key, p => p.Value);
- }
- // --- for crawler ---
- public string[] GetNonDumpedUrls(int num)
- {
- using (var conn = GetConnection())
- return conn.Query<ArchiveEntry>($"select {nameof(ArchiveEntry.Url)} from Archives WHERE {nameof(ArchiveEntry.StatusCode)} IS NULL LIMIT @num", new { num })
- .Select(p => p.Url).ToArray();
- }
- public int CreateArchiveEntryIfNotExist(string url, DbConnection conn = null)
- {
- int Run(IDbConnection con) => 1 != con.ExecuteScalar<int>($"SELECT 1 FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url })
- ? con.Execute($"INSERT INTO Archives ({nameof(ArchiveEntry.Url)},{nameof(ArchiveEntry.LastWriteTime)}) VALUES(@url,@now)", new { url, now = DateTime.Now })
- : 0;
- if (null != conn)
- return Run(conn);
- using (conn = GetConnection())
- return Run(conn);
- }
- public int UpdateArchiveEntity(ArchiveEntry entryToUpdate, DbConnection conn = null)
- {
- entryToUpdate.LastWriteTime = DateTime.Now;
- int Run(IDbConnection con) => con.Execute(
- "UPDATE Archives SET "
- + $"{nameof(ArchiveEntry.LastWriteTime)}=@{nameof(ArchiveEntry.LastWriteTime)},"
- + $"{nameof(ArchiveEntry.StatusCode)}=@{nameof(ArchiveEntry.StatusCode)},"
- + $"{nameof(ArchiveEntry.StatusDescription)}=@{nameof(ArchiveEntry.StatusDescription)},"
- + $"{nameof(ArchiveEntry.Headers)}=@{nameof(ArchiveEntry.Headers)},"
- + $"{nameof(ArchiveEntry.Content)}=@{nameof(ArchiveEntry.Content)} " +
- $"WHERE {nameof(ArchiveEntry.Url)}=@{nameof(ArchiveEntry.Url)}", entryToUpdate);
- if (null != conn)
- return Run(conn);
- using (conn = GetConnection())
- return Run(conn);
- }
- public int AddPageLink(ResourceLinkEntry entry, DbConnection conn = null)
- {
- int Run(IDbConnection con)
- {
- if (1 != con.ExecuteScalar<int>(
- "SELECT 1 FROM ResourceLinks WHERE "
- + $"{nameof(ResourceLinkEntry.Resource)}=@{nameof(ResourceLinkEntry.Resource)} "
- + $"AND {nameof(ResourceLinkEntry.Link)}=@{nameof(ResourceLinkEntry.Link)}"
- , entry))
- return con.Execute(
- "INSERT INTO ResourceLinks ("
- + $"{nameof(ResourceLinkEntry.Resource)},"
- + $"{nameof(ResourceLinkEntry.Link)},"
- + $"{nameof(ResourceLinkEntry.Included)},"
- + $"{nameof(ResourceLinkEntry.Remark)}" +
- ") VALUES ("
- + $"@{nameof(ResourceLinkEntry.Resource)},"
- + $"@{nameof(ResourceLinkEntry.Link)},"
- + $"@{nameof(ResourceLinkEntry.Included)},"
- + $"@{nameof(ResourceLinkEntry.Remark)}" +
- ")", entry);
- return 0;
- }
- if (null != conn)
- return Run(conn);
- using (conn = GetConnection())
- return Run(conn);
- }
- public int BulkAddNewArchiveEntry(IEnumerable<string> urls)
- {
- return BulkOperation(urls, CreateArchiveEntryIfNotExist);
- }
- public int BulkUpdateArchiveEntry(IEnumerable<ArchiveEntry> items)
- {
- return BulkOperation(items, UpdateArchiveEntity);
- }
- public int BulkAddResourceLink(string page, bool included, string remark, params string[] links)
- {
- return BulkOperation(links.Select(p => new ResourceLinkEntry { Resource = page, Link = p, Included = included, Remark = remark }), AddPageLink);
- }
- public int BulkAddResourceLink(IEnumerable<ResourceLinkEntry> items)
- {
- return BulkOperation(items, AddPageLink);
- }
- public string GetReferer(string url)
- {
- using (var conn = GetConnection())
- {
- return conn.Query<string>($"SELECT Resource FROM ResourceLinks WHERE {nameof(ResourceLinkEntry.Link)}=@url", new { url }).FirstOrDefault();
- }
- }
- // --- for offline server & extractor ---
- public ArchiveEntry GetEntry(string url)
- {
- using (var conn = GetConnection())
- {
- return conn.Query<ArchiveEntry>($"SELECT * FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url }).FirstOrDefault();
- }
- }
- // --- for extractor ---
- public string[] GetAllUrl()
- {
- using (var conn = GetConnection())
- {
- return conn.Query<string>($"SELECT {nameof(ArchiveEntry.Url)} FROM Archives").ToArray();
- }
- }
- }
- }
|