DataAccess.cs 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. using Dapper;
  2. using Mono.Data.Sqlite;
  3. using Rac.Entities;
  4. using System;
  5. using System.Collections.Generic;
  6. using System.Data;
  7. using System.Data.Common;
  8. using System.IO;
  9. using System.Linq;
  10. namespace Rac.Tools
  11. {
  12. internal class DataAccess
  13. {
  14. private readonly string _dbFilePath;
  15. public DataAccess(string dbFilePath)
  16. {
  17. _dbFilePath = dbFilePath;
  18. CreateDatabaseIfNotExist();
  19. }
  20. private void CreateDatabaseIfNotExist()
  21. {
  22. if (File.Exists(_dbFilePath)) return;
  23. using (var conn = GetConnection())
  24. {
  25. conn.Execute(
  26. //Create table:Config
  27. "CREATE TABLE Configs("
  28. + $"{nameof(ConfigEntry.Key)} VARCHAR(16) PRIMARY KEY,"
  29. + $"{nameof(ConfigEntry.Value)} VARCHAR(512)"
  30. + ");" +
  31. //Create table:Archive
  32. "CREATE TABLE Archives("
  33. + $"{nameof(ArchiveEntry.Url)} VARCHAR(512) PRIMARY KEY,"
  34. + $"{nameof(ArchiveEntry.LastWriteTime)} DATETIME,"
  35. + $"{nameof(ArchiveEntry.StatusCode)} INT,"
  36. + $"{nameof(ArchiveEntry.StatusDescription)} VARCHAR(128),"
  37. + $"{nameof(ArchiveEntry.Headers)} VARCHAR(512),"
  38. + $"{nameof(ArchiveEntry.Content)} BLOB"
  39. + ");" +
  40. //Create table:PageLinks
  41. "CREATE TABLE ResourceLinks("
  42. + $"{nameof(ResourceLinkEntry.Resource)} VARCHAR(512),"
  43. + $"{nameof(ResourceLinkEntry.Link)} VARCHAR(512),"
  44. + $"{nameof(ResourceLinkEntry.Included)} BOOLEAN,"
  45. + $"{nameof(ResourceLinkEntry.Remark)} VARCHAR(512),"
  46. + $"PRIMARY KEY ({nameof(ResourceLinkEntry.Resource)},{nameof(ResourceLinkEntry.Link)})"
  47. + ");"
  48. //Init Data
  49. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HomeUrl)}');"
  50. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.HostsInclude)}');"
  51. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.UrlPrefixExclude)}');"
  52. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.OwsPort)}');"
  53. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.Parallel)}');"
  54. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.DefaultCharset)}');"
  55. + $"INSERT INTO Configs ({nameof(ConfigEntry.Key)}) VALUES ('{nameof(ConfigAdapter.ExtractWebRoot)}');"
  56. );
  57. }
  58. }
  59. private int BulkOperation<T>(IEnumerable<T> items, Func<T, DbConnection, int> proc)
  60. {
  61. var count = 0;
  62. using (var conn = GetConnection())
  63. {
  64. using (var t = conn.BeginTransaction())
  65. {
  66. count += items.Sum(p => proc(p, conn));
  67. t.Commit();
  68. }
  69. }
  70. return count;
  71. }
  72. private DbConnection GetConnection()
  73. {
  74. var conn = new SqliteConnection("Data Source=" + _dbFilePath);
  75. conn.Open();
  76. return conn;
  77. }
  78. // --- common ---
  79. public Dictionary<string, string> GetConfigs()
  80. {
  81. using (var conn = GetConnection())
  82. return conn.Query<ConfigEntry>("select * from Configs")
  83. .ToDictionary(p => p.Key, p => p.Value);
  84. }
  85. // --- for crawler ---
  86. public string[] GetNonDumpedUrls(int num)
  87. {
  88. using (var conn = GetConnection())
  89. return conn.Query<ArchiveEntry>($"select {nameof(ArchiveEntry.Url)} from Archives WHERE {nameof(ArchiveEntry.StatusCode)} IS NULL LIMIT @num", new { num })
  90. .Select(p => p.Url).ToArray();
  91. }
  92. public int CreateArchiveEntryIfNotExist(string url, DbConnection conn = null)
  93. {
  94. int Run(IDbConnection con) => 1 != con.ExecuteScalar<int>($"SELECT 1 FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url })
  95. ? con.Execute($"INSERT INTO Archives ({nameof(ArchiveEntry.Url)},{nameof(ArchiveEntry.LastWriteTime)}) VALUES(@url,@now)", new { url, now = DateTime.Now })
  96. : 0;
  97. if (null != conn)
  98. return Run(conn);
  99. using (conn = GetConnection())
  100. return Run(conn);
  101. }
  102. public int UpdateArchiveEntity(ArchiveEntry entryToUpdate, DbConnection conn = null)
  103. {
  104. entryToUpdate.LastWriteTime = DateTime.Now;
  105. int Run(IDbConnection con) => con.Execute(
  106. "UPDATE Archives SET "
  107. + $"{nameof(ArchiveEntry.LastWriteTime)}=@{nameof(ArchiveEntry.LastWriteTime)},"
  108. + $"{nameof(ArchiveEntry.StatusCode)}=@{nameof(ArchiveEntry.StatusCode)},"
  109. + $"{nameof(ArchiveEntry.StatusDescription)}=@{nameof(ArchiveEntry.StatusDescription)},"
  110. + $"{nameof(ArchiveEntry.Headers)}=@{nameof(ArchiveEntry.Headers)},"
  111. + $"{nameof(ArchiveEntry.Content)}=@{nameof(ArchiveEntry.Content)} " +
  112. $"WHERE {nameof(ArchiveEntry.Url)}=@{nameof(ArchiveEntry.Url)}", entryToUpdate);
  113. if (null != conn)
  114. return Run(conn);
  115. using (conn = GetConnection())
  116. return Run(conn);
  117. }
  118. public int AddPageLink(ResourceLinkEntry entry, DbConnection conn = null)
  119. {
  120. int Run(IDbConnection con)
  121. {
  122. if (1 != con.ExecuteScalar<int>(
  123. "SELECT 1 FROM ResourceLinks WHERE "
  124. + $"{nameof(ResourceLinkEntry.Resource)}=@{nameof(ResourceLinkEntry.Resource)} "
  125. + $"AND {nameof(ResourceLinkEntry.Link)}=@{nameof(ResourceLinkEntry.Link)}"
  126. , entry))
  127. return con.Execute(
  128. "INSERT INTO ResourceLinks ("
  129. + $"{nameof(ResourceLinkEntry.Resource)},"
  130. + $"{nameof(ResourceLinkEntry.Link)},"
  131. + $"{nameof(ResourceLinkEntry.Included)},"
  132. + $"{nameof(ResourceLinkEntry.Remark)}" +
  133. ") VALUES ("
  134. + $"@{nameof(ResourceLinkEntry.Resource)},"
  135. + $"@{nameof(ResourceLinkEntry.Link)},"
  136. + $"@{nameof(ResourceLinkEntry.Included)},"
  137. + $"@{nameof(ResourceLinkEntry.Remark)}" +
  138. ")", entry);
  139. return 0;
  140. }
  141. if (null != conn)
  142. return Run(conn);
  143. using (conn = GetConnection())
  144. return Run(conn);
  145. }
  146. public int BulkAddNewArchiveEntry(IEnumerable<string> urls)
  147. {
  148. return BulkOperation(urls, CreateArchiveEntryIfNotExist);
  149. }
  150. public int BulkUpdateArchiveEntry(IEnumerable<ArchiveEntry> items)
  151. {
  152. return BulkOperation(items, UpdateArchiveEntity);
  153. }
  154. public int BulkAddResourceLink(string page, bool included, string remark, params string[] links)
  155. {
  156. return BulkOperation(links.Select(p => new ResourceLinkEntry { Resource = page, Link = p, Included = included, Remark = remark }), AddPageLink);
  157. }
  158. public int BulkAddResourceLink(IEnumerable<ResourceLinkEntry> items)
  159. {
  160. return BulkOperation(items, AddPageLink);
  161. }
  162. public string GetReferer(string url)
  163. {
  164. using (var conn = GetConnection())
  165. {
  166. return conn.Query<string>($"SELECT Resource FROM ResourceLinks WHERE {nameof(ResourceLinkEntry.Link)}=@url", new { url }).FirstOrDefault();
  167. }
  168. }
  169. // --- for offline server & extractor ---
  170. public ArchiveEntry GetEntry(string url)
  171. {
  172. using (var conn = GetConnection())
  173. {
  174. return conn.Query<ArchiveEntry>($"SELECT * FROM Archives WHERE {nameof(ArchiveEntry.Url)}=@url", new { url }).FirstOrDefault();
  175. }
  176. }
  177. // --- for extractor ---
  178. public string[] GetAllUrl()
  179. {
  180. using (var conn = GetConnection())
  181. {
  182. return conn.Query<string>($"SELECT {nameof(ArchiveEntry.Url)} FROM Archives").ToArray();
  183. }
  184. }
  185. }
  186. }