FileReader.cs 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. using SharpCompress.Archives;
  2. namespace WarcViewerBlazorWinForm.Backend.IO.Archiving
  3. {
  4. internal class FileReader : IDisposable
  5. {
  6. private const string EXT_WARC = ".warc";
  7. private const string EXT_XZ = ".xz";
  8. private string? _inputFilePath;
  9. private IArchive? _archive;
  10. private IArchiveEntry[]? _archiveEntries;
  11. public bool? IsDirectRead { get; private set; }
  12. public ReadOnlyMemory<string>? PackagedEntryNames { get; set; }
  13. public async Task LoadFileAsync(string inputFilePath)
  14. {
  15. if (File.Exists(inputFilePath) == false) throw new FileNotFoundException("file not found", inputFilePath);
  16. await Task.Run(() =>
  17. {
  18. _inputFilePath = inputFilePath;
  19. IsDirectRead = Path.GetExtension(_inputFilePath).Equals(EXT_WARC, StringComparison.CurrentCultureIgnoreCase);
  20. if (IsDirectRead == true) return;
  21. _archive = ArchiveFactory.Open(_inputFilePath);
  22. _archiveEntries = _archive.Entries.ToArray();
  23. PackagedEntryNames = _archiveEntries.Select(p => p.Key).ToArray();
  24. });
  25. }
  26. public Stream OpenReadDirectly()
  27. {
  28. if (_inputFilePath == null) throw new InvalidOperationException("not load file yet");
  29. if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(OpenEntryRead)} method");
  30. return File.OpenRead(_inputFilePath);
  31. }
  32. public Stream OpenEntryRead(int archiveEntryIndex)
  33. {
  34. if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method");
  35. var entry = _archiveEntries[archiveEntryIndex];
  36. return ReadEntryInternal(entry);
  37. }
  38. public Stream OpenEntryRead(string archiveEntryKey)
  39. {
  40. if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method");
  41. var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase));
  42. return entry == null
  43. ? throw new FileNotFoundException("entry not found", archiveEntryKey)
  44. : ReadEntryInternal(entry);
  45. }
  46. private static Stream ReadEntryInternal(IArchiveEntry entry)
  47. {
  48. var ext = Path.GetExtension(entry.Key).ToLower();
  49. switch (ext)
  50. {
  51. case EXT_WARC: return entry.OpenEntryStream();
  52. case EXT_XZ: return XzRwOps.ReadXzStream(entry.OpenEntryStream());
  53. default: throw new NotSupportedException("not supported inner compressed extension");
  54. }
  55. }
  56. public void Dispose() => _archive?.Dispose();
  57. public long GetLengthDirectly()
  58. {
  59. if (_inputFilePath == null) throw new InvalidOperationException("not load file yet");
  60. if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(GetEntryLength)} method");
  61. return new FileInfo(_inputFilePath).Length;
  62. }
  63. public long GetEntryLength(int archiveEntryIndex)
  64. {
  65. if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method");
  66. var entry = _archiveEntries[archiveEntryIndex];
  67. var stream = ReadEntryInternal(entry);
  68. return stream.GetLengthStupid();
  69. }
  70. public long GetEntryLength(string archiveEntryKey)
  71. {
  72. if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method");
  73. var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase));
  74. if (entry == null) throw new FileNotFoundException("entry not found", archiveEntryKey);
  75. var stream = ReadEntryInternal(entry);
  76. return stream.GetLengthStupid();
  77. }
  78. public static async Task<long> GetLengthAsync(FileDescriptor fd)
  79. {
  80. using var ar = new FileReader();
  81. await ar.LoadFileAsync(fd.FilePath);
  82. if (fd.IsReadDirectly) return ar.GetLengthDirectly();
  83. if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey));
  84. return ar.GetEntryLength(fd.ArchiveEntryKey);
  85. }
  86. public static async Task<Stream> OpenReadStreamAsync(FileDescriptor fd)
  87. {
  88. var ar = new FileReader();
  89. await ar.LoadFileAsync(fd.FilePath);
  90. if (fd.IsReadDirectly) return ar.OpenReadDirectly();
  91. if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey));
  92. return new DisposeStream(ar.OpenEntryRead(fd.ArchiveEntryKey), ar);
  93. }
  94. private class DisposeStream(Stream underlyingStream, IDisposable disposable) : Stream
  95. {
  96. protected override void Dispose(bool disposing)
  97. {
  98. underlyingStream?.Dispose();
  99. underlyingStream = null!;
  100. disposable?.Dispose();
  101. disposable = null!;
  102. }
  103. public override void Flush() => underlyingStream.Flush();
  104. public override int Read(byte[] buffer, int offset, int count) => underlyingStream.Read(buffer, offset, count);
  105. public override long Seek(long offset, SeekOrigin origin) => underlyingStream.Seek(offset, origin);
  106. public override void SetLength(long value) => underlyingStream.SetLength(value);
  107. public override void Write(byte[] buffer, int offset, int count) => underlyingStream.Write(buffer, offset, count);
  108. public override bool CanRead => underlyingStream.CanRead;
  109. public override bool CanSeek => underlyingStream.CanSeek;
  110. public override bool CanWrite => underlyingStream.CanWrite;
  111. public override long Length => underlyingStream.Length;
  112. public override long Position
  113. {
  114. get => underlyingStream.Position;
  115. set => underlyingStream.Position = value;
  116. }
  117. }
  118. }
  119. }