123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- using SharpCompress.Archives;
- namespace WarcViewerBlazorWinForm.Backend.IO.Archiving
- {
- internal class FileReader : IDisposable
- {
- private const string EXT_WARC = ".warc";
- private const string EXT_XZ = ".xz";
- private string? _inputFilePath;
- private IArchive? _archive;
- private IArchiveEntry[]? _archiveEntries;
- public bool? IsDirectRead { get; private set; }
- public ReadOnlyMemory<string>? PackagedEntryNames { get; set; }
- public async Task LoadFileAsync(string inputFilePath)
- {
- if (File.Exists(inputFilePath) == false) throw new FileNotFoundException("file not found", inputFilePath);
- await Task.Run(() =>
- {
- _inputFilePath = inputFilePath;
- IsDirectRead = Path.GetExtension(_inputFilePath).Equals(EXT_WARC, StringComparison.CurrentCultureIgnoreCase);
- if (IsDirectRead == true) return;
- _archive = ArchiveFactory.Open(_inputFilePath);
- _archiveEntries = _archive.Entries.ToArray();
- PackagedEntryNames = _archiveEntries.Select(p => p.Key).ToArray();
- });
- }
- public Stream OpenReadDirectly()
- {
- if (_inputFilePath == null) throw new InvalidOperationException("not load file yet");
- if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(OpenEntryRead)} method");
- return File.OpenRead(_inputFilePath);
- }
- public Stream OpenEntryRead(int archiveEntryIndex)
- {
- if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method");
- var entry = _archiveEntries[archiveEntryIndex];
- return ReadEntryInternal(entry);
- }
- public Stream OpenEntryRead(string archiveEntryKey)
- {
- if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method");
- var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase));
- return entry == null
- ? throw new FileNotFoundException("entry not found", archiveEntryKey)
- : ReadEntryInternal(entry);
- }
- private static Stream ReadEntryInternal(IArchiveEntry entry)
- {
- var ext = Path.GetExtension(entry.Key).ToLower();
- switch (ext)
- {
- case EXT_WARC: return entry.OpenEntryStream();
- case EXT_XZ: return XzRwOps.ReadXzStream(entry.OpenEntryStream());
- default: throw new NotSupportedException("not supported inner compressed extension");
- }
- }
- public void Dispose() => _archive?.Dispose();
- public long GetLengthDirectly()
- {
- if (_inputFilePath == null) throw new InvalidOperationException("not load file yet");
- if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(GetEntryLength)} method");
- return new FileInfo(_inputFilePath).Length;
- }
- public long GetEntryLength(int archiveEntryIndex)
- {
- if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method");
- var entry = _archiveEntries[archiveEntryIndex];
- var stream = ReadEntryInternal(entry);
- return stream.GetLengthStupid();
- }
- public long GetEntryLength(string archiveEntryKey)
- {
- if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method");
- var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase));
- if (entry == null) throw new FileNotFoundException("entry not found", archiveEntryKey);
- var stream = ReadEntryInternal(entry);
- return stream.GetLengthStupid();
- }
- public static async Task<long> GetLengthAsync(FileDescriptor fd)
- {
- using var ar = new FileReader();
- await ar.LoadFileAsync(fd.FilePath);
- if (fd.IsReadDirectly) return ar.GetLengthDirectly();
- if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey));
- return ar.GetEntryLength(fd.ArchiveEntryKey);
- }
- public static async Task<Stream> OpenReadStreamAsync(FileDescriptor fd)
- {
- var ar = new FileReader();
- await ar.LoadFileAsync(fd.FilePath);
- if (fd.IsReadDirectly) return ar.OpenReadDirectly();
- if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey));
- return new DisposeStream(ar.OpenEntryRead(fd.ArchiveEntryKey), ar);
- }
- private class DisposeStream(Stream underlyingStream, IDisposable disposable) : Stream
- {
- protected override void Dispose(bool disposing)
- {
- underlyingStream?.Dispose();
- underlyingStream = null!;
- disposable?.Dispose();
- disposable = null!;
- }
- public override void Flush() => underlyingStream.Flush();
- public override int Read(byte[] buffer, int offset, int count) => underlyingStream.Read(buffer, offset, count);
- public override long Seek(long offset, SeekOrigin origin) => underlyingStream.Seek(offset, origin);
- public override void SetLength(long value) => underlyingStream.SetLength(value);
- public override void Write(byte[] buffer, int offset, int count) => underlyingStream.Write(buffer, offset, count);
- public override bool CanRead => underlyingStream.CanRead;
- public override bool CanSeek => underlyingStream.CanSeek;
- public override bool CanWrite => underlyingStream.CanWrite;
- public override long Length => underlyingStream.Length;
- public override long Position
- {
- get => underlyingStream.Position;
- set => underlyingStream.Position = value;
- }
- }
- }
- }
|