using SharpCompress.Archives; namespace WarcViewerBlazorWinForm.Backend.IO.Archiving { internal class FileReader : IDisposable { private const string EXT_WARC = ".warc"; private const string EXT_XZ = ".xz"; private string? _inputFilePath; private IArchive? _archive; private IArchiveEntry[]? _archiveEntries; public bool? IsDirectRead { get; private set; } public ReadOnlyMemory? PackagedEntryNames { get; set; } public async Task LoadFileAsync(string inputFilePath) { if (File.Exists(inputFilePath) == false) throw new FileNotFoundException("file not found", inputFilePath); await Task.Run(() => { _inputFilePath = inputFilePath; IsDirectRead = Path.GetExtension(_inputFilePath).Equals(EXT_WARC, StringComparison.CurrentCultureIgnoreCase); if (IsDirectRead == true) return; _archive = ArchiveFactory.Open(_inputFilePath); _archiveEntries = _archive.Entries.ToArray(); PackagedEntryNames = _archiveEntries.Select(p => p.Key).ToArray(); }); } public Stream OpenReadDirectly() { if (_inputFilePath == null) throw new InvalidOperationException("not load file yet"); if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(OpenEntryRead)} method"); return File.OpenRead(_inputFilePath); } public Stream OpenEntryRead(int archiveEntryIndex) { if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method"); var entry = _archiveEntries[archiveEntryIndex]; return ReadEntryInternal(entry); } public Stream OpenEntryRead(string archiveEntryKey) { if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(OpenReadDirectly)} method"); var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase)); return entry == null ? throw new FileNotFoundException("entry not found", archiveEntryKey) : ReadEntryInternal(entry); } private static Stream ReadEntryInternal(IArchiveEntry entry) { var ext = Path.GetExtension(entry.Key).ToLower(); switch (ext) { case EXT_WARC: return entry.OpenEntryStream(); case EXT_XZ: return XzRwOps.ReadXzStream(entry.OpenEntryStream()); default: throw new NotSupportedException("not supported inner compressed extension"); } } public void Dispose() => _archive?.Dispose(); public long GetLengthDirectly() { if (_inputFilePath == null) throw new InvalidOperationException("not load file yet"); if (IsDirectRead == false) throw new InvalidOperationException($"archived file, should use {nameof(GetEntryLength)} method"); return new FileInfo(_inputFilePath).Length; } public long GetEntryLength(int archiveEntryIndex) { if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method"); var entry = _archiveEntries[archiveEntryIndex]; var stream = ReadEntryInternal(entry); return stream.GetLengthStupid(); } public long GetEntryLength(string archiveEntryKey) { if (_archiveEntries == null) throw new InvalidOperationException($"direct read warc file, should use {nameof(GetLengthDirectly)} method"); var entry = _archiveEntries.FirstOrDefault(p => string.Equals(p.Key, archiveEntryKey, StringComparison.OrdinalIgnoreCase)); if (entry == null) throw new FileNotFoundException("entry not found", archiveEntryKey); var stream = ReadEntryInternal(entry); return stream.GetLengthStupid(); } public static async Task GetLengthAsync(FileDescriptor fd) { using var ar = new FileReader(); await ar.LoadFileAsync(fd.FilePath); if (fd.IsReadDirectly) return ar.GetLengthDirectly(); if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey)); return ar.GetEntryLength(fd.ArchiveEntryKey); } public static async Task OpenReadStreamAsync(FileDescriptor fd) { var ar = new FileReader(); await ar.LoadFileAsync(fd.FilePath); if (fd.IsReadDirectly) return ar.OpenReadDirectly(); if (fd.ArchiveEntryKey == null) throw new ArgumentNullException(nameof(fd.ArchiveEntryKey)); return new DisposeStream(ar.OpenEntryRead(fd.ArchiveEntryKey), ar); } private class DisposeStream(Stream underlyingStream, IDisposable disposable) : Stream { protected override void Dispose(bool disposing) { underlyingStream?.Dispose(); underlyingStream = null!; disposable?.Dispose(); disposable = null!; } public override void Flush() => underlyingStream.Flush(); public override int Read(byte[] buffer, int offset, int count) => underlyingStream.Read(buffer, offset, count); public override long Seek(long offset, SeekOrigin origin) => underlyingStream.Seek(offset, origin); public override void SetLength(long value) => underlyingStream.SetLength(value); public override void Write(byte[] buffer, int offset, int count) => underlyingStream.Write(buffer, offset, count); public override bool CanRead => underlyingStream.CanRead; public override bool CanSeek => underlyingStream.CanSeek; public override bool CanWrite => underlyingStream.CanWrite; public override long Length => underlyingStream.Length; public override long Position { get => underlyingStream.Position; set => underlyingStream.Position = value; } } } }