using System.IO.Compression; using UglyToad.PdfPig; namespace ImageConvertService.Biz; using Models; public class PdfImageReader { private static class Signatures { public static readonly byte[] Zlib = { 0x78, 0x9C }; public static readonly byte[] Jpg = { 0xFF, 0xD8, 0xFF }; } public ArchiveEntry[] ReadImages(byte[] inputPdfBytes, string? password = null, ArchiveEntryFileTimeTuple? fileTime = null) { var parsingOptions = new ParsingOptions(); if (password != null) parsingOptions.Password = password; using var document = PdfDocument.Open(inputPdfBytes, parsingOptions); var pages = document.GetPages().ToArray(); var lstResult = new List(pages.Length); Parallel.ForEach(pages, (item, _, pageIndex) => { var images = item.GetImages().ToArray(); for (var imageIndex = 0; imageIndex < images.Length; imageIndex++) { var pdfImage = images[imageIndex]; var outputFileName = $"P{pageIndex + 1:0000}_{imageIndex + 1:000}"; if (pdfImage.TryGetPng(out var pngBytes)) { lock (lstResult) { lstResult.Add(new ArchiveEntry { PathAndName = outputFileName + ".png", FileTimeTuple = fileTime, Content = pngBytes }); } } else if (pdfImage.TryGetBytesAsMemory(out var rms)) { int bp = 0; var rawBytes = rms.ToArray(); lock (lstResult) { lstResult.Add(new ArchiveEntry { PathAndName = outputFileName + ".bin", FileTimeTuple = fileTime, Content = rawBytes }); } } else { var extractedBytes = pdfImage.RawBytes.ToArray(); var span = (ReadOnlySpan)extractedBytes; var extName = ".bin"; //de zlib if (span.Length > Signatures.Zlib.Length && span.StartsWith(Signatures.Zlib)) { using var inMs = new MemoryStream(extractedBytes); using var decStream = new ZLibStream(inMs, CompressionMode.Decompress); using var outMs = new MemoryStream(); decStream.CopyTo(outMs); extractedBytes = outMs.ToArray(); span = (ReadOnlySpan)extractedBytes; } if (span.Length > Signatures.Jpg.Length && span.StartsWith(Signatures.Jpg)) { extName = ".jpg"; } else { int bp = 0; } lock (lstResult) { lstResult.Add(new ArchiveEntry { PathAndName = outputFileName + extName, FileTimeTuple = fileTime, Content = extractedBytes }); } } } }); return lstResult.ToArray(); } }