123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- using System.IO.Compression;
- using UglyToad.PdfPig;
- namespace ImageConvertService.Biz;
- using Models;
- public class PdfImageReader
- {
- private static class Signatures
- {
- public static readonly byte[] Zlib = { 0x78, 0x9C };
- public static readonly byte[] Jpg = { 0xFF, 0xD8, 0xFF };
- }
- public ArchiveEntry[] ReadImages(byte[] inputPdfBytes, string? password = null, ArchiveEntryFileTimeTuple? fileTime = null)
- {
- var parsingOptions = new ParsingOptions();
- if (password != null) parsingOptions.Password = password;
- using var document = PdfDocument.Open(inputPdfBytes, parsingOptions);
- var pages = document.GetPages().ToArray();
- var lstResult = new List<ArchiveEntry>(pages.Length);
- Parallel.ForEach(pages, (item, _, pageIndex) =>
- {
- var images = item.GetImages().ToArray();
- for (var imageIndex = 0; imageIndex < images.Length; imageIndex++)
- {
- var pdfImage = images[imageIndex];
- var outputFileName = $"P{pageIndex + 1:0000}_{imageIndex + 1:000}";
- if (pdfImage.TryGetPng(out var pngBytes))
- {
- lock (lstResult)
- {
- lstResult.Add(new ArchiveEntry
- {
- PathAndName = outputFileName + ".png",
- FileTimeTuple = fileTime,
- Content = pngBytes
- });
- }
- }
- else if (pdfImage.TryGetBytesAsMemory(out var rms))
- {
- int bp = 0;
- var rawBytes = rms.ToArray();
- lock (lstResult)
- {
- lstResult.Add(new ArchiveEntry
- {
- PathAndName = outputFileName + ".bin",
- FileTimeTuple = fileTime,
- Content = rawBytes
- });
- }
- }
- else
- {
- var extractedBytes = pdfImage.RawBytes.ToArray();
- var span = (ReadOnlySpan<byte>)extractedBytes;
- var extName = ".bin";
- //de zlib
- if (span.Length > Signatures.Zlib.Length && span.StartsWith(Signatures.Zlib))
- {
- using var inMs = new MemoryStream(extractedBytes);
- using var decStream = new ZLibStream(inMs, CompressionMode.Decompress);
- using var outMs = new MemoryStream();
- decStream.CopyTo(outMs);
- extractedBytes = outMs.ToArray();
- span = (ReadOnlySpan<byte>)extractedBytes;
- }
- if (span.Length > Signatures.Jpg.Length && span.StartsWith(Signatures.Jpg))
- {
- extName = ".jpg";
- }
- else
- {
- int bp = 0;
- }
- lock (lstResult)
- {
- lstResult.Add(new ArchiveEntry
- {
- PathAndName = outputFileName + extName,
- FileTimeTuple = fileTime,
- Content = extractedBytes
- });
- }
- }
- }
- });
- return lstResult.ToArray();
- }
- }
|