PdfImageReader.cs 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. using System.IO.Compression;
  2. using UglyToad.PdfPig;
  3. namespace ImageConvertService.Biz;
  4. using Models;
  5. public class PdfImageReader
  6. {
  7. private static class Signatures
  8. {
  9. public static readonly byte[] Zlib = { 0x78, 0x9C };
  10. public static readonly byte[] Jpg = { 0xFF, 0xD8, 0xFF };
  11. }
  12. public ArchiveEntry[] ReadImages(byte[] inputPdfBytes, string? password = null, ArchiveEntryFileTimeTuple? fileTime = null)
  13. {
  14. var parsingOptions = new ParsingOptions();
  15. if (password != null) parsingOptions.Password = password;
  16. using var document = PdfDocument.Open(inputPdfBytes, parsingOptions);
  17. var pages = document.GetPages().ToArray();
  18. var lstResult = new List<ArchiveEntry>(pages.Length);
  19. Parallel.ForEach(pages, (item, _, pageIndex) =>
  20. {
  21. var images = item.GetImages().ToArray();
  22. for (var imageIndex = 0; imageIndex < images.Length; imageIndex++)
  23. {
  24. var pdfImage = images[imageIndex];
  25. var outputFileName = $"P{pageIndex + 1:0000}_{imageIndex + 1:000}";
  26. if (pdfImage.TryGetPng(out var pngBytes))
  27. {
  28. lock (lstResult)
  29. {
  30. lstResult.Add(new ArchiveEntry
  31. {
  32. PathAndName = outputFileName + ".png",
  33. FileTimeTuple = fileTime,
  34. Content = pngBytes
  35. });
  36. }
  37. }
  38. else if (pdfImage.TryGetBytesAsMemory(out var rms))
  39. {
  40. int bp = 0;
  41. var rawBytes = rms.ToArray();
  42. lock (lstResult)
  43. {
  44. lstResult.Add(new ArchiveEntry
  45. {
  46. PathAndName = outputFileName + ".bin",
  47. FileTimeTuple = fileTime,
  48. Content = rawBytes
  49. });
  50. }
  51. }
  52. else
  53. {
  54. var extractedBytes = pdfImage.RawBytes.ToArray();
  55. var span = (ReadOnlySpan<byte>)extractedBytes;
  56. var extName = ".bin";
  57. //de zlib
  58. if (span.Length > Signatures.Zlib.Length && span.StartsWith(Signatures.Zlib))
  59. {
  60. using var inMs = new MemoryStream(extractedBytes);
  61. using var decStream = new ZLibStream(inMs, CompressionMode.Decompress);
  62. using var outMs = new MemoryStream();
  63. decStream.CopyTo(outMs);
  64. extractedBytes = outMs.ToArray();
  65. span = (ReadOnlySpan<byte>)extractedBytes;
  66. }
  67. if (span.Length > Signatures.Jpg.Length && span.StartsWith(Signatures.Jpg))
  68. {
  69. extName = ".jpg";
  70. }
  71. else
  72. {
  73. int bp = 0;
  74. }
  75. lock (lstResult)
  76. {
  77. lstResult.Add(new ArchiveEntry
  78. {
  79. PathAndName = outputFileName + extName,
  80. FileTimeTuple = fileTime,
  81. Content = extractedBytes
  82. });
  83. }
  84. }
  85. }
  86. });
  87. return lstResult.ToArray();
  88. }
  89. }