Ver código fonte

PDF extractor: add password support , and zlib decompress

HOME 7 meses atrás
pai
commit
67770b9b35
1 arquivos alterados com 31 adições e 6 exclusões
  1. 31 6
      PdfExtractor/Program.cs

+ 31 - 6
PdfExtractor/Program.cs

@@ -1,4 +1,6 @@
-using System.Diagnostics;
+using System;
+using System.Diagnostics;
+using System.IO.Compression;
 using UglyToad.PdfPig;
 
 if (args.Length != 2)
@@ -24,17 +26,23 @@ if (!Directory.Exists(outputDir))
 
 Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.Idle;
 
-
 var fullInputDir = Path.GetFullPath(inputDir);
 var fullOutputDir = Path.GetFullPath(outputDir);
 
 var filePathsToProcess = Directory.GetFiles(fullInputDir);
 
 var signatureJpg = new byte[] { 0xFF, 0xD8, 0xFF };
+var signatureZlib = new byte[] { 0x78, 0x9C };
 
 void ProcessFunc(string inputFilePath)
 {
-    var outputFileDir = Path.Combine(fullOutputDir, Path.GetFileNameWithoutExtension(inputFilePath));
+    var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(inputFilePath);
+
+    var folderName = fileNameWithoutExtension.EndsWith("PDF", StringComparison.InvariantCultureIgnoreCase)
+        ? fileNameWithoutExtension
+        : fileNameWithoutExtension + " PDF";
+
+    var outputFileDir = Path.Combine(fullOutputDir, folderName);
 
     if (Directory.Exists(outputFileDir) == false)
     {
@@ -43,7 +51,14 @@ void ProcessFunc(string inputFilePath)
     }
 
     var pdfBytes = File.ReadAllBytes(inputFilePath);
-    using var document = PdfDocument.Open(pdfBytes);
+    var parsingOptions = new ParsingOptions();
+
+    if (fileNameWithoutExtension.Contains("--pw-"))
+    {
+        parsingOptions.Password = fileNameWithoutExtension.Split("--pw-").Last();
+    }
+
+    using var document = PdfDocument.Open(pdfBytes, parsingOptions);
     var pages = document.GetPages().ToArray();
     for (var pageIndex = 0; pageIndex < pages.Length; pageIndex++)
     {
@@ -81,9 +96,21 @@ void ProcessFunc(string inputFilePath)
             else
             {
                 bytes = image.RawBytes.ToArray();
+
                 var span = (ReadOnlySpan<byte>)bytes;
                 var extName = ".bin";
 
+                //de zlib
+                if (span.Length > signatureZlib.Length && span.StartsWith(signatureZlib))
+                {
+                    using var inMs = new MemoryStream(bytes);
+                    using var decStream = new ZLibStream(inMs, CompressionMode.Decompress);
+                    using var outMs = new MemoryStream();
+                    decStream.CopyTo(outMs);
+                    bytes = outMs.ToArray();
+                    span = (ReadOnlySpan<byte>)bytes;
+                }
+
                 if (span.Length > signatureJpg.Length && span.StartsWith(signatureJpg))
                 {
                     extName = ".jpg";
@@ -113,5 +140,3 @@ Parallel.ForEach(filePathsToProcess, ProcessFunc);
 //foreach (var s in filePathsToProcess) ProcessFunc(s);
 
 return 0;
-
-