123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- using System;
- using System.Diagnostics;
- using System.IO.Compression;
- using UglyToad.PdfPig;
- if (args.Length != 2)
- {
- Console.WriteLine("Err: 2 args required, <inputDir> <outputDir>");
- return 1;
- }
- var inputDir = args[0];
- var outputDir = args[1];
- if (!Directory.Exists(inputDir))
- {
- Console.WriteLine("Err: inputDir dose not exist");
- return 1;
- }
- if (!Directory.Exists(outputDir))
- {
- Console.WriteLine("Err: outputDir dose not exist");
- return 1;
- }
- Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.Idle;
- var fullInputDir = Path.GetFullPath(inputDir);
- var fullOutputDir = Path.GetFullPath(outputDir);
- var filePathsToProcess = Directory.GetFiles(fullInputDir);
- var signatureJpg = new byte[] { 0xFF, 0xD8, 0xFF };
- var signatureZlib = new byte[] { 0x78, 0x9C };
- void ProcessFunc(string inputFilePath)
- {
- var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(inputFilePath);
- var folderName = fileNameWithoutExtension.EndsWith("PDF", StringComparison.InvariantCultureIgnoreCase)
- ? fileNameWithoutExtension
- : fileNameWithoutExtension + " PDF";
- var outputFileDir = Path.Combine(fullOutputDir, folderName);
- if (Directory.Exists(outputFileDir) == false)
- {
- lock (Console.Out) Console.WriteLine("Create dir: " + outputFileDir);
- Directory.CreateDirectory(outputFileDir);
- }
- var pdfBytes = File.ReadAllBytes(inputFilePath);
- var parsingOptions = new ParsingOptions();
- if (fileNameWithoutExtension.Contains("--pw-"))
- {
- parsingOptions.Password = fileNameWithoutExtension.Split("--pw-").Last();
- }
- using var document = PdfDocument.Open(pdfBytes, parsingOptions);
- var pages = document.GetPages().ToArray();
- for (var pageIndex = 0; pageIndex < pages.Length; pageIndex++)
- {
- var page = pages[pageIndex];
- var images = page.GetImages().ToArray();
- for (var imageIndex = 0; imageIndex < images.Length; imageIndex++)
- {
- var image = images[imageIndex];
- var outputFileName = $"P{pageIndex + 1:0000}_{imageIndex + 1:000}";
- if (image.TryGetPng(out var bytes))
- {
- var finalOutputPath = Path.Combine(outputFileDir, outputFileName + ".png");
- if (File.Exists(finalOutputPath))
- {
- Console.WriteLine($"SKIP {finalOutputPath}");
- continue;
- }
- File.WriteAllBytes(finalOutputPath, bytes);
- Console.WriteLine($"SAVE {finalOutputPath}");
- }
- else if (image.TryGetBytes(out var irlBytes))
- {
- var finalOutputPath = Path.Combine(outputFileDir, outputFileName + ".bin");
- if (File.Exists(finalOutputPath))
- {
- Console.WriteLine($"SKIP {finalOutputPath}");
- continue;
- }
- File.WriteAllBytes(finalOutputPath, bytes);
- Console.WriteLine($"SAVE {finalOutputPath}");
- }
- else
- {
- bytes = image.RawBytes.ToArray();
- var span = (ReadOnlySpan<byte>)bytes;
- var extName = ".bin";
- //de zlib
- if (span.Length > signatureZlib.Length && span.StartsWith(signatureZlib))
- {
- using var inMs = new MemoryStream(bytes);
- using var decStream = new ZLibStream(inMs, CompressionMode.Decompress);
- using var outMs = new MemoryStream();
- decStream.CopyTo(outMs);
- bytes = outMs.ToArray();
- span = (ReadOnlySpan<byte>)bytes;
- }
- if (span.Length > signatureJpg.Length && span.StartsWith(signatureJpg))
- {
- extName = ".jpg";
- }
- else
- {
- int bp = 0;
- }
- var finalOutputPath = Path.Combine(outputFileDir, outputFileName + extName);
- if (File.Exists(finalOutputPath))
- {
- Console.WriteLine($"SKIP {finalOutputPath}");
- continue;
- }
- File.WriteAllBytes(finalOutputPath, bytes);
- Console.WriteLine($"SAVE {finalOutputPath}");
- }
- }
- }
- }
- Parallel.ForEach(filePathsToProcess, ProcessFunc);
- // for debug
- //foreach (var s in filePathsToProcess) ProcessFunc(s);
- return 0;
|