소스 검색

ADD: pdf extractor

HOME 7 달 전
부모
커밋
e748caf436
6개의 변경된 파일154개의 추가작업 그리고 3개의 파일을 삭제
  1. 14 0
      PdfExtractor/PdfExtractor.csproj
  2. 117 0
      PdfExtractor/Program.cs
  3. 8 0
      PdfExtractor/Properties/launchSettings.json
  4. 9 2
      StrangeTools.sln
  5. 5 0
      WebpDirConv/Program.cs
  6. 1 1
      WebpDirConv/Properties/launchSettings.json

+ 14 - 0
PdfExtractor/PdfExtractor.csproj

@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="PdfPig" Version="0.1.8" />
+  </ItemGroup>
+
+</Project>

+ 117 - 0
PdfExtractor/Program.cs

@@ -0,0 +1,117 @@
+using System.Diagnostics;
+using UglyToad.PdfPig;
+
+if (args.Length != 2)
+{
+    Console.WriteLine("Err: 2 args required, <inputDir> <outputDir>");
+    return 1;
+}
+
+var inputDir = args[0];
+var outputDir = args[1];
+
+if (!Directory.Exists(inputDir))
+{
+    Console.WriteLine("Err: inputDir dose not exist");
+    return 1;
+}
+
+if (!Directory.Exists(outputDir))
+{
+    Console.WriteLine("Err: outputDir dose not exist");
+    return 1;
+}
+
+Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.Idle;
+
+
+var fullInputDir = Path.GetFullPath(inputDir);
+var fullOutputDir = Path.GetFullPath(outputDir);
+
+var filePathsToProcess = Directory.GetFiles(fullInputDir);
+
+var signatureJpg = new byte[] { 0xFF, 0xD8, 0xFF };
+
+void ProcessFunc(string inputFilePath)
+{
+    var outputFileDir = Path.Combine(fullOutputDir, Path.GetFileNameWithoutExtension(inputFilePath));
+
+    if (Directory.Exists(outputFileDir) == false)
+    {
+        lock (Console.Out) Console.WriteLine("Create dir: " + outputFileDir);
+        Directory.CreateDirectory(outputFileDir);
+    }
+
+    var pdfBytes = File.ReadAllBytes(inputFilePath);
+    using var document = PdfDocument.Open(pdfBytes);
+    var pages = document.GetPages().ToArray();
+    for (var pageIndex = 0; pageIndex < pages.Length; pageIndex++)
+    {
+        var page = pages[pageIndex];
+
+        var images = page.GetImages().ToArray();
+        for (var imageIndex = 0; imageIndex < images.Length; imageIndex++)
+        {
+            var image = images[imageIndex];
+            var outputFileName = $"P{pageIndex + 1:0000}_{imageIndex + 1:000}";
+            if (image.TryGetPng(out var bytes))
+            {
+                var finalOutputPath = Path.Combine(outputFileDir, outputFileName + ".png");
+                if (File.Exists(finalOutputPath))
+                {
+                    Console.WriteLine($"SKIP {finalOutputPath}");
+                    continue;
+                }
+
+                File.WriteAllBytes(finalOutputPath, bytes);
+                Console.WriteLine($"SAVE {finalOutputPath}");
+            }
+            else if (image.TryGetBytes(out var irlBytes))
+            {
+                var finalOutputPath = Path.Combine(outputFileDir, outputFileName + ".bin");
+                if (File.Exists(finalOutputPath))
+                {
+                    Console.WriteLine($"SKIP {finalOutputPath}");
+                    continue;
+                }
+
+                File.WriteAllBytes(finalOutputPath, bytes);
+                Console.WriteLine($"SAVE {finalOutputPath}");
+            }
+            else
+            {
+                bytes = image.RawBytes.ToArray();
+                var span = (ReadOnlySpan<byte>)bytes;
+                var extName = ".bin";
+
+                if (span.Length > signatureJpg.Length && span.StartsWith(signatureJpg))
+                {
+                    extName = ".jpg";
+                }
+                else
+                {
+                    int bp = 0;
+                }
+
+                var finalOutputPath = Path.Combine(outputFileDir, outputFileName + extName);
+                if (File.Exists(finalOutputPath))
+                {
+                    Console.WriteLine($"SKIP {finalOutputPath}");
+                    continue;
+                }
+
+                File.WriteAllBytes(finalOutputPath, bytes);
+                Console.WriteLine($"SAVE {finalOutputPath}");
+            }
+        }
+    }
+}
+
+Parallel.ForEach(filePathsToProcess, ProcessFunc);
+
+// for debug
+//foreach (var s in filePathsToProcess) ProcessFunc(s);
+
+return 0;
+
+

+ 8 - 0
PdfExtractor/Properties/launchSettings.json

@@ -0,0 +1,8 @@
+{
+  "profiles": {
+    "PdfExtractor": {
+      "commandName": "Project",
+      "commandLineArgs": "Z:\\PTW_PROC\\@PDF_R Z:\\PTW_PROC\\@PDF_EXTRACT"
+    }
+  }
+}

+ 9 - 2
StrangeTools.sln

@@ -60,7 +60,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CompServ.Tests", "CompressS
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CompServ.Worker", "CompressService\CompServ.Worker\CompServ.Worker.csproj", "{2F7ED95A-58ED-43CF-A934-D591C822C5EC}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Older", "Older", "{84ACDDE9-A7B5-4528-A5BA-7BF76B0E6BAF}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "@Older", "@Older", "{84ACDDE9-A7B5-4528-A5BA-7BF76B0E6BAF}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PictureMover", "PictureMover\PictureMover.csproj", "{C94C8E6E-1051-49AD-A539-E0BC8D365C59}"
 EndProject
@@ -84,7 +84,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SniTamperProxy", "SniTamper
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CreateTimePrefixRen", "CreateTimePrefixRen\CreateTimePrefixRen.csproj", "{FB7B086B-EAB5-4694-88F6-7BD794224532}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebpDirConv", "WebpDirConv\WebpDirConv.csproj", "{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebpDirConv", "WebpDirConv\WebpDirConv.csproj", "{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PdfExtractor", "PdfExtractor\PdfExtractor.csproj", "{54F47038-F132-4F2E-BEED-302B5835CBF8}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -232,6 +234,10 @@ Global
 		{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A}.Release|Any CPU.Build.0 = Release|Any CPU
+		{54F47038-F132-4F2E-BEED-302B5835CBF8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{54F47038-F132-4F2E-BEED-302B5835CBF8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{54F47038-F132-4F2E-BEED-302B5835CBF8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{54F47038-F132-4F2E-BEED-302B5835CBF8}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -274,6 +280,7 @@ Global
 		{8E775D0B-CB5E-4921-81F0-D236F60072FC} = {0CAFC020-EA0B-4DB1-9782-645970A53BA5}
 		{FB7B086B-EAB5-4694-88F6-7BD794224532} = {3120ADE6-C606-42F1-9AA8-B7F1A8933CD7}
 		{9AC94CF3-80A1-4558-BDA9-A6D99C8F643A} = {3120ADE6-C606-42F1-9AA8-B7F1A8933CD7}
+		{54F47038-F132-4F2E-BEED-302B5835CBF8} = {3120ADE6-C606-42F1-9AA8-B7F1A8933CD7}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {017A8C58-F476-47E7-9CBE-077A98A76AB4}

+ 5 - 0
WebpDirConv/Program.cs

@@ -64,6 +64,11 @@ void ProcessFunc(string inputFilePath)
     pWebp.WaitForExit();
 
     lock (Console.Out) Console.WriteLine($"PROCESS {pWebp.ExitCode} {outputFilePath}");
+
+    if (pWebp.ExitCode != 0)
+    {
+        int bp = 0;
+    }
 }
 
 Parallel.ForEach(filePathsToProcess, ProcessFunc);

+ 1 - 1
WebpDirConv/Properties/launchSettings.json

@@ -2,7 +2,7 @@
   "profiles": {
     "WebpDirConv": {
       "commandName": "Project",
-      "commandLineArgs": "F:\\var\\tmp\\de_s Z:\\var\\tmp\\de_w" 
+      "commandLineArgs": "Z:\\PTW_PROC\\@PDF_EXTRACT Z:\\PTW_PROC\\@PDF_EXTRACT_WEBP"
     }
   }
 }