feat: implement cbz writing and pdf reading
This commit is contained in:
		
							parent
							
								
									034f0b142c
								
							
						
					
					
						commit
						e8287d0c6e
					
				
					 10 changed files with 644 additions and 57 deletions
				
			
		
							
								
								
									
										156
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										156
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							| 
						 | 
					@ -40,6 +40,15 @@ version = "0.5.0"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
 | 
					checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "android_system_properties"
 | 
				
			||||||
 | 
					version = "0.1.5"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "libc",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "anstream"
 | 
					name = "anstream"
 | 
				
			||||||
version = "0.6.15"
 | 
					version = "0.6.15"
 | 
				
			||||||
| 
						 | 
					@ -232,6 +241,7 @@ dependencies = [
 | 
				
			||||||
 "image",
 | 
					 "image",
 | 
				
			||||||
 "indicatif",
 | 
					 "indicatif",
 | 
				
			||||||
 "log",
 | 
					 "log",
 | 
				
			||||||
 | 
					 "lopdf",
 | 
				
			||||||
 "pdf-writer",
 | 
					 "pdf-writer",
 | 
				
			||||||
 "pretty_env_logger",
 | 
					 "pretty_env_logger",
 | 
				
			||||||
 "rayon",
 | 
					 "rayon",
 | 
				
			||||||
| 
						 | 
					@ -268,6 +278,17 @@ version = "1.0.0"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 | 
					checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "chrono"
 | 
				
			||||||
 | 
					version = "0.4.42"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "iana-time-zone",
 | 
				
			||||||
 | 
					 "num-traits",
 | 
				
			||||||
 | 
					 "windows-link",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "cipher"
 | 
					name = "cipher"
 | 
				
			||||||
version = "0.4.4"
 | 
					version = "0.4.4"
 | 
				
			||||||
| 
						 | 
					@ -349,6 +370,12 @@ version = "0.3.1"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
 | 
					checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "core-foundation-sys"
 | 
				
			||||||
 | 
					version = "0.8.7"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "cpufeatures"
 | 
					name = "cpufeatures"
 | 
				
			||||||
version = "0.2.14"
 | 
					version = "0.2.14"
 | 
				
			||||||
| 
						 | 
					@ -484,6 +511,15 @@ version = "1.0.0"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
 | 
					checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "encoding_rs"
 | 
				
			||||||
 | 
					version = "0.8.35"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "cfg-if",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "env_logger"
 | 
					name = "env_logger"
 | 
				
			||||||
version = "0.10.2"
 | 
					version = "0.10.2"
 | 
				
			||||||
| 
						 | 
					@ -676,6 +712,30 @@ version = "2.1.0"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 | 
					checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "iana-time-zone"
 | 
				
			||||||
 | 
					version = "0.1.64"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "android_system_properties",
 | 
				
			||||||
 | 
					 "core-foundation-sys",
 | 
				
			||||||
 | 
					 "iana-time-zone-haiku",
 | 
				
			||||||
 | 
					 "js-sys",
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					 "wasm-bindgen",
 | 
				
			||||||
 | 
					 "windows-core",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "iana-time-zone-haiku"
 | 
				
			||||||
 | 
					version = "0.1.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "cc",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "image"
 | 
					name = "image"
 | 
				
			||||||
version = "0.25.8"
 | 
					version = "0.25.8"
 | 
				
			||||||
| 
						 | 
					@ -847,6 +907,12 @@ dependencies = [
 | 
				
			||||||
 "zlib-rs",
 | 
					 "zlib-rs",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "linked-hash-map"
 | 
				
			||||||
 | 
					version = "0.5.6"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "linux-raw-sys"
 | 
					name = "linux-raw-sys"
 | 
				
			||||||
version = "0.4.14"
 | 
					version = "0.4.14"
 | 
				
			||||||
| 
						 | 
					@ -884,6 +950,25 @@ dependencies = [
 | 
				
			||||||
 "imgref",
 | 
					 "imgref",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "lopdf"
 | 
				
			||||||
 | 
					version = "0.32.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "e775e4ee264e8a87d50a9efef7b67b4aa988cf94e75630859875fc347e6c872b"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "chrono",
 | 
				
			||||||
 | 
					 "encoding_rs",
 | 
				
			||||||
 | 
					 "flate2",
 | 
				
			||||||
 | 
					 "itoa",
 | 
				
			||||||
 | 
					 "linked-hash-map",
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					 "md5",
 | 
				
			||||||
 | 
					 "nom",
 | 
				
			||||||
 | 
					 "rayon",
 | 
				
			||||||
 | 
					 "time",
 | 
				
			||||||
 | 
					 "weezl",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "lzma-rust2"
 | 
					name = "lzma-rust2"
 | 
				
			||||||
version = "0.13.0"
 | 
					version = "0.13.0"
 | 
				
			||||||
| 
						 | 
					@ -904,6 +989,12 @@ dependencies = [
 | 
				
			||||||
 "rayon",
 | 
					 "rayon",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "md5"
 | 
				
			||||||
 | 
					version = "0.7.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "memchr"
 | 
					name = "memchr"
 | 
				
			||||||
version = "2.7.4"
 | 
					version = "2.7.4"
 | 
				
			||||||
| 
						 | 
					@ -1613,10 +1704,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
 | 
					checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "deranged",
 | 
					 "deranged",
 | 
				
			||||||
 | 
					 "itoa",
 | 
				
			||||||
 "num-conv",
 | 
					 "num-conv",
 | 
				
			||||||
 "powerfmt",
 | 
					 "powerfmt",
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
 "time-core",
 | 
					 "time-core",
 | 
				
			||||||
 | 
					 "time-macros",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
| 
						 | 
					@ -1625,6 +1718,16 @@ version = "0.1.6"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
 | 
					checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "time-macros"
 | 
				
			||||||
 | 
					version = "0.2.24"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "num-conv",
 | 
				
			||||||
 | 
					 "time-core",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "toml"
 | 
					name = "toml"
 | 
				
			||||||
version = "0.8.19"
 | 
					version = "0.8.19"
 | 
				
			||||||
| 
						 | 
					@ -1826,12 +1929,65 @@ dependencies = [
 | 
				
			||||||
 "windows-sys 0.59.0",
 | 
					 "windows-sys 0.59.0",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-core"
 | 
				
			||||||
 | 
					version = "0.62.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "windows-implement",
 | 
				
			||||||
 | 
					 "windows-interface",
 | 
				
			||||||
 | 
					 "windows-link",
 | 
				
			||||||
 | 
					 "windows-result",
 | 
				
			||||||
 | 
					 "windows-strings",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-implement"
 | 
				
			||||||
 | 
					version = "0.60.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "syn",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-interface"
 | 
				
			||||||
 | 
					version = "0.59.3"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "syn",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "windows-link"
 | 
					name = "windows-link"
 | 
				
			||||||
version = "0.2.1"
 | 
					version = "0.2.1"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 | 
					checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-result"
 | 
				
			||||||
 | 
					version = "0.4.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "windows-link",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-strings"
 | 
				
			||||||
 | 
					version = "0.5.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "windows-link",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "windows-sys"
 | 
					name = "windows-sys"
 | 
				
			||||||
version = "0.52.0"
 | 
					version = "0.52.0"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,6 +16,7 @@ rayon = "1.10.0"
 | 
				
			||||||
tabled = "0.20.0"
 | 
					tabled = "0.20.0"
 | 
				
			||||||
walkdir = "2.5.0"
 | 
					walkdir = "2.5.0"
 | 
				
			||||||
zip = "6.0.0"
 | 
					zip = "6.0.0"
 | 
				
			||||||
 | 
					lopdf = "0.32.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dev-dependencies]
 | 
					[dev-dependencies]
 | 
				
			||||||
tempfile = "3.12.0"
 | 
					tempfile = "3.12.0"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
use std::ffi::OsStr;
 | 
					use std::ffi::OsStr;
 | 
				
			||||||
use std::fs::File;
 | 
					use std::fs::File;
 | 
				
			||||||
use std::io::Read;
 | 
					use std::io::{Read, Write};
 | 
				
			||||||
use std::path::Path;
 | 
					use std::path::Path;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use anyhow::Result;
 | 
					use anyhow::Result;
 | 
				
			||||||
| 
						 | 
					@ -9,7 +9,7 @@ use zip::ZipArchive;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::model::{Document, ImagePage};
 | 
					use crate::model::{Document, ImagePage};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use super::FormatReader;
 | 
					use super::{FormatReader, FormatWriter};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct CbzReader;
 | 
					pub struct CbzReader;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -51,3 +51,40 @@ impl FormatReader for CbzReader {
 | 
				
			||||||
        Ok(Document::new(pages))
 | 
					        Ok(Document::new(pages))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct CbzWriter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl FormatWriter for CbzWriter {
 | 
				
			||||||
 | 
					    fn write(&self, doc: &Document, output: &Path) -> Result<()> {
 | 
				
			||||||
 | 
					        use zip::write::SimpleFileOptions;
 | 
				
			||||||
 | 
					        let file = File::create(output)?;
 | 
				
			||||||
 | 
					        let mut zip = zip::ZipWriter::new(file);
 | 
				
			||||||
 | 
					        let options = SimpleFileOptions::default();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (idx, page) in doc.pages.iter().enumerate() {
 | 
				
			||||||
 | 
					            let mut name = page.name.clone();
 | 
				
			||||||
 | 
					            if Path::new(&name).extension().and_then(OsStr::to_str) != Some("jpg") {
 | 
				
			||||||
 | 
					                name = format!("{:03}.jpg", idx + 1);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            zip.start_file(&name, options)?;
 | 
				
			||||||
 | 
					            if let Some(dct) = &page.jpeg_dct {
 | 
				
			||||||
 | 
					                zip.write_all(dct)?;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                // Encode to JPEG
 | 
				
			||||||
 | 
					                let rgb = page.image.to_rgb8();
 | 
				
			||||||
 | 
					                let (w, h) = (rgb.width(), rgb.height());
 | 
				
			||||||
 | 
					                let mut cursor = std::io::Cursor::new(Vec::new());
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    let mut enc =
 | 
				
			||||||
 | 
					                        image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
 | 
				
			||||||
 | 
					                    enc.encode(&rgb.into_raw(), w, h, image::ColorType::Rgb8.into())?;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                let data = cursor.into_inner();
 | 
				
			||||||
 | 
					                zip.write_all(&data)?;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        zip.finish()?;
 | 
				
			||||||
 | 
					        Ok(())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,6 +8,9 @@ use crate::model::Document;
 | 
				
			||||||
pub mod cbz;
 | 
					pub mod cbz;
 | 
				
			||||||
pub mod pdf;
 | 
					pub mod pdf;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use cbz::{CbzReader, CbzWriter};
 | 
				
			||||||
 | 
					use pdf::{PdfReader, PdfWriter};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
					#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
				
			||||||
pub enum FormatId {
 | 
					pub enum FormatId {
 | 
				
			||||||
    Cbz,
 | 
					    Cbz,
 | 
				
			||||||
| 
						 | 
					@ -17,18 +20,12 @@ pub enum FormatId {
 | 
				
			||||||
impl FormatId {
 | 
					impl FormatId {
 | 
				
			||||||
    #[allow(dead_code)]
 | 
					    #[allow(dead_code)]
 | 
				
			||||||
    pub fn can_read(self) -> bool {
 | 
					    pub fn can_read(self) -> bool {
 | 
				
			||||||
        match self {
 | 
					        get_reader(self).is_some()
 | 
				
			||||||
            FormatId::Cbz => true,
 | 
					 | 
				
			||||||
            FormatId::Pdf => false, // planned but not implemented yet
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #[allow(dead_code)]
 | 
					    #[allow(dead_code)]
 | 
				
			||||||
    pub fn can_write(self) -> bool {
 | 
					    pub fn can_write(self) -> bool {
 | 
				
			||||||
        match self {
 | 
					        get_writer(self).is_some()
 | 
				
			||||||
            FormatId::Pdf => true,
 | 
					 | 
				
			||||||
            FormatId::Cbz => false, // planned but not implemented yet
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn detect_from_path(path: &Path) -> Option<FormatId> {
 | 
					    pub fn detect_from_path(path: &Path) -> Option<FormatId> {
 | 
				
			||||||
| 
						 | 
					@ -47,3 +44,18 @@ pub trait FormatReader: Send + Sync {
 | 
				
			||||||
pub trait FormatWriter: Send + Sync {
 | 
					pub trait FormatWriter: Send + Sync {
 | 
				
			||||||
    fn write(&self, doc: &Document, output: &Path) -> Result<()>;
 | 
					    fn write(&self, doc: &Document, output: &Path) -> Result<()>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Registry/factory functions to centralize available readers & writers
 | 
				
			||||||
 | 
					pub fn get_reader(format: FormatId) -> Option<Box<dyn FormatReader>> {
 | 
				
			||||||
 | 
					    match format {
 | 
				
			||||||
 | 
					        FormatId::Cbz => Some(Box::new(CbzReader)),
 | 
				
			||||||
 | 
					        FormatId::Pdf => Some(Box::new(PdfReader)),
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn get_writer(format: FormatId) -> Option<Box<dyn FormatWriter>> {
 | 
				
			||||||
 | 
					    match format {
 | 
				
			||||||
 | 
					        FormatId::Pdf => Some(Box::new(PdfWriter)),
 | 
				
			||||||
 | 
					        FormatId::Cbz => Some(Box::new(CbzWriter)),
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,12 +1,13 @@
 | 
				
			||||||
use anyhow::Result;
 | 
					use anyhow::Result;
 | 
				
			||||||
use image::{codecs::jpeg::JpegEncoder, ColorType};
 | 
					use image::codecs::jpeg::JpegEncoder;
 | 
				
			||||||
 | 
					use image::ColorType;
 | 
				
			||||||
use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref};
 | 
					use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref};
 | 
				
			||||||
use std::io::Cursor;
 | 
					use std::io::Cursor;
 | 
				
			||||||
use std::path::Path;
 | 
					use std::path::Path;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::model::Document;
 | 
					use crate::model::{Document, ImagePage};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use super::FormatWriter;
 | 
					use super::{FormatReader, FormatWriter};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct PdfWriter;
 | 
					pub struct PdfWriter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -77,3 +78,194 @@ impl FormatWriter for PdfWriter {
 | 
				
			||||||
        Ok(())
 | 
					        Ok(())
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct PdfReader;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl FormatReader for PdfReader {
 | 
				
			||||||
 | 
					    fn read(&self, input: &Path) -> Result<Document> {
 | 
				
			||||||
 | 
					        use lopdf::{Document as LoDocument, Object};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let doc = LoDocument::load(input)?;
 | 
				
			||||||
 | 
					        let pages_map = doc.get_pages();
 | 
				
			||||||
 | 
					        let mut image_pages: Vec<ImagePage> = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (idx, (_page_num, page_id)) in pages_map.iter().enumerate() {
 | 
				
			||||||
 | 
					            // Fetch page object
 | 
				
			||||||
 | 
					            let page_obj = doc.get_object(*page_id)?;
 | 
				
			||||||
 | 
					            let page_dict = match page_obj.as_dict() {
 | 
				
			||||||
 | 
					                Ok(d) => d,
 | 
				
			||||||
 | 
					                Err(_) => continue,
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Resolve Resources dictionary (can be a reference or inline dict)
 | 
				
			||||||
 | 
					            let (mut xobjects_opt, mut content_refs): (Option<lopdf::Dictionary>, Vec<Vec<u8>>) =
 | 
				
			||||||
 | 
					                (None, Vec::new());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if let Ok(obj) = page_dict.get(b"Resources") {
 | 
				
			||||||
 | 
					                match obj {
 | 
				
			||||||
 | 
					                    Object::Reference(id) => {
 | 
				
			||||||
 | 
					                        if let Ok(Object::Dictionary(d)) = doc.get_object(*id) {
 | 
				
			||||||
 | 
					                            // Extract XObject dict if present
 | 
				
			||||||
 | 
					                            if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
 | 
				
			||||||
 | 
					                                if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
 | 
				
			||||||
 | 
					                                    xobjects_opt = Some(xd.clone());
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
 | 
				
			||||||
 | 
					                                xobjects_opt = Some(xd.clone());
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    Object::Dictionary(d) => {
 | 
				
			||||||
 | 
					                        if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
 | 
				
			||||||
 | 
					                            if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
 | 
				
			||||||
 | 
					                                xobjects_opt = Some(xd.clone());
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
 | 
				
			||||||
 | 
					                            xobjects_opt = Some(xd.clone());
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ => {}
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Try to track which XObjects are used by parsing Content streams for /Name Do
 | 
				
			||||||
 | 
					            if let Ok(contents_obj) = page_dict.get(b"Contents") {
 | 
				
			||||||
 | 
					                match contents_obj {
 | 
				
			||||||
 | 
					                    Object::Reference(cid) => {
 | 
				
			||||||
 | 
					                        if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
 | 
				
			||||||
 | 
					                            content_refs.extend(extract_xobject_names(&stream.content));
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    Object::Array(arr) => {
 | 
				
			||||||
 | 
					                        for o in arr {
 | 
				
			||||||
 | 
					                            if let Object::Reference(cid) = o {
 | 
				
			||||||
 | 
					                                if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
 | 
				
			||||||
 | 
					                                    content_refs.extend(extract_xobject_names(&stream.content));
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    Object::Stream(stream) => {
 | 
				
			||||||
 | 
					                        content_refs.extend(extract_xobject_names(&stream.content));
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ => {}
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // If we have XObjects, pick the first image (prefer one referenced in content)
 | 
				
			||||||
 | 
					            if let Some(xobjects) = xobjects_opt {
 | 
				
			||||||
 | 
					                // Build ordered keys: first those referenced in content, then the rest
 | 
				
			||||||
 | 
					                let mut keys: Vec<Vec<u8>> = xobjects.iter().map(|(k, _)| k.clone()).collect();
 | 
				
			||||||
 | 
					                // Stable sort by whether referenced first
 | 
				
			||||||
 | 
					                keys.sort_by_key(|k| {
 | 
				
			||||||
 | 
					                    let name = if k.starts_with(b"/") {
 | 
				
			||||||
 | 
					                        k[1..].to_vec()
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        k.clone()
 | 
				
			||||||
 | 
					                    };
 | 
				
			||||||
 | 
					                    match content_refs.iter().position(|r| *r == name) {
 | 
				
			||||||
 | 
					                        Some(pos) => pos as i32,
 | 
				
			||||||
 | 
					                        None => i32::MAX,
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for key in keys {
 | 
				
			||||||
 | 
					                    if let Ok(&Object::Reference(obj_id)) = xobjects.get(&key) {
 | 
				
			||||||
 | 
					                        if let Ok(Object::Stream(stream)) = doc.get_object(obj_id) {
 | 
				
			||||||
 | 
					                            let dict = &stream.dict;
 | 
				
			||||||
 | 
					                            let is_image = matches!(dict.get(b"Subtype"), Ok(Object::Name(n)) if n == b"Image");
 | 
				
			||||||
 | 
					                            if !is_image {
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                            let is_dct = match dict.get(b"Filter") {
 | 
				
			||||||
 | 
					                                Ok(Object::Name(n)) => n == b"DCTDecode",
 | 
				
			||||||
 | 
					                                Ok(Object::Array(arr)) => arr
 | 
				
			||||||
 | 
					                                    .iter()
 | 
				
			||||||
 | 
					                                    .any(|o| matches!(o, Object::Name(n) if n == b"DCTDecode")),
 | 
				
			||||||
 | 
					                                _ => false,
 | 
				
			||||||
 | 
					                            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                            let data = stream.content.clone();
 | 
				
			||||||
 | 
					                            if is_dct {
 | 
				
			||||||
 | 
					                                if let Ok(img) = image::load_from_memory(&data) {
 | 
				
			||||||
 | 
					                                    let name = format!("{:03}.jpg", idx + 1);
 | 
				
			||||||
 | 
					                                    image_pages.push(ImagePage {
 | 
				
			||||||
 | 
					                                        name,
 | 
				
			||||||
 | 
					                                        image: img,
 | 
				
			||||||
 | 
					                                        jpeg_dct: Some(data),
 | 
				
			||||||
 | 
					                                    });
 | 
				
			||||||
 | 
					                                    break;
 | 
				
			||||||
 | 
					                                } else {
 | 
				
			||||||
 | 
					                                    // If JPEG parsing failed, skip
 | 
				
			||||||
 | 
					                                    continue;
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            } else if let Ok(img) = image::load_from_memory(&data) {
 | 
				
			||||||
 | 
					                                // Fallback: try to decode arbitrary image stream
 | 
				
			||||||
 | 
					                                let name = format!("{:03}.jpg", idx + 1);
 | 
				
			||||||
 | 
					                                image_pages.push(ImagePage {
 | 
				
			||||||
 | 
					                                    name,
 | 
				
			||||||
 | 
					                                    image: img,
 | 
				
			||||||
 | 
					                                    jpeg_dct: None,
 | 
				
			||||||
 | 
					                                });
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Ok(Document::new(image_pages))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Very small helper to extract XObject names used in a content stream by scanning for "/Name Do"
 | 
				
			||||||
 | 
					fn extract_xobject_names(content: &[u8]) -> Vec<Vec<u8>> {
 | 
				
			||||||
 | 
					    // This is a naive scanner but often sufficient: tokens separated by whitespace, looking for "/name Do"
 | 
				
			||||||
 | 
					    let mut names = Vec::new();
 | 
				
			||||||
 | 
					    let s = content;
 | 
				
			||||||
 | 
					    let mut i = 0;
 | 
				
			||||||
 | 
					    while i < s.len() {
 | 
				
			||||||
 | 
					        // skip whitespace
 | 
				
			||||||
 | 
					        while i < s.len() && s[i].is_ascii_whitespace() {
 | 
				
			||||||
 | 
					            i += 1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if i >= s.len() {
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if s[i] == b'/' {
 | 
				
			||||||
 | 
					            // read name
 | 
				
			||||||
 | 
					            let start = i + 1;
 | 
				
			||||||
 | 
					            i += 1;
 | 
				
			||||||
 | 
					            while i < s.len() && !s[i].is_ascii_whitespace() {
 | 
				
			||||||
 | 
					                i += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let name = s[start..i].to_vec();
 | 
				
			||||||
 | 
					            // skip whitespace
 | 
				
			||||||
 | 
					            while i < s.len() && s[i].is_ascii_whitespace() {
 | 
				
			||||||
 | 
					                i += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            // check for Do operator after possible inline graphics state
 | 
				
			||||||
 | 
					            // We will just check if next token is Do
 | 
				
			||||||
 | 
					            let mut j = i;
 | 
				
			||||||
 | 
					            while j < s.len() && s[j].is_ascii_whitespace() {
 | 
				
			||||||
 | 
					                j += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let op_start = j;
 | 
				
			||||||
 | 
					            while j < s.len() && (s[j] as char).is_ascii_alphabetic() {
 | 
				
			||||||
 | 
					                j += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if &s[op_start..j] == b"Do" {
 | 
				
			||||||
 | 
					                names.push(name);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            i = j;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            // skip token
 | 
				
			||||||
 | 
					            while i < s.len() && !s[i].is_ascii_whitespace() {
 | 
				
			||||||
 | 
					                i += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    names
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										24
									
								
								src/job.rs
									
										
									
									
									
								
							
							
						
						
									
										24
									
								
								src/job.rs
									
										
									
									
									
								
							| 
						 | 
					@ -5,9 +5,7 @@ use anyhow::Result;
 | 
				
			||||||
use indicatif::{ProgressBar, ProgressStyle};
 | 
					use indicatif::{ProgressBar, ProgressStyle};
 | 
				
			||||||
use rayon::prelude::*;
 | 
					use rayon::prelude::*;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::formats::cbz::CbzReader;
 | 
					use crate::formats::{get_reader, get_writer, FormatId};
 | 
				
			||||||
use crate::formats::pdf::PdfWriter;
 | 
					 | 
				
			||||||
use crate::formats::{FormatId, FormatReader, FormatWriter};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Clone)]
 | 
					#[derive(Debug, Clone)]
 | 
				
			||||||
pub struct Job {
 | 
					pub struct Job {
 | 
				
			||||||
| 
						 | 
					@ -34,22 +32,6 @@ impl Job {
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn get_reader(format: FormatId) -> Box<dyn FormatReader> {
 | 
					 | 
				
			||||||
    match format {
 | 
					 | 
				
			||||||
        FormatId::Cbz => Box::new(CbzReader),
 | 
					 | 
				
			||||||
        // Placeholder for future formats
 | 
					 | 
				
			||||||
        FormatId::Pdf => unimplemented!("Reading PDF not implemented"),
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn get_writer(format: FormatId) -> Box<dyn FormatWriter> {
 | 
					 | 
				
			||||||
    match format {
 | 
					 | 
				
			||||||
        FormatId::Pdf => Box::new(PdfWriter),
 | 
					 | 
				
			||||||
        // Placeholder for future formats
 | 
					 | 
				
			||||||
        FormatId::Cbz => unimplemented!("Writing CBZ not implemented"),
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
 | 
					pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
 | 
				
			||||||
    let pb = ProgressBar::new(jobs.len() as u64);
 | 
					    let pb = ProgressBar::new(jobs.len() as u64);
 | 
				
			||||||
    pb.enable_steady_tick(Duration::from_millis(300));
 | 
					    pb.enable_steady_tick(Duration::from_millis(300));
 | 
				
			||||||
| 
						 | 
					@ -59,8 +41,8 @@ pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    jobs.par_iter().for_each(|job| {
 | 
					    jobs.par_iter().for_each(|job| {
 | 
				
			||||||
        // Build the pipeline for each job
 | 
					        // Build the pipeline for each job
 | 
				
			||||||
        let reader = get_reader(job.from);
 | 
					        let reader = get_reader(job.from).expect("No reader registered for selected input format");
 | 
				
			||||||
        let writer = get_writer(job.to);
 | 
					        let writer = get_writer(job.to).expect("No writer registered for selected output format");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let doc = reader.read(&job.input_path).expect("Failed to read input");
 | 
					        let doc = reader.read(&job.input_path).expect("Failed to read input");
 | 
				
			||||||
        writer
 | 
					        writer
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										90
									
								
								src/main.rs
									
										
									
									
									
								
							
							
						
						
									
										90
									
								
								src/main.rs
									
										
									
									
									
								
							| 
						 | 
					@ -11,6 +11,21 @@ use walkdir::WalkDir;
 | 
				
			||||||
use cbz2pdf::formats::FormatId;
 | 
					use cbz2pdf::formats::FormatId;
 | 
				
			||||||
use cbz2pdf::job::{process_jobs, Job};
 | 
					use cbz2pdf::job::{process_jobs, Job};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(clap::ValueEnum, Clone, Debug)]
 | 
				
			||||||
 | 
					enum CliFormat {
 | 
				
			||||||
 | 
					    Cbz,
 | 
				
			||||||
 | 
					    Pdf,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl From<CliFormat> for FormatId {
 | 
				
			||||||
 | 
					    fn from(value: CliFormat) -> Self {
 | 
				
			||||||
 | 
					        match value {
 | 
				
			||||||
 | 
					            CliFormat::Cbz => FormatId::Cbz,
 | 
				
			||||||
 | 
					            CliFormat::Pdf => FormatId::Pdf,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Parser)]
 | 
					#[derive(Parser)]
 | 
				
			||||||
#[command()]
 | 
					#[command()]
 | 
				
			||||||
struct Cli {
 | 
					struct Cli {
 | 
				
			||||||
| 
						 | 
					@ -18,7 +33,7 @@ struct Cli {
 | 
				
			||||||
        short = 'i',
 | 
					        short = 'i',
 | 
				
			||||||
        long = "input",
 | 
					        long = "input",
 | 
				
			||||||
        value_hint = ValueHint::FilePath,
 | 
					        value_hint = ValueHint::FilePath,
 | 
				
			||||||
        help = "Path to CBZ file or directory containing CBZ files"
 | 
					        help = "Path to input file or directory"
 | 
				
			||||||
    )]
 | 
					    )]
 | 
				
			||||||
    input_path: String,
 | 
					    input_path: String,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,10 +42,20 @@ struct Cli {
 | 
				
			||||||
        long = "output-directory",
 | 
					        long = "output-directory",
 | 
				
			||||||
        default_value = ".",
 | 
					        default_value = ".",
 | 
				
			||||||
        value_hint = ValueHint::FilePath,
 | 
					        value_hint = ValueHint::FilePath,
 | 
				
			||||||
        help = "Output directory for PDF files"
 | 
					        help = "Output directory for converted files"
 | 
				
			||||||
    )]
 | 
					    )]
 | 
				
			||||||
    output_dir: String,
 | 
					    output_dir: String,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[arg(
 | 
				
			||||||
 | 
					        long = "from",
 | 
				
			||||||
 | 
					        value_enum,
 | 
				
			||||||
 | 
					        help = "Input format. If omitted, auto-detect from file extension"
 | 
				
			||||||
 | 
					    )]
 | 
				
			||||||
 | 
					    from: Option<CliFormat>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[arg(long = "to", value_enum, default_value = "pdf", help = "Output format")]
 | 
				
			||||||
 | 
					    to: CliFormat,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #[arg(short = 'p', long, help = "Ask for confirmation before doing anything")]
 | 
					    #[arg(short = 'p', long, help = "Ask for confirmation before doing anything")]
 | 
				
			||||||
    interactive: bool,
 | 
					    interactive: bool,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -43,29 +68,51 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 | 
				
			||||||
    let input_path = Path::new(&cli.input_path);
 | 
					    let input_path = Path::new(&cli.input_path);
 | 
				
			||||||
    let output_dir = Path::new(&cli.output_dir);
 | 
					    let output_dir = Path::new(&cli.output_dir);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let from_opt: Option<FormatId> = cli.from.map(Into::into);
 | 
				
			||||||
 | 
					    let to_fmt: FormatId = cli.to.into();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Validate target capability early
 | 
				
			||||||
 | 
					    if !to_fmt.can_write() {
 | 
				
			||||||
 | 
					        eprintln!("Selected output format is not supported for writing: {to_fmt:?}");
 | 
				
			||||||
 | 
					        std::process::exit(1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut jobs: Vec<Job> = Vec::new();
 | 
					    let mut jobs: Vec<Job> = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if input_path.is_file() {
 | 
					    if input_path.is_file() {
 | 
				
			||||||
        if let Some(FormatId::Cbz) = FormatId::detect_from_path(input_path) {
 | 
					        let detected = FormatId::detect_from_path(input_path);
 | 
				
			||||||
 | 
					        let from_fmt = from_opt.or(detected).unwrap_or_else(|| {
 | 
				
			||||||
 | 
					            eprintln!(
 | 
				
			||||||
 | 
					                "Could not detect input format from file extension and no --from was provided"
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					            std::process::exit(1);
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if !from_fmt.can_read() {
 | 
				
			||||||
 | 
					            eprintln!("Selected/Detected input format is not supported for reading: {from_fmt:?}");
 | 
				
			||||||
 | 
					            std::process::exit(1);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        jobs.push(Job::new(
 | 
					        jobs.push(Job::new(
 | 
				
			||||||
            input_path.to_path_buf(),
 | 
					            input_path.to_path_buf(),
 | 
				
			||||||
            output_dir.to_path_buf(),
 | 
					            output_dir.to_path_buf(),
 | 
				
			||||||
                FormatId::Cbz,
 | 
					            from_fmt,
 | 
				
			||||||
                FormatId::Pdf,
 | 
					            to_fmt,
 | 
				
			||||||
        ));
 | 
					        ));
 | 
				
			||||||
        } else {
 | 
					 | 
				
			||||||
            eprintln!("Unsupported input file format");
 | 
					 | 
				
			||||||
            std::process::exit(1);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else if input_path.is_dir() {
 | 
					    } else if input_path.is_dir() {
 | 
				
			||||||
        jobs.extend(walk_directory(input_path, output_dir));
 | 
					        jobs.extend(walk_directory(input_path, output_dir, from_opt, to_fmt));
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        eprintln!(
 | 
					        eprintln!(
 | 
				
			||||||
            "Invalid input path. Please provide a CBZ file or a directory containing CBZ files."
 | 
					            "Invalid input path. Please provide a valid file or a directory containing supported files."
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        std::process::exit(1);
 | 
					        std::process::exit(1);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if jobs.is_empty() {
 | 
				
			||||||
 | 
					        eprintln!("No matching inputs found to process.");
 | 
				
			||||||
 | 
					        std::process::exit(1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    jobs.sort_by_key(|j| j.input_path.clone().into_os_string().into_string());
 | 
					    jobs.sort_by_key(|j| j.input_path.clone().into_os_string().into_string());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let proceed = if cli.interactive {
 | 
					    let proceed = if cli.interactive {
 | 
				
			||||||
| 
						 | 
					@ -105,23 +152,36 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 | 
				
			||||||
    Ok(())
 | 
					    Ok(())
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn walk_directory(directory: &Path, output_dir: &Path) -> Vec<Job> {
 | 
					fn walk_directory(
 | 
				
			||||||
 | 
					    directory: &Path,
 | 
				
			||||||
 | 
					    output_dir: &Path,
 | 
				
			||||||
 | 
					    from_opt: Option<FormatId>,
 | 
				
			||||||
 | 
					    to_fmt: FormatId,
 | 
				
			||||||
 | 
					) -> Vec<Job> {
 | 
				
			||||||
    debug!("Walking {directory:?}");
 | 
					    debug!("Walking {directory:?}");
 | 
				
			||||||
    let mut jobs = Vec::new();
 | 
					    let mut jobs = Vec::new();
 | 
				
			||||||
    for entry in WalkDir::new(directory) {
 | 
					    for entry in WalkDir::new(directory) {
 | 
				
			||||||
        let entry = entry.unwrap();
 | 
					        let entry = entry.unwrap();
 | 
				
			||||||
        let path = entry.path();
 | 
					        let path = entry.path();
 | 
				
			||||||
        if path.is_file() {
 | 
					        if path.is_file() {
 | 
				
			||||||
            if let Some(FormatId::Cbz) = FormatId::detect_from_path(path) {
 | 
					            let detected = FormatId::detect_from_path(path);
 | 
				
			||||||
 | 
					            let from_fmt_opt = match from_opt {
 | 
				
			||||||
 | 
					                Some(fixed) => detected.filter(|d| *d == fixed),
 | 
				
			||||||
 | 
					                None => detected,
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if let Some(from_fmt) = from_fmt_opt {
 | 
				
			||||||
 | 
					                if from_fmt.can_read() && to_fmt.can_write() {
 | 
				
			||||||
                    jobs.push(Job::new(
 | 
					                    jobs.push(Job::new(
 | 
				
			||||||
                        path.to_path_buf(),
 | 
					                        path.to_path_buf(),
 | 
				
			||||||
                        output_dir.to_path_buf(),
 | 
					                        output_dir.to_path_buf(),
 | 
				
			||||||
                    FormatId::Cbz,
 | 
					                        from_fmt,
 | 
				
			||||||
                    FormatId::Pdf,
 | 
					                        to_fmt,
 | 
				
			||||||
                    ));
 | 
					                    ));
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    jobs
 | 
					    jobs
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										96
									
								
								tests/cbz_writer_tests.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								tests/cbz_writer_tests.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,96 @@
 | 
				
			||||||
 | 
					use std::fs::File;
 | 
				
			||||||
 | 
					use std::io::Read;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use cbz2pdf::formats::cbz::CbzWriter;
 | 
				
			||||||
 | 
					use cbz2pdf::formats::FormatWriter;
 | 
				
			||||||
 | 
					use cbz2pdf::model::{Document, ImagePage};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn make_tiny_jpeg() -> (Vec<u8>, image::DynamicImage) {
 | 
				
			||||||
 | 
					    let img = image::DynamicImage::new_rgb8(1, 1).to_rgb8();
 | 
				
			||||||
 | 
					    let mut buf = Vec::new();
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        let mut cursor = std::io::Cursor::new(&mut buf);
 | 
				
			||||||
 | 
					        let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 80);
 | 
				
			||||||
 | 
					        enc.encode(&img, 1, 1, image::ColorType::Rgb8.into())
 | 
				
			||||||
 | 
					            .unwrap();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let decoded = image::load_from_memory(&buf).unwrap();
 | 
				
			||||||
 | 
					    (buf, decoded)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn cbz_writer_preserves_dct_and_renames_non_jpg() {
 | 
				
			||||||
 | 
					    // Prepare a page with original JPEG DCT data but a non-jpg name.
 | 
				
			||||||
 | 
					    let (jpeg_dct, decoded) = make_tiny_jpeg();
 | 
				
			||||||
 | 
					    let page = ImagePage {
 | 
				
			||||||
 | 
					        name: "cover.png".to_string(),
 | 
				
			||||||
 | 
					        image: decoded,
 | 
				
			||||||
 | 
					        jpeg_dct: Some(jpeg_dct.clone()),
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let doc = Document::new(vec![page]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let temp_dir = tempfile::tempdir().expect("create temp dir");
 | 
				
			||||||
 | 
					    let cbz_path = temp_dir.path().join("out.cbz");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let writer = CbzWriter;
 | 
				
			||||||
 | 
					    writer.write(&doc, &cbz_path).expect("write cbz");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Open the CBZ and verify it contains 001.jpg with the exact JPEG data.
 | 
				
			||||||
 | 
					    let f = File::open(&cbz_path).unwrap();
 | 
				
			||||||
 | 
					    let mut zip = zip::ZipArchive::new(f).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // There should be exactly one file named 001.jpg
 | 
				
			||||||
 | 
					    let mut found = false;
 | 
				
			||||||
 | 
					    for i in 0..zip.len() {
 | 
				
			||||||
 | 
					        let mut file = zip.by_index(i).unwrap();
 | 
				
			||||||
 | 
					        let name = file.enclosed_name().unwrap().to_owned();
 | 
				
			||||||
 | 
					        if name.file_name().unwrap() == "001.jpg" {
 | 
				
			||||||
 | 
					            let mut data = Vec::new();
 | 
				
			||||||
 | 
					            file.read_to_end(&mut data).unwrap();
 | 
				
			||||||
 | 
					            assert_eq!(
 | 
				
			||||||
 | 
					                data, jpeg_dct,
 | 
				
			||||||
 | 
					                "writer should preserve original JPEG DCT bytes"
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					            found = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    assert!(found, "001.jpg not found in zip");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn cbz_writer_keeps_jpg_name() {
 | 
				
			||||||
 | 
					    // If the page already has a .jpg name, the writer should keep it.
 | 
				
			||||||
 | 
					    let (jpeg_dct, decoded) = make_tiny_jpeg();
 | 
				
			||||||
 | 
					    let page = ImagePage {
 | 
				
			||||||
 | 
					        name: "page01.jpg".to_string(),
 | 
				
			||||||
 | 
					        image: decoded,
 | 
				
			||||||
 | 
					        jpeg_dct: Some(jpeg_dct),
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let doc = Document::new(vec![page]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let temp_dir = tempfile::tempdir().expect("create temp dir");
 | 
				
			||||||
 | 
					    let cbz_path = temp_dir.path().join("out.cbz");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let writer = CbzWriter;
 | 
				
			||||||
 | 
					    writer.write(&doc, &cbz_path).expect("write cbz");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let f = File::open(&cbz_path).unwrap();
 | 
				
			||||||
 | 
					    let mut zip = zip::ZipArchive::new(f).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut names = Vec::new();
 | 
				
			||||||
 | 
					    for i in 0..zip.len() {
 | 
				
			||||||
 | 
					        let file = zip.by_index(i).unwrap();
 | 
				
			||||||
 | 
					        let name = file
 | 
				
			||||||
 | 
					            .enclosed_name()
 | 
				
			||||||
 | 
					            .unwrap()
 | 
				
			||||||
 | 
					            .file_name()
 | 
				
			||||||
 | 
					            .unwrap()
 | 
				
			||||||
 | 
					            .to_owned();
 | 
				
			||||||
 | 
					        names.push(name.to_string_lossy().to_string());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        names,
 | 
				
			||||||
 | 
					        vec!["page01.jpg"],
 | 
				
			||||||
 | 
					        "existing .jpg name should be kept"
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -34,7 +34,7 @@ fn job_new_sets_output_extension() {
 | 
				
			||||||
#[test]
 | 
					#[test]
 | 
				
			||||||
fn format_capabilities_consistent() {
 | 
					fn format_capabilities_consistent() {
 | 
				
			||||||
    assert!(FormatId::Cbz.can_read());
 | 
					    assert!(FormatId::Cbz.can_read());
 | 
				
			||||||
    assert!(!FormatId::Cbz.can_write());
 | 
					    assert!(FormatId::Cbz.can_write());
 | 
				
			||||||
    assert!(FormatId::Pdf.can_write());
 | 
					    assert!(FormatId::Pdf.can_write());
 | 
				
			||||||
    assert!(!FormatId::Pdf.can_read());
 | 
					    assert!(FormatId::Pdf.can_read());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										51
									
								
								tests/pdf_reader_tests.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								tests/pdf_reader_tests.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,51 @@
 | 
				
			||||||
 | 
					use cbz2pdf::formats::pdf::{PdfReader, PdfWriter};
 | 
				
			||||||
 | 
					use cbz2pdf::formats::{FormatReader, FormatWriter};
 | 
				
			||||||
 | 
					use cbz2pdf::model::{Document, ImagePage};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn make_small_jpeg(w: u32, h: u32, rgb: [u8; 3]) -> (Vec<u8>, image::DynamicImage) {
 | 
				
			||||||
 | 
					    let mut img = image::ImageBuffer::<image::Rgb<u8>, _>::new(w, h);
 | 
				
			||||||
 | 
					    for p in img.pixels_mut() {
 | 
				
			||||||
 | 
					        *p = image::Rgb(rgb);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let dynimg = image::DynamicImage::ImageRgb8(img);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut buf = Vec::new();
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        let mut cursor = std::io::Cursor::new(&mut buf);
 | 
				
			||||||
 | 
					        let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
 | 
				
			||||||
 | 
					        let rgb8 = dynimg.to_rgb8();
 | 
				
			||||||
 | 
					        enc.encode(&rgb8, w, h, image::ColorType::Rgb8.into())
 | 
				
			||||||
 | 
					            .unwrap();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    (buf, dynimg)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn pdf_reader_extracts_jpeg_xobject_and_preserves_dct() {
 | 
				
			||||||
 | 
					    // Build a PDF with one JPEG-backed page
 | 
				
			||||||
 | 
					    let (jpeg_dct, dynimg) = make_small_jpeg(3, 2, [10, 20, 30]);
 | 
				
			||||||
 | 
					    let page = ImagePage {
 | 
				
			||||||
 | 
					        name: "p1.jpg".into(),
 | 
				
			||||||
 | 
					        image: dynimg.clone(),
 | 
				
			||||||
 | 
					        jpeg_dct: Some(jpeg_dct.clone()),
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let doc = Document::new(vec![page]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let temp_dir = tempfile::tempdir().expect("tmpdir");
 | 
				
			||||||
 | 
					    let pdf_path = temp_dir.path().join("in.pdf");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PdfWriter.write(&doc, &pdf_path).expect("write pdf");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Read back with PdfReader
 | 
				
			||||||
 | 
					    let out = PdfReader.read(&pdf_path).expect("read pdf");
 | 
				
			||||||
 | 
					    assert_eq!(out.pages.len(), 1, "should have one page extracted");
 | 
				
			||||||
 | 
					    let p = &out.pages[0];
 | 
				
			||||||
 | 
					    assert_eq!(p.image.width(), dynimg.width());
 | 
				
			||||||
 | 
					    assert_eq!(p.image.height(), dynimg.height());
 | 
				
			||||||
 | 
					    assert!(p.jpeg_dct.is_some(), "should preserve DCT for JPEG images");
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        p.jpeg_dct.as_ref().unwrap(),
 | 
				
			||||||
 | 
					        &jpeg_dct,
 | 
				
			||||||
 | 
					        "JPEG bytes should match"
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue