diff --git a/Cargo.lock b/Cargo.lock index 82159c3..2628ea5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,15 +37,6 @@ dependencies = [ "equator", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anstream" version = "0.6.21" @@ -143,7 +134,7 @@ dependencies = [ "anyhow", "arrayvec", "log", - "nom 7.1.3", + "nom", "num-rational", "v_frame", ] @@ -184,15 +175,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "built" version = "0.7.7" @@ -232,15 +214,6 @@ dependencies = [ "libbz2-rs-sys", ] -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cbz2pdf" version = "0.1.0" @@ -251,7 +224,6 @@ dependencies = [ "image", "indicatif", "log", - "lopdf", "pdf-writer", "pretty_env_logger", "rayon", @@ -289,17 +261,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" -[[package]] -name = "chrono" -version = "0.4.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" -dependencies = [ - "iana-time-zone", - "num-traits", - "windows-link", -] - [[package]] name = "cipher" version = "0.4.4" @@ -381,12 +342,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - [[package]] name = "cpufeatures" version = "0.2.17" @@ -510,15 +465,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "ecb" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" -dependencies = [ - "cipher", -] - [[package]] name = "either" version = "1.15.0" @@ -531,15 +477,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - [[package]] name = "env_logger" version = "0.10.2" @@ -664,9 +601,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "generic-array" -version = "0.14.8" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dc8f7d2ded5f9209535e4b3fd4d39c002f30902ff5ce9f64e2c33d549576500" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -749,30 +686,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" -[[package]] -name = "iana-time-zone" -version = "0.1.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "image" version = "0.25.8" @@ -842,7 +755,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "block-padding", "generic-array", ] @@ -889,47 +801,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" -[[package]] -name = "jiff" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" -dependencies = [ - "jiff-static", - "jiff-tzdb-platform", - "log", - "portable-atomic", - "portable-atomic-util", - "serde", - "windows-sys 0.59.0", -] - -[[package]] -name = "jiff-static" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "jiff-tzdb" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" - -[[package]] -name = "jiff-tzdb-platform" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" -dependencies = [ - "jiff-tzdb", -] - [[package]] name = "jobserver" version = "0.1.34" @@ -1008,38 +879,6 @@ dependencies = [ "imgref", ] -[[package]] -name = "lopdf" -version = "0.38.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7184fdea2bc3cd272a1acec4030c321a8f9875e877b3f92a53f2f6033fdc289" -dependencies = [ - "aes", - "bitflags", - "cbc", - "chrono", - "ecb", - "encoding_rs", - "flate2", - "getrandom 0.3.3", - "indexmap", - "itoa", - "jiff", - "log", - "md-5", - "nom 8.0.0", - "nom_locate", - "rand 0.9.2", - "rangemap", - "rayon", - "sha2", - "stringprep", - "thiserror 2.0.17", - "time", - "ttf-parser", - "weezl", -] - [[package]] name = "lzma-rust2" version = "0.13.0" @@ -1060,16 +899,6 @@ dependencies = [ "rayon", ] -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - [[package]] name = "memchr" version = "2.7.6" @@ -1118,26 +947,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" -dependencies = [ - "memchr", -] - -[[package]] -name = "nom_locate" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d" -dependencies = [ - "bytecount", - "memchr", - "nom 8.0.0", -] - [[package]] name = "noop_proc_macro" version = "0.3.0" @@ -1276,15 +1085,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" -[[package]] -name = "portable-atomic-util" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" -dependencies = [ - "portable-atomic", -] - [[package]] name = "powerfmt" version = "0.2.0" @@ -1412,18 +1212,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_chacha", + "rand_core", ] [[package]] @@ -1433,17 +1223,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", + "rand_core", ] [[package]] @@ -1455,21 +1235,6 @@ dependencies = [ "getrandom 0.2.16", ] -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom 0.3.3", -] - -[[package]] -name = "rangemap" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223" - [[package]] name = "rav1e" version = "0.7.1" @@ -1496,11 +1261,11 @@ dependencies = [ "once_cell", "paste", "profiling", - "rand 0.8.5", - "rand_chacha 0.3.1", + "rand", + "rand_chacha", "simd_helpers", "system-deps", - "thiserror 1.0.69", + "thiserror", "v_frame", "wasm-bindgen", ] @@ -1702,17 +1467,6 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - [[package]] name = "strsim" version = "0.11.1" @@ -1816,16 +1570,7 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" -dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl", ] [[package]] @@ -1839,17 +1584,6 @@ dependencies = [ "syn", ] -[[package]] -name = "thiserror-impl" -version = "2.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tiff" version = "0.10.3" @@ -1871,12 +1605,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", - "itoa", "num-conv", "powerfmt", "serde", "time-core", - "time-macros", ] [[package]] @@ -1885,31 +1617,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" -[[package]] -name = "time-macros" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tinyvec" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "toml" version = "0.8.23" @@ -1944,45 +1651,18 @@ dependencies = [ "winnow", ] -[[package]] -name = "ttf-parser" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31" - [[package]] name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-properties" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" - [[package]] name = "unicode-width" version = "0.2.2" @@ -2142,65 +1822,12 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index bbebc9e..599d6a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,6 @@ rayon = "1.10.0" tabled = "0.20.0" walkdir = "2.5.0" zip = "6.0.0" -lopdf = "0.38.0" [dev-dependencies] tempfile = "3.12.0" diff --git a/src/formats/cbz.rs b/src/formats/cbz.rs index 8319496..886ab43 100644 --- a/src/formats/cbz.rs +++ b/src/formats/cbz.rs @@ -1,6 +1,6 @@ use std::ffi::OsStr; use std::fs::File; -use std::io::{Read, Write}; +use std::io::Read; use std::path::Path; use anyhow::Result; @@ -9,7 +9,7 @@ use zip::ZipArchive; use crate::model::{Document, ImagePage}; -use super::{FormatReader, FormatWriter}; +use super::FormatReader; pub struct CbzReader; @@ -51,40 +51,3 @@ impl FormatReader for CbzReader { Ok(Document::new(pages)) } } - -pub struct CbzWriter; - -impl FormatWriter for CbzWriter { - fn write(&self, doc: &Document, output: &Path) -> Result<()> { - use zip::write::SimpleFileOptions; - let file = File::create(output)?; - let mut zip = zip::ZipWriter::new(file); - let options = SimpleFileOptions::default(); - - for (idx, page) in doc.pages.iter().enumerate() { - let mut name = page.name.clone(); - if Path::new(&name).extension().and_then(OsStr::to_str) != Some("jpg") { - name = format!("{:03}.jpg", idx + 1); - } - zip.start_file(&name, options)?; - if let Some(dct) = &page.jpeg_dct { - zip.write_all(dct)?; - } else { - // Encode to JPEG - let rgb = page.image.to_rgb8(); - let (w, h) = (rgb.width(), rgb.height()); - let mut cursor = std::io::Cursor::new(Vec::new()); - { - let mut enc = - image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85); - enc.encode(&rgb.into_raw(), w, h, image::ColorType::Rgb8.into())?; - } - let data = cursor.into_inner(); - zip.write_all(&data)?; - } - } - - zip.finish()?; - Ok(()) - } -} diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 0bac2ce..0ad3ec0 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -8,9 +8,6 @@ use crate::model::Document; pub mod cbz; pub mod pdf; -use cbz::{CbzReader, CbzWriter}; -use pdf::{PdfReader, PdfWriter}; - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FormatId { Cbz, @@ -20,12 +17,18 @@ pub enum FormatId { impl FormatId { #[allow(dead_code)] pub fn can_read(self) -> bool { - get_reader(self).is_some() + match self { + FormatId::Cbz => true, + FormatId::Pdf => false, // planned but not implemented yet + } } #[allow(dead_code)] pub fn can_write(self) -> bool { - get_writer(self).is_some() + match self { + FormatId::Pdf => true, + FormatId::Cbz => false, // planned but not implemented yet + } } pub fn detect_from_path(path: &Path) -> Option { @@ -44,17 +47,3 @@ pub trait FormatReader: Send + Sync { pub trait FormatWriter: Send + Sync { fn write(&self, doc: &Document, output: &Path) -> Result<()>; } - -pub fn get_reader(format: FormatId) -> Option> { - match format { - FormatId::Cbz => Some(Box::new(CbzReader)), - FormatId::Pdf => Some(Box::new(PdfReader)), - } -} - -pub fn get_writer(format: FormatId) -> Option> { - match format { - FormatId::Pdf => Some(Box::new(PdfWriter)), - FormatId::Cbz => Some(Box::new(CbzWriter)), - } -} diff --git a/src/formats/pdf.rs b/src/formats/pdf.rs index 929d9cb..2da0469 100644 --- a/src/formats/pdf.rs +++ b/src/formats/pdf.rs @@ -1,13 +1,12 @@ use anyhow::Result; -use image::codecs::jpeg::JpegEncoder; -use image::ColorType; +use image::{codecs::jpeg::JpegEncoder, ColorType}; use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref}; use std::io::Cursor; use std::path::Path; -use crate::model::{Document, ImagePage}; +use crate::model::Document; -use super::{FormatReader, FormatWriter}; +use super::FormatWriter; pub struct PdfWriter; @@ -78,194 +77,3 @@ impl FormatWriter for PdfWriter { Ok(()) } } - -pub struct PdfReader; - -impl FormatReader for PdfReader { - fn read(&self, input: &Path) -> Result { - use lopdf::{Document as LoDocument, Object}; - - let doc = LoDocument::load(input)?; - let pages_map = doc.get_pages(); - let mut image_pages: Vec = Vec::new(); - - for (idx, (_page_num, page_id)) in pages_map.iter().enumerate() { - // Fetch page object - let page_obj = doc.get_object(*page_id)?; - let page_dict = match page_obj.as_dict() { - Ok(d) => d, - Err(_) => continue, - }; - - // Resolve Resources dictionary (can be a reference or inline dict) - let (mut xobjects_opt, mut content_refs): (Option, Vec>) = - (None, Vec::new()); - - if let Ok(obj) = page_dict.get(b"Resources") { - match obj { - Object::Reference(id) => { - if let Ok(Object::Dictionary(d)) = doc.get_object(*id) { - // Extract XObject dict if present - if let Ok(Object::Reference(xid)) = d.get(b"XObject") { - if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) { - xobjects_opt = Some(xd.clone()); - } - } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") { - xobjects_opt = Some(xd.clone()); - } - } - } - Object::Dictionary(d) => { - if let Ok(Object::Reference(xid)) = d.get(b"XObject") { - if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) { - xobjects_opt = Some(xd.clone()); - } - } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") { - xobjects_opt = Some(xd.clone()); - } - } - _ => {} - } - } - - // Try to track which XObjects are used by parsing Content streams for /Name Do - if let Ok(contents_obj) = page_dict.get(b"Contents") { - match contents_obj { - Object::Reference(cid) => { - if let Ok(Object::Stream(stream)) = doc.get_object(*cid) { - content_refs.extend(extract_xobject_names(&stream.content)); - } - } - Object::Array(arr) => { - for o in arr { - if let Object::Reference(cid) = o { - if let Ok(Object::Stream(stream)) = doc.get_object(*cid) { - content_refs.extend(extract_xobject_names(&stream.content)); - } - } - } - } - Object::Stream(stream) => { - content_refs.extend(extract_xobject_names(&stream.content)); - } - _ => {} - } - } - - // If we have XObjects, pick the first image (prefer one referenced in content) - if let Some(xobjects) = xobjects_opt { - // Build ordered keys: first those referenced in content, then the rest - let mut keys: Vec> = xobjects.iter().map(|(k, _)| k.clone()).collect(); - // Stable sort by whether referenced first - keys.sort_by_key(|k| { - let name = if k.starts_with(b"/") { - k[1..].to_vec() - } else { - k.clone() - }; - match content_refs.iter().position(|r| *r == name) { - Some(pos) => pos as i32, - None => i32::MAX, - } - }); - - for key in keys { - if let Ok(&Object::Reference(obj_id)) = xobjects.get(&key) { - if let Ok(Object::Stream(stream)) = doc.get_object(obj_id) { - let dict = &stream.dict; - let is_image = matches!(dict.get(b"Subtype"), Ok(Object::Name(n)) if n == b"Image"); - if !is_image { - continue; - } - - let is_dct = match dict.get(b"Filter") { - Ok(Object::Name(n)) => n == b"DCTDecode", - Ok(Object::Array(arr)) => arr - .iter() - .any(|o| matches!(o, Object::Name(n) if n == b"DCTDecode")), - _ => false, - }; - - let data = stream.content.clone(); - if is_dct { - if let Ok(img) = image::load_from_memory(&data) { - let name = format!("{:03}.jpg", idx + 1); - image_pages.push(ImagePage { - name, - image: img, - jpeg_dct: Some(data), - }); - break; - } else { - // If JPEG parsing failed, skip - continue; - } - } else if let Ok(img) = image::load_from_memory(&data) { - // Fallback: try to decode arbitrary image stream - let name = format!("{:03}.jpg", idx + 1); - image_pages.push(ImagePage { - name, - image: img, - jpeg_dct: None, - }); - break; - } - } - } - } - } - } - - Ok(Document::new(image_pages)) - } -} - -// Helper to extract XObject names used in a content stream by scanning for "/Name Do" -fn extract_xobject_names(content: &[u8]) -> Vec> { - // This is a naive scanner but often sufficient: tokens separated by whitespace, looking for "/name Do" - let mut names = Vec::new(); - let s = content; - let mut i = 0; - while i < s.len() { - // skip whitespace - while i < s.len() && s[i].is_ascii_whitespace() { - i += 1; - } - if i >= s.len() { - break; - } - if s[i] == b'/' { - // read name - let start = i + 1; - i += 1; - while i < s.len() && !s[i].is_ascii_whitespace() { - i += 1; - } - let name = s[start..i].to_vec(); - // skip whitespace - while i < s.len() && s[i].is_ascii_whitespace() { - i += 1; - } - // check for Do operator after possible inline graphics state - // We will just check if next token is Do - let mut j = i; - while j < s.len() && s[j].is_ascii_whitespace() { - j += 1; - } - let op_start = j; - while j < s.len() && (s[j] as char).is_ascii_alphabetic() { - j += 1; - } - if &s[op_start..j] == b"Do" { - names.push(name); - } - i = j; - } else { - // skip token - while i < s.len() && !s[i].is_ascii_whitespace() { - i += 1; - } - } - } - names -} diff --git a/src/job.rs b/src/job.rs index 7f2043d..473ba1a 100644 --- a/src/job.rs +++ b/src/job.rs @@ -5,7 +5,9 @@ use anyhow::Result; use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; -use crate::formats::{get_reader, get_writer, FormatId}; +use crate::formats::cbz::CbzReader; +use crate::formats::pdf::PdfWriter; +use crate::formats::{FormatId, FormatReader, FormatWriter}; #[derive(Debug, Clone)] pub struct Job { @@ -32,6 +34,22 @@ impl Job { } } +fn get_reader(format: FormatId) -> Box { + match format { + FormatId::Cbz => Box::new(CbzReader), + // Placeholder for future formats + FormatId::Pdf => unimplemented!("Reading PDF not implemented"), + } +} + +fn get_writer(format: FormatId) -> Box { + match format { + FormatId::Pdf => Box::new(PdfWriter), + // Placeholder for future formats + FormatId::Cbz => unimplemented!("Writing CBZ not implemented"), + } +} + pub fn process_jobs(jobs: Vec) -> Result<()> { let pb = ProgressBar::new(jobs.len() as u64); pb.enable_steady_tick(Duration::from_millis(300)); @@ -41,8 +59,8 @@ pub fn process_jobs(jobs: Vec) -> Result<()> { jobs.par_iter().for_each(|job| { // Build the pipeline for each job - let reader = get_reader(job.from).expect("No reader registered for selected input format"); - let writer = get_writer(job.to).expect("No writer registered for selected output format"); + let reader = get_reader(job.from); + let writer = get_writer(job.to); let doc = reader.read(&job.input_path).expect("Failed to read input"); writer diff --git a/src/main.rs b/src/main.rs index 7ac9262..7b902b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,21 +11,6 @@ use walkdir::WalkDir; use cbz2pdf::formats::FormatId; use cbz2pdf::job::{process_jobs, Job}; -#[derive(clap::ValueEnum, Clone, Debug)] -enum CliFormat { - Cbz, - Pdf, -} - -impl From for FormatId { - fn from(value: CliFormat) -> Self { - match value { - CliFormat::Cbz => FormatId::Cbz, - CliFormat::Pdf => FormatId::Pdf, - } - } -} - #[derive(Parser)] #[command()] struct Cli { @@ -33,7 +18,7 @@ struct Cli { short = 'i', long = "input", value_hint = ValueHint::FilePath, - help = "Path to input file or directory" + help = "Path to CBZ file or directory containing CBZ files" )] input_path: String, @@ -42,20 +27,10 @@ struct Cli { long = "output-directory", default_value = ".", value_hint = ValueHint::FilePath, - help = "Output directory for converted files" + help = "Output directory for PDF files" )] output_dir: String, - #[arg( - long = "from", - value_enum, - help = "Input format. If omitted, auto-detect from file extension" - )] - from: Option, - - #[arg(long = "to", value_enum, default_value = "pdf", help = "Output format")] - to: CliFormat, - #[arg(short = 'p', long, help = "Ask for confirmation before doing anything")] interactive: bool, } @@ -68,51 +43,29 @@ fn main() -> Result<(), Box> { let input_path = Path::new(&cli.input_path); let output_dir = Path::new(&cli.output_dir); - let from_opt: Option = cli.from.map(Into::into); - let to_fmt: FormatId = cli.to.into(); - - // Validate target capability early - if !to_fmt.can_write() { - eprintln!("Selected output format is not supported for writing: {to_fmt:?}"); - std::process::exit(1); - } - let mut jobs: Vec = Vec::new(); if input_path.is_file() { - let detected = FormatId::detect_from_path(input_path); - let from_fmt = from_opt.or(detected).unwrap_or_else(|| { - eprintln!( - "Could not detect input format from file extension and no --from was provided" - ); - std::process::exit(1); - }); - - if !from_fmt.can_read() { - eprintln!("Selected/Detected input format is not supported for reading: {from_fmt:?}"); + if let Some(FormatId::Cbz) = FormatId::detect_from_path(input_path) { + jobs.push(Job::new( + input_path.to_path_buf(), + output_dir.to_path_buf(), + FormatId::Cbz, + FormatId::Pdf, + )); + } else { + eprintln!("Unsupported input file format"); std::process::exit(1); } - - jobs.push(Job::new( - input_path.to_path_buf(), - output_dir.to_path_buf(), - from_fmt, - to_fmt, - )); } else if input_path.is_dir() { - jobs.extend(walk_directory(input_path, output_dir, from_opt, to_fmt)); + jobs.extend(walk_directory(input_path, output_dir)); } else { eprintln!( - "Invalid input path. Please provide a valid file or a directory containing supported files." + "Invalid input path. Please provide a CBZ file or a directory containing CBZ files." ); std::process::exit(1); } - if jobs.is_empty() { - eprintln!("No matching inputs found to process."); - std::process::exit(1); - } - jobs.sort_by_key(|j| j.input_path.clone().into_os_string().into_string()); let proceed = if cli.interactive { @@ -152,33 +105,20 @@ fn main() -> Result<(), Box> { Ok(()) } -fn walk_directory( - directory: &Path, - output_dir: &Path, - from_opt: Option, - to_fmt: FormatId, -) -> Vec { +fn walk_directory(directory: &Path, output_dir: &Path) -> Vec { debug!("Walking {directory:?}"); let mut jobs = Vec::new(); for entry in WalkDir::new(directory) { let entry = entry.unwrap(); let path = entry.path(); if path.is_file() { - let detected = FormatId::detect_from_path(path); - let from_fmt_opt = match from_opt { - Some(fixed) => detected.filter(|d| *d == fixed), - None => detected, - }; - - if let Some(from_fmt) = from_fmt_opt { - if from_fmt.can_read() && to_fmt.can_write() { - jobs.push(Job::new( - path.to_path_buf(), - output_dir.to_path_buf(), - from_fmt, - to_fmt, - )); - } + if let Some(FormatId::Cbz) = FormatId::detect_from_path(path) { + jobs.push(Job::new( + path.to_path_buf(), + output_dir.to_path_buf(), + FormatId::Cbz, + FormatId::Pdf, + )); } } } diff --git a/tests/cbz_writer_tests.rs b/tests/cbz_writer_tests.rs deleted file mode 100644 index 2fbd54e..0000000 --- a/tests/cbz_writer_tests.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::fs::File; -use std::io::Read; - -use cbz2pdf::formats::cbz::CbzWriter; -use cbz2pdf::formats::FormatWriter; -use cbz2pdf::model::{Document, ImagePage}; - -fn make_tiny_jpeg() -> (Vec, image::DynamicImage) { - let img = image::DynamicImage::new_rgb8(1, 1).to_rgb8(); - let mut buf = Vec::new(); - { - let mut cursor = std::io::Cursor::new(&mut buf); - let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 80); - enc.encode(&img, 1, 1, image::ColorType::Rgb8.into()) - .unwrap(); - } - let decoded = image::load_from_memory(&buf).unwrap(); - (buf, decoded) -} - -#[test] -fn cbz_writer_preserves_dct_and_renames_non_jpg() { - // Prepare a page with original JPEG DCT data but a non-jpg name. - let (jpeg_dct, decoded) = make_tiny_jpeg(); - let page = ImagePage { - name: "cover.png".to_string(), - image: decoded, - jpeg_dct: Some(jpeg_dct.clone()), - }; - let doc = Document::new(vec![page]); - - let temp_dir = tempfile::tempdir().expect("create temp dir"); - let cbz_path = temp_dir.path().join("out.cbz"); - - let writer = CbzWriter; - writer.write(&doc, &cbz_path).expect("write cbz"); - - // Open the CBZ and verify it contains 001.jpg with the exact JPEG data. - let f = File::open(&cbz_path).unwrap(); - let mut zip = zip::ZipArchive::new(f).unwrap(); - - // There should be exactly one file named 001.jpg - let mut found = false; - for i in 0..zip.len() { - let mut file = zip.by_index(i).unwrap(); - let name = file.enclosed_name().unwrap().to_owned(); - if name.file_name().unwrap() == "001.jpg" { - let mut data = Vec::new(); - file.read_to_end(&mut data).unwrap(); - assert_eq!( - data, jpeg_dct, - "writer should preserve original JPEG DCT bytes" - ); - found = true; - } - } - assert!(found, "001.jpg not found in zip"); -} - -#[test] -fn cbz_writer_keeps_jpg_name() { - // If the page already has a .jpg name, the writer should keep it. - let (jpeg_dct, decoded) = make_tiny_jpeg(); - let page = ImagePage { - name: "page01.jpg".to_string(), - image: decoded, - jpeg_dct: Some(jpeg_dct), - }; - let doc = Document::new(vec![page]); - - let temp_dir = tempfile::tempdir().expect("create temp dir"); - let cbz_path = temp_dir.path().join("out.cbz"); - - let writer = CbzWriter; - writer.write(&doc, &cbz_path).expect("write cbz"); - - let f = File::open(&cbz_path).unwrap(); - let mut zip = zip::ZipArchive::new(f).unwrap(); - - let mut names = Vec::new(); - for i in 0..zip.len() { - let file = zip.by_index(i).unwrap(); - let name = file - .enclosed_name() - .unwrap() - .file_name() - .unwrap() - .to_owned(); - names.push(name.to_string_lossy().to_string()); - } - assert_eq!( - names, - vec!["page01.jpg"], - "existing .jpg name should be kept" - ); -} diff --git a/tests/job_and_format_tests.rs b/tests/job_and_format_tests.rs index 55c9742..41b5e6d 100644 --- a/tests/job_and_format_tests.rs +++ b/tests/job_and_format_tests.rs @@ -34,7 +34,7 @@ fn job_new_sets_output_extension() { #[test] fn format_capabilities_consistent() { assert!(FormatId::Cbz.can_read()); - assert!(FormatId::Cbz.can_write()); + assert!(!FormatId::Cbz.can_write()); assert!(FormatId::Pdf.can_write()); - assert!(FormatId::Pdf.can_read()); + assert!(!FormatId::Pdf.can_read()); } diff --git a/tests/pdf_reader_tests.rs b/tests/pdf_reader_tests.rs deleted file mode 100644 index 32be07f..0000000 --- a/tests/pdf_reader_tests.rs +++ /dev/null @@ -1,51 +0,0 @@ -use cbz2pdf::formats::pdf::{PdfReader, PdfWriter}; -use cbz2pdf::formats::{FormatReader, FormatWriter}; -use cbz2pdf::model::{Document, ImagePage}; - -fn make_small_jpeg(w: u32, h: u32, rgb: [u8; 3]) -> (Vec, image::DynamicImage) { - let mut img = image::ImageBuffer::, _>::new(w, h); - for p in img.pixels_mut() { - *p = image::Rgb(rgb); - } - let dynimg = image::DynamicImage::ImageRgb8(img); - - let mut buf = Vec::new(); - { - let mut cursor = std::io::Cursor::new(&mut buf); - let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85); - let rgb8 = dynimg.to_rgb8(); - enc.encode(&rgb8, w, h, image::ColorType::Rgb8.into()) - .unwrap(); - } - (buf, dynimg) -} - -#[test] -fn pdf_reader_extracts_jpeg_xobject_and_preserves_dct() { - // Build a PDF with one JPEG-backed page - let (jpeg_dct, dynimg) = make_small_jpeg(3, 2, [10, 20, 30]); - let page = ImagePage { - name: "p1.jpg".into(), - image: dynimg.clone(), - jpeg_dct: Some(jpeg_dct.clone()), - }; - let doc = Document::new(vec![page]); - - let temp_dir = tempfile::tempdir().expect("tmpdir"); - let pdf_path = temp_dir.path().join("in.pdf"); - - PdfWriter.write(&doc, &pdf_path).expect("write pdf"); - - // Read back with PdfReader - let out = PdfReader.read(&pdf_path).expect("read pdf"); - assert_eq!(out.pages.len(), 1, "should have one page extracted"); - let p = &out.pages[0]; - assert_eq!(p.image.width(), dynimg.width()); - assert_eq!(p.image.height(), dynimg.height()); - assert!(p.jpeg_dct.is_some(), "should preserve DCT for JPEG images"); - assert_eq!( - p.jpeg_dct.as_ref().unwrap(), - &jpeg_dct, - "JPEG bytes should match" - ); -}