Compare commits

..

3 commits

Author SHA1 Message Date
Renovate
acb1e37b6f chore(deps): lock file maintenance
All checks were successful
Build and test / Clippy (pull_request) Successful in 1m4s
Build and test / Tests (pull_request) Successful in 1m12s
Build and test / Build AMD64 (pull_request) Successful in 1m11s
Checking yaml / Run yamllint (pull_request) Successful in 8s
Checking Renovate configuration / validate (pull_request) Successful in 1m24s
Build and test / Generate Documentation (pull_request) Successful in 52s
2025-10-13 18:00:22 +11:00
Renovate
4b69124358 fix(deps): update rust crate lopdf to 0.38.0
All checks were successful
Build and test / Clippy (pull_request) Successful in 42s
Checking Renovate configuration / validate (pull_request) Successful in 1m1s
Checking yaml / Run yamllint (pull_request) Successful in 3s
Build and test / Tests (pull_request) Successful in 1m4s
Build and test / Build AMD64 (pull_request) Successful in 1m3s
Build and test / Generate Documentation (pull_request) Successful in 1m4s
2025-10-13 17:30:12 +11:00
b35ccbe271
feat: implement cbz writing and pdf reading
All checks were successful
Build and test / Clippy (pull_request) Successful in 44s
Build and test / Tests (pull_request) Successful in 48s
Checking yaml / Run yamllint (pull_request) Successful in 5s
Checking Renovate configuration / validate (pull_request) Successful in 1m4s
Build and test / Build AMD64 (pull_request) Successful in 49s
Build and test / Generate Documentation (pull_request) Successful in 59s
2025-10-13 16:57:14 +11:00
10 changed files with 870 additions and 67 deletions

393
Cargo.lock generated
View file

@ -37,6 +37,15 @@ dependencies = [
"equator",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.21"
@ -134,7 +143,7 @@ dependencies = [
"anyhow",
"arrayvec",
"log",
"nom",
"nom 7.1.3",
"num-rational",
"v_frame",
]
@ -175,6 +184,15 @@ dependencies = [
"generic-array",
]
[[package]]
name = "block-padding"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
dependencies = [
"generic-array",
]
[[package]]
name = "built"
version = "0.7.7"
@ -214,6 +232,15 @@ dependencies = [
"libbz2-rs-sys",
]
[[package]]
name = "cbc"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
dependencies = [
"cipher",
]
[[package]]
name = "cbz2pdf"
version = "0.1.0"
@ -224,6 +251,7 @@ dependencies = [
"image",
"indicatif",
"log",
"lopdf",
"pdf-writer",
"pretty_env_logger",
"rayon",
@ -261,6 +289,17 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "chrono"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"iana-time-zone",
"num-traits",
"windows-link",
]
[[package]]
name = "cipher"
version = "0.4.4"
@ -342,6 +381,12 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "cpufeatures"
version = "0.2.17"
@ -465,6 +510,15 @@ dependencies = [
"subtle",
]
[[package]]
name = "ecb"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7"
dependencies = [
"cipher",
]
[[package]]
name = "either"
version = "1.15.0"
@ -477,6 +531,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "env_logger"
version = "0.10.2"
@ -601,9 +664,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "generic-array"
version = "0.14.7"
version = "0.14.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
checksum = "1dc8f7d2ded5f9209535e4b3fd4d39c002f30902ff5ce9f64e2c33d549576500"
dependencies = [
"typenum",
"version_check",
@ -686,6 +749,30 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
[[package]]
name = "iana-time-zone"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "image"
version = "0.25.8"
@ -755,6 +842,7 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
dependencies = [
"block-padding",
"generic-array",
]
@ -801,6 +889,47 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jiff"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
dependencies = [
"jiff-static",
"jiff-tzdb-platform",
"log",
"portable-atomic",
"portable-atomic-util",
"serde",
"windows-sys 0.59.0",
]
[[package]]
name = "jiff-static"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "jiff-tzdb"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524"
[[package]]
name = "jiff-tzdb-platform"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
dependencies = [
"jiff-tzdb",
]
[[package]]
name = "jobserver"
version = "0.1.34"
@ -879,6 +1008,38 @@ dependencies = [
"imgref",
]
[[package]]
name = "lopdf"
version = "0.38.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7184fdea2bc3cd272a1acec4030c321a8f9875e877b3f92a53f2f6033fdc289"
dependencies = [
"aes",
"bitflags",
"cbc",
"chrono",
"ecb",
"encoding_rs",
"flate2",
"getrandom 0.3.3",
"indexmap",
"itoa",
"jiff",
"log",
"md-5",
"nom 8.0.0",
"nom_locate",
"rand 0.9.2",
"rangemap",
"rayon",
"sha2",
"stringprep",
"thiserror 2.0.17",
"time",
"ttf-parser",
"weezl",
]
[[package]]
name = "lzma-rust2"
version = "0.13.0"
@ -899,6 +1060,16 @@ dependencies = [
"rayon",
]
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if",
"digest",
]
[[package]]
name = "memchr"
version = "2.7.6"
@ -947,6 +1118,26 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "nom_locate"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d"
dependencies = [
"bytecount",
"memchr",
"nom 8.0.0",
]
[[package]]
name = "noop_proc_macro"
version = "0.3.0"
@ -1085,6 +1276,15 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
[[package]]
name = "portable-atomic-util"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
dependencies = [
"portable-atomic",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
@ -1212,8 +1412,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.3",
]
[[package]]
@ -1223,7 +1433,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.3",
]
[[package]]
@ -1235,6 +1455,21 @@ dependencies = [
"getrandom 0.2.16",
]
[[package]]
name = "rand_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom 0.3.3",
]
[[package]]
name = "rangemap"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223"
[[package]]
name = "rav1e"
version = "0.7.1"
@ -1261,11 +1496,11 @@ dependencies = [
"once_cell",
"paste",
"profiling",
"rand",
"rand_chacha",
"rand 0.8.5",
"rand_chacha 0.3.1",
"simd_helpers",
"system-deps",
"thiserror",
"thiserror 1.0.69",
"v_frame",
"wasm-bindgen",
]
@ -1467,6 +1702,17 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "stringprep"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
dependencies = [
"unicode-bidi",
"unicode-normalization",
"unicode-properties",
]
[[package]]
name = "strsim"
version = "0.11.1"
@ -1570,7 +1816,16 @@ version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
"thiserror-impl 1.0.69",
]
[[package]]
name = "thiserror"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
dependencies = [
"thiserror-impl 2.0.17",
]
[[package]]
@ -1584,6 +1839,17 @@ dependencies = [
"syn",
]
[[package]]
name = "thiserror-impl"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tiff"
version = "0.10.3"
@ -1605,10 +1871,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
@ -1617,6 +1885,31 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
[[package]]
name = "time-macros"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinyvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "toml"
version = "0.8.23"
@ -1651,18 +1944,45 @@ dependencies = [
"winnow",
]
[[package]]
name = "ttf-parser"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31"
[[package]]
name = "typenum"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unicode-bidi"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
[[package]]
name = "unicode-ident"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
[[package]]
name = "unicode-normalization"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-properties"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
[[package]]
name = "unicode-width"
version = "0.2.2"
@ -1822,12 +2142,65 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View file

@ -16,6 +16,7 @@ rayon = "1.10.0"
tabled = "0.20.0"
walkdir = "2.5.0"
zip = "6.0.0"
lopdf = "0.38.0"
[dev-dependencies]
tempfile = "3.12.0"

View file

@ -1,6 +1,6 @@
use std::ffi::OsStr;
use std::fs::File;
use std::io::Read;
use std::io::{Read, Write};
use std::path::Path;
use anyhow::Result;
@ -9,7 +9,7 @@ use zip::ZipArchive;
use crate::model::{Document, ImagePage};
use super::FormatReader;
use super::{FormatReader, FormatWriter};
pub struct CbzReader;
@ -51,3 +51,40 @@ impl FormatReader for CbzReader {
Ok(Document::new(pages))
}
}
pub struct CbzWriter;
impl FormatWriter for CbzWriter {
fn write(&self, doc: &Document, output: &Path) -> Result<()> {
use zip::write::SimpleFileOptions;
let file = File::create(output)?;
let mut zip = zip::ZipWriter::new(file);
let options = SimpleFileOptions::default();
for (idx, page) in doc.pages.iter().enumerate() {
let mut name = page.name.clone();
if Path::new(&name).extension().and_then(OsStr::to_str) != Some("jpg") {
name = format!("{:03}.jpg", idx + 1);
}
zip.start_file(&name, options)?;
if let Some(dct) = &page.jpeg_dct {
zip.write_all(dct)?;
} else {
// Encode to JPEG
let rgb = page.image.to_rgb8();
let (w, h) = (rgb.width(), rgb.height());
let mut cursor = std::io::Cursor::new(Vec::new());
{
let mut enc =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
enc.encode(&rgb.into_raw(), w, h, image::ColorType::Rgb8.into())?;
}
let data = cursor.into_inner();
zip.write_all(&data)?;
}
}
zip.finish()?;
Ok(())
}
}

View file

@ -8,6 +8,9 @@ use crate::model::Document;
pub mod cbz;
pub mod pdf;
use cbz::{CbzReader, CbzWriter};
use pdf::{PdfReader, PdfWriter};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FormatId {
Cbz,
@ -17,18 +20,12 @@ pub enum FormatId {
impl FormatId {
#[allow(dead_code)]
pub fn can_read(self) -> bool {
match self {
FormatId::Cbz => true,
FormatId::Pdf => false, // planned but not implemented yet
}
get_reader(self).is_some()
}
#[allow(dead_code)]
pub fn can_write(self) -> bool {
match self {
FormatId::Pdf => true,
FormatId::Cbz => false, // planned but not implemented yet
}
get_writer(self).is_some()
}
pub fn detect_from_path(path: &Path) -> Option<FormatId> {
@ -47,3 +44,17 @@ pub trait FormatReader: Send + Sync {
pub trait FormatWriter: Send + Sync {
fn write(&self, doc: &Document, output: &Path) -> Result<()>;
}
pub fn get_reader(format: FormatId) -> Option<Box<dyn FormatReader>> {
match format {
FormatId::Cbz => Some(Box::new(CbzReader)),
FormatId::Pdf => Some(Box::new(PdfReader)),
}
}
pub fn get_writer(format: FormatId) -> Option<Box<dyn FormatWriter>> {
match format {
FormatId::Pdf => Some(Box::new(PdfWriter)),
FormatId::Cbz => Some(Box::new(CbzWriter)),
}
}

View file

@ -1,12 +1,13 @@
use anyhow::Result;
use image::{codecs::jpeg::JpegEncoder, ColorType};
use image::codecs::jpeg::JpegEncoder;
use image::ColorType;
use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref};
use std::io::Cursor;
use std::path::Path;
use crate::model::Document;
use crate::model::{Document, ImagePage};
use super::FormatWriter;
use super::{FormatReader, FormatWriter};
pub struct PdfWriter;
@ -77,3 +78,194 @@ impl FormatWriter for PdfWriter {
Ok(())
}
}
pub struct PdfReader;
impl FormatReader for PdfReader {
fn read(&self, input: &Path) -> Result<Document> {
use lopdf::{Document as LoDocument, Object};
let doc = LoDocument::load(input)?;
let pages_map = doc.get_pages();
let mut image_pages: Vec<ImagePage> = Vec::new();
for (idx, (_page_num, page_id)) in pages_map.iter().enumerate() {
// Fetch page object
let page_obj = doc.get_object(*page_id)?;
let page_dict = match page_obj.as_dict() {
Ok(d) => d,
Err(_) => continue,
};
// Resolve Resources dictionary (can be a reference or inline dict)
let (mut xobjects_opt, mut content_refs): (Option<lopdf::Dictionary>, Vec<Vec<u8>>) =
(None, Vec::new());
if let Ok(obj) = page_dict.get(b"Resources") {
match obj {
Object::Reference(id) => {
if let Ok(Object::Dictionary(d)) = doc.get_object(*id) {
// Extract XObject dict if present
if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
xobjects_opt = Some(xd.clone());
}
} else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
xobjects_opt = Some(xd.clone());
}
}
}
Object::Dictionary(d) => {
if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
xobjects_opt = Some(xd.clone());
}
} else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
xobjects_opt = Some(xd.clone());
}
}
_ => {}
}
}
// Try to track which XObjects are used by parsing Content streams for /Name Do
if let Ok(contents_obj) = page_dict.get(b"Contents") {
match contents_obj {
Object::Reference(cid) => {
if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
content_refs.extend(extract_xobject_names(&stream.content));
}
}
Object::Array(arr) => {
for o in arr {
if let Object::Reference(cid) = o {
if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
content_refs.extend(extract_xobject_names(&stream.content));
}
}
}
}
Object::Stream(stream) => {
content_refs.extend(extract_xobject_names(&stream.content));
}
_ => {}
}
}
// If we have XObjects, pick the first image (prefer one referenced in content)
if let Some(xobjects) = xobjects_opt {
// Build ordered keys: first those referenced in content, then the rest
let mut keys: Vec<Vec<u8>> = xobjects.iter().map(|(k, _)| k.clone()).collect();
// Stable sort by whether referenced first
keys.sort_by_key(|k| {
let name = if k.starts_with(b"/") {
k[1..].to_vec()
} else {
k.clone()
};
match content_refs.iter().position(|r| *r == name) {
Some(pos) => pos as i32,
None => i32::MAX,
}
});
for key in keys {
if let Ok(&Object::Reference(obj_id)) = xobjects.get(&key) {
if let Ok(Object::Stream(stream)) = doc.get_object(obj_id) {
let dict = &stream.dict;
let is_image = matches!(dict.get(b"Subtype"), Ok(Object::Name(n)) if n == b"Image");
if !is_image {
continue;
}
let is_dct = match dict.get(b"Filter") {
Ok(Object::Name(n)) => n == b"DCTDecode",
Ok(Object::Array(arr)) => arr
.iter()
.any(|o| matches!(o, Object::Name(n) if n == b"DCTDecode")),
_ => false,
};
let data = stream.content.clone();
if is_dct {
if let Ok(img) = image::load_from_memory(&data) {
let name = format!("{:03}.jpg", idx + 1);
image_pages.push(ImagePage {
name,
image: img,
jpeg_dct: Some(data),
});
break;
} else {
// If JPEG parsing failed, skip
continue;
}
} else if let Ok(img) = image::load_from_memory(&data) {
// Fallback: try to decode arbitrary image stream
let name = format!("{:03}.jpg", idx + 1);
image_pages.push(ImagePage {
name,
image: img,
jpeg_dct: None,
});
break;
}
}
}
}
}
}
Ok(Document::new(image_pages))
}
}
// Helper to extract XObject names used in a content stream by scanning for "/Name Do"
fn extract_xobject_names(content: &[u8]) -> Vec<Vec<u8>> {
// This is a naive scanner but often sufficient: tokens separated by whitespace, looking for "/name Do"
let mut names = Vec::new();
let s = content;
let mut i = 0;
while i < s.len() {
// skip whitespace
while i < s.len() && s[i].is_ascii_whitespace() {
i += 1;
}
if i >= s.len() {
break;
}
if s[i] == b'/' {
// read name
let start = i + 1;
i += 1;
while i < s.len() && !s[i].is_ascii_whitespace() {
i += 1;
}
let name = s[start..i].to_vec();
// skip whitespace
while i < s.len() && s[i].is_ascii_whitespace() {
i += 1;
}
// check for Do operator after possible inline graphics state
// We will just check if next token is Do
let mut j = i;
while j < s.len() && s[j].is_ascii_whitespace() {
j += 1;
}
let op_start = j;
while j < s.len() && (s[j] as char).is_ascii_alphabetic() {
j += 1;
}
if &s[op_start..j] == b"Do" {
names.push(name);
}
i = j;
} else {
// skip token
while i < s.len() && !s[i].is_ascii_whitespace() {
i += 1;
}
}
}
names
}

View file

@ -5,9 +5,7 @@ use anyhow::Result;
use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use crate::formats::cbz::CbzReader;
use crate::formats::pdf::PdfWriter;
use crate::formats::{FormatId, FormatReader, FormatWriter};
use crate::formats::{get_reader, get_writer, FormatId};
#[derive(Debug, Clone)]
pub struct Job {
@ -34,22 +32,6 @@ impl Job {
}
}
fn get_reader(format: FormatId) -> Box<dyn FormatReader> {
match format {
FormatId::Cbz => Box::new(CbzReader),
// Placeholder for future formats
FormatId::Pdf => unimplemented!("Reading PDF not implemented"),
}
}
fn get_writer(format: FormatId) -> Box<dyn FormatWriter> {
match format {
FormatId::Pdf => Box::new(PdfWriter),
// Placeholder for future formats
FormatId::Cbz => unimplemented!("Writing CBZ not implemented"),
}
}
pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
let pb = ProgressBar::new(jobs.len() as u64);
pb.enable_steady_tick(Duration::from_millis(300));
@ -59,8 +41,8 @@ pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
jobs.par_iter().for_each(|job| {
// Build the pipeline for each job
let reader = get_reader(job.from);
let writer = get_writer(job.to);
let reader = get_reader(job.from).expect("No reader registered for selected input format");
let writer = get_writer(job.to).expect("No writer registered for selected output format");
let doc = reader.read(&job.input_path).expect("Failed to read input");
writer

View file

@ -11,6 +11,21 @@ use walkdir::WalkDir;
use cbz2pdf::formats::FormatId;
use cbz2pdf::job::{process_jobs, Job};
#[derive(clap::ValueEnum, Clone, Debug)]
enum CliFormat {
Cbz,
Pdf,
}
impl From<CliFormat> for FormatId {
fn from(value: CliFormat) -> Self {
match value {
CliFormat::Cbz => FormatId::Cbz,
CliFormat::Pdf => FormatId::Pdf,
}
}
}
#[derive(Parser)]
#[command()]
struct Cli {
@ -18,7 +33,7 @@ struct Cli {
short = 'i',
long = "input",
value_hint = ValueHint::FilePath,
help = "Path to CBZ file or directory containing CBZ files"
help = "Path to input file or directory"
)]
input_path: String,
@ -27,10 +42,20 @@ struct Cli {
long = "output-directory",
default_value = ".",
value_hint = ValueHint::FilePath,
help = "Output directory for PDF files"
help = "Output directory for converted files"
)]
output_dir: String,
#[arg(
long = "from",
value_enum,
help = "Input format. If omitted, auto-detect from file extension"
)]
from: Option<CliFormat>,
#[arg(long = "to", value_enum, default_value = "pdf", help = "Output format")]
to: CliFormat,
#[arg(short = 'p', long, help = "Ask for confirmation before doing anything")]
interactive: bool,
}
@ -43,29 +68,51 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let input_path = Path::new(&cli.input_path);
let output_dir = Path::new(&cli.output_dir);
let from_opt: Option<FormatId> = cli.from.map(Into::into);
let to_fmt: FormatId = cli.to.into();
// Validate target capability early
if !to_fmt.can_write() {
eprintln!("Selected output format is not supported for writing: {to_fmt:?}");
std::process::exit(1);
}
let mut jobs: Vec<Job> = Vec::new();
if input_path.is_file() {
if let Some(FormatId::Cbz) = FormatId::detect_from_path(input_path) {
jobs.push(Job::new(
input_path.to_path_buf(),
output_dir.to_path_buf(),
FormatId::Cbz,
FormatId::Pdf,
));
} else {
eprintln!("Unsupported input file format");
let detected = FormatId::detect_from_path(input_path);
let from_fmt = from_opt.or(detected).unwrap_or_else(|| {
eprintln!(
"Could not detect input format from file extension and no --from was provided"
);
std::process::exit(1);
});
if !from_fmt.can_read() {
eprintln!("Selected/Detected input format is not supported for reading: {from_fmt:?}");
std::process::exit(1);
}
jobs.push(Job::new(
input_path.to_path_buf(),
output_dir.to_path_buf(),
from_fmt,
to_fmt,
));
} else if input_path.is_dir() {
jobs.extend(walk_directory(input_path, output_dir));
jobs.extend(walk_directory(input_path, output_dir, from_opt, to_fmt));
} else {
eprintln!(
"Invalid input path. Please provide a CBZ file or a directory containing CBZ files."
"Invalid input path. Please provide a valid file or a directory containing supported files."
);
std::process::exit(1);
}
if jobs.is_empty() {
eprintln!("No matching inputs found to process.");
std::process::exit(1);
}
jobs.sort_by_key(|j| j.input_path.clone().into_os_string().into_string());
let proceed = if cli.interactive {
@ -105,20 +152,33 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
fn walk_directory(directory: &Path, output_dir: &Path) -> Vec<Job> {
fn walk_directory(
directory: &Path,
output_dir: &Path,
from_opt: Option<FormatId>,
to_fmt: FormatId,
) -> Vec<Job> {
debug!("Walking {directory:?}");
let mut jobs = Vec::new();
for entry in WalkDir::new(directory) {
let entry = entry.unwrap();
let path = entry.path();
if path.is_file() {
if let Some(FormatId::Cbz) = FormatId::detect_from_path(path) {
jobs.push(Job::new(
path.to_path_buf(),
output_dir.to_path_buf(),
FormatId::Cbz,
FormatId::Pdf,
));
let detected = FormatId::detect_from_path(path);
let from_fmt_opt = match from_opt {
Some(fixed) => detected.filter(|d| *d == fixed),
None => detected,
};
if let Some(from_fmt) = from_fmt_opt {
if from_fmt.can_read() && to_fmt.can_write() {
jobs.push(Job::new(
path.to_path_buf(),
output_dir.to_path_buf(),
from_fmt,
to_fmt,
));
}
}
}
}

96
tests/cbz_writer_tests.rs Normal file
View file

@ -0,0 +1,96 @@
use std::fs::File;
use std::io::Read;
use cbz2pdf::formats::cbz::CbzWriter;
use cbz2pdf::formats::FormatWriter;
use cbz2pdf::model::{Document, ImagePage};
fn make_tiny_jpeg() -> (Vec<u8>, image::DynamicImage) {
let img = image::DynamicImage::new_rgb8(1, 1).to_rgb8();
let mut buf = Vec::new();
{
let mut cursor = std::io::Cursor::new(&mut buf);
let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 80);
enc.encode(&img, 1, 1, image::ColorType::Rgb8.into())
.unwrap();
}
let decoded = image::load_from_memory(&buf).unwrap();
(buf, decoded)
}
#[test]
fn cbz_writer_preserves_dct_and_renames_non_jpg() {
// Prepare a page with original JPEG DCT data but a non-jpg name.
let (jpeg_dct, decoded) = make_tiny_jpeg();
let page = ImagePage {
name: "cover.png".to_string(),
image: decoded,
jpeg_dct: Some(jpeg_dct.clone()),
};
let doc = Document::new(vec![page]);
let temp_dir = tempfile::tempdir().expect("create temp dir");
let cbz_path = temp_dir.path().join("out.cbz");
let writer = CbzWriter;
writer.write(&doc, &cbz_path).expect("write cbz");
// Open the CBZ and verify it contains 001.jpg with the exact JPEG data.
let f = File::open(&cbz_path).unwrap();
let mut zip = zip::ZipArchive::new(f).unwrap();
// There should be exactly one file named 001.jpg
let mut found = false;
for i in 0..zip.len() {
let mut file = zip.by_index(i).unwrap();
let name = file.enclosed_name().unwrap().to_owned();
if name.file_name().unwrap() == "001.jpg" {
let mut data = Vec::new();
file.read_to_end(&mut data).unwrap();
assert_eq!(
data, jpeg_dct,
"writer should preserve original JPEG DCT bytes"
);
found = true;
}
}
assert!(found, "001.jpg not found in zip");
}
#[test]
fn cbz_writer_keeps_jpg_name() {
// If the page already has a .jpg name, the writer should keep it.
let (jpeg_dct, decoded) = make_tiny_jpeg();
let page = ImagePage {
name: "page01.jpg".to_string(),
image: decoded,
jpeg_dct: Some(jpeg_dct),
};
let doc = Document::new(vec![page]);
let temp_dir = tempfile::tempdir().expect("create temp dir");
let cbz_path = temp_dir.path().join("out.cbz");
let writer = CbzWriter;
writer.write(&doc, &cbz_path).expect("write cbz");
let f = File::open(&cbz_path).unwrap();
let mut zip = zip::ZipArchive::new(f).unwrap();
let mut names = Vec::new();
for i in 0..zip.len() {
let file = zip.by_index(i).unwrap();
let name = file
.enclosed_name()
.unwrap()
.file_name()
.unwrap()
.to_owned();
names.push(name.to_string_lossy().to_string());
}
assert_eq!(
names,
vec!["page01.jpg"],
"existing .jpg name should be kept"
);
}

View file

@ -34,7 +34,7 @@ fn job_new_sets_output_extension() {
#[test]
fn format_capabilities_consistent() {
assert!(FormatId::Cbz.can_read());
assert!(!FormatId::Cbz.can_write());
assert!(FormatId::Cbz.can_write());
assert!(FormatId::Pdf.can_write());
assert!(!FormatId::Pdf.can_read());
assert!(FormatId::Pdf.can_read());
}

51
tests/pdf_reader_tests.rs Normal file
View file

@ -0,0 +1,51 @@
use cbz2pdf::formats::pdf::{PdfReader, PdfWriter};
use cbz2pdf::formats::{FormatReader, FormatWriter};
use cbz2pdf::model::{Document, ImagePage};
fn make_small_jpeg(w: u32, h: u32, rgb: [u8; 3]) -> (Vec<u8>, image::DynamicImage) {
let mut img = image::ImageBuffer::<image::Rgb<u8>, _>::new(w, h);
for p in img.pixels_mut() {
*p = image::Rgb(rgb);
}
let dynimg = image::DynamicImage::ImageRgb8(img);
let mut buf = Vec::new();
{
let mut cursor = std::io::Cursor::new(&mut buf);
let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
let rgb8 = dynimg.to_rgb8();
enc.encode(&rgb8, w, h, image::ColorType::Rgb8.into())
.unwrap();
}
(buf, dynimg)
}
#[test]
fn pdf_reader_extracts_jpeg_xobject_and_preserves_dct() {
// Build a PDF with one JPEG-backed page
let (jpeg_dct, dynimg) = make_small_jpeg(3, 2, [10, 20, 30]);
let page = ImagePage {
name: "p1.jpg".into(),
image: dynimg.clone(),
jpeg_dct: Some(jpeg_dct.clone()),
};
let doc = Document::new(vec![page]);
let temp_dir = tempfile::tempdir().expect("tmpdir");
let pdf_path = temp_dir.path().join("in.pdf");
PdfWriter.write(&doc, &pdf_path).expect("write pdf");
// Read back with PdfReader
let out = PdfReader.read(&pdf_path).expect("read pdf");
assert_eq!(out.pages.len(), 1, "should have one page extracted");
let p = &out.pages[0];
assert_eq!(p.image.width(), dynimg.width());
assert_eq!(p.image.height(), dynimg.height());
assert!(p.jpeg_dct.is_some(), "should preserve DCT for JPEG images");
assert_eq!(
p.jpeg_dct.as_ref().unwrap(),
&jpeg_dct,
"JPEG bytes should match"
);
}