From 6379e8a56b2eaac256cf859dcf61fd49158e8936 Mon Sep 17 00:00:00 2001 From: Marc Plano-Lesay Date: Sun, 26 Oct 2025 19:14:22 +1100 Subject: [PATCH] feat: support cbr reading --- .gitignore | 1 + Cargo.lock | 52 ++++++++++++++++++++++++ Cargo.toml | 3 +- src/formats/cbr.rs | 75 +++++++++++++++++++++++++++++++++++ src/formats/cbx.rs | 30 ++++++++++++++ src/formats/cbz.rs | 28 +++++-------- src/formats/mod.rs | 9 +++++ src/job.rs | 1 + tests/cbr_reader_tests.rs | 23 +++++++++++ tests/job_and_format_tests.rs | 14 ++++++- 10 files changed, 216 insertions(+), 20 deletions(-) create mode 100644 src/formats/cbr.rs create mode 100644 src/formats/cbx.rs create mode 100644 tests/cbr_reader_tests.rs diff --git a/.gitignore b/.gitignore index 3f4b997..f950e3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.DS_Store .direnv/ .idea/ /target diff --git a/Cargo.lock b/Cargo.lock index 5022923..52f0115 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -265,6 +265,7 @@ dependencies = [ "rayon", "tabled", "tempfile", + "unrar", "walkdir", "zip", ] @@ -1991,6 +1992,29 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" +[[package]] +name = "unrar" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ec61343a630d2b50d13216dea5125e157d3fc180a7d3f447d22fe146b648fc" +dependencies = [ + "bitflags", + "regex", + "unrar_sys", + "widestring", +] + +[[package]] +name = "unrar_sys" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b77675b883cfbe6bf41e6b7a5cd6008e0a83ba497de3d96e41a064bbeead765" +dependencies = [ + "cc", + "libc", + "winapi", +] + [[package]] name = "utf8parse" version = "0.2.2" @@ -2125,6 +2149,28 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -2134,6 +2180,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index bbebc9e..6faf9b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ tabled = "0.20.0" walkdir = "2.5.0" zip = "6.0.0" lopdf = "0.38.0" +unrar = "0.5.8" +tempfile = "3.12.0" [dev-dependencies] -tempfile = "3.12.0" diff --git a/src/formats/cbr.rs b/src/formats/cbr.rs new file mode 100644 index 0000000..9650d7f --- /dev/null +++ b/src/formats/cbr.rs @@ -0,0 +1,75 @@ +use std::ffi::OsStr; +use std::fs; +use std::io::Read; +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Result}; +use walkdir::WalkDir; + +use crate::model::Document; + +use super::{CbxReader, FormatReader}; + +pub struct CbrReader; + +impl CbxReader for CbrReader { + fn extract_images(&self, input: &Path) -> Result)>> { + let tempdir = tempfile::tempdir()?; + let dest = tempdir.path().to_path_buf(); + { + use std::env; + use unrar::Archive; + + let cwd = env::current_dir()?; + let input_str = input.to_string_lossy().to_string(); + env::set_current_dir(&dest)?; + let mut archive = Archive::new(&input_str) + .open_for_processing() + .map_err(|e| anyhow!("Failed to open RAR for processing: {e}"))?; + + loop { + match archive.read_header() { + Ok(Some(header)) => { + archive = if header.entry().is_file() { + header + .extract() + .map_err(|e| anyhow!("Failed to extract entry: {e}"))? + } else { + header + .skip() + .map_err(|e| anyhow!("Failed to skip entry: {e}"))? + }; + } + Ok(None) => break, + Err(e) => { + let _ = env::set_current_dir(cwd); + return Err(anyhow!("Failed to read RAR header: {e}")); + } + } + } + env::set_current_dir(cwd)?; + } + + let mut files: Vec<(String, Vec)> = Vec::new(); + for entry in WalkDir::new(&dest).into_iter().filter_map(Result::ok) { + let path: PathBuf = entry.path().to_path_buf(); + if path.is_file() && path.extension() == Some(OsStr::new("jpg")) { + let mut data = Vec::new(); + fs::File::open(&path)?.read_to_end(&mut data)?; + let name = path + .file_name() + .and_then(OsStr::to_str) + .map(|s| s.to_string()) + .unwrap_or_else(|| path.display().to_string()); + files.push((name, data)); + } + } + Ok(files) + } +} + +impl FormatReader for CbrReader { + fn read(&self, input: &Path) -> Result { + self.read_cbx(input) + } +} diff --git a/src/formats/cbx.rs b/src/formats/cbx.rs new file mode 100644 index 0000000..5457bef --- /dev/null +++ b/src/formats/cbx.rs @@ -0,0 +1,30 @@ +use anyhow::Result; +use std::path::Path; + +use crate::model::Document; + +// Shared reader logic for CBx (CBZ/CBR) formats +pub trait CbxReader: Send + Sync { + // Implementors should return a list of (file_name, jpeg_bytes) + fn extract_images(&self, input: &Path) -> Result)>>; + + // Build a Document from extracted JPEG bytes + fn read_cbx(&self, input: &Path) -> Result { + let files = self.extract_images(input)?; + let mut pages: Vec = Vec::new(); + { + use rayon::prelude::*; + files + .par_iter() + .map(|(name, data)| crate::model::ImagePage { + name: name.clone(), + image: image::load_from_memory(data).expect("Failed to decode image"), + jpeg_dct: Some(data.clone()), + }) + .collect_into_vec(&mut pages); + + pages.par_sort_by_key(|p| p.name.clone()); + } + Ok(Document::new(pages)) + } +} diff --git a/src/formats/cbz.rs b/src/formats/cbz.rs index 8319496..fa52f3a 100644 --- a/src/formats/cbz.rs +++ b/src/formats/cbz.rs @@ -4,17 +4,16 @@ use std::io::{Read, Write}; use std::path::Path; use anyhow::Result; -use rayon::prelude::*; use zip::ZipArchive; -use crate::model::{Document, ImagePage}; +use crate::model::Document; -use super::{FormatReader, FormatWriter}; +use super::{CbxReader, FormatReader, FormatWriter}; pub struct CbzReader; -impl FormatReader for CbzReader { - fn read(&self, input: &Path) -> Result { +impl CbxReader for CbzReader { + fn extract_images(&self, input: &Path) -> Result)>> { let mut zip = ZipArchive::new(File::open(input)?)?; let mut files: Vec<(String, Vec)> = Vec::new(); for i in 0..zip.len() { @@ -35,20 +34,13 @@ impl FormatReader for CbzReader { )); } } + Ok(files) + } +} - let mut pages: Vec = Vec::new(); - files - .par_iter() - .map(|(name, data)| ImagePage { - name: name.clone(), - image: image::load_from_memory(data).expect("Failed to decode image"), - jpeg_dct: Some(data.clone()), - }) - .collect_into_vec(&mut pages); - - pages.par_sort_by_key(|p| p.name.clone()); - - Ok(Document::new(pages)) +impl FormatReader for CbzReader { + fn read(&self, input: &Path) -> Result { + self.read_cbx(input) } } diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 0bac2ce..69c4cdd 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -5,9 +5,14 @@ use anyhow::Result; use crate::model::Document; +pub mod cbr; +pub mod cbx; pub mod cbz; pub mod pdf; +pub use cbx::CbxReader; + +use cbr::CbrReader; use cbz::{CbzReader, CbzWriter}; use pdf::{PdfReader, PdfWriter}; @@ -15,6 +20,7 @@ use pdf::{PdfReader, PdfWriter}; pub enum FormatId { Cbz, Pdf, + Cbr, } impl FormatId { @@ -32,6 +38,7 @@ impl FormatId { match path.extension().and_then(OsStr::to_str) { Some("cbz") => Some(FormatId::Cbz), Some("pdf") => Some(FormatId::Pdf), + Some("cbr") => Some(FormatId::Cbr), _ => None, } } @@ -49,6 +56,7 @@ pub fn get_reader(format: FormatId) -> Option> { match format { FormatId::Cbz => Some(Box::new(CbzReader)), FormatId::Pdf => Some(Box::new(PdfReader)), + FormatId::Cbr => Some(Box::new(CbrReader)), } } @@ -56,5 +64,6 @@ pub fn get_writer(format: FormatId) -> Option> { match format { FormatId::Pdf => Some(Box::new(PdfWriter)), FormatId::Cbz => Some(Box::new(CbzWriter)), + FormatId::Cbr => None, } } diff --git a/src/job.rs b/src/job.rs index 7f2043d..49db051 100644 --- a/src/job.rs +++ b/src/job.rs @@ -21,6 +21,7 @@ impl Job { match to { FormatId::Pdf => output_path.set_extension("pdf"), FormatId::Cbz => output_path.set_extension("cbz"), + FormatId::Cbr => output_path.set_extension("cbr"), }; Self { diff --git a/tests/cbr_reader_tests.rs b/tests/cbr_reader_tests.rs new file mode 100644 index 0000000..c7289d9 --- /dev/null +++ b/tests/cbr_reader_tests.rs @@ -0,0 +1,23 @@ +use std::fs::File; +use std::io::Write; + +use cbz2pdf::formats::cbr::CbrReader; +use cbz2pdf::formats::FormatReader; + +// We cannot reliably create a RAR archive in tests (tools cannot create .rar), +// Instead, verify that the reader fails gracefully (returns an error) when given an invalid .cbr +// file. +#[test] +fn cbr_reader_errors_on_invalid_archive() { + let temp_dir = tempfile::tempdir().expect("create temp dir"); + let cbr_path = temp_dir.path().join("invalid.cbr"); + + // Write some junk that is definitely not a RAR archive + let mut f = File::create(&cbr_path).expect("create cbr"); + f.write_all(b"this is not a rar archive").unwrap(); + + let reader = CbrReader; + let res = reader.read(&cbr_path); + + assert!(res.is_err(), "CbrReader should error on invalid archives"); +} diff --git a/tests/job_and_format_tests.rs b/tests/job_and_format_tests.rs index 55c9742..7f572fc 100644 --- a/tests/job_and_format_tests.rs +++ b/tests/job_and_format_tests.rs @@ -7,8 +7,10 @@ use cbz2pdf::job::Job; fn detect_from_path_recognizes_extensions() { let cbz = PathBuf::from("/tmp/book.cbz"); let pdf = PathBuf::from("/tmp/book.pdf"); + let cbr = PathBuf::from("/tmp/book.cbr"); assert_eq!(FormatId::detect_from_path(&cbz), Some(FormatId::Cbz)); assert_eq!(FormatId::detect_from_path(&pdf), Some(FormatId::Pdf)); + assert_eq!(FormatId::detect_from_path(&cbr), Some(FormatId::Cbr)); assert_eq!( FormatId::detect_from_path(&PathBuf::from("/tmp/book.txt")), None @@ -24,11 +26,19 @@ fn job_new_sets_output_extension() { let job2 = Job::new( PathBuf::from("/tmp/book.pdf"), - outdir, + outdir.clone(), FormatId::Pdf, FormatId::Cbz, ); assert!(job2.output_path.ends_with("book.cbz")); + + let job3 = Job::new( + PathBuf::from("/tmp/book.cbz"), + outdir, + FormatId::Cbz, + FormatId::Cbr, + ); + assert!(job3.output_path.ends_with("book.cbr")); } #[test] @@ -37,4 +47,6 @@ fn format_capabilities_consistent() { assert!(FormatId::Cbz.can_write()); assert!(FormatId::Pdf.can_write()); assert!(FormatId::Pdf.can_read()); + assert!(FormatId::Cbr.can_read()); + assert!(!FormatId::Cbr.can_write()); }