chore(deps): lock file maintenance

fix(deps): update rust crate lopdf to 0.38.0
feat: implement cbz writing and pdf reading
2025-10-13 18:00:22 +11:00 · 2025-10-13 17:30:12 +11:00 · 2025-10-13 16:57:14 +11:00
11 changed files with 1201 additions and 340 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -16,6 +16,7 @@ rayon = "1.10.0"
 tabled = "0.20.0"
 walkdir = "2.5.0"
 zip = "6.0.0"
+lopdf = "0.38.0"

 [dev-dependencies]
 tempfile = "3.12.0"
--- a/flake.lock
+++ b/flake.lock
@ -57,11 +57,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1755615617,
-        "narHash": "sha256-HMwfAJBdrr8wXAkbGhtcby1zGFvs+StOp19xNsbqdOg=",
+        "lastModified": 1760038930,
+        "narHash": "sha256-Oncbh0UmHjSlxO7ErQDM3KM0A5/Znfofj2BSzlHLeVw=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "20075955deac2583bb12f07151c2df830ef346b4",
+        "rev": "0b4defa2584313f3b781240b29d61f6f9f7e0df3",
        "type": "github"
      },
      "original": {
@ -80,11 +80,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1755960406,
-        "narHash": "sha256-RF7j6C1TmSTK9tYWO6CdEMtg6XZaUKcvZwOCD2SICZs=",
+        "lastModified": 1759523803,
+        "narHash": "sha256-PTod9NG+i3XbbnBKMl/e5uHDBYpwIWivQ3gOWSEuIEM=",
        "owner": "cachix",
        "repo": "git-hooks.nix",
-        "rev": "e891a93b193fcaf2fc8012d890dc7f0befe86ec2",
+        "rev": "cfc9f7bb163ad8542029d303e599c0f7eee09835",
        "type": "github"
      },
      "original": {
--- a/src/formats/cbz.rs
+++ b/src/formats/cbz.rs
@ -1,6 +1,6 @@
 use std::ffi::OsStr;
 use std::fs::File;
-use std::io::Read;
+use std::io::{Read, Write};
 use std::path::Path;

 use anyhow::Result;
@ -9,7 +9,7 @@ use zip::ZipArchive;

 use crate::model::{Document, ImagePage};

-use super::FormatReader;
+use super::{FormatReader, FormatWriter};

 pub struct CbzReader;

@ -51,3 +51,40 @@ impl FormatReader for CbzReader {
        Ok(Document::new(pages))
    }
 }
+
+pub struct CbzWriter;
+
+impl FormatWriter for CbzWriter {
+    fn write(&self, doc: &Document, output: &Path) -> Result<()> {
+        use zip::write::SimpleFileOptions;
+        let file = File::create(output)?;
+        let mut zip = zip::ZipWriter::new(file);
+        let options = SimpleFileOptions::default();
+
+        for (idx, page) in doc.pages.iter().enumerate() {
+            let mut name = page.name.clone();
+            if Path::new(&name).extension().and_then(OsStr::to_str) != Some("jpg") {
+                name = format!("{:03}.jpg", idx + 1);
+            }
+            zip.start_file(&name, options)?;
+            if let Some(dct) = &page.jpeg_dct {
+                zip.write_all(dct)?;
+            } else {
+                // Encode to JPEG
+                let rgb = page.image.to_rgb8();
+                let (w, h) = (rgb.width(), rgb.height());
+                let mut cursor = std::io::Cursor::new(Vec::new());
+                {
+                    let mut enc =
+                        image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
+                    enc.encode(&rgb.into_raw(), w, h, image::ColorType::Rgb8.into())?;
+                }
+                let data = cursor.into_inner();
+                zip.write_all(&data)?;
+            }
+        }
+
+        zip.finish()?;
+        Ok(())
+    }
+}
--- a/src/formats/mod.rs
+++ b/src/formats/mod.rs
@ -8,6 +8,9 @@ use crate::model::Document;
 pub mod cbz;
 pub mod pdf;

+use cbz::{CbzReader, CbzWriter};
+use pdf::{PdfReader, PdfWriter};
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum FormatId {
    Cbz,
@ -17,18 +20,12 @@ pub enum FormatId {
 impl FormatId {
    #[allow(dead_code)]
    pub fn can_read(self) -> bool {
-        match self {
-            FormatId::Cbz => true,
-            FormatId::Pdf => false, // planned but not implemented yet
-        }
+        get_reader(self).is_some()
    }

    #[allow(dead_code)]
    pub fn can_write(self) -> bool {
-        match self {
-            FormatId::Pdf => true,
-            FormatId::Cbz => false, // planned but not implemented yet
-        }
+        get_writer(self).is_some()
    }

    pub fn detect_from_path(path: &Path) -> Option<FormatId> {
@ -47,3 +44,17 @@ pub trait FormatReader: Send + Sync {
 pub trait FormatWriter: Send + Sync {
    fn write(&self, doc: &Document, output: &Path) -> Result<()>;
 }
+
+pub fn get_reader(format: FormatId) -> Option<Box<dyn FormatReader>> {
+    match format {
+        FormatId::Cbz => Some(Box::new(CbzReader)),
+        FormatId::Pdf => Some(Box::new(PdfReader)),
+    }
+}
+
+pub fn get_writer(format: FormatId) -> Option<Box<dyn FormatWriter>> {
+    match format {
+        FormatId::Pdf => Some(Box::new(PdfWriter)),
+        FormatId::Cbz => Some(Box::new(CbzWriter)),
+    }
+}
--- a/src/formats/pdf.rs
+++ b/src/formats/pdf.rs
@ -1,12 +1,13 @@
 use anyhow::Result;
-use image::{codecs::jpeg::JpegEncoder, ColorType};
+use image::codecs::jpeg::JpegEncoder;
+use image::ColorType;
 use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref};
 use std::io::Cursor;
 use std::path::Path;

-use crate::model::Document;
+use crate::model::{Document, ImagePage};

-use super::FormatWriter;
+use super::{FormatReader, FormatWriter};

 pub struct PdfWriter;

@ -77,3 +78,194 @@ impl FormatWriter for PdfWriter {
        Ok(())
    }
 }
+
+pub struct PdfReader;
+
+impl FormatReader for PdfReader {
+    fn read(&self, input: &Path) -> Result<Document> {
+        use lopdf::{Document as LoDocument, Object};
+
+        let doc = LoDocument::load(input)?;
+        let pages_map = doc.get_pages();
+        let mut image_pages: Vec<ImagePage> = Vec::new();
+
+        for (idx, (_page_num, page_id)) in pages_map.iter().enumerate() {
+            // Fetch page object
+            let page_obj = doc.get_object(*page_id)?;
+            let page_dict = match page_obj.as_dict() {
+                Ok(d) => d,
+                Err(_) => continue,
+            };
+
+            // Resolve Resources dictionary (can be a reference or inline dict)
+            let (mut xobjects_opt, mut content_refs): (Option<lopdf::Dictionary>, Vec<Vec<u8>>) =
+                (None, Vec::new());
+
+            if let Ok(obj) = page_dict.get(b"Resources") {
+                match obj {
+                    Object::Reference(id) => {
+                        if let Ok(Object::Dictionary(d)) = doc.get_object(*id) {
+                            // Extract XObject dict if present
+                            if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
+                                if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
+                                    xobjects_opt = Some(xd.clone());
+                                }
+                            } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
+                                xobjects_opt = Some(xd.clone());
+                            }
+                        }
+                    }
+                    Object::Dictionary(d) => {
+                        if let Ok(Object::Reference(xid)) = d.get(b"XObject") {
+                            if let Ok(Object::Dictionary(xd)) = doc.get_object(*xid) {
+                                xobjects_opt = Some(xd.clone());
+                            }
+                        } else if let Ok(Object::Dictionary(xd)) = d.get(b"XObject") {
+                            xobjects_opt = Some(xd.clone());
+                        }
+                    }
+                    _ => {}
+                }
+            }
+
+            // Try to track which XObjects are used by parsing Content streams for /Name Do
+            if let Ok(contents_obj) = page_dict.get(b"Contents") {
+                match contents_obj {
+                    Object::Reference(cid) => {
+                        if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
+                            content_refs.extend(extract_xobject_names(&stream.content));
+                        }
+                    }
+                    Object::Array(arr) => {
+                        for o in arr {
+                            if let Object::Reference(cid) = o {
+                                if let Ok(Object::Stream(stream)) = doc.get_object(*cid) {
+                                    content_refs.extend(extract_xobject_names(&stream.content));
+                                }
+                            }
+                        }
+                    }
+                    Object::Stream(stream) => {
+                        content_refs.extend(extract_xobject_names(&stream.content));
+                    }
+                    _ => {}
+                }
+            }
+
+            // If we have XObjects, pick the first image (prefer one referenced in content)
+            if let Some(xobjects) = xobjects_opt {
+                // Build ordered keys: first those referenced in content, then the rest
+                let mut keys: Vec<Vec<u8>> = xobjects.iter().map(|(k, _)| k.clone()).collect();
+                // Stable sort by whether referenced first
+                keys.sort_by_key(|k| {
+                    let name = if k.starts_with(b"/") {
+                        k[1..].to_vec()
+                    } else {
+                        k.clone()
+                    };
+                    match content_refs.iter().position(|r| *r == name) {
+                        Some(pos) => pos as i32,
+                        None => i32::MAX,
+                    }
+                });
+
+                for key in keys {
+                    if let Ok(&Object::Reference(obj_id)) = xobjects.get(&key) {
+                        if let Ok(Object::Stream(stream)) = doc.get_object(obj_id) {
+                            let dict = &stream.dict;
+                            let is_image = matches!(dict.get(b"Subtype"), Ok(Object::Name(n)) if n == b"Image");
+                            if !is_image {
+                                continue;
+                            }
+
+                            let is_dct = match dict.get(b"Filter") {
+                                Ok(Object::Name(n)) => n == b"DCTDecode",
+                                Ok(Object::Array(arr)) => arr
+                                    .iter()
+                                    .any(|o| matches!(o, Object::Name(n) if n == b"DCTDecode")),
+                                _ => false,
+                            };
+
+                            let data = stream.content.clone();
+                            if is_dct {
+                                if let Ok(img) = image::load_from_memory(&data) {
+                                    let name = format!("{:03}.jpg", idx + 1);
+                                    image_pages.push(ImagePage {
+                                        name,
+                                        image: img,
+                                        jpeg_dct: Some(data),
+                                    });
+                                    break;
+                                } else {
+                                    // If JPEG parsing failed, skip
+                                    continue;
+                                }
+                            } else if let Ok(img) = image::load_from_memory(&data) {
+                                // Fallback: try to decode arbitrary image stream
+                                let name = format!("{:03}.jpg", idx + 1);
+                                image_pages.push(ImagePage {
+                                    name,
+                                    image: img,
+                                    jpeg_dct: None,
+                                });
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(Document::new(image_pages))
+    }
+}
+
+// Helper to extract XObject names used in a content stream by scanning for "/Name Do"
+fn extract_xobject_names(content: &[u8]) -> Vec<Vec<u8>> {
+    // This is a naive scanner but often sufficient: tokens separated by whitespace, looking for "/name Do"
+    let mut names = Vec::new();
+    let s = content;
+    let mut i = 0;
+    while i < s.len() {
+        // skip whitespace
+        while i < s.len() && s[i].is_ascii_whitespace() {
+            i += 1;
+        }
+        if i >= s.len() {
+            break;
+        }
+        if s[i] == b'/' {
+            // read name
+            let start = i + 1;
+            i += 1;
+            while i < s.len() && !s[i].is_ascii_whitespace() {
+                i += 1;
+            }
+            let name = s[start..i].to_vec();
+            // skip whitespace
+            while i < s.len() && s[i].is_ascii_whitespace() {
+                i += 1;
+            }
+            // check for Do operator after possible inline graphics state
+            // We will just check if next token is Do
+            let mut j = i;
+            while j < s.len() && s[j].is_ascii_whitespace() {
+                j += 1;
+            }
+            let op_start = j;
+            while j < s.len() && (s[j] as char).is_ascii_alphabetic() {
+                j += 1;
+            }
+            if &s[op_start..j] == b"Do" {
+                names.push(name);
+            }
+            i = j;
+        } else {
+            // skip token
+            while i < s.len() && !s[i].is_ascii_whitespace() {
+                i += 1;
+            }
+        }
+    }
+    names
+}
--- a/src/job.rs
+++ b/src/job.rs
@ -5,9 +5,7 @@ use anyhow::Result;
 use indicatif::{ProgressBar, ProgressStyle};
 use rayon::prelude::*;

-use crate::formats::cbz::CbzReader;
-use crate::formats::pdf::PdfWriter;
-use crate::formats::{FormatId, FormatReader, FormatWriter};
+use crate::formats::{get_reader, get_writer, FormatId};

 #[derive(Debug, Clone)]
 pub struct Job {
@ -34,22 +32,6 @@ impl Job {
    }
 }

-fn get_reader(format: FormatId) -> Box<dyn FormatReader> {
-    match format {
-        FormatId::Cbz => Box::new(CbzReader),
-        // Placeholder for future formats
-        FormatId::Pdf => unimplemented!("Reading PDF not implemented"),
-    }
-}
-
-fn get_writer(format: FormatId) -> Box<dyn FormatWriter> {
-    match format {
-        FormatId::Pdf => Box::new(PdfWriter),
-        // Placeholder for future formats
-        FormatId::Cbz => unimplemented!("Writing CBZ not implemented"),
-    }
-}
-
 pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {
    let pb = ProgressBar::new(jobs.len() as u64);
    pb.enable_steady_tick(Duration::from_millis(300));
@ -59,8 +41,8 @@ pub fn process_jobs(jobs: Vec<Job>) -> Result<()> {

    jobs.par_iter().for_each(|job| {
        // Build the pipeline for each job
-        let reader = get_reader(job.from);
-        let writer = get_writer(job.to);
+        let reader = get_reader(job.from).expect("No reader registered for selected input format");
+        let writer = get_writer(job.to).expect("No writer registered for selected output format");

        let doc = reader.read(&job.input_path).expect("Failed to read input");
        writer
--- a/src/main.rs
+++ b/src/main.rs
@ -11,6 +11,21 @@ use walkdir::WalkDir;
 use cbz2pdf::formats::FormatId;
 use cbz2pdf::job::{process_jobs, Job};

+#[derive(clap::ValueEnum, Clone, Debug)]
+enum CliFormat {
+    Cbz,
+    Pdf,
+}
+
+impl From<CliFormat> for FormatId {
+    fn from(value: CliFormat) -> Self {
+        match value {
+            CliFormat::Cbz => FormatId::Cbz,
+            CliFormat::Pdf => FormatId::Pdf,
+        }
+    }
+}
+
 #[derive(Parser)]
 #[command()]
 struct Cli {
@ -18,7 +33,7 @@ struct Cli {
        short = 'i',
        long = "input",
        value_hint = ValueHint::FilePath,
-        help = "Path to CBZ file or directory containing CBZ files"
+        help = "Path to input file or directory"
    )]
    input_path: String,

@ -27,10 +42,20 @@ struct Cli {
        long = "output-directory",
        default_value = ".",
        value_hint = ValueHint::FilePath,
-        help = "Output directory for PDF files"
+        help = "Output directory for converted files"
    )]
    output_dir: String,

+    #[arg(
+        long = "from",
+        value_enum,
+        help = "Input format. If omitted, auto-detect from file extension"
+    )]
+    from: Option<CliFormat>,
+
+    #[arg(long = "to", value_enum, default_value = "pdf", help = "Output format")]
+    to: CliFormat,
+
    #[arg(short = 'p', long, help = "Ask for confirmation before doing anything")]
    interactive: bool,
 }
@ -43,29 +68,51 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
    let input_path = Path::new(&cli.input_path);
    let output_dir = Path::new(&cli.output_dir);

+    let from_opt: Option<FormatId> = cli.from.map(Into::into);
+    let to_fmt: FormatId = cli.to.into();
+
+    // Validate target capability early
+    if !to_fmt.can_write() {
+        eprintln!("Selected output format is not supported for writing: {to_fmt:?}");
+        std::process::exit(1);
+    }
+
    let mut jobs: Vec<Job> = Vec::new();

    if input_path.is_file() {
-        if let Some(FormatId::Cbz) = FormatId::detect_from_path(input_path) {
-            jobs.push(Job::new(
-                input_path.to_path_buf(),
-                output_dir.to_path_buf(),
-                FormatId::Cbz,
-                FormatId::Pdf,
-            ));
-        } else {
-            eprintln!("Unsupported input file format");
+        let detected = FormatId::detect_from_path(input_path);
+        let from_fmt = from_opt.or(detected).unwrap_or_else(|| {
+            eprintln!(
+                "Could not detect input format from file extension and no --from was provided"
+            );
+            std::process::exit(1);
+        });
+
+        if !from_fmt.can_read() {
+            eprintln!("Selected/Detected input format is not supported for reading: {from_fmt:?}");
            std::process::exit(1);
        }
+
+        jobs.push(Job::new(
+            input_path.to_path_buf(),
+            output_dir.to_path_buf(),
+            from_fmt,
+            to_fmt,
+        ));
    } else if input_path.is_dir() {
-        jobs.extend(walk_directory(input_path, output_dir));
+        jobs.extend(walk_directory(input_path, output_dir, from_opt, to_fmt));
    } else {
        eprintln!(
-            "Invalid input path. Please provide a CBZ file or a directory containing CBZ files."
+            "Invalid input path. Please provide a valid file or a directory containing supported files."
        );
        std::process::exit(1);
    }

+    if jobs.is_empty() {
+        eprintln!("No matching inputs found to process.");
+        std::process::exit(1);
+    }
+
    jobs.sort_by_key(|j| j.input_path.clone().into_os_string().into_string());

    let proceed = if cli.interactive {
@ -105,20 +152,33 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
    Ok(())
 }

-fn walk_directory(directory: &Path, output_dir: &Path) -> Vec<Job> {
+fn walk_directory(
+    directory: &Path,
+    output_dir: &Path,
+    from_opt: Option<FormatId>,
+    to_fmt: FormatId,
+) -> Vec<Job> {
    debug!("Walking {directory:?}");
    let mut jobs = Vec::new();
    for entry in WalkDir::new(directory) {
        let entry = entry.unwrap();
        let path = entry.path();
        if path.is_file() {
-            if let Some(FormatId::Cbz) = FormatId::detect_from_path(path) {
-                jobs.push(Job::new(
-                    path.to_path_buf(),
-                    output_dir.to_path_buf(),
-                    FormatId::Cbz,
-                    FormatId::Pdf,
-                ));
+            let detected = FormatId::detect_from_path(path);
+            let from_fmt_opt = match from_opt {
+                Some(fixed) => detected.filter(|d| *d == fixed),
+                None => detected,
+            };
+
+            if let Some(from_fmt) = from_fmt_opt {
+                if from_fmt.can_read() && to_fmt.can_write() {
+                    jobs.push(Job::new(
+                        path.to_path_buf(),
+                        output_dir.to_path_buf(),
+                        from_fmt,
+                        to_fmt,
+                    ));
+                }
            }
        }
    }
--- a/tests/cbz_writer_tests.rs
+++ b/tests/cbz_writer_tests.rs
@ -0,0 +1,96 @@
+use std::fs::File;
+use std::io::Read;
+
+use cbz2pdf::formats::cbz::CbzWriter;
+use cbz2pdf::formats::FormatWriter;
+use cbz2pdf::model::{Document, ImagePage};
+
+fn make_tiny_jpeg() -> (Vec<u8>, image::DynamicImage) {
+    let img = image::DynamicImage::new_rgb8(1, 1).to_rgb8();
+    let mut buf = Vec::new();
+    {
+        let mut cursor = std::io::Cursor::new(&mut buf);
+        let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 80);
+        enc.encode(&img, 1, 1, image::ColorType::Rgb8.into())
+            .unwrap();
+    }
+    let decoded = image::load_from_memory(&buf).unwrap();
+    (buf, decoded)
+}
+
+#[test]
+fn cbz_writer_preserves_dct_and_renames_non_jpg() {
+    // Prepare a page with original JPEG DCT data but a non-jpg name.
+    let (jpeg_dct, decoded) = make_tiny_jpeg();
+    let page = ImagePage {
+        name: "cover.png".to_string(),
+        image: decoded,
+        jpeg_dct: Some(jpeg_dct.clone()),
+    };
+    let doc = Document::new(vec![page]);
+
+    let temp_dir = tempfile::tempdir().expect("create temp dir");
+    let cbz_path = temp_dir.path().join("out.cbz");
+
+    let writer = CbzWriter;
+    writer.write(&doc, &cbz_path).expect("write cbz");
+
+    // Open the CBZ and verify it contains 001.jpg with the exact JPEG data.
+    let f = File::open(&cbz_path).unwrap();
+    let mut zip = zip::ZipArchive::new(f).unwrap();
+
+    // There should be exactly one file named 001.jpg
+    let mut found = false;
+    for i in 0..zip.len() {
+        let mut file = zip.by_index(i).unwrap();
+        let name = file.enclosed_name().unwrap().to_owned();
+        if name.file_name().unwrap() == "001.jpg" {
+            let mut data = Vec::new();
+            file.read_to_end(&mut data).unwrap();
+            assert_eq!(
+                data, jpeg_dct,
+                "writer should preserve original JPEG DCT bytes"
+            );
+            found = true;
+        }
+    }
+    assert!(found, "001.jpg not found in zip");
+}
+
+#[test]
+fn cbz_writer_keeps_jpg_name() {
+    // If the page already has a .jpg name, the writer should keep it.
+    let (jpeg_dct, decoded) = make_tiny_jpeg();
+    let page = ImagePage {
+        name: "page01.jpg".to_string(),
+        image: decoded,
+        jpeg_dct: Some(jpeg_dct),
+    };
+    let doc = Document::new(vec![page]);
+
+    let temp_dir = tempfile::tempdir().expect("create temp dir");
+    let cbz_path = temp_dir.path().join("out.cbz");
+
+    let writer = CbzWriter;
+    writer.write(&doc, &cbz_path).expect("write cbz");
+
+    let f = File::open(&cbz_path).unwrap();
+    let mut zip = zip::ZipArchive::new(f).unwrap();
+
+    let mut names = Vec::new();
+    for i in 0..zip.len() {
+        let file = zip.by_index(i).unwrap();
+        let name = file
+            .enclosed_name()
+            .unwrap()
+            .file_name()
+            .unwrap()
+            .to_owned();
+        names.push(name.to_string_lossy().to_string());
+    }
+    assert_eq!(
+        names,
+        vec!["page01.jpg"],
+        "existing .jpg name should be kept"
+    );
+}
--- a/tests/job_and_format_tests.rs
+++ b/tests/job_and_format_tests.rs
@ -34,7 +34,7 @@ fn job_new_sets_output_extension() {
 #[test]
 fn format_capabilities_consistent() {
    assert!(FormatId::Cbz.can_read());
-    assert!(!FormatId::Cbz.can_write());
+    assert!(FormatId::Cbz.can_write());
    assert!(FormatId::Pdf.can_write());
-    assert!(!FormatId::Pdf.can_read());
+    assert!(FormatId::Pdf.can_read());
 }
--- a/tests/pdf_reader_tests.rs
+++ b/tests/pdf_reader_tests.rs
@ -0,0 +1,51 @@
+use cbz2pdf::formats::pdf::{PdfReader, PdfWriter};
+use cbz2pdf::formats::{FormatReader, FormatWriter};
+use cbz2pdf::model::{Document, ImagePage};
+
+fn make_small_jpeg(w: u32, h: u32, rgb: [u8; 3]) -> (Vec<u8>, image::DynamicImage) {
+    let mut img = image::ImageBuffer::<image::Rgb<u8>, _>::new(w, h);
+    for p in img.pixels_mut() {
+        *p = image::Rgb(rgb);
+    }
+    let dynimg = image::DynamicImage::ImageRgb8(img);
+
+    let mut buf = Vec::new();
+    {
+        let mut cursor = std::io::Cursor::new(&mut buf);
+        let mut enc = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut cursor, 85);
+        let rgb8 = dynimg.to_rgb8();
+        enc.encode(&rgb8, w, h, image::ColorType::Rgb8.into())
+            .unwrap();
+    }
+    (buf, dynimg)
+}
+
+#[test]
+fn pdf_reader_extracts_jpeg_xobject_and_preserves_dct() {
+    // Build a PDF with one JPEG-backed page
+    let (jpeg_dct, dynimg) = make_small_jpeg(3, 2, [10, 20, 30]);
+    let page = ImagePage {
+        name: "p1.jpg".into(),
+        image: dynimg.clone(),
+        jpeg_dct: Some(jpeg_dct.clone()),
+    };
+    let doc = Document::new(vec![page]);
+
+    let temp_dir = tempfile::tempdir().expect("tmpdir");
+    let pdf_path = temp_dir.path().join("in.pdf");
+
+    PdfWriter.write(&doc, &pdf_path).expect("write pdf");
+
+    // Read back with PdfReader
+    let out = PdfReader.read(&pdf_path).expect("read pdf");
+    assert_eq!(out.pages.len(), 1, "should have one page extracted");
+    let p = &out.pages[0];
+    assert_eq!(p.image.width(), dynimg.width());
+    assert_eq!(p.image.height(), dynimg.height());
+    assert!(p.jpeg_dct.is_some(), "should preserve DCT for JPEG images");
+    assert_eq!(
+        p.jpeg_dct.as_ref().unwrap(),
+        &jpeg_dct,
+        "JPEG bytes should match"
+    );
+}
Author	SHA1	Message	Date
Renovate	acb1e37b6f	chore(deps): lock file maintenance All checks were successful Build and test / Clippy (pull_request) Successful in 1m4s Details Build and test / Tests (pull_request) Successful in 1m12s Details Build and test / Build AMD64 (pull_request) Successful in 1m11s Details Checking yaml / Run yamllint (pull_request) Successful in 8s Details Checking Renovate configuration / validate (pull_request) Successful in 1m24s Details Build and test / Generate Documentation (pull_request) Successful in 52s Details	2025-10-13 18:00:22 +11:00
Renovate	4b69124358	fix(deps): update rust crate lopdf to 0.38.0 All checks were successful Build and test / Clippy (pull_request) Successful in 42s Details Checking Renovate configuration / validate (pull_request) Successful in 1m1s Details Checking yaml / Run yamllint (pull_request) Successful in 3s Details Build and test / Tests (pull_request) Successful in 1m4s Details Build and test / Build AMD64 (pull_request) Successful in 1m3s Details Build and test / Generate Documentation (pull_request) Successful in 1m4s Details	2025-10-13 17:30:12 +11:00
Marc Plano-Lesay	b35ccbe271	feat: implement cbz writing and pdf reading All checks were successful Build and test / Clippy (pull_request) Successful in 44s Details Build and test / Tests (pull_request) Successful in 48s Details Checking yaml / Run yamllint (pull_request) Successful in 5s Details Checking Renovate configuration / validate (pull_request) Successful in 1m4s Details Build and test / Build AMD64 (pull_request) Successful in 49s Details Build and test / Generate Documentation (pull_request) Successful in 59s Details	2025-10-13 16:57:14 +11:00