cbz2pdf/src/main.rs
2024-10-15 17:30:02 +11:00

238 lines
6.6 KiB
Rust

use anyhow::Result;
use clap::{Parser, ValueHint};
use dialoguer::theme::ColorfulTheme;
use dialoguer::Confirm;
use image::DynamicImage;
use indicatif::{ProgressBar, ProgressStyle};
use log::*;
use pdf_writer::{Content, Filter, Finish, Name, Pdf, Rect, Ref};
use rayon::prelude::*;
use std::ffi::OsStr;
use std::fs::File;
use std::io::Read;
use std::path::{Path, PathBuf};
use std::time::Duration;
use tabled::builder::Builder;
use tabled::settings::Style;
use walkdir::WalkDir;
use zip::ZipArchive;
#[derive(Parser)]
#[command()]
struct Cli {
#[arg(
short = 'i',
long = "input",
value_hint = ValueHint::FilePath,
help = "Path to CBZ file or directory containing CBZ files"
)]
input_path: String,
#[arg(
short = 'o',
long = "output-directory",
default_value = ".",
value_hint = ValueHint::FilePath,
help = "Output directory for PDF files"
)]
output_dir: String,
#[arg(short = 'p', long, help = "Ask for confirmation before doing anything")]
interactive: bool,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
pretty_env_logger::init();
let cli = Cli::parse();
let input_path = Path::new(&cli.input_path);
let output_dir = Path::new(&cli.output_dir);
let mut jobs = Vec::new();
if input_path.is_file() && input_path.extension() == Some(OsStr::new("cbz")) {
jobs.push(Job::new(input_path.to_path_buf(), output_dir.to_path_buf()));
} else if input_path.is_dir() {
jobs.extend(walk_directory(input_path, output_dir));
} else {
eprintln!(
"Invalid input path. Please provide a CBZ file or a directory containing CBZ files."
);
std::process::exit(1);
}
jobs.sort_by_key(|j| j.cbz_path.clone().into_os_string().into_string());
let proceed = if cli.interactive {
let mut table_builder = Builder::default();
table_builder.push_record(["From", "To"]);
jobs.iter().for_each(|job| {
table_builder.push_record(vec![
job.cbz_path.clone().into_os_string().into_string().unwrap(),
job.pdf_path.clone().into_os_string().into_string().unwrap(),
]);
});
let mut table = table_builder.build();
table.with(Style::rounded());
println!("{}", table);
Confirm::with_theme(&ColorfulTheme::default())
.with_prompt("Convert?")
.interact()?
} else {
true
};
if proceed {
process_jobs(jobs)?;
}
Ok(())
}
fn walk_directory(directory: &Path, output_dir: &Path) -> Vec<Job> {
debug!("Walking {:?}", directory);
let mut jobs = Vec::new();
for entry in WalkDir::new(directory) {
let entry = entry.unwrap();
let path = entry.path();
if path.is_file() && path.extension() == Some(OsStr::new("cbz")) {
jobs.push(Job::new(path.to_path_buf(), output_dir.to_path_buf()));
}
}
jobs
}
struct ImageFile {
pub name: String,
pub data: Vec<u8>,
}
struct DecodedImageFile {
pub name: String,
pub data: Vec<u8>,
pub image: DynamicImage,
}
impl From<&ImageFile> for DecodedImageFile {
fn from(value: &ImageFile) -> Self {
let image = image::load_from_memory(&value.data).unwrap();
Self {
name: value.name.clone(),
data: value.data.clone(),
image,
}
}
}
struct Job {
pub cbz_path: PathBuf,
pub pdf_path: PathBuf,
}
impl Job {
fn new(cbz_path: PathBuf, output_dir: PathBuf) -> Self {
let mut output_path = output_dir.join(cbz_path.file_name().unwrap());
output_path.set_extension("pdf");
Self {
cbz_path,
pdf_path: output_path,
}
}
}
fn convert_cbz(cbz_path: &Path, output_path: &Path) -> Result<()> {
let a4 = Rect::new(0.0, 0.0, 595.0, 842.0);
let mut zip = ZipArchive::new(File::open(cbz_path)?)?;
let mut files = Vec::new();
for i in 0..zip.len() {
let mut file = zip.by_index(i)?;
let mut image_data = Vec::new();
let name = file.enclosed_name().expect("Failed to read file name");
if name.extension() == Some(OsStr::new("jpg")) {
file.read_to_end(&mut image_data)?;
files.push(ImageFile {
name: name
.file_name()
.expect("Failed to read file name")
.to_string_lossy()
.to_string(),
data: image_data,
});
}
}
let mut images = Vec::new();
files
.par_iter()
.map(DecodedImageFile::from)
.collect_into_vec(&mut images);
images.par_sort_by_key(|img| img.name.clone());
let mut pdf = Pdf::new();
let catalog_id = Ref::new(1);
let page_tree_id = Ref::new(2);
pdf.catalog(catalog_id).pages(page_tree_id);
let mut pages = Vec::new();
let image_count = images.len();
for (pos, image) in images.iter().enumerate() {
let page_id = Ref::new(pos as i32 + 10);
let image_id = Ref::new(image_count as i32 + 10 + pos as i32);
let content_id = Ref::new(image_count as i32 * 3 + 10 + pos as i32);
pages.push(page_id);
let mut page = pdf.page(page_id);
let image_name = Name(b"Im1");
page.media_box(a4);
page.parent(page_tree_id);
page.contents(content_id);
page.resources().x_objects().pair(image_name, image_id);
page.finish();
let mut pdf_image = pdf.image_xobject(image_id, &image.data);
pdf_image.filter(Filter::DctDecode);
pdf_image.width(image.image.width() as i32);
pdf_image.height(image.image.height() as i32);
pdf_image.color_space().device_rgb();
pdf_image.bits_per_component(8);
pdf_image.finish();
let mut content = Content::new();
content.save_state();
content.transform([a4.x2, 0.0, 0.0, a4.y2, 0.0, 0.0]);
content.x_object(image_name);
content.restore_state();
pdf.stream(content_id, &content.finish());
}
let page_count = pages.len();
pdf.pages(page_tree_id).kids(pages).count(page_count as i32);
std::fs::write(output_path, pdf.finish())?;
Ok(())
}
fn process_jobs(jobs: Vec<Job>) -> Result<()> {
let pb = ProgressBar::new(jobs.len() as u64);
pb.enable_steady_tick(Duration::from_millis(300));
pb.set_style(ProgressStyle::with_template(
"[{elapsed_precise}] {wide_bar} {pos:>7}/{len:7} {msg}",
)?);
jobs.par_iter().for_each(|entry| {
convert_cbz(&entry.cbz_path, &entry.pdf_path).unwrap();
pb.inc(1);
});
pb.finish();
Ok(())
}