Persist links to a database

This commit is contained in:
Marc Plano-Lesay 2025-04-30 21:45:08 +10:00
parent 17fb0c1856
commit b157985bf3
Signed by: kernald
GPG key ID: 66A41B08CC62A6CF
10 changed files with 451 additions and 44 deletions

266
Cargo.lock generated
View file

@ -147,9 +147,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytemuck"
version = "1.22.0"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540"
checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c"
[[package]]
name = "bytes"
@ -159,9 +159,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "cc"
version = "1.2.19"
version = "1.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362"
checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a"
dependencies = [
"shlex",
]
@ -174,9 +174,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.40"
version = "0.4.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
dependencies = [
"android-tzdata",
"iana-time-zone",
@ -189,9 +189,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.32"
version = "4.5.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83"
checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071"
dependencies = [
"clap_builder",
"clap_derive",
@ -199,9 +199,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.32"
version = "4.5.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8"
checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2"
dependencies = [
"anstream",
"anstyle",
@ -276,6 +276,95 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "darling"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "deranged"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
dependencies = [
"powerfmt",
]
[[package]]
name = "diesel"
version = "2.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff3e1edb1f37b4953dd5176916347289ed43d7119cc2e6c7c3f7849ff44ea506"
dependencies = [
"chrono",
"diesel_derives",
"libsqlite3-sys",
"time",
]
[[package]]
name = "diesel_derives"
version = "2.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68d4216021b3ea446fd2047f5c8f8fe6e98af34508a254a01e4d6bc1e844f84d"
dependencies = [
"diesel_table_macro_syntax",
"dsl_auto_type",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "diesel_migrations"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a73ce704bad4231f001bff3314d91dce4aba0770cee8b233991859abc15c1f6"
dependencies = [
"diesel",
"migrations_internals",
"migrations_macros",
]
[[package]]
name = "diesel_table_macro_syntax"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25"
dependencies = [
"syn",
]
[[package]]
name = "directories"
version = "6.0.0"
@ -308,6 +397,26 @@ dependencies = [
"syn",
]
[[package]]
name = "dsl_auto_type"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139ae9aca7527f85f26dd76483eb38533fd84bd571065da1739656ef71c5ff5b"
dependencies = [
"darling",
"either",
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -359,7 +468,7 @@ dependencies = [
"pear",
"serde",
"serde_json",
"toml 0.8.20",
"toml",
"uncased",
"version_check",
]
@ -715,6 +824,12 @@ dependencies = [
"syn",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "1.0.3"
@ -808,6 +923,16 @@ dependencies = [
"libc",
]
[[package]]
name = "libsqlite3-sys"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "947e6816f7825b2b45027c2c32e7085da9934defa535de4a6a46b10a4d5257fa"
dependencies = [
"pkg-config",
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
@ -843,6 +968,27 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "migrations_internals"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd01039851e82f8799046eabbb354056283fb265c8ec0996af940f4e85a380ff"
dependencies = [
"serde",
"toml",
]
[[package]]
name = "migrations_macros"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb161cc72176cb37aa47f1fc520d3ef02263d67d661f44f05d05a079e1237fd"
dependencies = [
"migrations_internals",
"proc-macro2",
"quote",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -895,6 +1041,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-traits"
version = "0.2.19"
@ -915,9 +1067,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.21.1"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "openssl"
@ -1023,10 +1175,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.94"
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
@ -1066,6 +1224,8 @@ dependencies = [
"chrono",
"clap",
"color-eyre",
"diesel",
"diesel_migrations",
"directories",
"figment",
"figment_file_provider_adapter",
@ -1074,9 +1234,8 @@ dependencies = [
"regex",
"roux",
"serde",
"serde_json",
"tempfile",
"tokio",
"toml 0.5.11",
]
[[package]]
@ -1350,9 +1509,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.100"
version = "2.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
dependencies = [
"proc-macro2",
"quote",
@ -1440,6 +1599,37 @@ dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.3.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
[[package]]
name = "time-macros"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.7.6"
@ -1489,9 +1679,9 @@ dependencies = [
[[package]]
name = "tokio-util"
version = "0.7.14"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034"
checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
dependencies = [
"bytes",
"futures-core",
@ -1502,18 +1692,9 @@ dependencies = [
[[package]]
name = "toml"
version = "0.5.11"
version = "0.8.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
dependencies = [
"serde",
]
[[package]]
name = "toml"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae"
dependencies = [
"serde",
"serde_spanned",
@ -1523,26 +1704,33 @@ dependencies = [
[[package]]
name = "toml_datetime"
version = "0.6.8"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.24"
version = "0.22.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076"
[[package]]
name = "tower-service"
version = "0.3.3"
@ -1972,9 +2160,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.6"
version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63d3fcd9bba44b03821e7d699eeee959f3126dcc4aa8e4ae18ec617c2a5cea10"
checksum = "6cb8234a863ea0e8cd7284fcdd4f145233eb00fee02bbdd9861aec44e6477bc5"
dependencies = [
"memchr",
]

View file

@ -12,8 +12,6 @@ clap = { version = "4.5.32", features = ["derive"] }
roux = "2.2.14"
figment = { version = "0.10", features = ["toml", "json", "env"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.5"
tokio = { version = "1.44.2", features = ["rt", "rt-multi-thread", "macros"] }
regex = "1.10.3"
figment_file_provider_adapter = "0.1.1"
@ -22,3 +20,6 @@ log = "0.4.27"
color-eyre = "0.6.3"
chrono = { version = "0.4", features = ["serde"] }
multimap = "0.10.0"
diesel = { version = "2.2.10", features = ["sqlite", "chrono"] }
diesel_migrations = "2.2.0"
tempfile = "3.19.1"

9
diesel.toml Normal file
View file

@ -0,0 +1,9 @@
# For documentation on how to configure this file,
# see https://diesel.rs/guides/configuring-diesel-cli
[print_schema]
file = "src/schema.rs"
custom_type_derives = ["diesel::query_builder::QueryId", "Clone"]
[migrations_directory]
dir = "./migrations"

View file

@ -32,13 +32,16 @@
name = "reddit-magnet";
buildInputs = with pkgs; [
cargo
cargo-edit
cargo-machete
cargo-release
cargo-sort
diesel-cli
openssl
pkg-config
rustc
rust-toolchain
sqlite
] ++ lib.optionals stdenv.isDarwin [
libiconv
darwin.apple_sdk.frameworks.SystemConfiguration

View file

@ -0,0 +1 @@
DROP TABLE magnets

View file

@ -0,0 +1,9 @@
CREATE TABLE magnets
(
id INTEGER PRIMARY KEY,
title VARCHAR NOT NULL,
submitter VARCHAR NOT NULL,
subreddit VARCHAR NOT NULL,
link VARCHAR NOT NULL,
published_at DATETIME NOT NULL
)

123
src/db.rs Normal file
View file

@ -0,0 +1,123 @@
use crate::models::{Magnet, NewMagnet};
use crate::schema::magnets;
use crate::PostInfo;
use color_eyre::eyre::{eyre, Result, WrapErr};
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness};
use std::fs::create_dir_all;
use std::path::Path;
pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations");
/// Database for storing magnet links and associated information
pub struct Database {
conn: SqliteConnection,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::PostInfo;
use chrono::Utc;
use tempfile::tempdir;
#[test]
fn test_database_initialization() {
let temp_dir = tempdir().unwrap();
let db_path = temp_dir.path().join("test.db");
let db = Database::new(&db_path);
assert!(db.is_ok());
assert!(db_path.exists());
}
#[test]
fn test_store_and_retrieve_magnet_links() {
let temp_dir = tempdir().unwrap();
let db_path = temp_dir.path().join("test.db");
let mut db = Database::new(&db_path).unwrap();
let post_info = PostInfo {
title: "Test Title".to_string(),
submitter: "test_user".to_string(),
subreddit: "test_subreddit".to_string(),
magnet_links: vec![
"magnet:?xt=urn:btih:test1".to_string(),
"magnet:?xt=urn:btih:test2".to_string(),
],
timestamp: Utc::now(),
};
let expected_timestamp = post_info.timestamp.naive_utc();
let result = db.store_magnets(&post_info);
assert!(result.is_ok());
let magnets = db.get_all_magnets().unwrap();
assert_eq!(magnets.len(), 2);
for magnet in magnets {
assert!(
magnet.link == "magnet:?xt=urn:btih:test1"
|| magnet.link == "magnet:?xt=urn:btih:test2"
);
assert_eq!(magnet.title, "Test Title");
assert_eq!(magnet.submitter, "test_user");
assert_eq!(magnet.subreddit, "test_subreddit");
assert_eq!(magnet.published_at, expected_timestamp);
}
}
}
impl Database {
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
let database_url = path
.as_ref()
.to_str()
.ok_or_else(|| eyre!("Database path is not valid UTF-8"))?;
if let Some(parent) = path.as_ref().parent() {
create_dir_all(parent)
.wrap_err_with(|| format!("Failed to create directory: {:?}", parent))?;
}
let mut conn = SqliteConnection::establish(database_url)
.wrap_err("Failed to open database connection")?;
conn.run_pending_migrations(MIGRATIONS)
.expect("Failed to apply database migrations");
Ok(Database { conn })
}
pub fn get_all_magnets(&mut self) -> Result<Vec<Magnet>> {
let results = magnets::table
.select(Magnet::as_select())
.load(&mut self.conn)
.wrap_err("Failed to load magnets from database")?;
Ok(results)
}
pub fn store_magnets(&mut self, post: &PostInfo) -> Result<usize> {
let published_at = post.timestamp.naive_utc();
let links = post
.magnet_links
.iter()
.map(|m| NewMagnet {
title: post.title.as_str(),
submitter: post.submitter.as_str(),
subreddit: post.subreddit.as_str(),
link: m,
published_at: &published_at,
})
.collect::<Vec<NewMagnet>>();
diesel::insert_into(magnets::table)
.values(&links)
.execute(&mut self.conn)
.wrap_err("Failed to save new magnet")
}
}

View file

@ -8,18 +8,23 @@ use figment::{
Figment,
};
use figment_file_provider_adapter::FileAdapter;
use log::debug;
use log::{debug, warn};
use multimap::MultiMap;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs::create_dir_all;
use std::path::{Path, PathBuf};
use crate::db::Database;
use crate::magnet::{extract_magnet_links, Magnet};
use reddit_client::RedditClient;
mod db;
mod magnet;
mod models;
mod reddit_client;
mod schema;
#[derive(Debug, Serialize, Deserialize)]
struct SectionConfig {
@ -36,6 +41,7 @@ struct Config {
#[derive(Debug)]
struct PostInfo {
title: String,
submitter: String,
magnet_links: Vec<Magnet>,
subreddit: String,
timestamp: DateTime<Utc>,
@ -47,6 +53,10 @@ struct Args {
/// Path to the configuration file
#[arg(short, long)]
config: Option<String>,
/// Path to the database file
#[arg(short, long)]
db: Option<String>,
}
/// Filters posts based on a title filter pattern
@ -100,6 +110,23 @@ async fn main() -> Result<()> {
let args = Args::parse();
// Initialize database
let db_path = match args.db {
Some(path) => PathBuf::from(path),
None => ProjectDirs::from("fr", "enoent", "reddit-magnet")
.map(|p| p.data_dir().join("reddit-magnet.db"))
.ok_or_else(|| eyre!("Could not determine data directory"))?,
};
// Create parent directory if it doesn't exist
if let Some(parent) = db_path.parent() {
create_dir_all(parent)
.wrap_err_with(|| format!("Failed to create directory: {:?}", parent))?;
}
let mut db = Database::new(&db_path)
.wrap_err_with(|| format!("Failed to initialize database at {:?}", db_path))?;
let mut conf_extractor = Figment::new();
let config_file_path: Option<PathBuf> = match args.config {
Some(path) => Some(Path::new(&path).to_path_buf()),
@ -166,12 +193,20 @@ async fn main() -> Result<()> {
let magnet_links = extract_magnet_links(body);
if !magnet_links.is_empty() {
filtered_posts.push(PostInfo {
let post_info = PostInfo {
title: title.to_string(),
submitter: username.clone(),
subreddit: subreddit.to_string(),
magnet_links,
timestamp: post.created,
});
};
// Store the post info in the database
if let Err(e) = db.store_magnets(&post_info) {
warn!("Failed to store post info in database: {}", e);
}
filtered_posts.push(post_info);
}
}

26
src/models.rs Normal file
View file

@ -0,0 +1,26 @@
use crate::schema::magnets;
use chrono::NaiveDateTime;
use diesel::prelude::*;
#[derive(Queryable, Selectable)]
#[diesel(table_name = magnets)]
#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
pub struct Magnet {
pub id: Option<i32>,
pub title: String,
pub submitter: String,
pub subreddit: String,
pub link: String,
pub published_at: NaiveDateTime,
}
#[derive(Insertable)]
#[diesel(table_name = magnets)]
#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
pub struct NewMagnet<'a> {
pub title: &'a str,
pub submitter: &'a str,
pub subreddit: &'a str,
pub link: &'a str,
pub published_at: &'a NaiveDateTime,
}

12
src/schema.rs Normal file
View file

@ -0,0 +1,12 @@
// @generated automatically by Diesel CLI.
diesel::table! {
magnets (id) {
id -> Nullable<Integer>,
title -> Text,
submitter -> Text,
subreddit -> Text,
link -> Text,
published_at -> Timestamp,
}
}