From 4f5049f3ab97d68e82594abe9968de2a6db44b37 Mon Sep 17 00:00:00 2001 From: Danilo Reyes Date: Wed, 17 Sep 2025 22:08:36 -0600 Subject: [PATCH] deleted old faulty rust file --- src/download_rust/Cargo.toml | 31 -- src/download_rust/src/args.rs | 97 ------ src/download_rust/src/config.rs | 44 --- src/download_rust/src/download.rs | 474 ----------------------------- src/download_rust/src/functions.rs | 127 -------- src/download_rust/src/gallery.rs | 166 ---------- src/download_rust/src/main.rs | 33 -- src/download_rust/src/user.rs | 246 --------------- 8 files changed, 1218 deletions(-) delete mode 100644 src/download_rust/Cargo.toml delete mode 100644 src/download_rust/src/args.rs delete mode 100644 src/download_rust/src/config.rs delete mode 100644 src/download_rust/src/download.rs delete mode 100644 src/download_rust/src/functions.rs delete mode 100644 src/download_rust/src/gallery.rs delete mode 100644 src/download_rust/src/main.rs delete mode 100644 src/download_rust/src/user.rs diff --git a/src/download_rust/Cargo.toml b/src/download_rust/Cargo.toml deleted file mode 100644 index c6b4f7c..0000000 --- a/src/download_rust/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -##! Cargo.toml -# -# This Cargo manifest defines the Rust version of the jawz download -# manager. It exposes a single binary named `rust_downloader` and -# pulls in a handful of third‐party crates to mirror the features of -# the original Python implementation. The chosen dependencies -# provide command line parsing (clap), configuration loading -# (serde/serde_yaml), regular expressions (regex), home directory -# discovery (dirs), shuffling (rand), logging (log/env_logger) and -# convenient error handling (anyhow). Versions are pegged to -# relatively conservative releases so the project will build cleanly -# against the NixOS 25.05 channel. - -[package] -name = "rust_downloader" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -clap = { version = "4.4", features = ["derive"] } -serde = { version = "1.0", features = ["derive"] } -serde_yaml = "0.9" -regex = "1.10" -dirs = "5.0" -rand = { version = "0.8", features = ["std", "small_rng"] } -log = "0.4" -env_logger = "0.10" -once_cell = "1.17" -anyhow = "1.0" \ No newline at end of file diff --git a/src/download_rust/src/args.rs b/src/download_rust/src/args.rs deleted file mode 100644 index 5b831f6..0000000 --- a/src/download_rust/src/args.rs +++ /dev/null @@ -1,97 +0,0 @@ -//! Command line argument definitions. -//! -//! This module defines the [`Cli`] structure which holds the -//! command line arguments accepted by the application. It uses -//! [`clap`] derive macros to declare flags, options and positional -//! parameters and enforces compile time validation for the -//! enumerated scrapper types. A `--dry-run` flag has been added to -//! allow simulating the run without executing any external -//! commands. - -use clap::{ArgAction, Parser, ValueEnum}; - -/// The supported scraper categories. These values mirror the -/// behaviour of the original Python script and are used to select -/// which downstream logic executes. See `scrapper_manager` in -/// [`crate::download`] for details. -#[derive(ValueEnum, Clone, Debug, PartialEq, Eq)] -pub enum Scrapper { - /// Download from the user's push list - Push, - /// Download from the user's main list - Main, - /// Download from the user's Instagram list - Instagram, - /// Download from the user's Kemono list - Kemono, - /// Download from the comic list - Comic, - /// Download from the manga list - Manga, - /// Download from the webcomic list - Webcomic, -} - -/// Command line interface for the downloader. Deriving [`Parser`] -/// automatically generates argument parsing and help output. The -/// fields directly correspond to the command line options of the -/// original Python implementation with a few refinements: -/// -/// * `scrapper` is optional and uses the [`Scrapper`] enum for -/// compile time validation. -/// * `user` defaults to "everyone" when omitted. -/// * `input` accepts one or more strings and may be specified -/// multiple times on the command line. -/// * Flags use Rust style booleans rather than inverted names; `no -/// archive` and `no skip` have been inverted into `flag_archive` -/// and `flag_skip` with sensible defaults. -/// * A `--dry-run` flag has been introduced to simulate execution -/// without running external commands. -/// * Instagram `post_type` values default to all supported types. -#[derive(Parser, Debug, Clone)] -#[command(name = "Downloader", about = "Download images, galleries and videos from a wide array of websites.")] -pub struct Cli { - /// Selects the scraper to use. When omitted the program - /// interprets input links via `--input` instead. - #[arg(value_enum, index = 1)] - pub scrapper: Option, - - /// Selects the personal user list to process. Defaults to - /// "everyone" which processes all configured users. - #[arg(short = 'u', long = "user", default_value = "everyone")] - pub user: String, - - /// Downloads the provided links immediately instead of using a - /// preconfigured list. May be specified multiple times. - #[arg(short = 'i', long = "input", num_args = 1.., action = ArgAction::Append)] - pub input: Vec, - - /// Prints a numbered list of links and prompts for a selection. - #[arg(short = 'l', long = "list", action = ArgAction::SetTrue, default_value_t = false)] - pub flag_list: bool, - - /// Enables archiving of downloads to prevent duplicates. Use - /// `--no-archive` to disable. - #[arg(long = "no-archive", action = ArgAction::SetFalse, default_value_t = true)] - pub flag_archive: bool, - - /// Skips already downloaded items when true. Use `--no-skip` - /// to download entire galleries. - #[arg(long = "no-skip", action = ArgAction::SetFalse, default_value_t = true)] - pub flag_skip: bool, - - /// Prints generated commands in addition to executing them. - #[arg(short = 'v', long = "verbose", action = ArgAction::SetTrue, default_value_t = false)] - pub flag_verbose: bool, - - /// Performs a dry run. Commands will be printed but never - /// executed. This flag takes precedence over `--verbose`. - #[arg(long = "dry-run", action = ArgAction::SetTrue, default_value_t = false)] - pub flag_dry_run: bool, - - /// Filters Instagram posts by type. When multiple values are - /// provided they will be joined by commas. The default - /// includes all supported types. - #[arg(short = 't', long = "type-post", num_args = 1.., action = ArgAction::Append, default_values_t = vec![String::from("posts"), String::from("reels"), String::from("stories"), String::from("highlights"), String::from("avatar")])] - pub post_type: Vec, -} \ No newline at end of file diff --git a/src/download_rust/src/config.rs b/src/download_rust/src/config.rs deleted file mode 100644 index b113fba..0000000 --- a/src/download_rust/src/config.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Configuration handling. -//! -//! This module is responsible for loading the YAML configuration -//! expected by the downloader. The configuration is read from -//! `~/.config/jawz/config.yaml` and deserialised into a -//! [`serde_yaml::Value`]. Consumers can then index into the value -//! to pull out fields as needed. A global constant is not used in -//! order to avoid surprises during testing and to make error -//! propagation explicit. - -use anyhow::{anyhow, Context, Result}; -use dirs::home_dir; -use serde_yaml::Value; -use std::fs; -use std::path::PathBuf; - -/// Loads the configuration file from the user's home directory. The -/// expected location is `$HOME/.config/jawz/config.yaml`. If the -/// file cannot be read or parsed a descriptive error is returned. -pub fn load_config_variables() -> Result { - let home = home_dir().ok_or_else(|| anyhow!("Could not determine home directory"))?; - let path: PathBuf = [home.to_str().unwrap_or(""), ".config/jawz/config.yaml"] - .iter() - .collect::(); - let content = fs::read_to_string(&path) - .with_context(|| format!("Failed to read configuration file from {}", path.display()))?; - let cfg: Value = serde_yaml::from_str(&content) - .with_context(|| format!("Failed to parse YAML in {}", path.display()))?; - Ok(cfg) -} - -/// Finds the index of a user by name. Returns `None` if no match -/// exists or if the configuration does not contain a `users` list. -pub fn get_user_index(name: &str, cfg: &Value) -> Option { - cfg.get("users")?.as_sequence()?.iter().enumerate().find_map(|(i, user)| { - let map = user.as_mapping()?; - let n = map.get(&Value::String("name".into()))?.as_str()?; - if n.eq_ignore_ascii_case(name) { - Some(i) - } else { - None - } - }) -} \ No newline at end of file diff --git a/src/download_rust/src/download.rs b/src/download_rust/src/download.rs deleted file mode 100644 index 95a1b23..0000000 --- a/src/download_rust/src/download.rs +++ /dev/null @@ -1,474 +0,0 @@ -//! High level download orchestration. -//! -//! This module coordinates the various helper modules to mirror the -//! behaviour of the original Python downloader. It exposes a -//! `run` function which is called from `main.rs` with the parsed -//! command line arguments and the loaded configuration. Where -//! possible iterators and guard clauses replace explicit loops to -//! improve clarity. - -use crate::args::{Cli, Scrapper}; -use crate::config::{get_user_index, load_config_variables}; -use crate::functions::{append_line, list_lines, parse_link, quote, run}; -use crate::gallery::Gallery; -use crate::user::User; -use anyhow::{anyhow, Context, Result}; -use log::{debug, info}; -use rand::seq::SliceRandom; -use regex::Regex; -use serde_yaml::Value; -use std::fs; -use std::io::{self, Write}; -use std::path::Path; - -/// A simple struct representing a video download. It collects -/// command line arguments required to build a `yt-dlp` or -/// `stream-dl` command. The `dest` and `database` fields should be -/// prequoted. -#[derive(Default, Debug, Clone)] -struct Video { - use_archive: bool, - link: String, - dest: String, - database: String, -} - -/// Constructs the `-o include=...` argument for Instagram links. -/// When the provided link does not contain "instagram" an empty -/// string is returned. When multiple post types are supplied they -/// are joined with commas. -fn parse_instagram(link: &str, cli: &Cli) -> String { - if !link.contains("instagram") { - return String::new(); - } - if cli.post_type.is_empty() { - return String::new(); - } - let joined = if cli.post_type.len() > 1 { - cli.post_type.join(",") - } else { - cli.post_type.first().cloned().unwrap_or_default() - }; - format!(" -o include={}", quote(&joined)) -} - -/// Builds a command string for video downloads. The logic mirrors -/// the original Python `video_command` function. See the source -/// comments for more details. Logging of the command and link is -/// performed at the call site. -fn video_command(video: &Video) -> String { - let rgx_yt = Regex::new(r"https://(?:www\.)?youtube|https://youtu.be").expect("invalid regex"); - let rgx_music = Regex::new(r"https://music\.youtube.*").expect("invalid regex"); - // Handle special case for chaturbate: use stream-dl on the last - // path component only. - if video.link.contains("chaturbate") { - let slug = video - .link - .trim_end_matches('/') - .rsplit('/') - .next() - .unwrap_or(""); - return format!("stream-dl {}", slug); - } - let mut command = String::from("yt-dlp"); - if rgx_yt.is_match(&video.link) { - command.push_str(" --embed-subs --embed-thumbnail"); - command.push_str(" --embed-metadata --embed-chapters"); - command.push_str(&format!(" -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s")))); - } else if rgx_music.is_match(&video.link) { - if video.use_archive { - command.push_str(&format!(" --download-archive {}", video.database)); - } - command.push_str(" --no-playlist --newline -x"); - command.push_str(" --audio-format best --add-metadata --audio-quality 0 -o"); - command.push_str(&format!(" {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s")))); - } else { - command.push_str(&format!(" -f mp4 -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s")))); - } - format!("{} {}", command, quote(&video.link)) -} - -/// Processes a gallery list (main, instagram or kemono) for a single -/// user. Builds and executes the appropriate `gallery-dl` command. -fn parse_gallery(list_name: &str, user: &User, cli: &Cli, cfg: &Value) -> Result<()> { - let mut gallery = Gallery::default(); - gallery.archive = cli.flag_archive; - // If skip is disabled (`flag_skip` false) then we enable skip - // through an option on gallery-dl. Otherwise we leave it empty. - gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() }; - gallery.dest = Some("download".to_string()); - gallery.list = Some(list_name.to_string()); - gallery.opt_args = parse_instagram(list_name, cli); - gallery.generate_command(Some(user), cfg, false)?; - gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?; - Ok(()) -} - -/// Downloads manga or comics based on the provided category. The -/// `skip_arg` string contains the chapter range options and is -/// assembled by the caller. Only lines matching the category are -/// downloaded. -fn comic_manager(skip_arg: &str, category: &Scrapper, cfg: &Value, cli: &Cli) -> Result<()> { - let comic = cfg - .get("comic") - .ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?; - let list_path = comic - .get("comic-list") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?; - let content = fs::read_to_string(list_path) - .with_context(|| format!("Failed to read comic list from {}", list_path))?; - let pattern = match category { - Scrapper::Manga => "manga|webtoon", - Scrapper::Comic => "readcomiconline", - _ => return Err(anyhow!("Invalid category for comic manager")), - }; - let re_cat = Regex::new(pattern).expect("Failed to compile comic regex"); - content - .lines() - .filter(|line| re_cat.is_match(line)) - .map(str::to_string) - .try_for_each(|link| { - let mut gallery = Gallery::default(); - gallery.archive = cli.flag_archive; - gallery.skip_arg = skip_arg.to_string(); - gallery.link = Some(link.trim().to_string()); - // Generate a comic command; pass `None` for user since - // comic downloads resolve their own directories - gallery.generate_command(None, cfg, true)?; - gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?; - // Save the comic link to the master list - save_comic(&link, cfg)?; - Ok::<(), anyhow::Error>(()) - })?; - Ok(()) -} - -/// Prints a numbered list of webcomics to stdout and returns the -/// selected index. The user is prompted via stdin. Errors during -/// parsing or invalid selections are propagated. -fn print_webcomics(webcomics: &Value) -> Result { - let list = webcomics - .get("webcomics") - .and_then(|v| v.as_sequence()) - .ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?; - for (index, entry) in list.iter().enumerate() { - let name = entry - .get("name") - .and_then(|v| v.as_str()) - .unwrap_or(""); - println!("{}", list_lines(index, name)); - } - print!("Select a webcomic: "); - io::stdout().flush()?; - let mut input = String::new(); - io::stdin().read_line(&mut input)?; - let choice: usize = input.trim().parse()?; - if choice >= list.len() { - return Err(anyhow!("Invalid selection {}", choice)); - } - Ok(choice) -} - -/// Handles the webcomic download flow. The configuration file -/// referenced by `comic.webcomic-list` is parsed and the user is -/// prompted to choose which webcomic to download. A `webcomix` -/// command is then assembled and executed. -fn webcomic_manager(cfg: &Value, cli: &Cli) -> Result<()> { - let comic = cfg - .get("comic") - .ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?; - let list_path = comic - .get("webcomic-list") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'webcomic-list' in comic configuration"))?; - let webcomics_yaml: Value = serde_yaml::from_str(&fs::read_to_string(list_path)?) - .with_context(|| format!("Failed to parse webcomic list at {}", list_path))?; - let idx = print_webcomics(&webcomics_yaml)?; - let list = webcomics_yaml - .get("webcomics") - .and_then(|v| v.as_sequence()) - .ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?; - let entry = list.get(idx).ok_or_else(|| anyhow!("Invalid webcomic index"))?; - let rating = entry - .get("type") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'type' in webcomic entry"))?; - let global = webcomics_yaml - .get("global") - .and_then(|v| v.as_mapping()) - .ok_or_else(|| anyhow!("Webcomic list missing 'global' section"))?; - let dest_key = format!("{}_directory", rating); - let dest = global - .get(&Value::String(dest_key.clone())) - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing '{}' in webcomic global", dest_key))?; - let name = entry - .get("name") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'name' in webcomic entry"))?; - let link = entry - .get("url") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'url' in webcomic entry"))?; - let nxt_code = entry - .get("next_code") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'next_code' in webcomic entry"))?; - let img_code = entry - .get("image_code") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'image_code' in webcomic entry"))?; - info!("The webcomic is {}", dest); - let mut command = format!("cd {} && webcomix custom {}", quote(dest), quote(name)); - command.push_str(" --start-url "); - command.push_str("e(link)); - command.push_str(&format!(" --next-page-xpath={}", quote(nxt_code))); - command.push_str(&format!(" --image-xpath={}", quote(img_code))); - command.push_str(" -y --cbz"); - run(&command, cli.flag_dry_run, cli.flag_verbose) -} - -/// Appends a comic or manga link to the global comic list if it is -/// not already present. Links are normalised via `parse_link` to -/// avoid duplicates. Logs a message when skipping duplicates. -fn save_comic(link: &str, cfg: &Value) -> Result<()> { - let comic = cfg - .get("comic") - .ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?; - let list_path = comic - .get("comic-list") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?; - let content = fs::read_to_string(list_path).unwrap_or_default().to_lowercase(); - let fixed = parse_link(link).to_lowercase(); - if content.contains(&fixed) { - info!("Graphic novel repeated, not saving"); - return Ok(()); - } - info!("New graphic novel, saving"); - append_line(Path::new(list_path), link) -} - -/// Handles the push list for a user. Links are classified into -/// gallery, comic, video or other categories using regular -/// expressions. Each category is processed appropriately. After -/// processing the push list is truncated. Logging mirrors the -/// original script. -fn push_manager(user: &User, cli: &Cli, cfg: &Value) -> Result<()> { - // Regular expressions used to classify links. These patterns - // mirror the ones in the original Python implementation. - let rgx_gallery = Regex::new( - r"(?x) - (x\.com/\w+((?=.*media)|(?!.*status))) - |(men\.wikifeet) - |(furaffinity\.net/user/) - |((deviantart\.com/\w+(?!.*/art/))) - |(furaffinity\.net/gallery/) - |(furaffinity\.net/scraps/) - |(furaffinity\.net/favorites/) - |(instagram.com(?!/p/)/\w+) - |(e621\.net((?=/post/)|(?!/posts/))) - |(flickr\.com/photos/\w+/(?!\d+)) - |(tumblr\.com(?!/post/)) - |(kemono\.party/(fanbox|gumroad|patreon)(?!/user/\d+/post)) - |(blogspot\.com(?!/)) - |(rule34\.paheal\.net/post/(?!view)) - |(rule34\.xxx/index\.php\?page=post&s=(?!view)) - |(pixiv\.net/(en/)?((?=users)|(?!artwork))) - |(fanbox\.cc/@\w+(?!.*posts/\d+)) - |(reddit\.com/(user|u)) - |(baraag\.net/((@\w+)|(?!/\d+))) - |(pinterest\.com/(?!pin/\d+)) - |(redgifs\.com/(users|u|(?!watch))) - |(bsky\.app/profile/(?!.*?/post/)) - ", - ) - .expect("Failed to compile gallery regex"); - let rgx_video = Regex::new(r"youtu\.be|youtube|pornhub|xtube|xvideos|chaturbate").expect("Failed to compile video regex"); - let rgx_comic = Regex::new(r"readcomiconline|mangahere|mangadex|webtoons|manganato").expect("Failed to compile comic regex"); - - // Read the push list into memory - let push_path = user - .lists - .get("push") - .ok_or_else(|| anyhow!("Push list missing for user {}", user.name))?; - let lines = fs::read_to_string(push_path).unwrap_or_default(); - // Temporary storage for categories - let mut links_galleries: Vec = Vec::new(); - let mut links_videos: Vec = Vec::new(); - let mut links_comics: Vec = Vec::new(); - let mut links_other: Vec = Vec::new(); - // Classify each link exactly once - for line in lines.lines().map(str::trim).filter(|l| !l.is_empty()) { - if rgx_gallery.is_match(line) { - links_galleries.push(line.to_string()); - } else if rgx_video.is_match(line) { - links_videos.push(line.to_string()); - } else if rgx_comic.is_match(line) { - links_comics.push(line.to_string()); - } else { - links_other.push(line.to_string()); - } - } - // Process gallery links - for link in &links_galleries { - let mut gallery = Gallery::default(); - gallery.archive = cli.flag_archive; - gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() }; - gallery.link = Some(parse_link(link)); - gallery.dest = Some("download".to_string()); - gallery.opt_args = parse_instagram(link, cli); - gallery.generate_command(Some(user), cfg, false)?; - gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?; - // Save link into master list to prevent duplicates - user.save_link(link)?; - } - // Process comic links - for link in &links_comics { - let skip_arg = if !cli.flag_skip { - "".to_string() - } else if link.contains("readcomiconline") { - " --chapter-range 1".to_string() - } else { - " --chapter-range 1-5".to_string() - }; - let mut gallery = Gallery::default(); - gallery.archive = cli.flag_archive; - gallery.skip_arg = skip_arg; - gallery.link = Some(link.to_string()); - gallery.generate_command(None, cfg, true)?; - gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?; - save_comic(link, cfg)?; - } - // Process video links - for link in &links_videos { - let mut video = Video::default(); - video.use_archive = cli.flag_archive; - video.link = link.to_string(); - // Use the media directory for the user - if let Some(media_dir) = user.directories.get("media") { - video.dest = media_dir.to_string_lossy().to_string(); - } else { - video.dest = String::new(); - } - video.database = quote( - user - .dbs - .get("media") - .map(|p| p.to_string_lossy()) - .unwrap_or_default() - .as_ref(), - ); - let cmd = video_command(&video); - info!("{} {}", cmd, link); - run(&cmd, cli.flag_dry_run, cli.flag_verbose)?; - } - // Process other links - for link in &links_other { - info!("Other type of download {}", link); - let mut gallery = Gallery::default(); - gallery.archive = false; - gallery.skip_arg = " -o directory='[]'".to_string(); - gallery.link = Some(link.to_string()); - gallery.dest = Some("push".to_string()); - gallery.generate_command(Some(user), cfg, false)?; - gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?; - } - // Truncate the push list - fs::write(push_path, "")?; - Ok(()) -} - -/// Manages the selected scraper for a single user. Delegates to -/// specialised functions based on the scraper type. The user's -/// `list_manager` is invoked up front to prepare per‑site lists. -fn scrapper_manager(user: &User, scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> { - user.list_manager()?; - match scrapper { - Scrapper::Main => parse_gallery("main", user, cli, cfg), - Scrapper::Instagram => parse_gallery("instagram", user, cli, cfg), - Scrapper::Kemono => parse_gallery("kemono", user, cli, cfg), - Scrapper::Push => push_manager(user, cli, cfg), - Scrapper::Comic => { - let skip_arg = if cli.flag_skip { - " --chapter-range 1" - } else { - "" - }; - comic_manager(skip_arg, scrapper, cfg, cli) - } - Scrapper::Manga => { - let skip_arg = if cli.flag_skip { - " --chapter-range 1-5" - } else { - "" - }; - comic_manager(skip_arg, scrapper, cfg, cli) - } - Scrapper::Webcomic => webcomic_manager(cfg, cli), - } -} - -/// Invokes the selected scraper for every configured user. Only -/// scrapers which operate on per‑user lists (main, instagram, -/// kemono and push) are executed; others are skipped. -fn scrap_everyone(scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> { - let users = cfg - .get("users") - .and_then(|v| v.as_sequence()) - .ok_or_else(|| anyhow!("No users configured"))?; - for user_entry in users.iter() { - let name = user_entry - .get("name") - .and_then(|v| v.as_str()) - .unwrap_or(""); - let idx = get_user_index(name, cfg) - .ok_or_else(|| anyhow!("User '{}' missing from configuration", name))?; - let user = User::new(idx, cfg)?; - info!("Scrapping {:?} for {}", scrapper, name); - scrapper_manager(&user, scrapper, cli, cfg)?; - } - Ok(()) -} - -/// Entry point for the download module. Decides how to dispatch -/// based on the presence or absence of a scrapper argument and -/// whether direct input links were provided. This function is -/// designed to be called from `main`. -pub fn run(cli: Cli, cfg: Value) -> Result<()> { - if let Some(scrapper) = &cli.scrapper { - let is_shared = matches!(scrapper, Scrapper::Push | Scrapper::Main | Scrapper::Instagram | Scrapper::Kemono); - if cli.user.eq_ignore_ascii_case("everyone") && is_shared { - return scrap_everyone(scrapper, &cli, &cfg); - } - // Otherwise operate on a single user - let user_name = &cli.user; - let idx = get_user_index(user_name, &cfg) - .ok_or_else(|| anyhow!("Unknown user '{}'", user_name))?; - let user = User::new(idx, &cfg)?; - return scrapper_manager(&user, scrapper, &cli, &cfg); - } - // No scrapper provided, process input links if present - if !cli.input.is_empty() { - // Determine which user should handle the push list. When - // called as an admin (`everyone` or `jawz`) we use the - // configuration for the user named "jawz". Otherwise we - // operate on the specified user. - let target = if cli.user.eq_ignore_ascii_case("everyone") || cli.user.eq_ignore_ascii_case("jawz") { - "jawz" - } else { - cli.user.as_str() - }; - let idx = get_user_index(target, &cfg) - .ok_or_else(|| anyhow!("Unknown user '{}'", target))?; - let user = User::new(idx, &cfg)?; - // Append each provided link to the user's push list - for link in cli.input.iter() { - user.append_list("push", &parse_link(link))?; - } - // Process the push list immediately - return push_manager(&user, &cli, &cfg); - } - Err(anyhow!("No scrapper selected and no input links provided")) -} \ No newline at end of file diff --git a/src/download_rust/src/functions.rs b/src/download_rust/src/functions.rs deleted file mode 100644 index ed1b6ec..0000000 --- a/src/download_rust/src/functions.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! Miscellaneous helper functions. -//! -//! This module contains a variety of small helpers used throughout -//! the downloader. Where appropriate iterators and guard clauses are -//! employed to keep the code concise and expressive. Error -//! conditions are reported via [`anyhow::Error`]. - -use anyhow::{anyhow, Result}; -use log::{debug, info}; -use regex::Regex; -use std::fs::{self, File}; -use std::io::{self, BufRead, Write}; -use std::path::{Path, PathBuf}; -use std::process::Command; - -/// Ensures that a Twitter/X link ends in `/media` if it does not -/// already. The check is simple: if the string ends with -/// `"/media"` then the original string is returned, otherwise -/// `"/media"` is appended. -pub fn validate_x_link(line: &str) -> String { - if line.trim_end().ends_with("/media") { - line.to_string() - } else { - format!("{}/media", line.trim_end_matches('/')) - } -} - -/// Normalises certain links. At present this function only ensures -/// that X/Twitter links end with `/media`. If the pattern does not -/// match the link is returned unchanged. -pub fn parse_link(link: &str) -> String { - let re = Regex::new(r"(?x) - (?:x\.com/\w+/?(?!.*status)) - ") - .expect("Failed to compile regex"); - if re.is_match(link) { - let fixed = validate_x_link(link); - debug!("Processed link {}", fixed); - fixed - } else { - debug!("No modifications needed for the link {}", link); - link.to_string() - } -} - -/// Surrounds a string with double quotes. This mirrors the Python -/// `quote` helper and is useful when constructing shell commands. -pub fn quote(s: &str) -> String { - format!("\"{}\"", s) -} - -/// Recursively deletes all files and directories inside `directory` and -/// finally removes the directory itself. Missing directories are -/// ignored. Any failure during deletion results in an error. -pub fn clean_cache(directory: &Path) -> Result<()> { - if !directory.is_dir() { - return Ok(()); - } - for entry in fs::read_dir(directory)? { - let entry = entry?; - let path = entry.path(); - if path.is_file() { - fs::remove_file(&path)?; - } else if path.is_dir() { - fs::remove_dir_all(&path)?; - } - } - fs::remove_dir(directory)?; - Ok(()) -} - -/// Runs a shell command. When `dry_run` is true the command is -/// printed and execution is skipped. When `verbose` is true the -/// command is printed prior to execution. The command is executed -/// via the system shell so that complex pipelines are permitted. -pub fn run(command: &str, dry_run: bool, verbose: bool) -> Result<()> { - if dry_run { - println!("{}", command); - return Ok(()); - } - if verbose { - println!("{}", command); - } - // Execute through the system shell. Use `sh -c` so that the - // command string is interpreted as a complete shell command. - let status = Command::new("sh").arg("-c").arg(command).status()?; - if !status.success() { - return Err(anyhow!("Command failed with status {}: {}", status, command)); - } - Ok(()) -} - -/// Formats a numbered list entry. Useful when printing selections to -/// the user. -pub fn list_lines(index: usize, line: &str) -> String { - format!("{}) {}", index, line) -} - -/// Reads all non-empty lines from a file, trimming trailing -/// whitespace. Returns an iterator over the lines. When the file -/// does not exist an empty vector is returned. Errors during file -/// access are propagated. -pub fn read_lines(file: &Path) -> Result> { - if !file.is_file() { - return Ok(vec![]); - } - let file = File::open(file)?; - let buf = io::BufReader::new(file); - let lines: Vec = buf - .lines() - .filter_map(|l| l.ok()) - .map(|l| l.trim_end().to_string()) - .filter(|l| !l.is_empty()) - .collect(); - Ok(lines) -} - -/// Writes a string to a file, creating the file if necessary and -/// appending a newline. Errors are propagated. -pub fn append_line(file: &Path, line: &str) -> Result<()> { - let mut f = fs::OpenOptions::new() - .create(true) - .append(true) - .open(file)?; - writeln!(f, "{}", line)?; - Ok(()) -} \ No newline at end of file diff --git a/src/download_rust/src/gallery.rs b/src/download_rust/src/gallery.rs deleted file mode 100644 index 5685421..0000000 --- a/src/download_rust/src/gallery.rs +++ /dev/null @@ -1,166 +0,0 @@ -//! Gallery command generator. -//! -//! The [`Gallery`] struct encapsulates the state required to build a -//! `gallery-dl` command. It exposes a method to generate the -//! command string based on user configuration and whether the -//! download is for a comic. Guard clauses are used extensively to -//! keep the logic easy to follow. - -use crate::config::load_config_variables; -use crate::functions::quote; -use crate::user::User; -use anyhow::{anyhow, Context, Result}; -use serde_yaml::Value; - -/// Represents a gallery download request. Fields are mutable so -/// callers can configure the desired behaviour before generating the -/// command string. -#[derive(Default, Debug, Clone)] -pub struct Gallery { - /// Whether to append a download archive to prevent duplicates - pub archive: bool, - /// Optional skip argument string. A non‑empty string starting - /// with a space will be appended verbatim to the command. - pub skip_arg: String, - /// The direct link to download. Mutually exclusive with `list`. - pub link: Option, - /// The name of the list to process. Mutually exclusive with - /// `link`. - pub list: Option, - /// The destination folder key (resolved via the user or comic - /// configuration). Ignored when `is_comic` is true and - /// `dest` is empty. - pub dest: Option, - /// Additional options passed verbatim to `gallery-dl` (e.g. - /// Instagram filters). - pub opt_args: String, - /// The generated command string. This field is populated by - /// `generate_command` and consumed by `run_command`. - pub command: String, -} - -impl Gallery { - /// Builds a gallery-dl command based on the current fields. When - /// `is_comic` is true the destination and archive database are - /// read from the `comic` section of the configuration and the - /// provided `user` is ignored. Otherwise the user is used to - /// determine where to download and which archive to use. The - /// generated command is stored in `self.command`. - pub fn generate_command( - &mut self, - user: Option<&User>, - cfg: &Value, - is_comic: bool, - ) -> Result<()> { - // Determine directory, database and queue based on context - let (directory, database, queue): (String, String, String) = if is_comic { - let comic = cfg - .get("comic") - .ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?; - let download_dir = comic - .get("download-dir") - .and_then(|v| v.as_str()) - .unwrap_or_default(); - let database = comic - .get("database") - .and_then(|v| v.as_str()) - .unwrap_or_default(); - // When a list name is provided for comics look up - // `${list}-list` in the comic config - let q = if let Some(list_name) = self.list.as_ref() { - let key = format!("{}-list", list_name); - if let Some(v) = comic.get(&Value::String(key.clone())) { - if let Some(s) = v.as_str() { - quote(s) - } else { - String::new() - } - } else { - String::new() - } - } else { - String::new() - }; - ( - quote(download_dir), - quote(database), - q, - ) - } else { - // Non‑comic downloads must have an associated user - let user = user.ok_or_else(|| anyhow!("User is required for non comic downloads"))?; - // Destination directory falls back to an empty string - let dest_key = self.dest.as_ref().unwrap_or(&String::new()); - let dir_path = user - .directories - .get(dest_key) - .ok_or_else(|| anyhow!("Unknown destination '{}' for user {}", dest_key, user.name))?; - let db_path = user - .dbs - .get("gallery") - .ok_or_else(|| anyhow!("Missing gallery database for user {}", user.name))?; - let q = if let Some(list_name) = self.list.as_ref() { - if let Some(p) = user.lists.get(list_name) { - quote(p.to_string_lossy().as_ref()) - } else { - String::new() - } - } else { - String::new() - }; - ( - quote(dir_path.to_string_lossy().as_ref()), - quote(db_path.to_string_lossy().as_ref()), - q, - ) - }; - - // Determine the sleep interval. When a user is provided use - // their configured value. Otherwise fall back to the first - // user's value from the configuration or zero when missing. - let user_sleep: u64 = if let Some(u) = user { - u.sleep - } else { - cfg.get("users") - .and_then(|u| u.as_sequence()) - .and_then(|seq| seq.get(0)) - .and_then(|v| v.get("sleep")) - .and_then(|v| v.as_i64()) - .unwrap_or(0) as u64 - }; - let mut cmd = format!("gallery-dl --sleep {}", user_sleep); - if !self.skip_arg.is_empty() { - cmd.push_str(&self.skip_arg); - } - if is_comic || self.dest.is_some() { - cmd.push_str(&format!(" --dest {}", directory)); - } - if self.archive { - cmd.push_str(&format!(" --download-archive {}", database)); - } - if !self.opt_args.is_empty() { - cmd.push_str(&self.opt_args); - } - // Append either a direct link or an input file list - match (&self.link, &self.list) { - (Some(link), None) if !link.is_empty() => { - cmd.push(' '); - cmd.push_str("e(link)); - } - (None, Some(_)) if !queue.is_empty() => { - cmd.push_str(&format!(" -i {}", queue)); - } - _ => {} - } - self.command = cmd; - Ok(()) - } - - /// Runs the previously generated command. If `dry_run` is true - /// the command is printed and not executed. When `verbose` is - /// true the command is printed prior to execution. The - /// underlying execution is delegated to [`crate::functions::run`]. - pub fn run_command(&self, dry_run: bool, verbose: bool) -> Result<()> { - crate::functions::run(&self.command, dry_run, verbose) - } -} \ No newline at end of file diff --git a/src/download_rust/src/main.rs b/src/download_rust/src/main.rs deleted file mode 100644 index 2cb051a..0000000 --- a/src/download_rust/src/main.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! Program entry point. -//! -//! This module wires together argument parsing, configuration -//! loading, logging initialisation and the high level download -//! orchestration. Errors are propagated via [`anyhow::Result`] and -//! reported to stderr. - -mod args; -mod config; -mod download; -mod functions; -mod gallery; -mod user; - -use anyhow::Result; -use args::Cli; -use clap::Parser; -use env_logger; - -fn main() -> Result<()> { - // Initialise logging. The logger reads the `RUST_LOG` - // environment variable; if not set the default level is "info". - env_logger::init(); - // Parse command line arguments - let cli = Cli::parse(); - // Load configuration from disk - let cfg = config::load_config_variables()?; - // Dispatch to the download logic - if let Err(err) = download::run(cli, cfg) { - eprintln!("error: {}", err); - } - Ok(()) -} \ No newline at end of file diff --git a/src/download_rust/src/user.rs b/src/download_rust/src/user.rs deleted file mode 100644 index 4643314..0000000 --- a/src/download_rust/src/user.rs +++ /dev/null @@ -1,246 +0,0 @@ -//! User management. -//! -//! The `User` struct encapsulates per‑user configuration and -//! filesystem state. It derives its settings from the YAML -//! configuration and provides methods for managing lists, caching -//! directories and avoiding duplicate downloads. Iterators and guard -//! clauses are used throughout to make intent clear. - -use crate::config::load_config_variables; -use crate::functions::{append_line, clean_cache, parse_link, read_lines, validate_x_link}; -use anyhow::{anyhow, Context, Result}; -use log::error; -use rand::seq::SliceRandom; -use regex::Regex; -use serde_yaml::{Mapping, Value}; -use std::collections::HashMap; -use std::fs::{self, File}; -use std::path::{Path, PathBuf}; - -/// Represents a user and all of the paths and lists associated with -/// that user. The `User` is constructed from the global -/// configuration and an index selecting one of the `users` entries. -pub struct User { - /// The merged configuration for this user. User specific keys - /// override global settings. - pub config: Mapping, - /// The human readable name of the user. - pub name: String, - /// Number of seconds to sleep between operations. - pub sleep: u64, - /// Directories keyed by their logical purpose (e.g. "cache", - /// "lists", "downloads", "media", etc.). - pub directories: HashMap, - /// Paths to the databases used for archiving downloads. - pub dbs: HashMap, - /// Paths to various list files. See `list_manager` for details. - pub lists: HashMap, -} - -impl User { - /// Constructs a new user from the given index into the - /// configuration. Returns an error if the configuration is - /// malformed. - pub fn new(index: usize, cfg: &Value) -> Result { - let users = cfg - .get("users") - .and_then(|u| u.as_sequence()) - .ok_or_else(|| anyhow!("Configuration is missing a 'users' array"))?; - let user_cfg = users - .get(index) - .and_then(|v| v.as_mapping()) - .ok_or_else(|| anyhow!("Invalid user index {}", index))? - .clone(); - let global_cfg = cfg - .get("global") - .and_then(|v| v.as_mapping()) - .ok_or_else(|| anyhow!("Configuration is missing a 'global' map"))? - .clone(); - - // Merge global into user specific settings. User values take - // precedence. We perform a simple extend on a mutable copy. - let mut merged: Mapping = global_cfg.clone(); - for (k, v) in user_cfg.iter() { - merged.insert(k.clone(), v.clone()); - } - - let name = merged - .get(&Value::String("name".into())) - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("User configuration missing 'name'"))? - .to_string(); - let sleep = merged - .get(&Value::String("sleep".into())) - .and_then(|v| v.as_i64()) - .unwrap_or(0) as u64; - - // Build directory map from keys ending with '-dir'. Remove - // the suffix when storing the key. Convert each value into - // an absolute PathBuf and append the user name for cache and - // lists directories to mirror the Python behaviour. - let mut directories: HashMap = merged - .iter() - .filter_map(|(k, v)| { - let key = k.as_str()?; - if key.ends_with("-dir") { - let dir_name = key.trim_end_matches("-dir"); - let path_str = v.as_str()?; - Some((dir_name.to_string(), PathBuf::from(path_str))) - } else { - None - } - }) - .collect(); - - // Append user name to cache and lists directories - if let Some(cache) = directories.get_mut("cache") { - *cache = cache.join(&name); - } - if let Some(lists_dir) = directories.get_mut("lists") { - *lists_dir = lists_dir.join(&name); - } - - // Derive database file locations - let mut dbs = HashMap::new(); - if let Some(db_dir) = directories.get("databases") { - dbs.insert( - "gallery".to_string(), - db_dir.join(format!("{}.sqlite3", name)), - ); - dbs.insert( - "media".to_string(), - db_dir.join(format!("{}_ytdl.txt", name)), - ); - } - - // Derive list file locations - let mut lists = HashMap::new(); - if let Some(lists_dir) = directories.get("lists") { - lists.insert("master".to_string(), lists_dir.join("watch.txt")); - lists.insert("push".to_string(), lists_dir.join("instant.txt")); - } - if let Some(cache_dir) = directories.get("cache") { - lists.insert("instagram".to_string(), cache_dir.join("instagram.txt")); - lists.insert("kemono".to_string(), cache_dir.join("kemono.txt")); - lists.insert("main".to_string(), cache_dir.join("main.txt")); - } - - Ok(Self { - config: merged, - name, - sleep, - directories, - dbs, - lists, - }) - } - - /// Creates the necessary directory structure for this user. Any - /// pre‑existing cache directory is cleared. Missing list and - /// database files are touched into existence. Errors during - /// directory manipulation are propagated. - pub fn create_directories(&self) -> Result<()> { - // Clear the cache directory if it exists - if let Some(cache_dir) = self.directories.get("cache") { - let _ = clean_cache(cache_dir); - // Recreate cache directory after cleaning - fs::create_dir_all(cache_dir)?; - } - - // Create all directories - for dir in self.directories.values() { - fs::create_dir_all(dir)?; - } - - // Ensure list directory exists; complain otherwise - if let Some(lists_dir) = self.directories.get("lists") { - if !lists_dir.is_dir() { - error!("Lists directory for user {} doesn't exist", self.name); - } - } - - // Touch database files if missing - for db_path in self.dbs.values() { - if !db_path.is_file() { - File::create(db_path)?; - } - } - - // Touch master and push lists - for key in [&"master", &"push"] { - if let Some(path) = self.lists.get(*key) { - if !path.is_file() { - File::create(path)?; - } - } - } - Ok(()) - } - - /// Appends a line to the specified list. The list name must be - /// one of the keys in the `lists` map. A newline is appended - /// automatically. - pub fn append_list(&self, name: &str, line: &str) -> Result<()> { - let path = self - .lists - .get(name) - .ok_or_else(|| anyhow!("Unknown list {} for user {}", name, self.name))?; - append_line(path, line) - } - - /// Writes a link into its appropriate cache list based on simple - /// pattern matching. See the original Python implementation for - /// category definitions. This method uses guard clauses to keep - /// the matching logic obvious. - fn append_cache_list(&self, line: &str) -> Result<()> { - let lower = line.to_lowercase(); - if lower.contains('x') { - return self.append_list("main", &validate_x_link(line)); - } - if lower.contains("kemono.party") { - return self.append_list("kemono", line); - } - if lower.contains("instagram") { - return self.append_list("instagram", line); - } - // default case - self.append_list("main", line) - } - - /// Reads the master list, shuffles it and creates per‑site cache - /// lists. Empty or duplicate lines are silently ignored. - pub fn list_manager(&self) -> Result<()> { - self.create_directories()?; - let master_path = self - .lists - .get("master") - .ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?; - let mut master_content = read_lines(master_path)?; - // Shuffle the list to randomise downloads; use a small RNG for - // reproducibility - let mut rng = rand::rngs::SmallRng::from_entropy(); - master_content.shuffle(&mut rng); - for line in master_content.iter().filter(|l| !l.is_empty()) { - self.append_cache_list(line)?; - } - Ok(()) - } - - /// Adds a link to the master list if it is not already present. - /// Normalisation of the link is performed via `parse_link` before - /// the check. Duplicates are logged and ignored. - pub fn save_link(&self, link: &str) -> Result<()> { - let master_path = self - .lists - .get("master") - .ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?; - let contents = fs::read_to_string(master_path).unwrap_or_default().to_lowercase(); - let fixed = parse_link(link); - if contents.contains(&fixed.to_lowercase()) { - info!("Gallery repeated, not saving"); - return Ok(()); - } - info!("New gallery, saving"); - self.append_list("master", &fixed) - } -} \ No newline at end of file