deleted old faulty rust file

This commit is contained in:
Danilo Reyes 2025-09-17 22:08:36 -06:00
parent 6deea63497
commit 4f5049f3ab
8 changed files with 0 additions and 1218 deletions

View File

@ -1,31 +0,0 @@
##! Cargo.toml
#
# This Cargo manifest defines the Rust version of the jawz download
# manager. It exposes a single binary named `rust_downloader` and
# pulls in a handful of thirdparty crates to mirror the features of
# the original Python implementation. The chosen dependencies
# provide command line parsing (clap), configuration loading
# (serde/serde_yaml), regular expressions (regex), home directory
# discovery (dirs), shuffling (rand), logging (log/env_logger) and
# convenient error handling (anyhow). Versions are pegged to
# relatively conservative releases so the project will build cleanly
# against the NixOS 25.05 channel.
[package]
name = "rust_downloader"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.4", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
regex = "1.10"
dirs = "5.0"
rand = { version = "0.8", features = ["std", "small_rng"] }
log = "0.4"
env_logger = "0.10"
once_cell = "1.17"
anyhow = "1.0"

View File

@ -1,97 +0,0 @@
//! Command line argument definitions.
//!
//! This module defines the [`Cli`] structure which holds the
//! command line arguments accepted by the application. It uses
//! [`clap`] derive macros to declare flags, options and positional
//! parameters and enforces compile time validation for the
//! enumerated scrapper types. A `--dry-run` flag has been added to
//! allow simulating the run without executing any external
//! commands.
use clap::{ArgAction, Parser, ValueEnum};
/// The supported scraper categories. These values mirror the
/// behaviour of the original Python script and are used to select
/// which downstream logic executes. See `scrapper_manager` in
/// [`crate::download`] for details.
#[derive(ValueEnum, Clone, Debug, PartialEq, Eq)]
pub enum Scrapper {
/// Download from the user's push list
Push,
/// Download from the user's main list
Main,
/// Download from the user's Instagram list
Instagram,
/// Download from the user's Kemono list
Kemono,
/// Download from the comic list
Comic,
/// Download from the manga list
Manga,
/// Download from the webcomic list
Webcomic,
}
/// Command line interface for the downloader. Deriving [`Parser`]
/// automatically generates argument parsing and help output. The
/// fields directly correspond to the command line options of the
/// original Python implementation with a few refinements:
///
/// * `scrapper` is optional and uses the [`Scrapper`] enum for
/// compile time validation.
/// * `user` defaults to "everyone" when omitted.
/// * `input` accepts one or more strings and may be specified
/// multiple times on the command line.
/// * Flags use Rust style booleans rather than inverted names; `no
/// archive` and `no skip` have been inverted into `flag_archive`
/// and `flag_skip` with sensible defaults.
/// * A `--dry-run` flag has been introduced to simulate execution
/// without running external commands.
/// * Instagram `post_type` values default to all supported types.
#[derive(Parser, Debug, Clone)]
#[command(name = "Downloader", about = "Download images, galleries and videos from a wide array of websites.")]
pub struct Cli {
/// Selects the scraper to use. When omitted the program
/// interprets input links via `--input` instead.
#[arg(value_enum, index = 1)]
pub scrapper: Option<Scrapper>,
/// Selects the personal user list to process. Defaults to
/// "everyone" which processes all configured users.
#[arg(short = 'u', long = "user", default_value = "everyone")]
pub user: String,
/// Downloads the provided links immediately instead of using a
/// preconfigured list. May be specified multiple times.
#[arg(short = 'i', long = "input", num_args = 1.., action = ArgAction::Append)]
pub input: Vec<String>,
/// Prints a numbered list of links and prompts for a selection.
#[arg(short = 'l', long = "list", action = ArgAction::SetTrue, default_value_t = false)]
pub flag_list: bool,
/// Enables archiving of downloads to prevent duplicates. Use
/// `--no-archive` to disable.
#[arg(long = "no-archive", action = ArgAction::SetFalse, default_value_t = true)]
pub flag_archive: bool,
/// Skips already downloaded items when true. Use `--no-skip`
/// to download entire galleries.
#[arg(long = "no-skip", action = ArgAction::SetFalse, default_value_t = true)]
pub flag_skip: bool,
/// Prints generated commands in addition to executing them.
#[arg(short = 'v', long = "verbose", action = ArgAction::SetTrue, default_value_t = false)]
pub flag_verbose: bool,
/// Performs a dry run. Commands will be printed but never
/// executed. This flag takes precedence over `--verbose`.
#[arg(long = "dry-run", action = ArgAction::SetTrue, default_value_t = false)]
pub flag_dry_run: bool,
/// Filters Instagram posts by type. When multiple values are
/// provided they will be joined by commas. The default
/// includes all supported types.
#[arg(short = 't', long = "type-post", num_args = 1.., action = ArgAction::Append, default_values_t = vec![String::from("posts"), String::from("reels"), String::from("stories"), String::from("highlights"), String::from("avatar")])]
pub post_type: Vec<String>,
}

View File

@ -1,44 +0,0 @@
//! Configuration handling.
//!
//! This module is responsible for loading the YAML configuration
//! expected by the downloader. The configuration is read from
//! `~/.config/jawz/config.yaml` and deserialised into a
//! [`serde_yaml::Value`]. Consumers can then index into the value
//! to pull out fields as needed. A global constant is not used in
//! order to avoid surprises during testing and to make error
//! propagation explicit.
use anyhow::{anyhow, Context, Result};
use dirs::home_dir;
use serde_yaml::Value;
use std::fs;
use std::path::PathBuf;
/// Loads the configuration file from the user's home directory. The
/// expected location is `$HOME/.config/jawz/config.yaml`. If the
/// file cannot be read or parsed a descriptive error is returned.
pub fn load_config_variables() -> Result<Value> {
let home = home_dir().ok_or_else(|| anyhow!("Could not determine home directory"))?;
let path: PathBuf = [home.to_str().unwrap_or(""), ".config/jawz/config.yaml"]
.iter()
.collect::<PathBuf>();
let content = fs::read_to_string(&path)
.with_context(|| format!("Failed to read configuration file from {}", path.display()))?;
let cfg: Value = serde_yaml::from_str(&content)
.with_context(|| format!("Failed to parse YAML in {}", path.display()))?;
Ok(cfg)
}
/// Finds the index of a user by name. Returns `None` if no match
/// exists or if the configuration does not contain a `users` list.
pub fn get_user_index(name: &str, cfg: &Value) -> Option<usize> {
cfg.get("users")?.as_sequence()?.iter().enumerate().find_map(|(i, user)| {
let map = user.as_mapping()?;
let n = map.get(&Value::String("name".into()))?.as_str()?;
if n.eq_ignore_ascii_case(name) {
Some(i)
} else {
None
}
})
}

View File

@ -1,474 +0,0 @@
//! High level download orchestration.
//!
//! This module coordinates the various helper modules to mirror the
//! behaviour of the original Python downloader. It exposes a
//! `run` function which is called from `main.rs` with the parsed
//! command line arguments and the loaded configuration. Where
//! possible iterators and guard clauses replace explicit loops to
//! improve clarity.
use crate::args::{Cli, Scrapper};
use crate::config::{get_user_index, load_config_variables};
use crate::functions::{append_line, list_lines, parse_link, quote, run};
use crate::gallery::Gallery;
use crate::user::User;
use anyhow::{anyhow, Context, Result};
use log::{debug, info};
use rand::seq::SliceRandom;
use regex::Regex;
use serde_yaml::Value;
use std::fs;
use std::io::{self, Write};
use std::path::Path;
/// A simple struct representing a video download. It collects
/// command line arguments required to build a `yt-dlp` or
/// `stream-dl` command. The `dest` and `database` fields should be
/// prequoted.
#[derive(Default, Debug, Clone)]
struct Video {
use_archive: bool,
link: String,
dest: String,
database: String,
}
/// Constructs the `-o include=...` argument for Instagram links.
/// When the provided link does not contain "instagram" an empty
/// string is returned. When multiple post types are supplied they
/// are joined with commas.
fn parse_instagram(link: &str, cli: &Cli) -> String {
if !link.contains("instagram") {
return String::new();
}
if cli.post_type.is_empty() {
return String::new();
}
let joined = if cli.post_type.len() > 1 {
cli.post_type.join(",")
} else {
cli.post_type.first().cloned().unwrap_or_default()
};
format!(" -o include={}", quote(&joined))
}
/// Builds a command string for video downloads. The logic mirrors
/// the original Python `video_command` function. See the source
/// comments for more details. Logging of the command and link is
/// performed at the call site.
fn video_command(video: &Video) -> String {
let rgx_yt = Regex::new(r"https://(?:www\.)?youtube|https://youtu.be").expect("invalid regex");
let rgx_music = Regex::new(r"https://music\.youtube.*").expect("invalid regex");
// Handle special case for chaturbate: use stream-dl on the last
// path component only.
if video.link.contains("chaturbate") {
let slug = video
.link
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or("");
return format!("stream-dl {}", slug);
}
let mut command = String::from("yt-dlp");
if rgx_yt.is_match(&video.link) {
command.push_str(" --embed-subs --embed-thumbnail");
command.push_str(" --embed-metadata --embed-chapters");
command.push_str(&format!(" -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
} else if rgx_music.is_match(&video.link) {
if video.use_archive {
command.push_str(&format!(" --download-archive {}", video.database));
}
command.push_str(" --no-playlist --newline -x");
command.push_str(" --audio-format best --add-metadata --audio-quality 0 -o");
command.push_str(&format!(" {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
} else {
command.push_str(&format!(" -f mp4 -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
}
format!("{} {}", command, quote(&video.link))
}
/// Processes a gallery list (main, instagram or kemono) for a single
/// user. Builds and executes the appropriate `gallery-dl` command.
fn parse_gallery(list_name: &str, user: &User, cli: &Cli, cfg: &Value) -> Result<()> {
let mut gallery = Gallery::default();
gallery.archive = cli.flag_archive;
// If skip is disabled (`flag_skip` false) then we enable skip
// through an option on gallery-dl. Otherwise we leave it empty.
gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() };
gallery.dest = Some("download".to_string());
gallery.list = Some(list_name.to_string());
gallery.opt_args = parse_instagram(list_name, cli);
gallery.generate_command(Some(user), cfg, false)?;
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
Ok(())
}
/// Downloads manga or comics based on the provided category. The
/// `skip_arg` string contains the chapter range options and is
/// assembled by the caller. Only lines matching the category are
/// downloaded.
fn comic_manager(skip_arg: &str, category: &Scrapper, cfg: &Value, cli: &Cli) -> Result<()> {
let comic = cfg
.get("comic")
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
let list_path = comic
.get("comic-list")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?;
let content = fs::read_to_string(list_path)
.with_context(|| format!("Failed to read comic list from {}", list_path))?;
let pattern = match category {
Scrapper::Manga => "manga|webtoon",
Scrapper::Comic => "readcomiconline",
_ => return Err(anyhow!("Invalid category for comic manager")),
};
let re_cat = Regex::new(pattern).expect("Failed to compile comic regex");
content
.lines()
.filter(|line| re_cat.is_match(line))
.map(str::to_string)
.try_for_each(|link| {
let mut gallery = Gallery::default();
gallery.archive = cli.flag_archive;
gallery.skip_arg = skip_arg.to_string();
gallery.link = Some(link.trim().to_string());
// Generate a comic command; pass `None` for user since
// comic downloads resolve their own directories
gallery.generate_command(None, cfg, true)?;
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
// Save the comic link to the master list
save_comic(&link, cfg)?;
Ok::<(), anyhow::Error>(())
})?;
Ok(())
}
/// Prints a numbered list of webcomics to stdout and returns the
/// selected index. The user is prompted via stdin. Errors during
/// parsing or invalid selections are propagated.
fn print_webcomics(webcomics: &Value) -> Result<usize> {
let list = webcomics
.get("webcomics")
.and_then(|v| v.as_sequence())
.ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?;
for (index, entry) in list.iter().enumerate() {
let name = entry
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("<unknown>");
println!("{}", list_lines(index, name));
}
print!("Select a webcomic: ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
let choice: usize = input.trim().parse()?;
if choice >= list.len() {
return Err(anyhow!("Invalid selection {}", choice));
}
Ok(choice)
}
/// Handles the webcomic download flow. The configuration file
/// referenced by `comic.webcomic-list` is parsed and the user is
/// prompted to choose which webcomic to download. A `webcomix`
/// command is then assembled and executed.
fn webcomic_manager(cfg: &Value, cli: &Cli) -> Result<()> {
let comic = cfg
.get("comic")
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
let list_path = comic
.get("webcomic-list")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'webcomic-list' in comic configuration"))?;
let webcomics_yaml: Value = serde_yaml::from_str(&fs::read_to_string(list_path)?)
.with_context(|| format!("Failed to parse webcomic list at {}", list_path))?;
let idx = print_webcomics(&webcomics_yaml)?;
let list = webcomics_yaml
.get("webcomics")
.and_then(|v| v.as_sequence())
.ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?;
let entry = list.get(idx).ok_or_else(|| anyhow!("Invalid webcomic index"))?;
let rating = entry
.get("type")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'type' in webcomic entry"))?;
let global = webcomics_yaml
.get("global")
.and_then(|v| v.as_mapping())
.ok_or_else(|| anyhow!("Webcomic list missing 'global' section"))?;
let dest_key = format!("{}_directory", rating);
let dest = global
.get(&Value::String(dest_key.clone()))
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing '{}' in webcomic global", dest_key))?;
let name = entry
.get("name")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'name' in webcomic entry"))?;
let link = entry
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'url' in webcomic entry"))?;
let nxt_code = entry
.get("next_code")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'next_code' in webcomic entry"))?;
let img_code = entry
.get("image_code")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'image_code' in webcomic entry"))?;
info!("The webcomic is {}", dest);
let mut command = format!("cd {} && webcomix custom {}", quote(dest), quote(name));
command.push_str(" --start-url ");
command.push_str(&quote(link));
command.push_str(&format!(" --next-page-xpath={}", quote(nxt_code)));
command.push_str(&format!(" --image-xpath={}", quote(img_code)));
command.push_str(" -y --cbz");
run(&command, cli.flag_dry_run, cli.flag_verbose)
}
/// Appends a comic or manga link to the global comic list if it is
/// not already present. Links are normalised via `parse_link` to
/// avoid duplicates. Logs a message when skipping duplicates.
fn save_comic(link: &str, cfg: &Value) -> Result<()> {
let comic = cfg
.get("comic")
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
let list_path = comic
.get("comic-list")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?;
let content = fs::read_to_string(list_path).unwrap_or_default().to_lowercase();
let fixed = parse_link(link).to_lowercase();
if content.contains(&fixed) {
info!("Graphic novel repeated, not saving");
return Ok(());
}
info!("New graphic novel, saving");
append_line(Path::new(list_path), link)
}
/// Handles the push list for a user. Links are classified into
/// gallery, comic, video or other categories using regular
/// expressions. Each category is processed appropriately. After
/// processing the push list is truncated. Logging mirrors the
/// original script.
fn push_manager(user: &User, cli: &Cli, cfg: &Value) -> Result<()> {
// Regular expressions used to classify links. These patterns
// mirror the ones in the original Python implementation.
let rgx_gallery = Regex::new(
r"(?x)
(x\.com/\w+((?=.*media)|(?!.*status)))
|(men\.wikifeet)
|(furaffinity\.net/user/)
|((deviantart\.com/\w+(?!.*/art/)))
|(furaffinity\.net/gallery/)
|(furaffinity\.net/scraps/)
|(furaffinity\.net/favorites/)
|(instagram.com(?!/p/)/\w+)
|(e621\.net((?=/post/)|(?!/posts/)))
|(flickr\.com/photos/\w+/(?!\d+))
|(tumblr\.com(?!/post/))
|(kemono\.party/(fanbox|gumroad|patreon)(?!/user/\d+/post))
|(blogspot\.com(?!/))
|(rule34\.paheal\.net/post/(?!view))
|(rule34\.xxx/index\.php\?page=post&s=(?!view))
|(pixiv\.net/(en/)?((?=users)|(?!artwork)))
|(fanbox\.cc/@\w+(?!.*posts/\d+))
|(reddit\.com/(user|u))
|(baraag\.net/((@\w+)|(?!/\d+)))
|(pinterest\.com/(?!pin/\d+))
|(redgifs\.com/(users|u|(?!watch)))
|(bsky\.app/profile/(?!.*?/post/))
",
)
.expect("Failed to compile gallery regex");
let rgx_video = Regex::new(r"youtu\.be|youtube|pornhub|xtube|xvideos|chaturbate").expect("Failed to compile video regex");
let rgx_comic = Regex::new(r"readcomiconline|mangahere|mangadex|webtoons|manganato").expect("Failed to compile comic regex");
// Read the push list into memory
let push_path = user
.lists
.get("push")
.ok_or_else(|| anyhow!("Push list missing for user {}", user.name))?;
let lines = fs::read_to_string(push_path).unwrap_or_default();
// Temporary storage for categories
let mut links_galleries: Vec<String> = Vec::new();
let mut links_videos: Vec<String> = Vec::new();
let mut links_comics: Vec<String> = Vec::new();
let mut links_other: Vec<String> = Vec::new();
// Classify each link exactly once
for line in lines.lines().map(str::trim).filter(|l| !l.is_empty()) {
if rgx_gallery.is_match(line) {
links_galleries.push(line.to_string());
} else if rgx_video.is_match(line) {
links_videos.push(line.to_string());
} else if rgx_comic.is_match(line) {
links_comics.push(line.to_string());
} else {
links_other.push(line.to_string());
}
}
// Process gallery links
for link in &links_galleries {
let mut gallery = Gallery::default();
gallery.archive = cli.flag_archive;
gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() };
gallery.link = Some(parse_link(link));
gallery.dest = Some("download".to_string());
gallery.opt_args = parse_instagram(link, cli);
gallery.generate_command(Some(user), cfg, false)?;
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
// Save link into master list to prevent duplicates
user.save_link(link)?;
}
// Process comic links
for link in &links_comics {
let skip_arg = if !cli.flag_skip {
"".to_string()
} else if link.contains("readcomiconline") {
" --chapter-range 1".to_string()
} else {
" --chapter-range 1-5".to_string()
};
let mut gallery = Gallery::default();
gallery.archive = cli.flag_archive;
gallery.skip_arg = skip_arg;
gallery.link = Some(link.to_string());
gallery.generate_command(None, cfg, true)?;
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
save_comic(link, cfg)?;
}
// Process video links
for link in &links_videos {
let mut video = Video::default();
video.use_archive = cli.flag_archive;
video.link = link.to_string();
// Use the media directory for the user
if let Some(media_dir) = user.directories.get("media") {
video.dest = media_dir.to_string_lossy().to_string();
} else {
video.dest = String::new();
}
video.database = quote(
user
.dbs
.get("media")
.map(|p| p.to_string_lossy())
.unwrap_or_default()
.as_ref(),
);
let cmd = video_command(&video);
info!("{} {}", cmd, link);
run(&cmd, cli.flag_dry_run, cli.flag_verbose)?;
}
// Process other links
for link in &links_other {
info!("Other type of download {}", link);
let mut gallery = Gallery::default();
gallery.archive = false;
gallery.skip_arg = " -o directory='[]'".to_string();
gallery.link = Some(link.to_string());
gallery.dest = Some("push".to_string());
gallery.generate_command(Some(user), cfg, false)?;
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
}
// Truncate the push list
fs::write(push_path, "")?;
Ok(())
}
/// Manages the selected scraper for a single user. Delegates to
/// specialised functions based on the scraper type. The user's
/// `list_manager` is invoked up front to prepare persite lists.
fn scrapper_manager(user: &User, scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> {
user.list_manager()?;
match scrapper {
Scrapper::Main => parse_gallery("main", user, cli, cfg),
Scrapper::Instagram => parse_gallery("instagram", user, cli, cfg),
Scrapper::Kemono => parse_gallery("kemono", user, cli, cfg),
Scrapper::Push => push_manager(user, cli, cfg),
Scrapper::Comic => {
let skip_arg = if cli.flag_skip {
" --chapter-range 1"
} else {
""
};
comic_manager(skip_arg, scrapper, cfg, cli)
}
Scrapper::Manga => {
let skip_arg = if cli.flag_skip {
" --chapter-range 1-5"
} else {
""
};
comic_manager(skip_arg, scrapper, cfg, cli)
}
Scrapper::Webcomic => webcomic_manager(cfg, cli),
}
}
/// Invokes the selected scraper for every configured user. Only
/// scrapers which operate on peruser lists (main, instagram,
/// kemono and push) are executed; others are skipped.
fn scrap_everyone(scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> {
let users = cfg
.get("users")
.and_then(|v| v.as_sequence())
.ok_or_else(|| anyhow!("No users configured"))?;
for user_entry in users.iter() {
let name = user_entry
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("<unknown>");
let idx = get_user_index(name, cfg)
.ok_or_else(|| anyhow!("User '{}' missing from configuration", name))?;
let user = User::new(idx, cfg)?;
info!("Scrapping {:?} for {}", scrapper, name);
scrapper_manager(&user, scrapper, cli, cfg)?;
}
Ok(())
}
/// Entry point for the download module. Decides how to dispatch
/// based on the presence or absence of a scrapper argument and
/// whether direct input links were provided. This function is
/// designed to be called from `main`.
pub fn run(cli: Cli, cfg: Value) -> Result<()> {
if let Some(scrapper) = &cli.scrapper {
let is_shared = matches!(scrapper, Scrapper::Push | Scrapper::Main | Scrapper::Instagram | Scrapper::Kemono);
if cli.user.eq_ignore_ascii_case("everyone") && is_shared {
return scrap_everyone(scrapper, &cli, &cfg);
}
// Otherwise operate on a single user
let user_name = &cli.user;
let idx = get_user_index(user_name, &cfg)
.ok_or_else(|| anyhow!("Unknown user '{}'", user_name))?;
let user = User::new(idx, &cfg)?;
return scrapper_manager(&user, scrapper, &cli, &cfg);
}
// No scrapper provided, process input links if present
if !cli.input.is_empty() {
// Determine which user should handle the push list. When
// called as an admin (`everyone` or `jawz`) we use the
// configuration for the user named "jawz". Otherwise we
// operate on the specified user.
let target = if cli.user.eq_ignore_ascii_case("everyone") || cli.user.eq_ignore_ascii_case("jawz") {
"jawz"
} else {
cli.user.as_str()
};
let idx = get_user_index(target, &cfg)
.ok_or_else(|| anyhow!("Unknown user '{}'", target))?;
let user = User::new(idx, &cfg)?;
// Append each provided link to the user's push list
for link in cli.input.iter() {
user.append_list("push", &parse_link(link))?;
}
// Process the push list immediately
return push_manager(&user, &cli, &cfg);
}
Err(anyhow!("No scrapper selected and no input links provided"))
}

View File

@ -1,127 +0,0 @@
//! Miscellaneous helper functions.
//!
//! This module contains a variety of small helpers used throughout
//! the downloader. Where appropriate iterators and guard clauses are
//! employed to keep the code concise and expressive. Error
//! conditions are reported via [`anyhow::Error`].
use anyhow::{anyhow, Result};
use log::{debug, info};
use regex::Regex;
use std::fs::{self, File};
use std::io::{self, BufRead, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
/// Ensures that a Twitter/X link ends in `/media` if it does not
/// already. The check is simple: if the string ends with
/// `"/media"` then the original string is returned, otherwise
/// `"/media"` is appended.
pub fn validate_x_link(line: &str) -> String {
if line.trim_end().ends_with("/media") {
line.to_string()
} else {
format!("{}/media", line.trim_end_matches('/'))
}
}
/// Normalises certain links. At present this function only ensures
/// that X/Twitter links end with `/media`. If the pattern does not
/// match the link is returned unchanged.
pub fn parse_link(link: &str) -> String {
let re = Regex::new(r"(?x)
(?:x\.com/\w+/?(?!.*status))
")
.expect("Failed to compile regex");
if re.is_match(link) {
let fixed = validate_x_link(link);
debug!("Processed link {}", fixed);
fixed
} else {
debug!("No modifications needed for the link {}", link);
link.to_string()
}
}
/// Surrounds a string with double quotes. This mirrors the Python
/// `quote` helper and is useful when constructing shell commands.
pub fn quote(s: &str) -> String {
format!("\"{}\"", s)
}
/// Recursively deletes all files and directories inside `directory` and
/// finally removes the directory itself. Missing directories are
/// ignored. Any failure during deletion results in an error.
pub fn clean_cache(directory: &Path) -> Result<()> {
if !directory.is_dir() {
return Ok(());
}
for entry in fs::read_dir(directory)? {
let entry = entry?;
let path = entry.path();
if path.is_file() {
fs::remove_file(&path)?;
} else if path.is_dir() {
fs::remove_dir_all(&path)?;
}
}
fs::remove_dir(directory)?;
Ok(())
}
/// Runs a shell command. When `dry_run` is true the command is
/// printed and execution is skipped. When `verbose` is true the
/// command is printed prior to execution. The command is executed
/// via the system shell so that complex pipelines are permitted.
pub fn run(command: &str, dry_run: bool, verbose: bool) -> Result<()> {
if dry_run {
println!("{}", command);
return Ok(());
}
if verbose {
println!("{}", command);
}
// Execute through the system shell. Use `sh -c` so that the
// command string is interpreted as a complete shell command.
let status = Command::new("sh").arg("-c").arg(command).status()?;
if !status.success() {
return Err(anyhow!("Command failed with status {}: {}", status, command));
}
Ok(())
}
/// Formats a numbered list entry. Useful when printing selections to
/// the user.
pub fn list_lines(index: usize, line: &str) -> String {
format!("{}) {}", index, line)
}
/// Reads all non-empty lines from a file, trimming trailing
/// whitespace. Returns an iterator over the lines. When the file
/// does not exist an empty vector is returned. Errors during file
/// access are propagated.
pub fn read_lines(file: &Path) -> Result<Vec<String>> {
if !file.is_file() {
return Ok(vec![]);
}
let file = File::open(file)?;
let buf = io::BufReader::new(file);
let lines: Vec<String> = buf
.lines()
.filter_map(|l| l.ok())
.map(|l| l.trim_end().to_string())
.filter(|l| !l.is_empty())
.collect();
Ok(lines)
}
/// Writes a string to a file, creating the file if necessary and
/// appending a newline. Errors are propagated.
pub fn append_line(file: &Path, line: &str) -> Result<()> {
let mut f = fs::OpenOptions::new()
.create(true)
.append(true)
.open(file)?;
writeln!(f, "{}", line)?;
Ok(())
}

View File

@ -1,166 +0,0 @@
//! Gallery command generator.
//!
//! The [`Gallery`] struct encapsulates the state required to build a
//! `gallery-dl` command. It exposes a method to generate the
//! command string based on user configuration and whether the
//! download is for a comic. Guard clauses are used extensively to
//! keep the logic easy to follow.
use crate::config::load_config_variables;
use crate::functions::quote;
use crate::user::User;
use anyhow::{anyhow, Context, Result};
use serde_yaml::Value;
/// Represents a gallery download request. Fields are mutable so
/// callers can configure the desired behaviour before generating the
/// command string.
#[derive(Default, Debug, Clone)]
pub struct Gallery {
/// Whether to append a download archive to prevent duplicates
pub archive: bool,
/// Optional skip argument string. A nonempty string starting
/// with a space will be appended verbatim to the command.
pub skip_arg: String,
/// The direct link to download. Mutually exclusive with `list`.
pub link: Option<String>,
/// The name of the list to process. Mutually exclusive with
/// `link`.
pub list: Option<String>,
/// The destination folder key (resolved via the user or comic
/// configuration). Ignored when `is_comic` is true and
/// `dest` is empty.
pub dest: Option<String>,
/// Additional options passed verbatim to `gallery-dl` (e.g.
/// Instagram filters).
pub opt_args: String,
/// The generated command string. This field is populated by
/// `generate_command` and consumed by `run_command`.
pub command: String,
}
impl Gallery {
/// Builds a gallery-dl command based on the current fields. When
/// `is_comic` is true the destination and archive database are
/// read from the `comic` section of the configuration and the
/// provided `user` is ignored. Otherwise the user is used to
/// determine where to download and which archive to use. The
/// generated command is stored in `self.command`.
pub fn generate_command(
&mut self,
user: Option<&User>,
cfg: &Value,
is_comic: bool,
) -> Result<()> {
// Determine directory, database and queue based on context
let (directory, database, queue): (String, String, String) = if is_comic {
let comic = cfg
.get("comic")
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
let download_dir = comic
.get("download-dir")
.and_then(|v| v.as_str())
.unwrap_or_default();
let database = comic
.get("database")
.and_then(|v| v.as_str())
.unwrap_or_default();
// When a list name is provided for comics look up
// `${list}-list` in the comic config
let q = if let Some(list_name) = self.list.as_ref() {
let key = format!("{}-list", list_name);
if let Some(v) = comic.get(&Value::String(key.clone())) {
if let Some(s) = v.as_str() {
quote(s)
} else {
String::new()
}
} else {
String::new()
}
} else {
String::new()
};
(
quote(download_dir),
quote(database),
q,
)
} else {
// Noncomic downloads must have an associated user
let user = user.ok_or_else(|| anyhow!("User is required for non comic downloads"))?;
// Destination directory falls back to an empty string
let dest_key = self.dest.as_ref().unwrap_or(&String::new());
let dir_path = user
.directories
.get(dest_key)
.ok_or_else(|| anyhow!("Unknown destination '{}' for user {}", dest_key, user.name))?;
let db_path = user
.dbs
.get("gallery")
.ok_or_else(|| anyhow!("Missing gallery database for user {}", user.name))?;
let q = if let Some(list_name) = self.list.as_ref() {
if let Some(p) = user.lists.get(list_name) {
quote(p.to_string_lossy().as_ref())
} else {
String::new()
}
} else {
String::new()
};
(
quote(dir_path.to_string_lossy().as_ref()),
quote(db_path.to_string_lossy().as_ref()),
q,
)
};
// Determine the sleep interval. When a user is provided use
// their configured value. Otherwise fall back to the first
// user's value from the configuration or zero when missing.
let user_sleep: u64 = if let Some(u) = user {
u.sleep
} else {
cfg.get("users")
.and_then(|u| u.as_sequence())
.and_then(|seq| seq.get(0))
.and_then(|v| v.get("sleep"))
.and_then(|v| v.as_i64())
.unwrap_or(0) as u64
};
let mut cmd = format!("gallery-dl --sleep {}", user_sleep);
if !self.skip_arg.is_empty() {
cmd.push_str(&self.skip_arg);
}
if is_comic || self.dest.is_some() {
cmd.push_str(&format!(" --dest {}", directory));
}
if self.archive {
cmd.push_str(&format!(" --download-archive {}", database));
}
if !self.opt_args.is_empty() {
cmd.push_str(&self.opt_args);
}
// Append either a direct link or an input file list
match (&self.link, &self.list) {
(Some(link), None) if !link.is_empty() => {
cmd.push(' ');
cmd.push_str(&quote(link));
}
(None, Some(_)) if !queue.is_empty() => {
cmd.push_str(&format!(" -i {}", queue));
}
_ => {}
}
self.command = cmd;
Ok(())
}
/// Runs the previously generated command. If `dry_run` is true
/// the command is printed and not executed. When `verbose` is
/// true the command is printed prior to execution. The
/// underlying execution is delegated to [`crate::functions::run`].
pub fn run_command(&self, dry_run: bool, verbose: bool) -> Result<()> {
crate::functions::run(&self.command, dry_run, verbose)
}
}

View File

@ -1,33 +0,0 @@
//! Program entry point.
//!
//! This module wires together argument parsing, configuration
//! loading, logging initialisation and the high level download
//! orchestration. Errors are propagated via [`anyhow::Result`] and
//! reported to stderr.
mod args;
mod config;
mod download;
mod functions;
mod gallery;
mod user;
use anyhow::Result;
use args::Cli;
use clap::Parser;
use env_logger;
fn main() -> Result<()> {
// Initialise logging. The logger reads the `RUST_LOG`
// environment variable; if not set the default level is "info".
env_logger::init();
// Parse command line arguments
let cli = Cli::parse();
// Load configuration from disk
let cfg = config::load_config_variables()?;
// Dispatch to the download logic
if let Err(err) = download::run(cli, cfg) {
eprintln!("error: {}", err);
}
Ok(())
}

View File

@ -1,246 +0,0 @@
//! User management.
//!
//! The `User` struct encapsulates peruser configuration and
//! filesystem state. It derives its settings from the YAML
//! configuration and provides methods for managing lists, caching
//! directories and avoiding duplicate downloads. Iterators and guard
//! clauses are used throughout to make intent clear.
use crate::config::load_config_variables;
use crate::functions::{append_line, clean_cache, parse_link, read_lines, validate_x_link};
use anyhow::{anyhow, Context, Result};
use log::error;
use rand::seq::SliceRandom;
use regex::Regex;
use serde_yaml::{Mapping, Value};
use std::collections::HashMap;
use std::fs::{self, File};
use std::path::{Path, PathBuf};
/// Represents a user and all of the paths and lists associated with
/// that user. The `User` is constructed from the global
/// configuration and an index selecting one of the `users` entries.
pub struct User {
/// The merged configuration for this user. User specific keys
/// override global settings.
pub config: Mapping,
/// The human readable name of the user.
pub name: String,
/// Number of seconds to sleep between operations.
pub sleep: u64,
/// Directories keyed by their logical purpose (e.g. "cache",
/// "lists", "downloads", "media", etc.).
pub directories: HashMap<String, PathBuf>,
/// Paths to the databases used for archiving downloads.
pub dbs: HashMap<String, PathBuf>,
/// Paths to various list files. See `list_manager` for details.
pub lists: HashMap<String, PathBuf>,
}
impl User {
/// Constructs a new user from the given index into the
/// configuration. Returns an error if the configuration is
/// malformed.
pub fn new(index: usize, cfg: &Value) -> Result<Self> {
let users = cfg
.get("users")
.and_then(|u| u.as_sequence())
.ok_or_else(|| anyhow!("Configuration is missing a 'users' array"))?;
let user_cfg = users
.get(index)
.and_then(|v| v.as_mapping())
.ok_or_else(|| anyhow!("Invalid user index {}", index))?
.clone();
let global_cfg = cfg
.get("global")
.and_then(|v| v.as_mapping())
.ok_or_else(|| anyhow!("Configuration is missing a 'global' map"))?
.clone();
// Merge global into user specific settings. User values take
// precedence. We perform a simple extend on a mutable copy.
let mut merged: Mapping = global_cfg.clone();
for (k, v) in user_cfg.iter() {
merged.insert(k.clone(), v.clone());
}
let name = merged
.get(&Value::String("name".into()))
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("User configuration missing 'name'"))?
.to_string();
let sleep = merged
.get(&Value::String("sleep".into()))
.and_then(|v| v.as_i64())
.unwrap_or(0) as u64;
// Build directory map from keys ending with '-dir'. Remove
// the suffix when storing the key. Convert each value into
// an absolute PathBuf and append the user name for cache and
// lists directories to mirror the Python behaviour.
let mut directories: HashMap<String, PathBuf> = merged
.iter()
.filter_map(|(k, v)| {
let key = k.as_str()?;
if key.ends_with("-dir") {
let dir_name = key.trim_end_matches("-dir");
let path_str = v.as_str()?;
Some((dir_name.to_string(), PathBuf::from(path_str)))
} else {
None
}
})
.collect();
// Append user name to cache and lists directories
if let Some(cache) = directories.get_mut("cache") {
*cache = cache.join(&name);
}
if let Some(lists_dir) = directories.get_mut("lists") {
*lists_dir = lists_dir.join(&name);
}
// Derive database file locations
let mut dbs = HashMap::new();
if let Some(db_dir) = directories.get("databases") {
dbs.insert(
"gallery".to_string(),
db_dir.join(format!("{}.sqlite3", name)),
);
dbs.insert(
"media".to_string(),
db_dir.join(format!("{}_ytdl.txt", name)),
);
}
// Derive list file locations
let mut lists = HashMap::new();
if let Some(lists_dir) = directories.get("lists") {
lists.insert("master".to_string(), lists_dir.join("watch.txt"));
lists.insert("push".to_string(), lists_dir.join("instant.txt"));
}
if let Some(cache_dir) = directories.get("cache") {
lists.insert("instagram".to_string(), cache_dir.join("instagram.txt"));
lists.insert("kemono".to_string(), cache_dir.join("kemono.txt"));
lists.insert("main".to_string(), cache_dir.join("main.txt"));
}
Ok(Self {
config: merged,
name,
sleep,
directories,
dbs,
lists,
})
}
/// Creates the necessary directory structure for this user. Any
/// preexisting cache directory is cleared. Missing list and
/// database files are touched into existence. Errors during
/// directory manipulation are propagated.
pub fn create_directories(&self) -> Result<()> {
// Clear the cache directory if it exists
if let Some(cache_dir) = self.directories.get("cache") {
let _ = clean_cache(cache_dir);
// Recreate cache directory after cleaning
fs::create_dir_all(cache_dir)?;
}
// Create all directories
for dir in self.directories.values() {
fs::create_dir_all(dir)?;
}
// Ensure list directory exists; complain otherwise
if let Some(lists_dir) = self.directories.get("lists") {
if !lists_dir.is_dir() {
error!("Lists directory for user {} doesn't exist", self.name);
}
}
// Touch database files if missing
for db_path in self.dbs.values() {
if !db_path.is_file() {
File::create(db_path)?;
}
}
// Touch master and push lists
for key in [&"master", &"push"] {
if let Some(path) = self.lists.get(*key) {
if !path.is_file() {
File::create(path)?;
}
}
}
Ok(())
}
/// Appends a line to the specified list. The list name must be
/// one of the keys in the `lists` map. A newline is appended
/// automatically.
pub fn append_list(&self, name: &str, line: &str) -> Result<()> {
let path = self
.lists
.get(name)
.ok_or_else(|| anyhow!("Unknown list {} for user {}", name, self.name))?;
append_line(path, line)
}
/// Writes a link into its appropriate cache list based on simple
/// pattern matching. See the original Python implementation for
/// category definitions. This method uses guard clauses to keep
/// the matching logic obvious.
fn append_cache_list(&self, line: &str) -> Result<()> {
let lower = line.to_lowercase();
if lower.contains('x') {
return self.append_list("main", &validate_x_link(line));
}
if lower.contains("kemono.party") {
return self.append_list("kemono", line);
}
if lower.contains("instagram") {
return self.append_list("instagram", line);
}
// default case
self.append_list("main", line)
}
/// Reads the master list, shuffles it and creates persite cache
/// lists. Empty or duplicate lines are silently ignored.
pub fn list_manager(&self) -> Result<()> {
self.create_directories()?;
let master_path = self
.lists
.get("master")
.ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?;
let mut master_content = read_lines(master_path)?;
// Shuffle the list to randomise downloads; use a small RNG for
// reproducibility
let mut rng = rand::rngs::SmallRng::from_entropy();
master_content.shuffle(&mut rng);
for line in master_content.iter().filter(|l| !l.is_empty()) {
self.append_cache_list(line)?;
}
Ok(())
}
/// Adds a link to the master list if it is not already present.
/// Normalisation of the link is performed via `parse_link` before
/// the check. Duplicates are logged and ignored.
pub fn save_link(&self, link: &str) -> Result<()> {
let master_path = self
.lists
.get("master")
.ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?;
let contents = fs::read_to_string(master_path).unwrap_or_default().to_lowercase();
let fixed = parse_link(link);
if contents.contains(&fixed.to_lowercase()) {
info!("Gallery repeated, not saving");
return Ok(());
}
info!("New gallery, saving");
self.append_list("master", &fixed)
}
}