download_rust init
This commit is contained in:
parent
ee6c1f367f
commit
6deea63497
31
src/download_rust/Cargo.toml
Normal file
31
src/download_rust/Cargo.toml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
##! Cargo.toml
|
||||||
|
#
|
||||||
|
# This Cargo manifest defines the Rust version of the jawz download
|
||||||
|
# manager. It exposes a single binary named `rust_downloader` and
|
||||||
|
# pulls in a handful of third‐party crates to mirror the features of
|
||||||
|
# the original Python implementation. The chosen dependencies
|
||||||
|
# provide command line parsing (clap), configuration loading
|
||||||
|
# (serde/serde_yaml), regular expressions (regex), home directory
|
||||||
|
# discovery (dirs), shuffling (rand), logging (log/env_logger) and
|
||||||
|
# convenient error handling (anyhow). Versions are pegged to
|
||||||
|
# relatively conservative releases so the project will build cleanly
|
||||||
|
# against the NixOS 25.05 channel.
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "rust_downloader"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = { version = "4.4", features = ["derive"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_yaml = "0.9"
|
||||||
|
regex = "1.10"
|
||||||
|
dirs = "5.0"
|
||||||
|
rand = { version = "0.8", features = ["std", "small_rng"] }
|
||||||
|
log = "0.4"
|
||||||
|
env_logger = "0.10"
|
||||||
|
once_cell = "1.17"
|
||||||
|
anyhow = "1.0"
|
||||||
97
src/download_rust/src/args.rs
Normal file
97
src/download_rust/src/args.rs
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
//! Command line argument definitions.
|
||||||
|
//!
|
||||||
|
//! This module defines the [`Cli`] structure which holds the
|
||||||
|
//! command line arguments accepted by the application. It uses
|
||||||
|
//! [`clap`] derive macros to declare flags, options and positional
|
||||||
|
//! parameters and enforces compile time validation for the
|
||||||
|
//! enumerated scrapper types. A `--dry-run` flag has been added to
|
||||||
|
//! allow simulating the run without executing any external
|
||||||
|
//! commands.
|
||||||
|
|
||||||
|
use clap::{ArgAction, Parser, ValueEnum};
|
||||||
|
|
||||||
|
/// The supported scraper categories. These values mirror the
|
||||||
|
/// behaviour of the original Python script and are used to select
|
||||||
|
/// which downstream logic executes. See `scrapper_manager` in
|
||||||
|
/// [`crate::download`] for details.
|
||||||
|
#[derive(ValueEnum, Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub enum Scrapper {
|
||||||
|
/// Download from the user's push list
|
||||||
|
Push,
|
||||||
|
/// Download from the user's main list
|
||||||
|
Main,
|
||||||
|
/// Download from the user's Instagram list
|
||||||
|
Instagram,
|
||||||
|
/// Download from the user's Kemono list
|
||||||
|
Kemono,
|
||||||
|
/// Download from the comic list
|
||||||
|
Comic,
|
||||||
|
/// Download from the manga list
|
||||||
|
Manga,
|
||||||
|
/// Download from the webcomic list
|
||||||
|
Webcomic,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Command line interface for the downloader. Deriving [`Parser`]
|
||||||
|
/// automatically generates argument parsing and help output. The
|
||||||
|
/// fields directly correspond to the command line options of the
|
||||||
|
/// original Python implementation with a few refinements:
|
||||||
|
///
|
||||||
|
/// * `scrapper` is optional and uses the [`Scrapper`] enum for
|
||||||
|
/// compile time validation.
|
||||||
|
/// * `user` defaults to "everyone" when omitted.
|
||||||
|
/// * `input` accepts one or more strings and may be specified
|
||||||
|
/// multiple times on the command line.
|
||||||
|
/// * Flags use Rust style booleans rather than inverted names; `no
|
||||||
|
/// archive` and `no skip` have been inverted into `flag_archive`
|
||||||
|
/// and `flag_skip` with sensible defaults.
|
||||||
|
/// * A `--dry-run` flag has been introduced to simulate execution
|
||||||
|
/// without running external commands.
|
||||||
|
/// * Instagram `post_type` values default to all supported types.
|
||||||
|
#[derive(Parser, Debug, Clone)]
|
||||||
|
#[command(name = "Downloader", about = "Download images, galleries and videos from a wide array of websites.")]
|
||||||
|
pub struct Cli {
|
||||||
|
/// Selects the scraper to use. When omitted the program
|
||||||
|
/// interprets input links via `--input` instead.
|
||||||
|
#[arg(value_enum, index = 1)]
|
||||||
|
pub scrapper: Option<Scrapper>,
|
||||||
|
|
||||||
|
/// Selects the personal user list to process. Defaults to
|
||||||
|
/// "everyone" which processes all configured users.
|
||||||
|
#[arg(short = 'u', long = "user", default_value = "everyone")]
|
||||||
|
pub user: String,
|
||||||
|
|
||||||
|
/// Downloads the provided links immediately instead of using a
|
||||||
|
/// preconfigured list. May be specified multiple times.
|
||||||
|
#[arg(short = 'i', long = "input", num_args = 1.., action = ArgAction::Append)]
|
||||||
|
pub input: Vec<String>,
|
||||||
|
|
||||||
|
/// Prints a numbered list of links and prompts for a selection.
|
||||||
|
#[arg(short = 'l', long = "list", action = ArgAction::SetTrue, default_value_t = false)]
|
||||||
|
pub flag_list: bool,
|
||||||
|
|
||||||
|
/// Enables archiving of downloads to prevent duplicates. Use
|
||||||
|
/// `--no-archive` to disable.
|
||||||
|
#[arg(long = "no-archive", action = ArgAction::SetFalse, default_value_t = true)]
|
||||||
|
pub flag_archive: bool,
|
||||||
|
|
||||||
|
/// Skips already downloaded items when true. Use `--no-skip`
|
||||||
|
/// to download entire galleries.
|
||||||
|
#[arg(long = "no-skip", action = ArgAction::SetFalse, default_value_t = true)]
|
||||||
|
pub flag_skip: bool,
|
||||||
|
|
||||||
|
/// Prints generated commands in addition to executing them.
|
||||||
|
#[arg(short = 'v', long = "verbose", action = ArgAction::SetTrue, default_value_t = false)]
|
||||||
|
pub flag_verbose: bool,
|
||||||
|
|
||||||
|
/// Performs a dry run. Commands will be printed but never
|
||||||
|
/// executed. This flag takes precedence over `--verbose`.
|
||||||
|
#[arg(long = "dry-run", action = ArgAction::SetTrue, default_value_t = false)]
|
||||||
|
pub flag_dry_run: bool,
|
||||||
|
|
||||||
|
/// Filters Instagram posts by type. When multiple values are
|
||||||
|
/// provided they will be joined by commas. The default
|
||||||
|
/// includes all supported types.
|
||||||
|
#[arg(short = 't', long = "type-post", num_args = 1.., action = ArgAction::Append, default_values_t = vec![String::from("posts"), String::from("reels"), String::from("stories"), String::from("highlights"), String::from("avatar")])]
|
||||||
|
pub post_type: Vec<String>,
|
||||||
|
}
|
||||||
44
src/download_rust/src/config.rs
Normal file
44
src/download_rust/src/config.rs
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
//! Configuration handling.
|
||||||
|
//!
|
||||||
|
//! This module is responsible for loading the YAML configuration
|
||||||
|
//! expected by the downloader. The configuration is read from
|
||||||
|
//! `~/.config/jawz/config.yaml` and deserialised into a
|
||||||
|
//! [`serde_yaml::Value`]. Consumers can then index into the value
|
||||||
|
//! to pull out fields as needed. A global constant is not used in
|
||||||
|
//! order to avoid surprises during testing and to make error
|
||||||
|
//! propagation explicit.
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use dirs::home_dir;
|
||||||
|
use serde_yaml::Value;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
/// Loads the configuration file from the user's home directory. The
|
||||||
|
/// expected location is `$HOME/.config/jawz/config.yaml`. If the
|
||||||
|
/// file cannot be read or parsed a descriptive error is returned.
|
||||||
|
pub fn load_config_variables() -> Result<Value> {
|
||||||
|
let home = home_dir().ok_or_else(|| anyhow!("Could not determine home directory"))?;
|
||||||
|
let path: PathBuf = [home.to_str().unwrap_or(""), ".config/jawz/config.yaml"]
|
||||||
|
.iter()
|
||||||
|
.collect::<PathBuf>();
|
||||||
|
let content = fs::read_to_string(&path)
|
||||||
|
.with_context(|| format!("Failed to read configuration file from {}", path.display()))?;
|
||||||
|
let cfg: Value = serde_yaml::from_str(&content)
|
||||||
|
.with_context(|| format!("Failed to parse YAML in {}", path.display()))?;
|
||||||
|
Ok(cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the index of a user by name. Returns `None` if no match
|
||||||
|
/// exists or if the configuration does not contain a `users` list.
|
||||||
|
pub fn get_user_index(name: &str, cfg: &Value) -> Option<usize> {
|
||||||
|
cfg.get("users")?.as_sequence()?.iter().enumerate().find_map(|(i, user)| {
|
||||||
|
let map = user.as_mapping()?;
|
||||||
|
let n = map.get(&Value::String("name".into()))?.as_str()?;
|
||||||
|
if n.eq_ignore_ascii_case(name) {
|
||||||
|
Some(i)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
474
src/download_rust/src/download.rs
Normal file
474
src/download_rust/src/download.rs
Normal file
@ -0,0 +1,474 @@
|
|||||||
|
//! High level download orchestration.
|
||||||
|
//!
|
||||||
|
//! This module coordinates the various helper modules to mirror the
|
||||||
|
//! behaviour of the original Python downloader. It exposes a
|
||||||
|
//! `run` function which is called from `main.rs` with the parsed
|
||||||
|
//! command line arguments and the loaded configuration. Where
|
||||||
|
//! possible iterators and guard clauses replace explicit loops to
|
||||||
|
//! improve clarity.
|
||||||
|
|
||||||
|
use crate::args::{Cli, Scrapper};
|
||||||
|
use crate::config::{get_user_index, load_config_variables};
|
||||||
|
use crate::functions::{append_line, list_lines, parse_link, quote, run};
|
||||||
|
use crate::gallery::Gallery;
|
||||||
|
use crate::user::User;
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use log::{debug, info};
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_yaml::Value;
|
||||||
|
use std::fs;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// A simple struct representing a video download. It collects
|
||||||
|
/// command line arguments required to build a `yt-dlp` or
|
||||||
|
/// `stream-dl` command. The `dest` and `database` fields should be
|
||||||
|
/// prequoted.
|
||||||
|
#[derive(Default, Debug, Clone)]
|
||||||
|
struct Video {
|
||||||
|
use_archive: bool,
|
||||||
|
link: String,
|
||||||
|
dest: String,
|
||||||
|
database: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Constructs the `-o include=...` argument for Instagram links.
|
||||||
|
/// When the provided link does not contain "instagram" an empty
|
||||||
|
/// string is returned. When multiple post types are supplied they
|
||||||
|
/// are joined with commas.
|
||||||
|
fn parse_instagram(link: &str, cli: &Cli) -> String {
|
||||||
|
if !link.contains("instagram") {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
if cli.post_type.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
let joined = if cli.post_type.len() > 1 {
|
||||||
|
cli.post_type.join(",")
|
||||||
|
} else {
|
||||||
|
cli.post_type.first().cloned().unwrap_or_default()
|
||||||
|
};
|
||||||
|
format!(" -o include={}", quote(&joined))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds a command string for video downloads. The logic mirrors
|
||||||
|
/// the original Python `video_command` function. See the source
|
||||||
|
/// comments for more details. Logging of the command and link is
|
||||||
|
/// performed at the call site.
|
||||||
|
fn video_command(video: &Video) -> String {
|
||||||
|
let rgx_yt = Regex::new(r"https://(?:www\.)?youtube|https://youtu.be").expect("invalid regex");
|
||||||
|
let rgx_music = Regex::new(r"https://music\.youtube.*").expect("invalid regex");
|
||||||
|
// Handle special case for chaturbate: use stream-dl on the last
|
||||||
|
// path component only.
|
||||||
|
if video.link.contains("chaturbate") {
|
||||||
|
let slug = video
|
||||||
|
.link
|
||||||
|
.trim_end_matches('/')
|
||||||
|
.rsplit('/')
|
||||||
|
.next()
|
||||||
|
.unwrap_or("");
|
||||||
|
return format!("stream-dl {}", slug);
|
||||||
|
}
|
||||||
|
let mut command = String::from("yt-dlp");
|
||||||
|
if rgx_yt.is_match(&video.link) {
|
||||||
|
command.push_str(" --embed-subs --embed-thumbnail");
|
||||||
|
command.push_str(" --embed-metadata --embed-chapters");
|
||||||
|
command.push_str(&format!(" -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
|
||||||
|
} else if rgx_music.is_match(&video.link) {
|
||||||
|
if video.use_archive {
|
||||||
|
command.push_str(&format!(" --download-archive {}", video.database));
|
||||||
|
}
|
||||||
|
command.push_str(" --no-playlist --newline -x");
|
||||||
|
command.push_str(" --audio-format best --add-metadata --audio-quality 0 -o");
|
||||||
|
command.push_str(&format!(" {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
|
||||||
|
} else {
|
||||||
|
command.push_str(&format!(" -f mp4 -o {}", quote(&(video.dest.clone() + "/%(title)s.%(ext)s"))));
|
||||||
|
}
|
||||||
|
format!("{} {}", command, quote(&video.link))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Processes a gallery list (main, instagram or kemono) for a single
|
||||||
|
/// user. Builds and executes the appropriate `gallery-dl` command.
|
||||||
|
fn parse_gallery(list_name: &str, user: &User, cli: &Cli, cfg: &Value) -> Result<()> {
|
||||||
|
let mut gallery = Gallery::default();
|
||||||
|
gallery.archive = cli.flag_archive;
|
||||||
|
// If skip is disabled (`flag_skip` false) then we enable skip
|
||||||
|
// through an option on gallery-dl. Otherwise we leave it empty.
|
||||||
|
gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() };
|
||||||
|
gallery.dest = Some("download".to_string());
|
||||||
|
gallery.list = Some(list_name.to_string());
|
||||||
|
gallery.opt_args = parse_instagram(list_name, cli);
|
||||||
|
gallery.generate_command(Some(user), cfg, false)?;
|
||||||
|
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Downloads manga or comics based on the provided category. The
|
||||||
|
/// `skip_arg` string contains the chapter range options and is
|
||||||
|
/// assembled by the caller. Only lines matching the category are
|
||||||
|
/// downloaded.
|
||||||
|
fn comic_manager(skip_arg: &str, category: &Scrapper, cfg: &Value, cli: &Cli) -> Result<()> {
|
||||||
|
let comic = cfg
|
||||||
|
.get("comic")
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
|
||||||
|
let list_path = comic
|
||||||
|
.get("comic-list")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?;
|
||||||
|
let content = fs::read_to_string(list_path)
|
||||||
|
.with_context(|| format!("Failed to read comic list from {}", list_path))?;
|
||||||
|
let pattern = match category {
|
||||||
|
Scrapper::Manga => "manga|webtoon",
|
||||||
|
Scrapper::Comic => "readcomiconline",
|
||||||
|
_ => return Err(anyhow!("Invalid category for comic manager")),
|
||||||
|
};
|
||||||
|
let re_cat = Regex::new(pattern).expect("Failed to compile comic regex");
|
||||||
|
content
|
||||||
|
.lines()
|
||||||
|
.filter(|line| re_cat.is_match(line))
|
||||||
|
.map(str::to_string)
|
||||||
|
.try_for_each(|link| {
|
||||||
|
let mut gallery = Gallery::default();
|
||||||
|
gallery.archive = cli.flag_archive;
|
||||||
|
gallery.skip_arg = skip_arg.to_string();
|
||||||
|
gallery.link = Some(link.trim().to_string());
|
||||||
|
// Generate a comic command; pass `None` for user since
|
||||||
|
// comic downloads resolve their own directories
|
||||||
|
gallery.generate_command(None, cfg, true)?;
|
||||||
|
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
// Save the comic link to the master list
|
||||||
|
save_comic(&link, cfg)?;
|
||||||
|
Ok::<(), anyhow::Error>(())
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prints a numbered list of webcomics to stdout and returns the
|
||||||
|
/// selected index. The user is prompted via stdin. Errors during
|
||||||
|
/// parsing or invalid selections are propagated.
|
||||||
|
fn print_webcomics(webcomics: &Value) -> Result<usize> {
|
||||||
|
let list = webcomics
|
||||||
|
.get("webcomics")
|
||||||
|
.and_then(|v| v.as_sequence())
|
||||||
|
.ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?;
|
||||||
|
for (index, entry) in list.iter().enumerate() {
|
||||||
|
let name = entry
|
||||||
|
.get("name")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("<unknown>");
|
||||||
|
println!("{}", list_lines(index, name));
|
||||||
|
}
|
||||||
|
print!("Select a webcomic: ");
|
||||||
|
io::stdout().flush()?;
|
||||||
|
let mut input = String::new();
|
||||||
|
io::stdin().read_line(&mut input)?;
|
||||||
|
let choice: usize = input.trim().parse()?;
|
||||||
|
if choice >= list.len() {
|
||||||
|
return Err(anyhow!("Invalid selection {}", choice));
|
||||||
|
}
|
||||||
|
Ok(choice)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the webcomic download flow. The configuration file
|
||||||
|
/// referenced by `comic.webcomic-list` is parsed and the user is
|
||||||
|
/// prompted to choose which webcomic to download. A `webcomix`
|
||||||
|
/// command is then assembled and executed.
|
||||||
|
fn webcomic_manager(cfg: &Value, cli: &Cli) -> Result<()> {
|
||||||
|
let comic = cfg
|
||||||
|
.get("comic")
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
|
||||||
|
let list_path = comic
|
||||||
|
.get("webcomic-list")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'webcomic-list' in comic configuration"))?;
|
||||||
|
let webcomics_yaml: Value = serde_yaml::from_str(&fs::read_to_string(list_path)?)
|
||||||
|
.with_context(|| format!("Failed to parse webcomic list at {}", list_path))?;
|
||||||
|
let idx = print_webcomics(&webcomics_yaml)?;
|
||||||
|
let list = webcomics_yaml
|
||||||
|
.get("webcomics")
|
||||||
|
.and_then(|v| v.as_sequence())
|
||||||
|
.ok_or_else(|| anyhow!("webcomic-list missing 'webcomics' array"))?;
|
||||||
|
let entry = list.get(idx).ok_or_else(|| anyhow!("Invalid webcomic index"))?;
|
||||||
|
let rating = entry
|
||||||
|
.get("type")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'type' in webcomic entry"))?;
|
||||||
|
let global = webcomics_yaml
|
||||||
|
.get("global")
|
||||||
|
.and_then(|v| v.as_mapping())
|
||||||
|
.ok_or_else(|| anyhow!("Webcomic list missing 'global' section"))?;
|
||||||
|
let dest_key = format!("{}_directory", rating);
|
||||||
|
let dest = global
|
||||||
|
.get(&Value::String(dest_key.clone()))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing '{}' in webcomic global", dest_key))?;
|
||||||
|
let name = entry
|
||||||
|
.get("name")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'name' in webcomic entry"))?;
|
||||||
|
let link = entry
|
||||||
|
.get("url")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'url' in webcomic entry"))?;
|
||||||
|
let nxt_code = entry
|
||||||
|
.get("next_code")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'next_code' in webcomic entry"))?;
|
||||||
|
let img_code = entry
|
||||||
|
.get("image_code")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'image_code' in webcomic entry"))?;
|
||||||
|
info!("The webcomic is {}", dest);
|
||||||
|
let mut command = format!("cd {} && webcomix custom {}", quote(dest), quote(name));
|
||||||
|
command.push_str(" --start-url ");
|
||||||
|
command.push_str("e(link));
|
||||||
|
command.push_str(&format!(" --next-page-xpath={}", quote(nxt_code)));
|
||||||
|
command.push_str(&format!(" --image-xpath={}", quote(img_code)));
|
||||||
|
command.push_str(" -y --cbz");
|
||||||
|
run(&command, cli.flag_dry_run, cli.flag_verbose)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Appends a comic or manga link to the global comic list if it is
|
||||||
|
/// not already present. Links are normalised via `parse_link` to
|
||||||
|
/// avoid duplicates. Logs a message when skipping duplicates.
|
||||||
|
fn save_comic(link: &str, cfg: &Value) -> Result<()> {
|
||||||
|
let comic = cfg
|
||||||
|
.get("comic")
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
|
||||||
|
let list_path = comic
|
||||||
|
.get("comic-list")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic-list' in comic configuration"))?;
|
||||||
|
let content = fs::read_to_string(list_path).unwrap_or_default().to_lowercase();
|
||||||
|
let fixed = parse_link(link).to_lowercase();
|
||||||
|
if content.contains(&fixed) {
|
||||||
|
info!("Graphic novel repeated, not saving");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
info!("New graphic novel, saving");
|
||||||
|
append_line(Path::new(list_path), link)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the push list for a user. Links are classified into
|
||||||
|
/// gallery, comic, video or other categories using regular
|
||||||
|
/// expressions. Each category is processed appropriately. After
|
||||||
|
/// processing the push list is truncated. Logging mirrors the
|
||||||
|
/// original script.
|
||||||
|
fn push_manager(user: &User, cli: &Cli, cfg: &Value) -> Result<()> {
|
||||||
|
// Regular expressions used to classify links. These patterns
|
||||||
|
// mirror the ones in the original Python implementation.
|
||||||
|
let rgx_gallery = Regex::new(
|
||||||
|
r"(?x)
|
||||||
|
(x\.com/\w+((?=.*media)|(?!.*status)))
|
||||||
|
|(men\.wikifeet)
|
||||||
|
|(furaffinity\.net/user/)
|
||||||
|
|((deviantart\.com/\w+(?!.*/art/)))
|
||||||
|
|(furaffinity\.net/gallery/)
|
||||||
|
|(furaffinity\.net/scraps/)
|
||||||
|
|(furaffinity\.net/favorites/)
|
||||||
|
|(instagram.com(?!/p/)/\w+)
|
||||||
|
|(e621\.net((?=/post/)|(?!/posts/)))
|
||||||
|
|(flickr\.com/photos/\w+/(?!\d+))
|
||||||
|
|(tumblr\.com(?!/post/))
|
||||||
|
|(kemono\.party/(fanbox|gumroad|patreon)(?!/user/\d+/post))
|
||||||
|
|(blogspot\.com(?!/))
|
||||||
|
|(rule34\.paheal\.net/post/(?!view))
|
||||||
|
|(rule34\.xxx/index\.php\?page=post&s=(?!view))
|
||||||
|
|(pixiv\.net/(en/)?((?=users)|(?!artwork)))
|
||||||
|
|(fanbox\.cc/@\w+(?!.*posts/\d+))
|
||||||
|
|(reddit\.com/(user|u))
|
||||||
|
|(baraag\.net/((@\w+)|(?!/\d+)))
|
||||||
|
|(pinterest\.com/(?!pin/\d+))
|
||||||
|
|(redgifs\.com/(users|u|(?!watch)))
|
||||||
|
|(bsky\.app/profile/(?!.*?/post/))
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.expect("Failed to compile gallery regex");
|
||||||
|
let rgx_video = Regex::new(r"youtu\.be|youtube|pornhub|xtube|xvideos|chaturbate").expect("Failed to compile video regex");
|
||||||
|
let rgx_comic = Regex::new(r"readcomiconline|mangahere|mangadex|webtoons|manganato").expect("Failed to compile comic regex");
|
||||||
|
|
||||||
|
// Read the push list into memory
|
||||||
|
let push_path = user
|
||||||
|
.lists
|
||||||
|
.get("push")
|
||||||
|
.ok_or_else(|| anyhow!("Push list missing for user {}", user.name))?;
|
||||||
|
let lines = fs::read_to_string(push_path).unwrap_or_default();
|
||||||
|
// Temporary storage for categories
|
||||||
|
let mut links_galleries: Vec<String> = Vec::new();
|
||||||
|
let mut links_videos: Vec<String> = Vec::new();
|
||||||
|
let mut links_comics: Vec<String> = Vec::new();
|
||||||
|
let mut links_other: Vec<String> = Vec::new();
|
||||||
|
// Classify each link exactly once
|
||||||
|
for line in lines.lines().map(str::trim).filter(|l| !l.is_empty()) {
|
||||||
|
if rgx_gallery.is_match(line) {
|
||||||
|
links_galleries.push(line.to_string());
|
||||||
|
} else if rgx_video.is_match(line) {
|
||||||
|
links_videos.push(line.to_string());
|
||||||
|
} else if rgx_comic.is_match(line) {
|
||||||
|
links_comics.push(line.to_string());
|
||||||
|
} else {
|
||||||
|
links_other.push(line.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Process gallery links
|
||||||
|
for link in &links_galleries {
|
||||||
|
let mut gallery = Gallery::default();
|
||||||
|
gallery.archive = cli.flag_archive;
|
||||||
|
gallery.skip_arg = if cli.flag_skip { String::new() } else { " -o skip=true".to_string() };
|
||||||
|
gallery.link = Some(parse_link(link));
|
||||||
|
gallery.dest = Some("download".to_string());
|
||||||
|
gallery.opt_args = parse_instagram(link, cli);
|
||||||
|
gallery.generate_command(Some(user), cfg, false)?;
|
||||||
|
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
// Save link into master list to prevent duplicates
|
||||||
|
user.save_link(link)?;
|
||||||
|
}
|
||||||
|
// Process comic links
|
||||||
|
for link in &links_comics {
|
||||||
|
let skip_arg = if !cli.flag_skip {
|
||||||
|
"".to_string()
|
||||||
|
} else if link.contains("readcomiconline") {
|
||||||
|
" --chapter-range 1".to_string()
|
||||||
|
} else {
|
||||||
|
" --chapter-range 1-5".to_string()
|
||||||
|
};
|
||||||
|
let mut gallery = Gallery::default();
|
||||||
|
gallery.archive = cli.flag_archive;
|
||||||
|
gallery.skip_arg = skip_arg;
|
||||||
|
gallery.link = Some(link.to_string());
|
||||||
|
gallery.generate_command(None, cfg, true)?;
|
||||||
|
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
save_comic(link, cfg)?;
|
||||||
|
}
|
||||||
|
// Process video links
|
||||||
|
for link in &links_videos {
|
||||||
|
let mut video = Video::default();
|
||||||
|
video.use_archive = cli.flag_archive;
|
||||||
|
video.link = link.to_string();
|
||||||
|
// Use the media directory for the user
|
||||||
|
if let Some(media_dir) = user.directories.get("media") {
|
||||||
|
video.dest = media_dir.to_string_lossy().to_string();
|
||||||
|
} else {
|
||||||
|
video.dest = String::new();
|
||||||
|
}
|
||||||
|
video.database = quote(
|
||||||
|
user
|
||||||
|
.dbs
|
||||||
|
.get("media")
|
||||||
|
.map(|p| p.to_string_lossy())
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_ref(),
|
||||||
|
);
|
||||||
|
let cmd = video_command(&video);
|
||||||
|
info!("{} {}", cmd, link);
|
||||||
|
run(&cmd, cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
}
|
||||||
|
// Process other links
|
||||||
|
for link in &links_other {
|
||||||
|
info!("Other type of download {}", link);
|
||||||
|
let mut gallery = Gallery::default();
|
||||||
|
gallery.archive = false;
|
||||||
|
gallery.skip_arg = " -o directory='[]'".to_string();
|
||||||
|
gallery.link = Some(link.to_string());
|
||||||
|
gallery.dest = Some("push".to_string());
|
||||||
|
gallery.generate_command(Some(user), cfg, false)?;
|
||||||
|
gallery.run_command(cli.flag_dry_run, cli.flag_verbose)?;
|
||||||
|
}
|
||||||
|
// Truncate the push list
|
||||||
|
fs::write(push_path, "")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Manages the selected scraper for a single user. Delegates to
|
||||||
|
/// specialised functions based on the scraper type. The user's
|
||||||
|
/// `list_manager` is invoked up front to prepare per‑site lists.
|
||||||
|
fn scrapper_manager(user: &User, scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> {
|
||||||
|
user.list_manager()?;
|
||||||
|
match scrapper {
|
||||||
|
Scrapper::Main => parse_gallery("main", user, cli, cfg),
|
||||||
|
Scrapper::Instagram => parse_gallery("instagram", user, cli, cfg),
|
||||||
|
Scrapper::Kemono => parse_gallery("kemono", user, cli, cfg),
|
||||||
|
Scrapper::Push => push_manager(user, cli, cfg),
|
||||||
|
Scrapper::Comic => {
|
||||||
|
let skip_arg = if cli.flag_skip {
|
||||||
|
" --chapter-range 1"
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
};
|
||||||
|
comic_manager(skip_arg, scrapper, cfg, cli)
|
||||||
|
}
|
||||||
|
Scrapper::Manga => {
|
||||||
|
let skip_arg = if cli.flag_skip {
|
||||||
|
" --chapter-range 1-5"
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
};
|
||||||
|
comic_manager(skip_arg, scrapper, cfg, cli)
|
||||||
|
}
|
||||||
|
Scrapper::Webcomic => webcomic_manager(cfg, cli),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Invokes the selected scraper for every configured user. Only
|
||||||
|
/// scrapers which operate on per‑user lists (main, instagram,
|
||||||
|
/// kemono and push) are executed; others are skipped.
|
||||||
|
fn scrap_everyone(scrapper: &Scrapper, cli: &Cli, cfg: &Value) -> Result<()> {
|
||||||
|
let users = cfg
|
||||||
|
.get("users")
|
||||||
|
.and_then(|v| v.as_sequence())
|
||||||
|
.ok_or_else(|| anyhow!("No users configured"))?;
|
||||||
|
for user_entry in users.iter() {
|
||||||
|
let name = user_entry
|
||||||
|
.get("name")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("<unknown>");
|
||||||
|
let idx = get_user_index(name, cfg)
|
||||||
|
.ok_or_else(|| anyhow!("User '{}' missing from configuration", name))?;
|
||||||
|
let user = User::new(idx, cfg)?;
|
||||||
|
info!("Scrapping {:?} for {}", scrapper, name);
|
||||||
|
scrapper_manager(&user, scrapper, cli, cfg)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Entry point for the download module. Decides how to dispatch
|
||||||
|
/// based on the presence or absence of a scrapper argument and
|
||||||
|
/// whether direct input links were provided. This function is
|
||||||
|
/// designed to be called from `main`.
|
||||||
|
pub fn run(cli: Cli, cfg: Value) -> Result<()> {
|
||||||
|
if let Some(scrapper) = &cli.scrapper {
|
||||||
|
let is_shared = matches!(scrapper, Scrapper::Push | Scrapper::Main | Scrapper::Instagram | Scrapper::Kemono);
|
||||||
|
if cli.user.eq_ignore_ascii_case("everyone") && is_shared {
|
||||||
|
return scrap_everyone(scrapper, &cli, &cfg);
|
||||||
|
}
|
||||||
|
// Otherwise operate on a single user
|
||||||
|
let user_name = &cli.user;
|
||||||
|
let idx = get_user_index(user_name, &cfg)
|
||||||
|
.ok_or_else(|| anyhow!("Unknown user '{}'", user_name))?;
|
||||||
|
let user = User::new(idx, &cfg)?;
|
||||||
|
return scrapper_manager(&user, scrapper, &cli, &cfg);
|
||||||
|
}
|
||||||
|
// No scrapper provided, process input links if present
|
||||||
|
if !cli.input.is_empty() {
|
||||||
|
// Determine which user should handle the push list. When
|
||||||
|
// called as an admin (`everyone` or `jawz`) we use the
|
||||||
|
// configuration for the user named "jawz". Otherwise we
|
||||||
|
// operate on the specified user.
|
||||||
|
let target = if cli.user.eq_ignore_ascii_case("everyone") || cli.user.eq_ignore_ascii_case("jawz") {
|
||||||
|
"jawz"
|
||||||
|
} else {
|
||||||
|
cli.user.as_str()
|
||||||
|
};
|
||||||
|
let idx = get_user_index(target, &cfg)
|
||||||
|
.ok_or_else(|| anyhow!("Unknown user '{}'", target))?;
|
||||||
|
let user = User::new(idx, &cfg)?;
|
||||||
|
// Append each provided link to the user's push list
|
||||||
|
for link in cli.input.iter() {
|
||||||
|
user.append_list("push", &parse_link(link))?;
|
||||||
|
}
|
||||||
|
// Process the push list immediately
|
||||||
|
return push_manager(&user, &cli, &cfg);
|
||||||
|
}
|
||||||
|
Err(anyhow!("No scrapper selected and no input links provided"))
|
||||||
|
}
|
||||||
127
src/download_rust/src/functions.rs
Normal file
127
src/download_rust/src/functions.rs
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
//! Miscellaneous helper functions.
|
||||||
|
//!
|
||||||
|
//! This module contains a variety of small helpers used throughout
|
||||||
|
//! the downloader. Where appropriate iterators and guard clauses are
|
||||||
|
//! employed to keep the code concise and expressive. Error
|
||||||
|
//! conditions are reported via [`anyhow::Error`].
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use log::{debug, info};
|
||||||
|
use regex::Regex;
|
||||||
|
use std::fs::{self, File};
|
||||||
|
use std::io::{self, BufRead, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
/// Ensures that a Twitter/X link ends in `/media` if it does not
|
||||||
|
/// already. The check is simple: if the string ends with
|
||||||
|
/// `"/media"` then the original string is returned, otherwise
|
||||||
|
/// `"/media"` is appended.
|
||||||
|
pub fn validate_x_link(line: &str) -> String {
|
||||||
|
if line.trim_end().ends_with("/media") {
|
||||||
|
line.to_string()
|
||||||
|
} else {
|
||||||
|
format!("{}/media", line.trim_end_matches('/'))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Normalises certain links. At present this function only ensures
|
||||||
|
/// that X/Twitter links end with `/media`. If the pattern does not
|
||||||
|
/// match the link is returned unchanged.
|
||||||
|
pub fn parse_link(link: &str) -> String {
|
||||||
|
let re = Regex::new(r"(?x)
|
||||||
|
(?:x\.com/\w+/?(?!.*status))
|
||||||
|
")
|
||||||
|
.expect("Failed to compile regex");
|
||||||
|
if re.is_match(link) {
|
||||||
|
let fixed = validate_x_link(link);
|
||||||
|
debug!("Processed link {}", fixed);
|
||||||
|
fixed
|
||||||
|
} else {
|
||||||
|
debug!("No modifications needed for the link {}", link);
|
||||||
|
link.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Surrounds a string with double quotes. This mirrors the Python
|
||||||
|
/// `quote` helper and is useful when constructing shell commands.
|
||||||
|
pub fn quote(s: &str) -> String {
|
||||||
|
format!("\"{}\"", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively deletes all files and directories inside `directory` and
|
||||||
|
/// finally removes the directory itself. Missing directories are
|
||||||
|
/// ignored. Any failure during deletion results in an error.
|
||||||
|
pub fn clean_cache(directory: &Path) -> Result<()> {
|
||||||
|
if !directory.is_dir() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
for entry in fs::read_dir(directory)? {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.is_file() {
|
||||||
|
fs::remove_file(&path)?;
|
||||||
|
} else if path.is_dir() {
|
||||||
|
fs::remove_dir_all(&path)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fs::remove_dir(directory)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runs a shell command. When `dry_run` is true the command is
|
||||||
|
/// printed and execution is skipped. When `verbose` is true the
|
||||||
|
/// command is printed prior to execution. The command is executed
|
||||||
|
/// via the system shell so that complex pipelines are permitted.
|
||||||
|
pub fn run(command: &str, dry_run: bool, verbose: bool) -> Result<()> {
|
||||||
|
if dry_run {
|
||||||
|
println!("{}", command);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if verbose {
|
||||||
|
println!("{}", command);
|
||||||
|
}
|
||||||
|
// Execute through the system shell. Use `sh -c` so that the
|
||||||
|
// command string is interpreted as a complete shell command.
|
||||||
|
let status = Command::new("sh").arg("-c").arg(command).status()?;
|
||||||
|
if !status.success() {
|
||||||
|
return Err(anyhow!("Command failed with status {}: {}", status, command));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Formats a numbered list entry. Useful when printing selections to
|
||||||
|
/// the user.
|
||||||
|
pub fn list_lines(index: usize, line: &str) -> String {
|
||||||
|
format!("{}) {}", index, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads all non-empty lines from a file, trimming trailing
|
||||||
|
/// whitespace. Returns an iterator over the lines. When the file
|
||||||
|
/// does not exist an empty vector is returned. Errors during file
|
||||||
|
/// access are propagated.
|
||||||
|
pub fn read_lines(file: &Path) -> Result<Vec<String>> {
|
||||||
|
if !file.is_file() {
|
||||||
|
return Ok(vec![]);
|
||||||
|
}
|
||||||
|
let file = File::open(file)?;
|
||||||
|
let buf = io::BufReader::new(file);
|
||||||
|
let lines: Vec<String> = buf
|
||||||
|
.lines()
|
||||||
|
.filter_map(|l| l.ok())
|
||||||
|
.map(|l| l.trim_end().to_string())
|
||||||
|
.filter(|l| !l.is_empty())
|
||||||
|
.collect();
|
||||||
|
Ok(lines)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes a string to a file, creating the file if necessary and
|
||||||
|
/// appending a newline. Errors are propagated.
|
||||||
|
pub fn append_line(file: &Path, line: &str) -> Result<()> {
|
||||||
|
let mut f = fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(file)?;
|
||||||
|
writeln!(f, "{}", line)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
166
src/download_rust/src/gallery.rs
Normal file
166
src/download_rust/src/gallery.rs
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
//! Gallery command generator.
|
||||||
|
//!
|
||||||
|
//! The [`Gallery`] struct encapsulates the state required to build a
|
||||||
|
//! `gallery-dl` command. It exposes a method to generate the
|
||||||
|
//! command string based on user configuration and whether the
|
||||||
|
//! download is for a comic. Guard clauses are used extensively to
|
||||||
|
//! keep the logic easy to follow.
|
||||||
|
|
||||||
|
use crate::config::load_config_variables;
|
||||||
|
use crate::functions::quote;
|
||||||
|
use crate::user::User;
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use serde_yaml::Value;
|
||||||
|
|
||||||
|
/// Represents a gallery download request. Fields are mutable so
|
||||||
|
/// callers can configure the desired behaviour before generating the
|
||||||
|
/// command string.
|
||||||
|
#[derive(Default, Debug, Clone)]
|
||||||
|
pub struct Gallery {
|
||||||
|
/// Whether to append a download archive to prevent duplicates
|
||||||
|
pub archive: bool,
|
||||||
|
/// Optional skip argument string. A non‑empty string starting
|
||||||
|
/// with a space will be appended verbatim to the command.
|
||||||
|
pub skip_arg: String,
|
||||||
|
/// The direct link to download. Mutually exclusive with `list`.
|
||||||
|
pub link: Option<String>,
|
||||||
|
/// The name of the list to process. Mutually exclusive with
|
||||||
|
/// `link`.
|
||||||
|
pub list: Option<String>,
|
||||||
|
/// The destination folder key (resolved via the user or comic
|
||||||
|
/// configuration). Ignored when `is_comic` is true and
|
||||||
|
/// `dest` is empty.
|
||||||
|
pub dest: Option<String>,
|
||||||
|
/// Additional options passed verbatim to `gallery-dl` (e.g.
|
||||||
|
/// Instagram filters).
|
||||||
|
pub opt_args: String,
|
||||||
|
/// The generated command string. This field is populated by
|
||||||
|
/// `generate_command` and consumed by `run_command`.
|
||||||
|
pub command: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Gallery {
|
||||||
|
/// Builds a gallery-dl command based on the current fields. When
|
||||||
|
/// `is_comic` is true the destination and archive database are
|
||||||
|
/// read from the `comic` section of the configuration and the
|
||||||
|
/// provided `user` is ignored. Otherwise the user is used to
|
||||||
|
/// determine where to download and which archive to use. The
|
||||||
|
/// generated command is stored in `self.command`.
|
||||||
|
pub fn generate_command(
|
||||||
|
&mut self,
|
||||||
|
user: Option<&User>,
|
||||||
|
cfg: &Value,
|
||||||
|
is_comic: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
// Determine directory, database and queue based on context
|
||||||
|
let (directory, database, queue): (String, String, String) = if is_comic {
|
||||||
|
let comic = cfg
|
||||||
|
.get("comic")
|
||||||
|
.ok_or_else(|| anyhow!("Missing 'comic' section in configuration"))?;
|
||||||
|
let download_dir = comic
|
||||||
|
.get("download-dir")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let database = comic
|
||||||
|
.get("database")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or_default();
|
||||||
|
// When a list name is provided for comics look up
|
||||||
|
// `${list}-list` in the comic config
|
||||||
|
let q = if let Some(list_name) = self.list.as_ref() {
|
||||||
|
let key = format!("{}-list", list_name);
|
||||||
|
if let Some(v) = comic.get(&Value::String(key.clone())) {
|
||||||
|
if let Some(s) = v.as_str() {
|
||||||
|
quote(s)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
(
|
||||||
|
quote(download_dir),
|
||||||
|
quote(database),
|
||||||
|
q,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
// Non‑comic downloads must have an associated user
|
||||||
|
let user = user.ok_or_else(|| anyhow!("User is required for non comic downloads"))?;
|
||||||
|
// Destination directory falls back to an empty string
|
||||||
|
let dest_key = self.dest.as_ref().unwrap_or(&String::new());
|
||||||
|
let dir_path = user
|
||||||
|
.directories
|
||||||
|
.get(dest_key)
|
||||||
|
.ok_or_else(|| anyhow!("Unknown destination '{}' for user {}", dest_key, user.name))?;
|
||||||
|
let db_path = user
|
||||||
|
.dbs
|
||||||
|
.get("gallery")
|
||||||
|
.ok_or_else(|| anyhow!("Missing gallery database for user {}", user.name))?;
|
||||||
|
let q = if let Some(list_name) = self.list.as_ref() {
|
||||||
|
if let Some(p) = user.lists.get(list_name) {
|
||||||
|
quote(p.to_string_lossy().as_ref())
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
(
|
||||||
|
quote(dir_path.to_string_lossy().as_ref()),
|
||||||
|
quote(db_path.to_string_lossy().as_ref()),
|
||||||
|
q,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Determine the sleep interval. When a user is provided use
|
||||||
|
// their configured value. Otherwise fall back to the first
|
||||||
|
// user's value from the configuration or zero when missing.
|
||||||
|
let user_sleep: u64 = if let Some(u) = user {
|
||||||
|
u.sleep
|
||||||
|
} else {
|
||||||
|
cfg.get("users")
|
||||||
|
.and_then(|u| u.as_sequence())
|
||||||
|
.and_then(|seq| seq.get(0))
|
||||||
|
.and_then(|v| v.get("sleep"))
|
||||||
|
.and_then(|v| v.as_i64())
|
||||||
|
.unwrap_or(0) as u64
|
||||||
|
};
|
||||||
|
let mut cmd = format!("gallery-dl --sleep {}", user_sleep);
|
||||||
|
if !self.skip_arg.is_empty() {
|
||||||
|
cmd.push_str(&self.skip_arg);
|
||||||
|
}
|
||||||
|
if is_comic || self.dest.is_some() {
|
||||||
|
cmd.push_str(&format!(" --dest {}", directory));
|
||||||
|
}
|
||||||
|
if self.archive {
|
||||||
|
cmd.push_str(&format!(" --download-archive {}", database));
|
||||||
|
}
|
||||||
|
if !self.opt_args.is_empty() {
|
||||||
|
cmd.push_str(&self.opt_args);
|
||||||
|
}
|
||||||
|
// Append either a direct link or an input file list
|
||||||
|
match (&self.link, &self.list) {
|
||||||
|
(Some(link), None) if !link.is_empty() => {
|
||||||
|
cmd.push(' ');
|
||||||
|
cmd.push_str("e(link));
|
||||||
|
}
|
||||||
|
(None, Some(_)) if !queue.is_empty() => {
|
||||||
|
cmd.push_str(&format!(" -i {}", queue));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
self.command = cmd;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runs the previously generated command. If `dry_run` is true
|
||||||
|
/// the command is printed and not executed. When `verbose` is
|
||||||
|
/// true the command is printed prior to execution. The
|
||||||
|
/// underlying execution is delegated to [`crate::functions::run`].
|
||||||
|
pub fn run_command(&self, dry_run: bool, verbose: bool) -> Result<()> {
|
||||||
|
crate::functions::run(&self.command, dry_run, verbose)
|
||||||
|
}
|
||||||
|
}
|
||||||
33
src/download_rust/src/main.rs
Normal file
33
src/download_rust/src/main.rs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
//! Program entry point.
|
||||||
|
//!
|
||||||
|
//! This module wires together argument parsing, configuration
|
||||||
|
//! loading, logging initialisation and the high level download
|
||||||
|
//! orchestration. Errors are propagated via [`anyhow::Result`] and
|
||||||
|
//! reported to stderr.
|
||||||
|
|
||||||
|
mod args;
|
||||||
|
mod config;
|
||||||
|
mod download;
|
||||||
|
mod functions;
|
||||||
|
mod gallery;
|
||||||
|
mod user;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use args::Cli;
|
||||||
|
use clap::Parser;
|
||||||
|
use env_logger;
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
// Initialise logging. The logger reads the `RUST_LOG`
|
||||||
|
// environment variable; if not set the default level is "info".
|
||||||
|
env_logger::init();
|
||||||
|
// Parse command line arguments
|
||||||
|
let cli = Cli::parse();
|
||||||
|
// Load configuration from disk
|
||||||
|
let cfg = config::load_config_variables()?;
|
||||||
|
// Dispatch to the download logic
|
||||||
|
if let Err(err) = download::run(cli, cfg) {
|
||||||
|
eprintln!("error: {}", err);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
246
src/download_rust/src/user.rs
Normal file
246
src/download_rust/src/user.rs
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
//! User management.
|
||||||
|
//!
|
||||||
|
//! The `User` struct encapsulates per‑user configuration and
|
||||||
|
//! filesystem state. It derives its settings from the YAML
|
||||||
|
//! configuration and provides methods for managing lists, caching
|
||||||
|
//! directories and avoiding duplicate downloads. Iterators and guard
|
||||||
|
//! clauses are used throughout to make intent clear.
|
||||||
|
|
||||||
|
use crate::config::load_config_variables;
|
||||||
|
use crate::functions::{append_line, clean_cache, parse_link, read_lines, validate_x_link};
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use log::error;
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_yaml::{Mapping, Value};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs::{self, File};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
/// Represents a user and all of the paths and lists associated with
|
||||||
|
/// that user. The `User` is constructed from the global
|
||||||
|
/// configuration and an index selecting one of the `users` entries.
|
||||||
|
pub struct User {
|
||||||
|
/// The merged configuration for this user. User specific keys
|
||||||
|
/// override global settings.
|
||||||
|
pub config: Mapping,
|
||||||
|
/// The human readable name of the user.
|
||||||
|
pub name: String,
|
||||||
|
/// Number of seconds to sleep between operations.
|
||||||
|
pub sleep: u64,
|
||||||
|
/// Directories keyed by their logical purpose (e.g. "cache",
|
||||||
|
/// "lists", "downloads", "media", etc.).
|
||||||
|
pub directories: HashMap<String, PathBuf>,
|
||||||
|
/// Paths to the databases used for archiving downloads.
|
||||||
|
pub dbs: HashMap<String, PathBuf>,
|
||||||
|
/// Paths to various list files. See `list_manager` for details.
|
||||||
|
pub lists: HashMap<String, PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl User {
|
||||||
|
/// Constructs a new user from the given index into the
|
||||||
|
/// configuration. Returns an error if the configuration is
|
||||||
|
/// malformed.
|
||||||
|
pub fn new(index: usize, cfg: &Value) -> Result<Self> {
|
||||||
|
let users = cfg
|
||||||
|
.get("users")
|
||||||
|
.and_then(|u| u.as_sequence())
|
||||||
|
.ok_or_else(|| anyhow!("Configuration is missing a 'users' array"))?;
|
||||||
|
let user_cfg = users
|
||||||
|
.get(index)
|
||||||
|
.and_then(|v| v.as_mapping())
|
||||||
|
.ok_or_else(|| anyhow!("Invalid user index {}", index))?
|
||||||
|
.clone();
|
||||||
|
let global_cfg = cfg
|
||||||
|
.get("global")
|
||||||
|
.and_then(|v| v.as_mapping())
|
||||||
|
.ok_or_else(|| anyhow!("Configuration is missing a 'global' map"))?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// Merge global into user specific settings. User values take
|
||||||
|
// precedence. We perform a simple extend on a mutable copy.
|
||||||
|
let mut merged: Mapping = global_cfg.clone();
|
||||||
|
for (k, v) in user_cfg.iter() {
|
||||||
|
merged.insert(k.clone(), v.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let name = merged
|
||||||
|
.get(&Value::String("name".into()))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("User configuration missing 'name'"))?
|
||||||
|
.to_string();
|
||||||
|
let sleep = merged
|
||||||
|
.get(&Value::String("sleep".into()))
|
||||||
|
.and_then(|v| v.as_i64())
|
||||||
|
.unwrap_or(0) as u64;
|
||||||
|
|
||||||
|
// Build directory map from keys ending with '-dir'. Remove
|
||||||
|
// the suffix when storing the key. Convert each value into
|
||||||
|
// an absolute PathBuf and append the user name for cache and
|
||||||
|
// lists directories to mirror the Python behaviour.
|
||||||
|
let mut directories: HashMap<String, PathBuf> = merged
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(k, v)| {
|
||||||
|
let key = k.as_str()?;
|
||||||
|
if key.ends_with("-dir") {
|
||||||
|
let dir_name = key.trim_end_matches("-dir");
|
||||||
|
let path_str = v.as_str()?;
|
||||||
|
Some((dir_name.to_string(), PathBuf::from(path_str)))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Append user name to cache and lists directories
|
||||||
|
if let Some(cache) = directories.get_mut("cache") {
|
||||||
|
*cache = cache.join(&name);
|
||||||
|
}
|
||||||
|
if let Some(lists_dir) = directories.get_mut("lists") {
|
||||||
|
*lists_dir = lists_dir.join(&name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive database file locations
|
||||||
|
let mut dbs = HashMap::new();
|
||||||
|
if let Some(db_dir) = directories.get("databases") {
|
||||||
|
dbs.insert(
|
||||||
|
"gallery".to_string(),
|
||||||
|
db_dir.join(format!("{}.sqlite3", name)),
|
||||||
|
);
|
||||||
|
dbs.insert(
|
||||||
|
"media".to_string(),
|
||||||
|
db_dir.join(format!("{}_ytdl.txt", name)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive list file locations
|
||||||
|
let mut lists = HashMap::new();
|
||||||
|
if let Some(lists_dir) = directories.get("lists") {
|
||||||
|
lists.insert("master".to_string(), lists_dir.join("watch.txt"));
|
||||||
|
lists.insert("push".to_string(), lists_dir.join("instant.txt"));
|
||||||
|
}
|
||||||
|
if let Some(cache_dir) = directories.get("cache") {
|
||||||
|
lists.insert("instagram".to_string(), cache_dir.join("instagram.txt"));
|
||||||
|
lists.insert("kemono".to_string(), cache_dir.join("kemono.txt"));
|
||||||
|
lists.insert("main".to_string(), cache_dir.join("main.txt"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
config: merged,
|
||||||
|
name,
|
||||||
|
sleep,
|
||||||
|
directories,
|
||||||
|
dbs,
|
||||||
|
lists,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates the necessary directory structure for this user. Any
|
||||||
|
/// pre‑existing cache directory is cleared. Missing list and
|
||||||
|
/// database files are touched into existence. Errors during
|
||||||
|
/// directory manipulation are propagated.
|
||||||
|
pub fn create_directories(&self) -> Result<()> {
|
||||||
|
// Clear the cache directory if it exists
|
||||||
|
if let Some(cache_dir) = self.directories.get("cache") {
|
||||||
|
let _ = clean_cache(cache_dir);
|
||||||
|
// Recreate cache directory after cleaning
|
||||||
|
fs::create_dir_all(cache_dir)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create all directories
|
||||||
|
for dir in self.directories.values() {
|
||||||
|
fs::create_dir_all(dir)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure list directory exists; complain otherwise
|
||||||
|
if let Some(lists_dir) = self.directories.get("lists") {
|
||||||
|
if !lists_dir.is_dir() {
|
||||||
|
error!("Lists directory for user {} doesn't exist", self.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Touch database files if missing
|
||||||
|
for db_path in self.dbs.values() {
|
||||||
|
if !db_path.is_file() {
|
||||||
|
File::create(db_path)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Touch master and push lists
|
||||||
|
for key in [&"master", &"push"] {
|
||||||
|
if let Some(path) = self.lists.get(*key) {
|
||||||
|
if !path.is_file() {
|
||||||
|
File::create(path)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Appends a line to the specified list. The list name must be
|
||||||
|
/// one of the keys in the `lists` map. A newline is appended
|
||||||
|
/// automatically.
|
||||||
|
pub fn append_list(&self, name: &str, line: &str) -> Result<()> {
|
||||||
|
let path = self
|
||||||
|
.lists
|
||||||
|
.get(name)
|
||||||
|
.ok_or_else(|| anyhow!("Unknown list {} for user {}", name, self.name))?;
|
||||||
|
append_line(path, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes a link into its appropriate cache list based on simple
|
||||||
|
/// pattern matching. See the original Python implementation for
|
||||||
|
/// category definitions. This method uses guard clauses to keep
|
||||||
|
/// the matching logic obvious.
|
||||||
|
fn append_cache_list(&self, line: &str) -> Result<()> {
|
||||||
|
let lower = line.to_lowercase();
|
||||||
|
if lower.contains('x') {
|
||||||
|
return self.append_list("main", &validate_x_link(line));
|
||||||
|
}
|
||||||
|
if lower.contains("kemono.party") {
|
||||||
|
return self.append_list("kemono", line);
|
||||||
|
}
|
||||||
|
if lower.contains("instagram") {
|
||||||
|
return self.append_list("instagram", line);
|
||||||
|
}
|
||||||
|
// default case
|
||||||
|
self.append_list("main", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads the master list, shuffles it and creates per‑site cache
|
||||||
|
/// lists. Empty or duplicate lines are silently ignored.
|
||||||
|
pub fn list_manager(&self) -> Result<()> {
|
||||||
|
self.create_directories()?;
|
||||||
|
let master_path = self
|
||||||
|
.lists
|
||||||
|
.get("master")
|
||||||
|
.ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?;
|
||||||
|
let mut master_content = read_lines(master_path)?;
|
||||||
|
// Shuffle the list to randomise downloads; use a small RNG for
|
||||||
|
// reproducibility
|
||||||
|
let mut rng = rand::rngs::SmallRng::from_entropy();
|
||||||
|
master_content.shuffle(&mut rng);
|
||||||
|
for line in master_content.iter().filter(|l| !l.is_empty()) {
|
||||||
|
self.append_cache_list(line)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a link to the master list if it is not already present.
|
||||||
|
/// Normalisation of the link is performed via `parse_link` before
|
||||||
|
/// the check. Duplicates are logged and ignored.
|
||||||
|
pub fn save_link(&self, link: &str) -> Result<()> {
|
||||||
|
let master_path = self
|
||||||
|
.lists
|
||||||
|
.get("master")
|
||||||
|
.ok_or_else(|| anyhow!("Master list missing for user {}", self.name))?;
|
||||||
|
let contents = fs::read_to_string(master_path).unwrap_or_default().to_lowercase();
|
||||||
|
let fixed = parse_link(link);
|
||||||
|
if contents.contains(&fixed.to_lowercase()) {
|
||||||
|
info!("Gallery repeated, not saving");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
info!("New gallery, saving");
|
||||||
|
self.append_list("master", &fixed)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user