first flake output test

This commit is contained in:
Danilo Reyes 2024-11-01 00:21:45 -06:00
parent bd50a7ce71
commit aa0e9490be
21 changed files with 3273 additions and 0 deletions

View File

@ -0,0 +1,240 @@
{
"extractor": {
"skip": "abort:5",
"cookies": [
"firefox",
"/home/jawz/.librewolf/jjwvqged.default",
"gnomekeyring"
],
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
"retries": 10,
"sleep-request": 0,
"directlink": {
"directory": [],
"filename": "{filename}.{extension}"
},
"bluesky": {
"username": "jawz.bsky.social",
"password": "isrb-ydbt-oz52-v7z3",
"directory": ["{author['handle']}"],
"include": ["media"],
"reposts": false,
"videos": true
},
"twitter": {
"skip": "abort:1",
"directory": ["{user[name]}"],
"include": ["media"],
"retweets": false,
"videos": "ytdl",
"logout": true
},
"flickr": {
"directory": ["{category}", "{owner[username]}"],
"size-max": "Original",
"access-token": "72157720915197374-51a26dc4fdfdf173",
"access-token-secret": "a1ddb10902f3fa85"
},
"pinterest": {
"directory": ["{board[owner][username]}", "{board[name]}"]
},
"wikifeet": {
"page-reverse": true,
"directory": ["{category}", "{celebrity}"]
},
"instagram": {
"sleep-request": "25-45",
"sleep": "25-45",
"directory": ["{username}"],
"parent-directory": true,
"highlights": {
"reverse": "true",
"directory": ["{username}"]
},
"stories": {
"reverse": "true",
"directory": ["{username}"]
},
"tagged": {
"directory": ["{tagged_username}", "tagged"]
}
},
"kemonoparty": {
"directory": ["{category}", "{user}"],
"retries": 10,
"timeout": 5,
"filename": "{id}_{filename}.{extension}"
},
"exhentai": {
"directory": ["{category}", "{title}"]
},
"tumblr": {
"directory": ["{blog_name}"],
"access-token": "5VwIW8TNBoNVPo9CzvKMza2wcn9gJXd6rnUBy6Ctqb4BCPpI59",
"access-token-secret": "8krZGeauA171aZpXZhwgZN8nZCxKQkXYKXWL473mTQPKrqoP3e",
"external": true,
"inline": true,
"posts": "all",
"reblogs": false,
"parent-directory": true,
"api-key": "uhBUtgPaX9gl7eaD8suGWW6ZInRedQoVT6xsZzopljy0jXHqm5",
"api-secret": "D3FDj1INyPzXikVpp4jmzSqjlC9czFUQ8oj2I883PSYJdqwURv"
},
"deviantart": {
"client-id": "20016",
"client-secret": "52e1f9b0cb26e673da36f69e2ddd0e9a",
"refresh-token": "3fd25b06f97853a93cbe3729edf5d1d196d44700",
"directory": ["{username}"],
"include": "gallery,scraps",
"flat": true,
"original": true,
"mature": true,
"auto-watch": true,
"auto-unwatch": true
},
"furaffinity": {
"directory": ["{user}", "{subcategory}"],
"include": ["scraps", "gallery"]
},
"patreon": {
"directory": [
"(Patreon) {creator[vanity]}",
"({date:%Y%m%d}) {title} ({id})"
],
"filename": "{filename}.{num}.{extension}",
"browser": "firefox"
},
"blogger": {
"directory": [
"{blog[name]}",
"{post[author]}",
"{post[title]} - [{post[id]}]"
],
"filename": "{filename} - {num}.{extension}"
},
"artstation": {
"directory": ["{userinfo[username]}"],
"external": true
},
"gfycat": {
"format": "webm"
},
"reddit": {
"user-agent": "Python:gallery-dl:v1.0 (by /u/captainjawz)",
"client-id": "T7nZ6WZ3_onJWBhLP8r08g",
"refresh-token": "184157546842-UHdPQX1c7kG1kbO09NAHY2O2taEiwg",
"directory": ["{author}"],
"parent-directory": true
},
"redgifs": {
"reverse": "true",
"directory": ["{userName}"]
},
"imgur": {
"mp4": true
},
"paheal": {
"directory": ["Husbands", "{search_tags}"]
},
"rule34": {
"directory": ["Husbands", "{search_tags}"]
},
"e621": {
"directory": ["Husbands", "{search_tags}"]
},
"baraag": {
"directory": ["{account[username]}"]
},
"pixiv": {
"refresh-token": "O4kc9tTzGItuuacDcfmevW6NELjm5CJdWiAbZdUv3Kk",
"directory": ["{user[account]} - {user[id]}"],
"ugoira": true,
"favorite": {
"directory": [
"{user_bookmark[account]} - {user_bookmark[id]}",
"Bookmarks"
]
},
"postprocessors": [
{
"name": "ugoira",
"extension": "webm",
"keep-files": false,
"whitelist": ["pixiv"],
"ffmpeg-twopass": true,
"ffmpeg-args": ["-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"]
}
]
},
"fanbox": {
"directory": ["{category}", "{creatorId}"],
"embeds": true
},
"readcomiconline": {
"chapter-reverse": true,
"directory": ["Comics", "{comic}", "{comic} #{issue}"],
"quality": "hq",
"captcha": "wait",
"postprocessors": ["cbz"]
},
"kissmanga": {
"chapter-reverse": true,
"directory": ["Manga", "{manga}", "{manga} Ch.{chapter}{chapter_minor}"],
"captcha": "wait",
"postprocessors": ["cbz"]
},
"mangahere": {
"chapter-reverse": true,
"directory": ["Manga", "{manga}", "{manga} Ch.{chapter}{chapter_minor}"],
"postprocessors": ["cbz"]
},
"mangadex": {
"chapter-reverse": true,
"chapter-filter": "lang == 'en'",
"directory": ["Manga", "{manga}", "{manga} Ch.{chapter}{chapter_minor}"],
"postprocessors": ["cbz"]
},
"mangareader": {
"chapter-reverse": true,
"directory": ["Manga", "{manga}", "{manga} Ch.{chapter}{chapter_minor}"],
"postprocessors": ["cbz"]
},
"mangapanda": {
"chapter-reverse": true,
"directory": ["Manga", "{manga}", "{manga} Ch.{chapter}{chapter_minor}"],
"postprocessors": ["cbz"]
},
"webtoons": {
"chapter-reverse": true,
"directory": ["Webtoons", "{comic}", "{comic} #{episode}"],
"postprocessors": ["cbz"]
}
},
"output": {
"mode": "auto"
},
"downloader": {
"part": true,
"part-directory": "/home/jawz/.cache/gallery-dl",
"ytdl": {
"logging": true,
"format": "bestvideo+bestaudio/best",
"module": "yt_dlp",
"forward-cookies": true
},
"http": {
"rate": null,
"retries": 5,
"timeout": 10.0,
"verify": true
}
},
"postprocessor": {
"cbz": {
"name": "zip",
"compression": "store",
"mode": "safe",
"extension": "cbz"
}
}
}

27
flake.lock generated Normal file
View File

@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1730200266,
"narHash": "sha256-l253w0XMT8nWHGXuXqyiIC/bMvh1VRszGXgdpQlfhvU=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "807e9154dcb16384b1b765ebe9cd2bba2ac287fd",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

31
flake.nix Normal file
View File

@ -0,0 +1,31 @@
{
description = "JawZ scripts flake setup";
inputs.nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
outputs =
{ self, nixpkgs }@inputs:
let
pkgs = import nixpkgs {
system = "x86_64-linux";
config.allowUnfree = true;
};
download = import ./pkgs/download.nix { inherit pkgs; };
in
{
packages.x86_64-linux.download = download;
nixosModules.download =
{
config,
lib,
pkgs,
...
}:
import ./modules/download.nix {
inherit
pkgs
lib
config
download
;
};
};
}

80
modules/base.nix Normal file
View File

@ -0,0 +1,80 @@
{
config,
lib,
pkgs,
...
}:
{
options.my.scripts = lib.mkOption {
type = lib.types.attrsOf (
lib.types.submodule {
options = {
enable = lib.mkEnableOption "Whether to enable this script";
install = lib.mkEnableOption "Whether to install the script package";
service = lib.mkEnableOption "Whether to enable the script service";
name = lib.mkOption {
type = lib.types.str;
description = "Name of the script.";
};
timer = lib.mkOption {
type = lib.types.str;
default = "*:0";
description = "Systemd timer schedule.";
};
description = lib.mkOption {
type = lib.types.str;
description = "Description of the service.";
};
package = lib.mkOption {
type = lib.types.package;
description = "Package containing the executable script.";
};
};
}
);
default = { };
description = "Configuration for multiple scripts.";
};
config = lib.mkIf (lib.any (s: s.enable) (lib.attrValues config.my.scripts)) {
users.users.jawz.packages = lib.flatten (
lib.mapAttrsToList (
_name: script: lib.optional (script.enable && script.install) script.package
) config.my.scripts
);
systemd.user.services = lib.mapAttrs' (
name: script:
lib.nameValuePair "${script.name}" (
lib.mkIf (script.enable && script.service) {
restartIfChanged = true;
inherit (script) description;
wantedBy = [ "default.target" ];
path = [
pkgs.nix
script.package
];
serviceConfig = {
Restart = "on-failure";
RestartSec = 30;
ExecStart = "${script.package}/bin/${script.name}";
};
}
)
) config.my.scripts;
systemd.user.timers = lib.mapAttrs' (
name: script:
lib.nameValuePair "${script.name}" (
lib.mkIf (script.enable && script.service) {
enable = true;
inherit (script) description;
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = script.timer;
};
}
)
) config.my.scripts;
};
}

109
modules/download.nix Normal file
View File

@ -0,0 +1,109 @@
{
pkgs,
lib,
config,
download,
...
}:
{
imports = [ ./base.nix ];
options.my.units = {
download.enable = lib.mkEnableOption "enable";
downloadManga.enable = lib.mkEnableOption "enable";
};
config = {
home-manager.users.jawz = {
xdg.configFile."gallery-dl/config.json".source = ../dotfiles/gallery-dl/config.json;
services.lorri.enable = true;
programs.bash = {
shellAliases = {
dl = "download -u jawz -i";
comic = ''dl "$(cat "$LC" | fzf --multi --exact -i)"'';
gallery = ''dl "$(cat "$LW" | fzf --multi --exact -i)"'';
};
initExtra = ''
list_root=$XDG_CONFIG_HOME/jawz/lists/jawz
export LW=$list_root/watch.txt
export LI=$list_root/instant.txt
export LC=$list_root/comic.txt
'';
};
};
systemd.user = {
services =
let
mkDownloadService = desc: execStartCmd: {
restartIfChanged = true;
description = "Downloads ${desc}";
wantedBy = [ "default.target" ];
path = [
pkgs.bash
download
];
serviceConfig = {
TimeoutStartSec = 2000;
TimeoutStopSec = 2000;
Restart = "on-failure";
RestartSec = 30;
ExecStart = "${download}/bin/download ${execStartCmd}";
};
};
in
{
tuhmayto = lib.mkIf config.my.units.download.enable (
mkDownloadService "tuhmayto stuff" ''
-u jawz -i https://x.com/tuhmayto/media \
https://www.furaffinity.net/user/tuhmayto/''
);
"download@" = lib.mkIf (config.my.units.download.enable || config.my.units.downloadManga.enable) (
mkDownloadService "post from multiple sources" "%I"
);
"instagram@" = lib.mkIf config.my.units.download.enable (
mkDownloadService "post types from instagram" "instagram -u jawz -t %I"
);
};
timers =
let
downloadTimer = time: delay: {
enable = true;
description = "Downloads post types from different sites";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = time;
RandomizedDelaySec = delay;
Persistent = true;
};
};
in
{
"instagram@stories" = lib.mkIf config.my.units.download.enable (
downloadTimer "*-*-* 08:12:00" 120 // { }
);
"download@main" = lib.mkIf config.my.units.download.enable (
downloadTimer "*-*-* 06,18:02:00" 30 // { }
);
"download@push" = lib.mkIf config.my.units.download.enable (downloadTimer "*:0/5" 30 // { });
"download@manga" = lib.mkIf config.my.units.downloadManga.enable (
downloadTimer "Mon,Fri *-*-* 03:08:00" 30 // { }
);
# "download@kemono" = downloadTimer
# "*-*-1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31 18:06:00" 60 // { };
tuhmayto = lib.mkIf config.my.units.download.enable {
enable = true;
description = "Downloads tuhmayto stuff";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:0/10";
};
};
};
};
my.scripts.download = {
enable = lib.mkDefault false;
install = true;
service = false;
name = "download";
package = download;
};
};
}

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -e
if [[ ! -d "/home/jawz/Development/NixOS/scripts/download" ]]; then
echo "Cannot find source directory; Did you move it?"
echo "(Looking for "/home/jawz/Development/NixOS/scripts/download")"
echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
exit 1
fi
# rebuild the cache forcefully
_nix_direnv_force_reload=1 direnv exec "/home/jawz/Development/NixOS/scripts/download" true
# Update the mtime for .envrc.
# This will cause direnv to reload again - but without re-building.
touch "/home/jawz/Development/NixOS/scripts/download/.envrc"
# Also update the timestamp of whatever profile_rc we have.
# This makes sure that we know we are up to date.
touch -r "/home/jawz/Development/NixOS/scripts/download/.envrc" "/home/jawz/Development/NixOS/scripts/download/.direnv"/*.rc

View File

@ -0,0 +1 @@
/nix/store/ilq7gdgibfyxmagbp4hivixvxl44apyr-nix-shell-env

File diff suppressed because it is too large Load Diff

1
nix/download/.env Normal file
View File

@ -0,0 +1 @@
CONFIG_FILE = "/home/jawz/.config/jawz/config.yaml"

1
nix/download/.envrc Normal file
View File

@ -0,0 +1 @@
use nix

96
nix/download/argparser.py Normal file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""Setup the argparser"""
import argparse
scrapper_types = (
"push",
"main",
"instagram",
"kemono",
"comic",
"manga",
"webcomic",
)
# Define types of instagram stories
instagram_types = ["posts", "reels", "stories", "highlights", "avatar"]
def argparser(users: list) -> argparse.Namespace:
"""Returns an argparser to evaluate user input"""
# ARG PARSER
parser = argparse.ArgumentParser(
prog="Downloader",
description="Download images and galleries from a wide array of websites"
" either by using links or chosing from user define lists."
" This program also takes care of archiving tasks,"
" that keep the run time fast and prevents downloading duplicates.",
)
# Chose the type of scrapper
parser.add_argument(
choices=scrapper_types,
nargs="?",
dest="scrapper",
help="Select a scrapper.",
)
# Parse user list
parser.add_argument(
"-u",
"--user",
choices=users,
dest="user",
help="Selects the personal user list to process. Defaults to everyone",
default="everyone",
type=str,
)
# Parse individual links
parser.add_argument(
"-i",
"--input",
nargs="*",
dest="link",
action="append",
help="Download the provided links",
type=str,
)
# Set the print list flag
parser.add_argument(
"-l",
"--list",
dest="flag_list",
action="store_true",
help="Prints a list of all the added links and prompts for a choice",
)
# Set the use archiver flag
parser.add_argument(
"-a",
"--no-archive",
dest="flag_archive",
action="store_false",
help="Disables the archiver flag",
)
# Set the skip flag
parser.add_argument(
"-s",
"--no_skip",
dest="flag_skip",
action="store_false",
help="Disables the skip function, downloads the entire gallery",
)
parser.add_argument(
"-v",
"--verbose",
dest="flag_verbose",
action="store_true",
help="Prints the generated commands instead of running them",
)
parser.add_argument(
"-t",
"--type-post",
choices=instagram_types,
nargs="*",
dest="post_type",
help="Filters posts on instagram by type",
default=instagram_types,
type=str,
)
return parser.parse_args()

Binary file not shown.

View File

@ -0,0 +1,46 @@
#!/usr/bin/env python3
from classes.user import User
from functions import LOG
from functions import load_config_variables
from functions import quote
from functions import run
class Gallery:
def __init__(self) -> None:
self.archive: bool = True
self.skip_arg: str = ""
self.link: str = ""
self.dest: str = ""
self.list: str = ""
self.opt_args: str = ""
self.command: str = ""
def generate_command(self, user: User = User(1), is_comic: bool = False) -> None:
"""Generates a command string."""
if is_comic:
configs = load_config_variables()
directory = quote(configs["comic"]["download-dir"])
database = quote(configs["comic"]["database"])
queue = quote(configs["comic"][f"{self.list}-list"]) if self.list else ""
else:
directory = quote(str(user.directories[self.dest]))
database = quote(str(user.dbs["gallery"]))
queue = quote(str(user.lists[self.list])) if self.list else ""
command = f"gallery-dl --sleep {str(user.sleep)}"
command += self.skip_arg if self.skip_arg else ""
command += f" --dest {directory}" if self.dest or is_comic else ""
command += f" --download-archive {database}" if self.archive else ""
command += self.opt_args if self.opt_args else ""
if self.link and not self.list:
command += f" {quote(self.link)}"
if self.list and not self.link:
command += f" -i {queue}"
LOG.debug(command)
self.command = command
def run_command(self, verbose: bool):
run(self.command, verbose)

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""Define the user class to populate and setup the download environment"""
import re
from random import shuffle
from pathlib import Path
from functions import load_config_variables
from functions import validate_x_link
from functions import parse_link
from functions import clean_cache
from functions import LOG
class User:
"""Populate the directory for each user"""
# pylint: disable=too-many-instance-attributes
def __init__(self, index) -> None:
config = load_config_variables()
self.config = config["users"][index] | config["global"]
self.name = self.config["name"]
self.sleep = self.config["sleep"]
# Directories
self.directories = {
str(key).replace("-dir", ""): Path(self.config[f"{key}"])
for key in filter(lambda x: re.search("-dir", x), self.config.keys())
}
self.directories["cache"] = self.directories["cache"] / self.name
self.directories["lists"] = self.directories["lists"] / self.name
# Files
self.dbs = {
"gallery": self.directories["databases"] / f"{self.name}.sqlite3",
"media": self.directories["databases"] / f"{self.name}_ytdl.txt",
}
# Lists
self.lists = {
"master": self.directories["lists"] / "watch.txt",
"push": self.directories["lists"] / "instant.txt",
"instagram": self.directories["cache"] / "instagram.txt",
"kemono": self.directories["cache"] / "kemono.txt",
"main": self.directories["cache"] / "main.txt",
}
def _create_directories(self) -> None:
"""Create user directories if they don't exist"""
clean_cache(self.directories["cache"])
# Create directories
for directory in self.directories.keys():
self.directories[directory].mkdir(parents=True, exist_ok=True)
# Check for the existence of core files
if not self.directories["lists"].is_dir():
LOG.error("Lists directory for user %s doesn't exist", self.name)
# dbs stands for databases, the archives.
for db in filter(lambda x: not self.dbs[x].is_file(), self.dbs.keys()):
self.dbs[db].touch()
for lst in filter(lambda x: not self.lists[x].is_file(), ["master", "push"]):
self.lists[lst].touch()
def append_list(self, name: str, line: str) -> None:
"""Appends a line into the given list"""
with open(self.lists[name], "a+", encoding="utf-8") as a_file:
a_file.write(line + "\n")
def _append_cache_list(self, line) -> None:
"""Writes the input line into it's respective list,
depending on what website it belongs to."""
if re.search("x", line):
self.append_list("main", validate_x_link(line))
elif re.search(r"kemono\.party", line):
self.append_list("kemono", line)
elif re.search("instagram", line):
self.append_list("instagram", line)
else:
self.append_list("main", line)
def list_manager(self) -> None:
"""Manage all the user list and create sub-lists"""
self._create_directories() # Call the function to create necesary cache dirs
with open(self.lists["master"], "r", encoding="utf-8") as r_file:
master_content = list(map(lambda x: x.rstrip(), r_file))
# Create temporary list files segmented per scrapper
shuffle(master_content)
for line in master_content:
self._append_cache_list(line)
def save_link(self, link: str) -> None:
"""Checks the master list against a new link
if unmatched, appends it to the end of the list"""
with open(self.lists["master"], "r", encoding="utf-8") as r_file:
links = r_file.read().lower()
if parse_link(link).lower() in links:
LOG.info("Gallery repeated, not saving")
return
LOG.info("New gallery, saving")
self.append_list("master", parse_link(link))

295
nix/download/download.py Normal file
View File

@ -0,0 +1,295 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Rewriting of the download manager script
with the intention to make it
more modular with the use of flags
in order to avoid unnecesary modifications
to the cofig files.
Also following in line more posix and python rules.
"""
import re
import yaml
from typing import Dict
from functions import LOG
from functions import run
from functions import quote
from functions import list_lines
from functions import load_config_variables
from functions import parse_link
from argparser import argparser
from classes.user import User
from classes.gallery import Gallery
# GLOBAL VARIABLE SECTION
CONFIGS = load_config_variables()
# Enable a default "everyone" flag for when running stuff like download gallery
USERS = ["everyone"] + [user["name"] for user in CONFIGS["users"]]
ARGS = argparser(USERS)
class Video:
"""Just a simple class to unify the Video parameters into a single one."""
def __init__(self) -> None:
self.use_archive: bool = True
self.link: str = ""
self.dest: str = ""
self.database: str = ""
def get_index(name: str) -> int:
"""Find the index in the config file"""
return next((i for i, d in enumerate(CONFIGS["users"]) if d["name"] == name), -1)
def parse_gallery(gdl_list: str, user: User) -> None:
"""Processes the gallery-dl command based on the selected gallery"""
gallery = Gallery()
gallery.archive = ARGS.flag_archive
gallery.skip_arg = " -o skip=true" if not ARGS.flag_skip else ""
gallery.dest = "download"
gallery.list = gdl_list
gallery.opt_args = parse_instagram(gdl_list)
gallery.generate_command(user)
gallery.run_command(ARGS.flag_verbose)
def parse_instagram(link: str) -> str:
"""Fix instagram links"""
if "instagram" not in link:
return ""
if isinstance(ARGS.post_type, list):
return f" -o include={quote(','.join(ARGS.post_type))}"
return f" -o include={quote(ARGS.post_type)}"
def video_command(video: Video) -> str:
"""Filters and processes the required command to download videos"""
command = "yt-dlp"
rgx_yt = re.compile(r"(https:\/\/youtube|https:\/\/www.youtube|https:\/\/youtu.be)")
rgx_music = re.compile(r"(https:\/\/music.youtube.*)")
if re.search(r"chaturbate", video.link):
return f"chat-dl {video.link}"
if rgx_yt.search(video.link):
command += " --embed-subs --embed-thumbnail"
command += " --embed-metadata --embed-chapters"
command += f" -o {quote(video.dest + '/%(title)s.%(ext)s')}"
elif rgx_music.search(video.link):
command += f" --download-archive {video.database}" if video.use_archive else ""
command += " --no-playlist --newline -x"
command += " --audio-format best --add-metadata --audio-quality 0 -o"
command += f" {quote(video.dest + '/%(title)s.%(ext)s')}"
else: # Any other video link, just do it generic
command += f" -f mp4 -o {quote(video.dest + '/%(title)s.%(ext)s')}"
LOG.info("%s %s", command, video.link)
return f"{command} {quote(video.link)}"
def comic_manager(skip_arg: str, category: str) -> None:
"""Process the information to download manga"""
re_cat = "manga|webtoon" if category == "manga" else "readcomiconline"
with open(CONFIGS["comic"]["comic-list"], "r", encoding="utf-8") as r_file:
links = list(filter(lambda x: re.search(re_cat, x), r_file))
for link in links:
gallery = Gallery()
gallery.archive = ARGS.flag_archive
gallery.skip_arg = skip_arg
gallery.link = link
gallery.generate_command(is_comic=True)
gallery.run_command(ARGS.flag_verbose)
def print_webcomics(webcomics: Dict[str, Dict]) -> int:
"""Prints a list of webcomics, and returns an index."""
for index, entry in enumerate(webcomics["webcomics"]):
print(list_lines(index, entry["name"]))
return int(input("Select a webcomic: "))
def webcomic_manager():
"""Process the information to download webcomics"""
with open(CONFIGS["comic"]["webcomic-list"], "r", encoding="utf-8") as r_file:
webcomics = yaml.safe_load(r_file)
usr_input = print_webcomics(webcomics)
# Determines where the webcomic will be downloaded
rating = webcomics["webcomics"][usr_input]["type"]
dest = webcomics["global"][f"{rating}_directory"]
name = webcomics["webcomics"][usr_input]["name"]
link = webcomics["webcomics"][usr_input]["url"]
nxt_code = webcomics["webcomics"][usr_input]["next_code"]
img_code = webcomics["webcomics"][usr_input]["image_code"]
LOG.info("The webcomic is %s", dest)
command = f"cd {quote(dest)} && webcomix custom"
command += f" {quote(name)}"
command += " --start-url"
command += f" {quote(link)}"
command += f" --next-page-xpath={quote(nxt_code)}"
command += f" --image-xpath={quote(img_code)}"
command += " -y --cbz"
run(command, ARGS.flag_verbose)
def save_comic(link: str) -> None:
"""Add comic/manga link to the list"""
list_comic = CONFIGS["comic"]["comic-list"]
with open(list_comic, "r", encoding="utf-8") as r_file:
links = r_file.read().lower()
if parse_link(link).lower() in links:
LOG.info("Graphic novel repeated, not saving")
return
LOG.info("New graphic novel, saving")
with open(list_comic, "a", encoding="utf-8") as w_file:
w_file.write(link + "\n")
def push_manager(user: User):
"""Filters out the URL to use the appropiate downloader"""
# Creates an array which will store any links that should use youtube-dl
rgx_gallery = re.compile(
r"(x\.com\/\w+((?=.*media)|(?!.*status)))"
r"|(men\.wikifeet)"
r"|(furaffinity\.net\/user\/)"
r"|((deviantart\.com\/\w+(?!.*\/art\/)))"
r"|(furaffinity\.net\/gallery\/)"
r"|(furaffinity\.net\/scraps\/)"
r"|(furaffinity\.net\/favorites\/)"
r"|(instagram.com(?!\/p\/)\/\w+)"
r"|(e621\.net((?=\/post\/)|(?!\/posts\/)))"
r"|(flickr\.com\/photos\/\w+\/(?!\d+))"
r"|(tumblr\.com(?!\/post\/))"
r"|(kemono\.party\/(fanbox|gumroad|patreon)(?!\/user\/\d+\/post))"
r"|(blogspot\.com(?!\/))"
r"|(rule34\.paheal\.net\/post\/(?!view))"
r"|(rule34\.xxx\/index\.php\?page\=post&s=(?!view))"
r"|(pixiv\.net\/(en\/)?((?=users)|(?!artwork)))"
r"|(fanbox\.cc\/@\w+(?!.*posts\/\d+))"
r"|(reddit\.com\/(user|u))"
r"|(baraag\.net\/((@\w+)|(?!\/\d+)))"
r"|(pinterest\.com\/(?!pin\/\d+))"
r"|(redgifs\.com\/(users|u|(?!watch)))"
r"|(bsky\.app\/profile\/(?!.*\/post\/))"
)
rgx_video = re.compile("youtu.be|youtube|pornhub|xtube|xvideos|chaturbate")
rgx_comic = re.compile("readcomiconline|mangahere|mangadex|webtoons")
with open(user.lists["push"], "r", encoding="utf-8") as r_file:
links = list(map(lambda x: x.rstrip(), r_file))
links_galleries = filter(rgx_gallery.search, links)
links_videos = filter(rgx_video.search, links)
links_comics = filter(rgx_comic.search, links)
links_other = filter(
lambda x: (not rgx_video.search(x))
and (not rgx_gallery.search(x))
and (not rgx_comic.search(x)),
links,
)
for link in links_galleries:
gallery = Gallery()
gallery.archive = ARGS.flag_archive
gallery.skip_arg = " -o skip=true" if not ARGS.flag_skip else ""
gallery.link = parse_link(link)
gallery.dest = "download"
gallery.opt_args = parse_instagram(link)
gallery.generate_command(user)
gallery.run_command(ARGS.flag_verbose)
user.save_link(link)
for link in links_comics:
if ARGS.flag_skip and re.search(r"readcomiconline", link):
skip_arg = " --chapter-range 1"
elif ARGS.flag_skip and re.search(r"mangahere|webtoons", link):
skip_arg = " --chapter-range 1-5"
else:
skip_arg = ""
gallery = Gallery()
gallery.archive = ARGS.flag_archive
gallery.skip_arg = skip_arg
gallery.link = link
gallery.generate_command(is_comic=True)
gallery.run_command(ARGS.flag_verbose)
save_comic(link)
for link in links_videos:
video = Video()
video.use_archive = ARGS.flag_archive
video.link = link
video.dest = f"{user.directories['media']}"
video.database = quote(f"{user.dbs['media']}")
run(video_command(video), ARGS.flag_verbose)
for link in links_other:
LOG.info("Other type of download %s", link)
gallery = Gallery()
gallery.archive = False
gallery.skip_arg = " -o directory='[]'"
gallery.link = link
gallery.dest = "push"
gallery.generate_command(user)
gallery.run_command(ARGS.flag_verbose)
# Flush the push list, cleans all the contents
with open(user.lists["push"], "w", encoding="utf-8") as w_file:
w_file.close()
def scrapper_manager(user: User) -> None:
"""Analyze the user arguments and call in functions"""
user.list_manager()
if re.search(r"main|instagram|kemono", ARGS.scrapper):
skip_arg = "" if ARGS.flag_skip else " -o skip=true"
parse_gallery(ARGS.scrapper, user)
elif ARGS.scrapper in "push":
push_manager(user)
elif re.search("^comic|manga", ARGS.scrapper):
skip_arg = " --chapter-range 1" if ARGS.flag_skip else ""
skip_arg += "-5" if ARGS.scrapper in "manga" else ""
comic_manager(skip_arg, ARGS.scrapper)
elif re.search("webcomic", ARGS.scrapper):
webcomic_manager()
def scrap_everyone() -> None:
"""Iterates over every user of my scrapper"""
for current_user in CONFIGS["users"]:
user = User(get_index(current_user["name"]))
LOG.info("Scrapping %s for %s", ARGS.scrapper, current_user["name"])
scrapper_manager(user)
def main():
"""Main module to decide what to do based on the parsed arguments"""
if ARGS.scrapper:
rgx_shared = re.compile("push|main|instagram|kemono")
if (ARGS.user in "everyone") and (rgx_shared.search(ARGS.scrapper)):
scrap_everyone()
else:
scrapper_manager(User(get_index(ARGS.user)))
elif ARGS.link:
is_admin = re.search(r"everyone|jawz", ARGS.user)
user = User(get_index("jawz" if is_admin else ARGS.user))
for arg_link in ARGS.link[0]:
user.append_list("push", parse_link(arg_link))
push_manager(user)
if __name__ == "__main__":
main()

112
nix/download/functions.py Normal file
View File

@ -0,0 +1,112 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Personal functions to aid on multiple scripts"""
import sys
import fileinput
import re
import os
import logging
from pathlib import Path
import yaml
VERBOSE_G = False
LOG = logging.getLogger()
HANDLER = logging.StreamHandler()
FORMATTER = logging.Formatter(
"[%(filename)s][%(levelname)s] %(funcName)s '%(message)s'"
)
HANDLER.setFormatter(FORMATTER)
LOG.addHandler(HANDLER)
LOG.setLevel(logging.INFO)
def validate_x_link(line: str) -> str:
"""returns a fixed link, which ends with /media"""
# if url contains /media at the end just write the line
if re.search(r"\/media$", line):
return line
# if does not contain /media at the end then add /media
return f"{line}/media"
def parse_link(link: str) -> str:
"""Fixes links"""
if not re.search(r"(x\.com\/\w+(\/)?(?!.*status))", link):
LOG.debug("No modifications needed for the link %s", link)
return link
# if url contains /media at the end just write the line
fixed_link = validate_x_link(link)
LOG.debug("Processed link %s", fixed_link)
return fixed_link
def load_config_variables():
"""Loads all the variables from the config file"""
config_file = Path("~/.config/jawz/config.yaml")
with open(config_file.expanduser(), encoding="utf-8") as open_file:
return yaml.safe_load(open_file)
def clean_cache(directory: Path):
"""Recursively deletes all the content of a directory,
including the directory itself."""
if not directory.is_dir():
return
for file in filter(lambda x: x.is_file(), directory.iterdir()):
file.unlink()
for dir in filter(lambda x: x.is_dir(), directory.iterdir()):
dir.rmdir()
directory.rmdir()
def run(command: str, verbose: bool):
"""Run command in a subprocess"""
# pylint: disable=subprocess-run-check
# This toggle allows for a really wasy debug when using -v
if verbose:
print(command)
else:
os.system(command)
def list_lines(i: int, line: str) -> str:
"""Create a numbered list"""
return f"{i}) {line}"
def quote(line: str) -> str:
"""Quote the line"""
return f'"{line}"'
def sort_txt_file(file_path: Path):
"""Sort every line alphabetically
remove duplicated and empty lines"""
file = str(file_path.resolve())
run(f"sort -u {quote(file)} -o {quote(file)}", VERBOSE_G)
run(f"sed -i '/^$/d' {quote(file)}", VERBOSE_G)
run(f'sed -i -e "s,http:,https:," {quote(file)}', VERBOSE_G)
# fix this using strip on python
# line.strip("/")
run(f'sed -i -e "s,/$,," {quote(file)}', VERBOSE_G) # trailing /
def randomize_txt_file(file_path: Path):
"""Randomize the order of the
lines of the txt file"""
file = str(file_path.resolve())
run(f"sort -R {quote(file)} -o {quote(file)}", VERBOSE_G)
def parse_list(file):
"""Replace http with https and remove trailing /"""
for line in fileinput.input(file, inplace=True):
sys.stdout.write(str(line).replace("http://", "https://"))
with open(file, "r+", encoding="utf-8") as open_file:
f_content = open_file.read()
f_content = re.compile(r"\/$", 0).sub(r"\/$", "")
open_file.seek(0)
open_file.truncate()
print(f_content)
sort_txt_file(file)

16
nix/download/setup.cfg Normal file
View File

@ -0,0 +1,16 @@
[metadata]
name = download
version = 1.5
[options]
py_modules =
download
functions
argparser
classes.gallery
classes.user
[options.entry_points]
console_scripts =
download = download:main

3
nix/download/setup.py Normal file
View File

@ -0,0 +1,3 @@
from setuptools import setup
setup()

37
nix/download/shell.nix Normal file
View File

@ -0,0 +1,37 @@
{
pkgs ? import <nixpkgs> { },
}:
with pkgs;
mkShell {
packages = [
(python3.withPackages (
ps: with ps; [
setuptools
pyyaml
types-pyyaml
]
))
yt-dlp
gallery-dl
ffmpeg
# (buildPythonApplication rec {
# pname = "webcomix";
# version = "3.9.0";
# src = fetchFromGitHub {
# inherit pname version;
# owner = "J-CPelletier";
# repo = pname;
# rev = "v${version}";
# sha256 = "sha256-hCnic8Rd81qY1R1XMrSME5ntYTSvZu4/ANp03nCmLKU=";
# };
# doCheck = false;
# propagatedBuildInputs =
# [ click scrapy scrapy-splash scrapy-fake-useragent tqdm ];
# })
];
buildInputs = [
];
}

17
pkgs/download.nix Normal file
View File

@ -0,0 +1,17 @@
{ pkgs, ... }:
with pkgs;
python3Packages.buildPythonApplication {
pname = "download";
version = "2.6";
src = ../nix/download/.;
build-system = with python3Packages; [ setuptools ];
dependencies = with python3Packages; [
pyyaml
types-pyyaml
];
propagatedBuildInputs = [
gallery-dl
ffmpeg
python3Packages.yt-dlp
];
}