418 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Rewriting of the download manager script
with the intention to make it
more modular with the use of flags
in order to avoid unnecesary modifications
to the cofig files.
Also following in line more posix and python rules.
"""
import re
import time
import logging
import yaml
from functions import run
from functions import quote
from functions import list_lines
from functions import load_config_variables
from argparser import argparser
from gdl_classes import User
# GLOBAL VARIABLE SECTION
# Store the name of the main binaries early in the code
BIN_GALLERY = "gallery-dl"
BIN_YOUTUBE = "yt-dlp"
# SKIP = "3"
CONFIGS = load_config_variables()
LOGGER = logging.getLogger()
HANDLER = logging.StreamHandler()
FORMATTER = logging.Formatter(
"[%(filename)s][%(levelname)s] %(funcName)s '%(message)s'"
)
HANDLER.setFormatter(FORMATTER)
LOGGER.addHandler(HANDLER)
LOGGER.setLevel(logging.INFO)
# Enable a default "everyone" flag for when running stuff like download gallery
USERS = ["everyone"]
for dictionary in CONFIGS["users"]:
USERS.append(dictionary["name"])
ARGS = argparser(USERS)
def get_index(value: str) -> int:
"""Find the index in the config file"""
for i, dic in enumerate(CONFIGS["users"]):
if dic["name"] == value:
LOGGER.debug("%s is %s", dic["name"], i)
return i
return -1
def parse_gallery(gdl_list: str, user: User):
"""Processes the gallery-dl command based on the selected gallery"""
# skip_arg = f" -A {SKIP}" if ARGS.flag_skip else ""
skip_arg = " -o skip=true" if not ARGS.flag_skip else ""
LOGGER.debug(skip_arg)
# Send the list to gallery-dl
download_gallery(
ARGS.flag_archive,
skip_arg,
"",
str(user.sleep),
quote(f"{user.dir_download}"),
quote(f"{user.archive_gallery}"),
quote(gdl_list),
parse_instagram(gdl_list),
)
def parse_instagram(link: str) -> str:
"""Fix instagram links"""
if "instagram" not in link:
return ""
if isinstance(ARGS.post_type, list):
string = f" -o include={quote(','.join(ARGS.post_type))}"
LOGGER.debug(string)
return string
string = f" -o include={quote(ARGS.post_type)}"
LOGGER.debug(string)
return string
def parse_link(link: str) -> str:
"""Fixes links"""
if not re.search(r"(twitter\.com\/\w+(\/)?(?!.*status))", link):
LOGGER.debug("No modifications needed for the link %s", link)
return link
# if url contains /media at the end just write the line
fixed_link = re.sub(r"\/$|\/media(\/?)$", "", link) + "/media"
LOGGER.debug("Processed link %s", fixed_link)
return fixed_link
def download_gallery(
use_archive: bool,
skip_arg: str = "",
link: str = "",
sleep: str = "0",
destination: str = "",
database: str = "",
queue: str = "",
opt_args: str = "",
):
"""Processes the command string to run the gallery archiver"""
command = f"{BIN_GALLERY} --sleep {sleep}"
if skip_arg != "":
command += skip_arg
if destination != "":
command += f" --dest {destination}"
if use_archive:
command += f" --download-archive {database}"
if opt_args != "":
command += opt_args
if link != "" and queue == "":
LOGGER.info("link: %s", quote(link))
command += f" {link}"
if queue != "" and link == "":
LOGGER.info("queue: %s", queue)
command += f" -i {queue}"
LOGGER.debug(command)
run(command, ARGS.flag_verbose)
def download_youtube(
use_archive: bool,
link: str = "",
destination: str = "",
database: str = "",
):
"""Filters and processes the required command to download videos"""
command = BIN_YOUTUBE
if re.search(r"(https:\/\/youtube|https:\/\/www.youtube|https:\/\/youtu.be)", link):
command += f' -o {quote(destination + "/%(title)s.%(ext)s")}'
elif re.search(r"(https:\/\/music.youtube.*)", link):
if use_archive:
command += f" --download-archive {database}"
command += f""" \
--no-playlist --newline -x \
--audio-format best --add-metadata --audio-quality 0 -o \
{quote(destination + '/%(title)s.%(ext)s')} \
"""
elif re.search(r"chaturbate", link):
# Re-runs the program every 30 seconds in case the stream goes private or dc
for i in range(1, 41): # For a 20 minute total
run(
f"""
{BIN_YOUTUBE} \
--hls-use-mpegts --prefer-ffmpeg \
-o {quote(destination + '/%(title)s.%(ext)s')} \
{link}
""",
ARGS.flag_verbose,
)
time.sleep(30)
LOGGER.info("waited for %s minutes", i * 30 / 60)
else: # Any other video link, just do it generic
command += f" -f mp4 -o {quote(destination + '/%(title)s.%(ext)s')}"
LOGGER.info("%s %s", command, link)
run(f"{command} {link}", ARGS.flag_verbose)
def comic_manager(skip_arg: str, category: str):
"""Process the information to download manga"""
re_cat = ""
if category == "manga":
re_cat = "manga|webtoon"
elif category == "comic":
re_cat = "readcomiconline"
with open(CONFIGS["comic"]["list"], encoding="utf-8") as list_comic:
for graphic_novel in [line.rstrip() for line in list_comic]:
# Search for mangas but exclude comics
if not re.search(re_cat, graphic_novel):
LOGGER.debug("%s does not match regex espression", graphic_novel)
continue
download_gallery(
ARGS.flag_archive,
skip_arg,
quote(graphic_novel),
"0",
CONFIGS["comic"]["download-directory"],
CONFIGS["comic"]["archive"],
"",
"",
)
def webcomic_manager():
"""Process the information to download webcomics"""
webcomic_list = CONFIGS["comic"]["webcomic-list"]
with open(webcomic_list, encoding="utf-8") as open_list:
webcomic_file = yaml.safe_load(open_list)
# Create a list of all the available webcomics for the user to chose from
for index, entry in enumerate(webcomic_file["Webcomics"]):
print(list_lines(index, entry["name"]))
# Prompt for a choice
usr_input = int(input("Select your comic: "))
# Determines where the webcomic will be downloaded
rating = webcomic_file["Webcomics"][usr_input]["type"]
webcomic_category = webcomic_file["Global"][f"{rating}_directory"]
LOGGER.debug("The webcomic is %s", webcomic_category)
command = f"""cd {quote(webcomic_category)} && webcomix custom \
{quote(webcomic_file["Webcomics"][usr_input]["name"])} \
--start-url \
{quote(webcomic_file["Webcomics"][usr_input]["url"])} \
--next-page-xpath={quote(webcomic_file["Webcomics"][usr_input]["next_code"])} \
--image-xpath={quote(webcomic_file["Webcomics"][usr_input]["image_code"])} \
-y --cbz"""
LOGGER.debug(command)
run(command, ARGS.flag_verbose)
def push_manager(user: User):
"""Filters out the URL to use the appropiate downloader"""
# Creates an array which will store any links that should use youtube-dl
link_video_cache = []
re_links = re.compile(
r"(twitter\.com\/\w+((?=.*media)|(?!.*status)))"
r"|(men\.wikifeet)"
r"|(furaffinity\.net\/user\/)"
r"|((deviantart\.com\/\w+(?!.*\/art\/)))"
r"|(furaffinity\.net\/gallery\/)"
r"|(furaffinity\.net\/scraps\/)"
r"|(furaffinity\.net\/favorites\/)"
r"|(instagram.com(?!\/p\/)\/\w+)"
r"|(e621\.net((?=\/post\/)|(?!\/posts\/)))"
r"|(flickr\.com\/photos\/\w+\/(?!\d+))"
r"|(tumblr\.com(?!\/post\/))"
r"|(kemono\.party\/(fanbox|gumroad|patreon)(?!\/user\/\d+\/post))"
r"|(blogspot\.com(?!\/))"
r"|(rule34\.paheal\.net\/post\/(?!view))"
r"|(rule34\.xxx\/index\.php\?page\=post&s=(?!view))"
r"|(pixiv\.net\/(en\/)?((?=users)|(?!artwork)))"
r"|(reddit\.com\/(user|u))"
r"|(baraag\.net\/((@\w+)|(?!\/\d+)))"
r"|(pinterest\.com\/(?!pin\/\d+))"
r"|(redgifs\.com\/(users|u|(?!watch)))",
)
with open(user.list_push, encoding="utf-8") as list_push:
for link in [line.rstrip() for line in list_push]:
LOGGER.debug("Processing %s", link)
# Flush the push list, cleans all the contents
with open(user.list_push, "w", encoding="utf-8") as list_push:
list_push.close()
# VIDEOS
if re.search(r"youtu.be|youtube|pornhub|xtube|xvideos|chaturbate", link):
LOGGER.debug("Matched type yt-dlp")
link_video_cache.append(link)
# Search for gallery links, these will be added to a list after downloading
elif re.search(re_links, link):
LOGGER.debug("Matched type gallery-dl")
# skip_arg = f" -A {SKIP}" if ARGS.flag_skip else ""
skip_arg = " -o skip=true" if not ARGS.flag_skip else ""
LOGGER.debug("Skip: %s, link: %s", skip_arg, parse_instagram(link))
download_gallery(
ARGS.flag_archive,
skip_arg,
quote(f"{parse_link(link)}"),
f"{user.sleep}",
quote(f"{user.dir_download}"),
quote(f"{user.archive_gallery}"),
"",
f"{parse_instagram(link)}",
)
# Record the gallery link, so it remains on the watch list
with open(user.list_master, "a", encoding="utf-8") as w_file, open(
user.list_master, "r", encoding="utf-8"
) as r_file:
content = r_file.read().lower()
if parse_link(link).lower() in content:
LOGGER.info("Gallery repeated, not saving")
continue
LOGGER.info("New gallery, saving")
w_file.write(parse_link(str(link)) + "\n")
# Searches for comic/manga links
elif re.search(r"readcomiconline|mangahere|mangadex|webtoons", link):
# Toggle for comic/manga skip flag
if ARGS.flag_skip and re.search(r"readcomiconline", link):
skip_arg = " --chapter-range 1"
elif ARGS.flag_skip and re.search(r"mangahere|webtoons", link):
skip_arg = " --chapter-range 1-5"
else:
skip_arg = ""
LOGGER.debug(skip_arg)
download_gallery(
ARGS.flag_archive,
skip_arg,
quote(link),
"0",
CONFIGS["comic"]["download-directory"],
CONFIGS["comic"]["archive"],
"",
"",
)
# Add comic/manga link to the list
list_gn = CONFIGS["comic"]["list"]
with open(list_gn, "a", encoding="utf-8") as w_file, open(
list_gn, "r", encoding="utf-8"
) as r_file:
content = r_file.read().lower()
if parse_link(link).lower() in content:
LOGGER.info("Graphic novel repeated, not saving")
continue
LOGGER.info("New graphic novel, saving")
w_file.write(link + "\n")
# Download generic links, the -o flag overwrites config file and
# downloads the files into the root destination
else:
LOGGER.info("Other type of download %s", link)
download_gallery(
False,
" -o directory='[]'",
quote(link),
"0",
quote(str(user.dir_push)),
"",
"",
"",
)
# Send the video links to youtube-dl
for link in link_video_cache:
download_youtube(
ARGS.flag_archive,
quote(link),
f"{user.dir_media_download}",
quote(f"{user.archive_media}"),
)
def scrapper_manager(user: User):
# pylint: disable=too-many-branches
"""Analyze the user arguments and call in functions"""
if not ARGS.scrapper: # Check if a scrapper was selected
return
if re.search(r"gallery|instagram|kemono", ARGS.scrapper):
# skip_arg = f" -A {SKIP}" if ARGS.flag_skip else ""
skip_arg = " -o skip=true" if not ARGS.flag_skip else ""
LOGGER.debug(skip_arg)
if ARGS.scrapper == "gallery":
parse_gallery(f"{user.list_main}", user)
elif ARGS.scrapper == "instagram":
parse_gallery(f"{user.list_instagram}", user)
elif ARGS.scrapper == "kemono":
parse_gallery(f"{user.list_kemono}", user)
elif ARGS.scrapper in "push":
push_manager(user)
elif ARGS.scrapper in "comic":
skip_arg = " --chapter-range 1" if ARGS.flag_skip else ""
LOGGER.debug(skip_arg)
comic_manager(skip_arg, "comic")
elif ARGS.scrapper in "manga":
skip_arg = " --chapter-range 1-5" if ARGS.flag_skip else ""
LOGGER.debug(skip_arg)
comic_manager(skip_arg, "manga")
elif ARGS.scrapper in "webcomic":
webcomic_manager()
def main():
"""Main module to decide what to do based on the parsed arguments"""
if ARGS.scrapper:
if (ARGS.user in "everyone") and (
re.search(r"push|gallery|instagram|kemono", ARGS.scrapper)
):
for current_user in CONFIGS["users"]:
user = User(get_index(current_user["name"]))
user.list_manager()
LOGGER.info("Scrapping %s for %s", ARGS.scrapper, current_user["name"])
scrapper_manager(user)
elif re.search(r"comic|manga|webcomic", ARGS.scrapper):
user = User(get_index("jawz"))
user.list_manager()
LOGGER.info("Scrapping %s", ARGS.scrapper)
scrapper_manager(user)
else:
# Create the lists to scrap
user = User(get_index(ARGS.user))
user.list_manager()
scrapper_manager(user)
elif ARGS.link:
LOGGER.debug(ARGS.link)
if re.search(r"everyone|jawz", ARGS.user):
# Create the lists to scrap
user = User(get_index("jawz"))
user.list_manager()
else:
# Create the lists to scrap
user = User(get_index(ARGS.user))
user.list_manager()
for arg_link in ARGS.link[0]:
LOGGER.debug(arg_link)
if ARGS.flag_verbose:
LOGGER.debug(
"%s >> %s", quote(parse_link(arg_link)), quote(user.list_push)
)
else:
with open(user.list_push, "a", encoding="utf-8") as open_file:
open_file.write(parse_link(arg_link) + "\n")
push_manager(user)
if __name__ == "__main__":
main()