#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Rewriting of the download manager script with the intention to make it more modular with the use of flags in order to avoid unnecesary modifications to the cofig files. Also following in line more posix and python rules. """ import re import time import logging import yaml from functions import run from functions import quote from functions import list_lines from functions import load_config_variables from argparser import argparser from gdl_classes import User # GLOBAL VARIABLE SECTION # Store the name of the main binaries early in the code BIN_GALLERY = "gallery-dl" BIN_YOUTUBE = "yt-dlp" # SKIP = "3" CONFIGS = load_config_variables() LOGGER = logging.getLogger() HANDLER = logging.StreamHandler() FORMATTER = logging.Formatter( "[%(filename)s][%(levelname)s] %(funcName)s '%(message)s'" ) HANDLER.setFormatter(FORMATTER) LOGGER.addHandler(HANDLER) LOGGER.setLevel(logging.INFO) # Enable a default "everyone" flag for when running stuff like download gallery USERS = ["everyone"] for dictionary in CONFIGS["users"]: USERS.append(dictionary["name"]) ARGS = argparser(USERS) def get_index(value: str) -> int: """Find the index in the config file""" for i, dic in enumerate(CONFIGS["users"]): if dic["name"] == value: LOGGER.debug("%s is %s", dic["name"], i) return i return -1 def parse_gallery(gdl_list: str, user: User): """Processes the gallery-dl command based on the selected gallery""" # skip_arg = f" -A {SKIP}" if ARGS.flag_skip else "" skip_arg = " -o skip=true" if not ARGS.flag_skip else "" LOGGER.debug(skip_arg) # Send the list to gallery-dl download_gallery( ARGS.flag_archive, skip_arg, "", str(user.sleep), quote(f"{user.dir_download}"), quote(f"{user.archive_gallery}"), quote(gdl_list), parse_instagram(gdl_list), ) def parse_instagram(link: str) -> str: """Fix instagram links""" if "instagram" not in link: return "" if isinstance(ARGS.post_type, list): string = f" -o include={quote(','.join(ARGS.post_type))}" LOGGER.debug(string) return string string = f" -o include={quote(ARGS.post_type)}" LOGGER.debug(string) return string def parse_link(link: str) -> str: """Fixes links""" if not re.search(r"(twitter\.com\/\w+(\/)?(?!.*status))", link): LOGGER.debug("No modifications needed for the link %s", link) return link # if url contains /media at the end just write the line fixed_link = re.sub(r"\/$|\/media(\/?)$", "", link) + "/media" LOGGER.debug("Processed link %s", fixed_link) return fixed_link def download_gallery( use_archive: bool, skip_arg: str = "", link: str = "", sleep: str = "0", destination: str = "", database: str = "", queue: str = "", opt_args: str = "", ): """Processes the command string to run the gallery archiver""" command = f"{BIN_GALLERY} --sleep {sleep}" if skip_arg != "": command += skip_arg if destination != "": command += f" --dest {destination}" if use_archive: command += f" --download-archive {database}" if opt_args != "": command += opt_args if link != "" and queue == "": LOGGER.info("link: %s", quote(link)) command += f" {link}" if queue != "" and link == "": LOGGER.info("queue: %s", queue) command += f" -i {queue}" LOGGER.debug(command) run(command, ARGS.flag_verbose) def download_youtube( use_archive: bool, link: str = "", destination: str = "", database: str = "", ): """Filters and processes the required command to download videos""" command = BIN_YOUTUBE if re.search(r"(https:\/\/youtube|https:\/\/www.youtube|https:\/\/youtu.be)", link): command += f' -o {quote(destination + "/%(title)s.%(ext)s")}' elif re.search(r"(https:\/\/music.youtube.*)", link): if use_archive: command += f" --download-archive {database}" command += f""" \ --no-playlist --newline -x \ --audio-format best --add-metadata --audio-quality 0 -o \ {quote(destination + '/%(title)s.%(ext)s')} \ """ elif re.search(r"chaturbate", link): # Re-runs the program every 30 seconds in case the stream goes private or dc for i in range(1, 41): # For a 20 minute total run( f""" {BIN_YOUTUBE} \ --hls-use-mpegts --prefer-ffmpeg \ -o {quote(destination + '/%(title)s.%(ext)s')} \ {link} """, ARGS.flag_verbose, ) time.sleep(30) LOGGER.info("waited for %s minutes", i * 30 / 60) else: # Any other video link, just do it generic command += f" -f mp4 -o {quote(destination + '/%(title)s.%(ext)s')}" LOGGER.info("%s %s", command, link) run(f"{command} {link}", ARGS.flag_verbose) def comic_manager(skip_arg: str, category: str): """Process the information to download manga""" re_cat = "" if category == "manga": re_cat = "manga|webtoon" elif category == "comic": re_cat = "readcomiconline" with open(CONFIGS["comic"]["list"], encoding="utf-8") as list_comic: for graphic_novel in [line.rstrip() for line in list_comic]: # Search for mangas but exclude comics if not re.search(re_cat, graphic_novel): LOGGER.debug("%s does not match regex espression", graphic_novel) continue download_gallery( ARGS.flag_archive, skip_arg, quote(graphic_novel), "0", CONFIGS["comic"]["download-directory"], CONFIGS["comic"]["archive"], "", "", ) def webcomic_manager(): """Process the information to download webcomics""" webcomic_list = CONFIGS["comic"]["webcomic-list"] with open(webcomic_list, encoding="utf-8") as open_list: webcomic_file = yaml.safe_load(open_list) # Create a list of all the available webcomics for the user to chose from for index, entry in enumerate(webcomic_file["Webcomics"]): print(list_lines(index, entry["name"])) # Prompt for a choice usr_input = int(input("Select your comic: ")) # Determines where the webcomic will be downloaded rating = webcomic_file["Webcomics"][usr_input]["type"] webcomic_category = webcomic_file["Global"][f"{rating}_directory"] LOGGER.debug("The webcomic is %s", webcomic_category) command = f"""cd {quote(webcomic_category)} && webcomix custom \ {quote(webcomic_file["Webcomics"][usr_input]["name"])} \ --start-url \ {quote(webcomic_file["Webcomics"][usr_input]["url"])} \ --next-page-xpath={quote(webcomic_file["Webcomics"][usr_input]["next_code"])} \ --image-xpath={quote(webcomic_file["Webcomics"][usr_input]["image_code"])} \ -y --cbz""" LOGGER.debug(command) run(command, ARGS.flag_verbose) def push_manager(user: User): """Filters out the URL to use the appropiate downloader""" # Creates an array which will store any links that should use youtube-dl link_video_cache = [] re_links = re.compile( r"(twitter\.com\/\w+((?=.*media)|(?!.*status)))" r"|(men\.wikifeet)" r"|(furaffinity\.net\/user\/)" r"|((deviantart\.com\/\w+(?!.*\/art\/)))" r"|(furaffinity\.net\/gallery\/)" r"|(furaffinity\.net\/scraps\/)" r"|(furaffinity\.net\/favorites\/)" r"|(instagram.com(?!\/p\/)\/\w+)" r"|(e621\.net((?=\/post\/)|(?!\/posts\/)))" r"|(flickr\.com\/photos\/\w+\/(?!\d+))" r"|(tumblr\.com(?!\/post\/))" r"|(kemono\.party\/(fanbox|gumroad|patreon)(?!\/user\/\d+\/post))" r"|(blogspot\.com(?!\/))" r"|(rule34\.paheal\.net\/post\/(?!view))" r"|(rule34\.xxx\/index\.php\?page\=post&s=(?!view))" r"|(pixiv\.net\/(en\/)?((?=users)|(?!artwork)))" r"|(reddit\.com\/(user|u))" r"|(baraag\.net\/((@\w+)|(?!\/\d+)))" r"|(pinterest\.com\/(?!pin\/\d+))" r"|(redgifs\.com\/(users|u|(?!watch)))", ) with open(user.list_push, encoding="utf-8") as list_push: for link in [line.rstrip() for line in list_push]: LOGGER.debug("Processing %s", link) # Flush the push list, cleans all the contents with open(user.list_push, "w", encoding="utf-8") as list_push: list_push.close() # VIDEOS if re.search(r"youtu.be|youtube|pornhub|xtube|xvideos|chaturbate", link): LOGGER.debug("Matched type yt-dlp") link_video_cache.append(link) # Search for gallery links, these will be added to a list after downloading elif re.search(re_links, link): LOGGER.debug("Matched type gallery-dl") # skip_arg = f" -A {SKIP}" if ARGS.flag_skip else "" skip_arg = " -o skip=true" if not ARGS.flag_skip else "" LOGGER.debug("Skip: %s, link: %s", skip_arg, parse_instagram(link)) download_gallery( ARGS.flag_archive, skip_arg, quote(f"{parse_link(link)}"), f"{user.sleep}", quote(f"{user.dir_download}"), quote(f"{user.archive_gallery}"), "", f"{parse_instagram(link)}", ) # Record the gallery link, so it remains on the watch list with open(user.list_master, "a", encoding="utf-8") as w_file, open( user.list_master, "r", encoding="utf-8" ) as r_file: content = r_file.read().lower() if parse_link(link).lower() in content: LOGGER.info("Gallery repeated, not saving") continue LOGGER.info("New gallery, saving") w_file.write(parse_link(str(link)) + "\n") # Searches for comic/manga links elif re.search(r"readcomiconline|mangahere|mangadex|webtoons", link): # Toggle for comic/manga skip flag if ARGS.flag_skip and re.search(r"readcomiconline", link): skip_arg = " --chapter-range 1" elif ARGS.flag_skip and re.search(r"mangahere|webtoons", link): skip_arg = " --chapter-range 1-5" else: skip_arg = "" LOGGER.debug(skip_arg) download_gallery( ARGS.flag_archive, skip_arg, quote(link), "0", CONFIGS["comic"]["download-directory"], CONFIGS["comic"]["archive"], "", "", ) # Add comic/manga link to the list list_gn = CONFIGS["comic"]["list"] with open(list_gn, "a", encoding="utf-8") as w_file, open( list_gn, "r", encoding="utf-8" ) as r_file: content = r_file.read().lower() if parse_link(link).lower() in content: LOGGER.info("Graphic novel repeated, not saving") continue LOGGER.info("New graphic novel, saving") w_file.write(link + "\n") # Download generic links, the -o flag overwrites config file and # downloads the files into the root destination else: LOGGER.info("Other type of download %s", link) download_gallery( False, " -o directory='[]'", quote(link), "0", quote(str(user.dir_push)), "", "", "", ) # Send the video links to youtube-dl for link in link_video_cache: download_youtube( ARGS.flag_archive, quote(link), f"{user.dir_media_download}", quote(f"{user.archive_media}"), ) def scrapper_manager(user: User): # pylint: disable=too-many-branches """Analyze the user arguments and call in functions""" if not ARGS.scrapper: # Check if a scrapper was selected return if re.search(r"gallery|instagram|kemono", ARGS.scrapper): # skip_arg = f" -A {SKIP}" if ARGS.flag_skip else "" skip_arg = " -o skip=true" if not ARGS.flag_skip else "" LOGGER.debug(skip_arg) if ARGS.scrapper == "gallery": parse_gallery(f"{user.list_main}", user) elif ARGS.scrapper == "instagram": parse_gallery(f"{user.list_instagram}", user) elif ARGS.scrapper == "kemono": parse_gallery(f"{user.list_kemono}", user) elif ARGS.scrapper in "push": push_manager(user) elif ARGS.scrapper in "comic": skip_arg = " --chapter-range 1" if ARGS.flag_skip else "" LOGGER.debug(skip_arg) comic_manager(skip_arg, "comic") elif ARGS.scrapper in "manga": skip_arg = " --chapter-range 1-5" if ARGS.flag_skip else "" LOGGER.debug(skip_arg) comic_manager(skip_arg, "manga") elif ARGS.scrapper in "webcomic": webcomic_manager() def main(): """Main module to decide what to do based on the parsed arguments""" if ARGS.scrapper: if (ARGS.user in "everyone") and ( re.search(r"push|gallery|instagram|kemono", ARGS.scrapper) ): for current_user in CONFIGS["users"]: user = User(get_index(current_user["name"])) user.list_manager() LOGGER.info("Scrapping %s for %s", ARGS.scrapper, current_user["name"]) scrapper_manager(user) elif re.search(r"comic|manga|webcomic", ARGS.scrapper): user = User(get_index("jawz")) user.list_manager() LOGGER.info("Scrapping %s", ARGS.scrapper) scrapper_manager(user) else: # Create the lists to scrap user = User(get_index(ARGS.user)) user.list_manager() scrapper_manager(user) elif ARGS.link: LOGGER.debug(ARGS.link) if re.search(r"everyone|jawz", ARGS.user): # Create the lists to scrap user = User(get_index("jawz")) user.list_manager() else: # Create the lists to scrap user = User(get_index(ARGS.user)) user.list_manager() for arg_link in ARGS.link[0]: LOGGER.debug(arg_link) if ARGS.flag_verbose: LOGGER.debug( "%s >> %s", quote(parse_link(arg_link)), quote(user.list_push) ) else: with open(user.list_push, "a", encoding="utf-8") as open_file: open_file.write(parse_link(arg_link) + "\n") push_manager(user) if __name__ == "__main__": main()