diff --git a/src/download/admin.py b/src/download/admin.py index cfb8864..648c710 100644 --- a/src/download/admin.py +++ b/src/download/admin.py @@ -64,6 +64,7 @@ def build_parser() -> argparse.ArgumentParser: p_list.add_argument("--user", action="append") p_list.add_argument("--disabled", action="store_true") p_list.add_argument("--banned", action="store_true") + p_list.add_argument("--requires-revision", action="store_true") p_list.set_defaults(func=cmd_list) p_users = sub.add_parser("users") diff --git a/src/download/admin_links.py b/src/download/admin_links.py index ee443ec..69ace29 100644 --- a/src/download/admin_links.py +++ b/src/download/admin_links.py @@ -114,6 +114,7 @@ def cmd_list(args: argparse.Namespace) -> None: users=users, include_disabled=args.disabled, include_banned=args.banned, + requires_revision_only=args.requires_revision, ) for row in rows: status = "enabled" if row["enabled"] else "disabled" diff --git a/src/download/classes/gallery.py b/src/download/classes/gallery.py index 94fa2cf..7b7894b 100644 --- a/src/download/classes/gallery.py +++ b/src/download/classes/gallery.py @@ -72,5 +72,5 @@ class Gallery: LOG.debug(command) self.command = command - def run_command(self, verbose: bool): - run(self.command, verbose) + def run_command(self, verbose: bool, on_line=None): + run(self.command, verbose, on_line=on_line) diff --git a/src/download/db.py b/src/download/db.py index 7e4c74a..d44bbdf 100644 --- a/src/download/db.py +++ b/src/download/db.py @@ -43,7 +43,8 @@ def ensure_schema(conn: sqlite3.Connection) -> None: updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, disabled_at TEXT, banned_at TEXT, - banned_reason TEXT + banned_reason TEXT, + requires_revision INTEGER NOT NULL DEFAULT 0 ); CREATE UNIQUE INDEX IF NOT EXISTS links_user_url_norm @@ -72,6 +73,19 @@ def ensure_schema(conn: sqlite3.Connection) -> None: ON link_tombstones (user_name, url_normalized); """ ) + _ensure_column( + conn, + "links", + "requires_revision", + "ALTER TABLE links ADD COLUMN requires_revision INTEGER NOT NULL DEFAULT 0", + ) + + +def _ensure_column(conn: sqlite3.Connection, table: str, column: str, ddl: str) -> None: + cols = [row[1] for row in conn.execute(f"PRAGMA table_info({table})").fetchall()] + if column in cols: + return + conn.execute(ddl) def normalize_url(url: str) -> str: @@ -247,6 +261,74 @@ def set_banned( return True +def mark_requires_revision( + conn: sqlite3.Connection, + user_name: str, + url_original: str, + reason: str, +) -> bool: + url_norm = normalize_url(url_original) + rows = conn.execute( + "SELECT id, url_original FROM links WHERE user_name = ? AND url_normalized = ?", + (user_name, url_norm), + ).fetchall() + if not rows: + return False + for row in rows: + conn.execute( + """ + UPDATE links + SET requires_revision = 1, + enabled = 0, + disabled_at = COALESCE(disabled_at, CURRENT_TIMESTAMP), + updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, + (row["id"],), + ) + add_history( + conn, + user_name, + "requires_revision", + link_id=row["id"], + old_url=row["url_original"], + note=reason, + ) + return True + + +def mark_requires_revision_by_norm( + conn: sqlite3.Connection, url_norm: str, reason: str +) -> int: + rows = conn.execute( + "SELECT id, user_name, url_original FROM links WHERE url_normalized = ?", + (url_norm,), + ).fetchall() + if not rows: + return 0 + for row in rows: + conn.execute( + """ + UPDATE links + SET requires_revision = 1, + enabled = 0, + disabled_at = COALESCE(disabled_at, CURRENT_TIMESTAMP), + updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, + (row["id"],), + ) + add_history( + conn, + row["user_name"], + "requires_revision", + link_id=row["id"], + old_url=row["url_original"], + note=reason, + ) + return len(rows) + + def rename_link( conn: sqlite3.Connection, user_name: str, @@ -329,6 +411,7 @@ def get_links( users: Iterable[str] | None = None, include_disabled: bool = False, include_banned: bool = False, + requires_revision_only: bool = False, ) -> list[sqlite3.Row]: params: list = [] where = [] @@ -340,6 +423,8 @@ def get_links( where.append("enabled = 1") if not include_banned: where.append("banned_at IS NULL") + if requires_revision_only: + where.append("requires_revision = 1") clause = " AND ".join(where) if clause: clause = "WHERE " + clause diff --git a/src/download/download.py b/src/download/download.py index d641039..a8ec339 100644 --- a/src/download/download.py +++ b/src/download/download.py @@ -73,15 +73,20 @@ def get_index(name: str) -> int: def parse_gallery(gdl_list: str, user: User) -> None: """Processes the gallery-dl command based on the selected gallery""" args = get_args() - gallery = Gallery() - gallery.archive = args.flag_archive - gallery.skip_arg = " -o skip=true" if not args.flag_skip else "" - gallery.dest = "download" - gallery.list = gdl_list - gallery.opt_args = parse_instagram(gdl_list) + list_path = user.lists[gdl_list] + with open(list_path, "r", encoding="utf-8") as r_file: + links = list(map(lambda x: x.rstrip(), r_file)) + for link in filter(None, links): + gallery = Gallery() + gallery.archive = args.flag_archive + gallery.skip_arg = " -o skip=true" if not args.flag_skip else "" + gallery.dest = "download" + gallery.link = link + gallery.opt_args = parse_instagram(link) - gallery.generate_command(user) - gallery.run_command(args.flag_verbose) + gallery.generate_command(user) + handler = _make_gallery_error_handler(link) + gallery.run_command(args.flag_verbose, on_line=handler) def parse_instagram(link: str, post_type: list[str] | str | None = None) -> list[str]: @@ -95,6 +100,48 @@ def parse_instagram(link: str, post_type: list[str] | str | None = None) -> list return ["-o", f"include={use_type}"] +REVISION_ERRORS = { + "NotFoundError: Requested user could not be found", + "Unable to retrieve Tweets from this timeline", + "No results for", +} + +TRANSIENT_ERRORS = { + "User input required (password)", + "cookies", + "429", + "rate limit", + "timed out", + "timeout", + "Network", + "connection", +} + + +def _make_gallery_error_handler(link: str): + norm = db.normalize_url(link) + + def handle(line: str) -> None: + if "[error]" in line: + reason = line.split("[error]", 1)[1].strip() + if reason in REVISION_ERRORS: + with db.connect() as conn: + db.mark_requires_revision_by_norm(conn, norm, reason) + conn.commit() + if any(tok in reason for tok in TRANSIENT_ERRORS): + LOG.warning("Transient error for %s: %s", link, reason) + return + if "No results for" in line: + with db.connect() as conn: + db.mark_requires_revision_by_norm(conn, norm, "No results for") + conn.commit() + return + if any(tok in line for tok in TRANSIENT_ERRORS): + LOG.warning("Transient error for %s: %s", link, line.strip()) + + return handle + + def _comic_skip_arg(link: str, flag_skip: bool) -> str: if not flag_skip: return "" @@ -122,7 +169,8 @@ def _handle_gallery_link(user: User, link: str, args, conn) -> None: gallery.dest = "download" gallery.opt_args = parse_instagram(link) gallery.generate_command(user) - gallery.run_command(args.flag_verbose) + handler = _make_gallery_error_handler(link) + gallery.run_command(args.flag_verbose, on_line=handler) def _handle_comic_link(link: str, args) -> None: @@ -131,7 +179,8 @@ def _handle_comic_link(link: str, args) -> None: gallery.skip_arg = _comic_skip_arg(link, args.flag_skip) gallery.link = link gallery.generate_command(is_comic=True) - gallery.run_command(args.flag_verbose) + handler = _make_gallery_error_handler(link) + gallery.run_command(args.flag_verbose, on_line=handler) save_comic(link) @@ -152,7 +201,8 @@ def _handle_other_link(user: User, link: str, args) -> None: gallery.link = link gallery.dest = "push" gallery.generate_command(user) - gallery.run_command(args.flag_verbose) + handler = _make_gallery_error_handler(link) + gallery.run_command(args.flag_verbose, on_line=handler) def video_command(video: Video): diff --git a/src/download/functions.py b/src/download/functions.py index 3a2ee09..b1959c6 100644 --- a/src/download/functions.py +++ b/src/download/functions.py @@ -66,6 +66,7 @@ def run( verbose: bool, cwd: Path | None = None, check: bool = False, + on_line=None, ) -> None: """Run command in a subprocess""" # pylint: disable=subprocess-run-check @@ -83,9 +84,28 @@ def run( else: args = list(command) - result = subprocess.run(args, check=check, cwd=cwd) - if not check and result.returncode != 0: - LOG.warning("Command failed (%s): %s", result.returncode, args) + if on_line is None: + result = subprocess.run(args, check=check, cwd=cwd) + if not check and result.returncode != 0: + LOG.warning("Command failed (%s): %s", result.returncode, args) + return + + proc = subprocess.Popen( + args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + assert proc.stdout is not None + for line in proc.stdout: + print(line, end="") + on_line(line) + returncode = proc.wait() + if check and returncode != 0: + raise subprocess.CalledProcessError(returncode, args) + if not check and returncode != 0: + LOG.warning("Command failed (%s): %s", returncode, args) def list_lines(i: int, line: str) -> str: diff --git a/src/download/tests/test_download.py b/src/download/tests/test_download.py index 5b620d2..e4474af 100644 --- a/src/download/tests/test_download.py +++ b/src/download/tests/test_download.py @@ -38,6 +38,7 @@ class TestDownload(unittest.TestCase): self.orig_db_connect = download.db.connect self.orig_db_add_link = download.db.add_link self.orig_save_comic = download.save_comic + self.orig_make_handler = download._make_gallery_error_handler def tearDown(self) -> None: download.Gallery = self.orig_gallery @@ -46,6 +47,7 @@ class TestDownload(unittest.TestCase): download.db.connect = self.orig_db_connect download.db.add_link = self.orig_db_add_link download.save_comic = self.orig_save_comic + download._make_gallery_error_handler = self.orig_make_handler def test_parse_instagram(self): res = download.parse_instagram("https://instagram.com/user") @@ -101,6 +103,7 @@ class TestDownload(unittest.TestCase): download.video_command = fake_video_command download.run = lambda *args, **kwargs: None download.save_comic = lambda *_args, **_kwargs: None + download._make_gallery_error_handler = lambda *_args, **_kwargs: None links = [ "https://x.com/someuser",