diff --git a/src/download/admin_links.py b/src/download/admin_links.py index 4041854..b26d14d 100644 --- a/src/download/admin_links.py +++ b/src/download/admin_links.py @@ -222,19 +222,30 @@ def cmd_fix_x_media(_: argparse.Namespace) -> None: with db.connect() as conn: rows = conn.execute( """ - SELECT id, url_original FROM links + SELECT id, user_name, url_original FROM links WHERE url_original LIKE '%x.com/%//media%' """ ).fetchall() for row in rows: fixed = row["url_original"].replace("//media", "/media") + norm = db.normalize_url(fixed) + conflict = conn.execute( + """ + SELECT id FROM links + WHERE user_name = ? AND url_normalized = ? AND id != ? + """, + (row["user_name"], norm, row["id"]), + ).fetchone() + if conflict: + conn.execute("DELETE FROM links WHERE id = ?", (row["id"],)) + continue conn.execute( """ UPDATE links SET url_original = ?, url_normalized = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ? """, - (fixed, db.normalize_url(fixed), row["id"]), + (fixed, norm, row["id"]), ) conn.commit() print("ok")