scripts/tuhmayto/tracker.py
2024-12-06 12:39:35 -06:00

103 lines
2.9 KiB
Python

import re
from datetime import datetime, timedelta
import sqlite3
import requests
from bs4 import BeautifulSoup
def setup_database() -> None:
conn = sqlite3.connect("activity_log.db")
cursor = conn.cursor()
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS activity_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT UNIQUE NOT NULL
)
"""
)
conn.commit()
conn.close()
def log_activity(timestamp: str) -> None:
conn = sqlite3.connect("activity_log.db")
cursor = conn.cursor()
cursor.execute(
"INSERT OR IGNORE INTO activity_log (timestamp) VALUES (?)", (timestamp,)
)
conn.commit()
conn.close()
def fetch_latest_log() -> datetime | None:
conn = sqlite3.connect("activity_log.db")
cursor = conn.cursor()
cursor.execute("SELECT timestamp FROM activity_log ORDER BY timestamp DESC LIMIT 1")
result = cursor.fetchone()
conn.close()
return datetime.strptime(result[0], "%Y-%m-%d %H:%M:%S") if result else None
def parse_last_seen(text: str) -> datetime | None:
now = datetime.now()
if "Visto por última vez" in text:
days_match = re.search(r"(\d+) día", text)
hours_match = re.search(r"(\d+) horas", text)
minutes_match = re.search(r"(\d+) minutos", text)
if days_match:
days_ago = int(days_match.group(1))
return now - timedelta(days=days_ago)
if hours_match:
hours_ago = int(hours_match.group(1))
return now - timedelta(hours=hours_ago)
if minutes_match:
minutes_ago = int(minutes_match.group(1))
return now - timedelta(minutes=minutes_ago)
elif "online" in text.lower():
return now
return None
def scrape_and_log(url: str) -> None:
response = requests.get(url)
if response.status_code != 200:
return
soup = BeautifulSoup(response.text, "html.parser")
details_row = soup.find("div", class_="details-row")
if not details_row:
return
offline_div = details_row.find("div", class_="offline")
if not offline_div:
return
last_seen_text = offline_div.text.strip()
last_seen_time = parse_last_seen(last_seen_text)
if not last_seen_time:
return
latest_log = fetch_latest_log()
if latest_log and last_seen_time.date() <= latest_log.date():
print(f"A log already exists for {latest_log} or later. Skipping new log.")
return
if latest_log and last_seen_time.hour == latest_log.hour:
print(f"An entry for this hour already exists. Skipping new log.")
return
timestamp = last_seen_time.strftime("%Y-%m-%d %H:%M:%S")
log_activity(timestamp)
print(f"Logged activity: {timestamp}")
def main():
url = "https://es.xhamsterporno.mx/users/johnneal911"
setup_database()
scrape_and_log(url)
if __name__ == "__main__":
main()