103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
import re
|
|
from datetime import datetime, timedelta
|
|
import sqlite3
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def setup_database() -> None:
|
|
conn = sqlite3.connect("activity_log.db")
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS activity_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
timestamp TEXT UNIQUE NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def log_activity(timestamp: str) -> None:
|
|
conn = sqlite3.connect("activity_log.db")
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT OR IGNORE INTO activity_log (timestamp) VALUES (?)", (timestamp,)
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def fetch_latest_log() -> datetime | None:
|
|
conn = sqlite3.connect("activity_log.db")
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT timestamp FROM activity_log ORDER BY timestamp DESC LIMIT 1")
|
|
result = cursor.fetchone()
|
|
conn.close()
|
|
return datetime.strptime(result[0], "%Y-%m-%d %H:%M:%S") if result else None
|
|
|
|
|
|
def parse_last_seen(text: str) -> datetime | None:
|
|
now = datetime.now()
|
|
if "Visto por última vez" in text:
|
|
days_match = re.search(r"(\d+) día", text)
|
|
hours_match = re.search(r"(\d+) horas", text)
|
|
minutes_match = re.search(r"(\d+) minutos", text)
|
|
if days_match:
|
|
days_ago = int(days_match.group(1))
|
|
return now - timedelta(days=days_ago)
|
|
if hours_match:
|
|
hours_ago = int(hours_match.group(1))
|
|
return now - timedelta(hours=hours_ago)
|
|
if minutes_match:
|
|
minutes_ago = int(minutes_match.group(1))
|
|
return now - timedelta(minutes=minutes_ago)
|
|
elif "online" in text.lower():
|
|
return now
|
|
return None
|
|
|
|
|
|
def scrape_and_log(url: str) -> None:
|
|
response = requests.get(url)
|
|
if response.status_code != 200:
|
|
return
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
details_row = soup.find("div", class_="details-row")
|
|
if not details_row:
|
|
return
|
|
|
|
offline_div = details_row.find("div", class_="offline")
|
|
if not offline_div:
|
|
return
|
|
|
|
last_seen_text = offline_div.text.strip()
|
|
last_seen_time = parse_last_seen(last_seen_text)
|
|
if not last_seen_time:
|
|
return
|
|
|
|
latest_log = fetch_latest_log()
|
|
if latest_log and last_seen_time.date() <= latest_log.date():
|
|
print(f"A log already exists for {latest_log} or later. Skipping new log.")
|
|
return
|
|
|
|
if latest_log and last_seen_time.hour == latest_log.hour:
|
|
print(f"An entry for this hour already exists. Skipping new log.")
|
|
return
|
|
|
|
timestamp = last_seen_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
log_activity(timestamp)
|
|
print(f"Logged activity: {timestamp}")
|
|
|
|
|
|
def main():
|
|
url = "https://es.xhamsterporno.mx/users/johnneal911"
|
|
setup_database()
|
|
scrape_and_log(url)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|