#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import hashlib from datetime import datetime import mysql.connector from dotenv import load_dotenv from pathlib import Path # ====================================================== # Load environment # ====================================================== env_path = Path(__file__).resolve().parent / ".env" load_dotenv(env_path) # ====================================================== # MySQL connection # ====================================================== def get_db_connection(): conn = mysql.connector.connect( host=os.getenv("DB_MYSQL_HOST"), user=os.getenv("DB_MYSQL_ROOT"), password=os.getenv("DB_MYSQL_ROOT_PASS"), port=int(os.getenv("DB_MYSQL_PORT")), database="walkfiles", auth_plugin="mysql_native_password" ) c = conn.cursor() c.execute("SET NAMES utf8mb4 COLLATE utf8mb4_general_ci") c.close() return conn # ====================================================== # Helpers # ====================================================== def file_md5(path, chunk_size=1024 * 1024): """Compute content MD5 of a file in chunks.""" md5 = hashlib.md5() with open(path, "rb") as f: while chunk := f.read(chunk_size): md5.update(chunk) return md5.hexdigest() def parse_size(size_str: str) -> int: """ Convert human input like: 10MB, 500kB, 2GB into bytes. If already numeric, return as-is. """ s = size_str.strip().upper() if s.endswith("KB"): return int(float(s[:-2]) * 1024) if s.endswith("MB"): return int(float(s[:-2]) * 1024 * 1024) if s.endswith("GB"): return int(float(s[:-2]) * 1024 * 1024 * 1024) return int(s) # assume raw bytes # ====================================================== # MAIN LOGIC # ====================================================== def run_md5_calculator(device_name=None, device_id=None, extension=".pdf", max_size="50MB"): """ device_name OR device_id must be provided. extension: ".pdf", ".jpg", etc. max_size: "10MB", "500KB", "1GB" or number of bytes """ max_bytes = parse_size(max_size) conn, cursor = None, None try: conn = get_db_connection() cursor = conn.cursor(dictionary=True) # ------------------------------------------ # Resolve device_id if only device_name given # ------------------------------------------ if device_id is None: if device_name is None: raise RuntimeError("You must provide device_name or device_id") cursor.execute("SELECT id FROM devices WHERE name=%s", (device_name,)) row = cursor.fetchone() if not row: raise RuntimeError(f"Device '{device_name}' not found") device_id = row["id"] print(f"\nšŸ” Filtering: device={device_id}, ext={extension}, max_size={max_bytes} bytes\n") # ------------------------------------------ # SELECT only files that need MD5 calculation # ------------------------------------------ cursor.execute(""" SELECT id, path, size, modified, content_md5, md5_calculated FROM files WHERE device_id=%s AND deleted = 0 AND path LIKE %s AND size <= %s """, (device_id, "%" + extension, max_bytes)) rows = cursor.fetchall() total = len(rows) print(f"šŸ“ Files matching criteria: {total}") updates = 0 for row in rows: file_id = row["id"] path = row["path"] size = row["size"] modified = row["modified"] prev_md5 = row["content_md5"] prev_calc = row["md5_calculated"] # ------------------------------- # Skip missing files on disk # ------------------------------- if not os.path.isfile(path): print(f"āš ļø Missing on disk, skipping: {path}") continue # ------------------------------- # Check conditions for recalculation # ------------------------------- need_md5 = False if prev_md5 is None: need_md5 = True else: if prev_calc is None or prev_calc < modified: need_md5 = True if not need_md5: continue # ------------------------------- # Compute MD5 # ------------------------------- print(f"šŸ”„ Calculating MD5: {path}") new_md5 = file_md5(path) now = datetime.now().replace(microsecond=0) cursor.execute(""" UPDATE files SET content_md5=%s, md5_calculated=%s WHERE id=%s """, (new_md5, now, file_id)) updates += 1 # optional commit per-file: # conn.commit() conn.commit() print("\nāœ… MD5 calculation finished.") print(f" Updated files: {updates}") print(f" Skipped files: {total - updates}\n") except Error as e: print("MySQL Error:", e) finally: if cursor: cursor.close() if conn: conn.close() # ====================================================== # RUN EXAMPLE # ====================================================== if __name__ == "__main__": # Example usage: run_md5_calculator( device_name="Z230", extension=".pdf", max_size="100MB" )