diff --git a/70 MD5.py b/70 MD5.py index 8ef237a..7fc57e8 100644 --- a/70 MD5.py +++ b/70 MD5.py @@ -5,6 +5,7 @@ FAST MD5 indexer with in-memory cache - prints every processed file - skips unchanged files instantly +- restart-safe (no reprocessing same files) """ import os @@ -60,7 +61,7 @@ def main(): print("📥 Loading already indexed files into memory...") cur.execute(""" - SELECT full_path, file_size, mtime + SELECT full_path, file_size, UNIX_TIMESTAMP(mtime) FROM file_md5_index """) @@ -84,11 +85,10 @@ def main(): except (OSError, FileNotFoundError): continue - key = ( - full_path, - stat.st_size, - datetime.fromtimestamp(stat.st_mtime), - ) + mtime = int(stat.st_mtime) + size = stat.st_size + + key = (full_path, size, mtime) # FAST PATH if key in indexed: @@ -99,7 +99,7 @@ def main(): continue print("➕ NEW / UPDATED") - print(f" Size: {format_size(stat.st_size)}") + print(f" Size: {format_size(size)}") print(f" File: {full_path}") try: @@ -111,7 +111,7 @@ def main(): cur.execute(""" INSERT INTO file_md5_index (full_path, file_name, directory, file_size, mtime, md5) - VALUES (%s, %s, %s, %s, %s, %s) + VALUES (%s, %s, %s, %s, FROM_UNIXTIME(%s), %s) ON DUPLICATE KEY UPDATE file_size=VALUES(file_size), mtime=VALUES(mtime), @@ -121,8 +121,8 @@ def main(): full_path, fname, root, - stat.st_size, - datetime.fromtimestamp(stat.st_mtime), + size, + mtime, md5, ))