z230

2025-12-18 07:53:48 +01:00
parent 6e8395890d
commit 0769bd2670
1 changed files with 143 additions and 0 deletions
--- a/MD5.py
+++ b/MD5.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+FAST MD5 indexer with in-memory cache
+- prints every processed file
+- skips unchanged files instantly
+"""
+
+import os
+import hashlib
+from datetime import datetime
+import pymysql
+
+# ==============================
+# CONFIG
+# ==============================
+
+ROOT_DIR = r"\\tower1\#ColdData\porno"
+
+DB_CONFIG = {
+    "host": "192.168.1.76",
+    "port": 3307,
+    "user": "root",
+    "password": "Vlado9674+",
+    "database": "torrents",
+    "charset": "utf8mb4",
+    "autocommit": True,
+}
+
+CHUNK_SIZE = 1024 * 1024  # 1 MB
+PRINT_SKIPPED = True      # set False if too noisy
+
+# ==============================
+# HELPERS
+# ==============================
+
+def compute_md5(path: str) -> str:
+    h = hashlib.md5()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+def format_size(size):
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if size < 1024:
+            return f"{size:.1f} {unit}"
+        size /= 1024
+    return f"{size:.1f} PB"
+
+# ==============================
+# MAIN
+# ==============================
+
+def main():
+    db = pymysql.connect(**DB_CONFIG)
+    cur = db.cursor()
+
+    print("📥 Loading already indexed files into memory...")
+
+    cur.execute("""
+        SELECT full_path, file_size, mtime
+        FROM file_md5_index
+    """)
+
+    indexed = {
+        (row[0], row[1], row[2])
+        for row in cur.fetchall()
+    }
+
+    print(f"✅ Loaded {len(indexed):,} indexed entries")
+    print("======================================")
+
+    new_files = 0
+    skipped = 0
+
+    for root, _, files in os.walk(ROOT_DIR):
+        for fname in files:
+            full_path = os.path.join(root, fname)
+
+            try:
+                stat = os.stat(full_path)
+            except (OSError, FileNotFoundError):
+                continue
+
+            key = (
+                full_path,
+                stat.st_size,
+                datetime.fromtimestamp(stat.st_mtime),
+            )
+
+            # FAST PATH
+            if key in indexed:
+                skipped += 1
+                if PRINT_SKIPPED:
+                    print("⏭ SKIP")
+                    print(f"   File: {full_path}")
+                continue
+
+            print("➕ NEW / UPDATED")
+            print(f"   Size: {format_size(stat.st_size)}")
+            print(f"   File: {full_path}")
+
+            try:
+                md5 = compute_md5(full_path)
+            except Exception as e:
+                print(f"❌ MD5 failed: {e}")
+                continue
+
+            cur.execute("""
+                INSERT INTO file_md5_index
+                    (full_path, file_name, directory, file_size, mtime, md5)
+                VALUES (%s, %s, %s, %s, %s, %s)
+                ON DUPLICATE KEY UPDATE
+                    file_size=VALUES(file_size),
+                    mtime=VALUES(mtime),
+                    md5=VALUES(md5),
+                    updated_at=CURRENT_TIMESTAMP
+            """, (
+                full_path,
+                fname,
+                root,
+                stat.st_size,
+                datetime.fromtimestamp(stat.st_mtime),
+                md5,
+            ))
+
+            new_files += 1
+            print(f"   MD5 : {md5}")
+            print("--------------------------------------")
+
+    print("======================================")
+    print(f"✅ New / updated : {new_files}")
+    print(f"⏭ Skipped        : {skipped}")
+    print("======================================")
+
+    cur.close()
+    db.close()
+
+
+if __name__ == "__main__":
+    main()