This commit is contained in:
2025-12-18 07:53:48 +01:00
parent 6e8395890d
commit 0769bd2670

143
70 MD5.py Normal file
View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FAST MD5 indexer with in-memory cache
- prints every processed file
- skips unchanged files instantly
"""
import os
import hashlib
from datetime import datetime
import pymysql
# ==============================
# CONFIG
# ==============================
ROOT_DIR = r"\\tower1\#ColdData\porno"
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": True,
}
CHUNK_SIZE = 1024 * 1024 # 1 MB
PRINT_SKIPPED = True # set False if too noisy
# ==============================
# HELPERS
# ==============================
def compute_md5(path: str) -> str:
h = hashlib.md5()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
h.update(chunk)
return h.hexdigest()
def format_size(size):
for unit in ["B", "KB", "MB", "GB", "TB"]:
if size < 1024:
return f"{size:.1f} {unit}"
size /= 1024
return f"{size:.1f} PB"
# ==============================
# MAIN
# ==============================
def main():
db = pymysql.connect(**DB_CONFIG)
cur = db.cursor()
print("📥 Loading already indexed files into memory...")
cur.execute("""
SELECT full_path, file_size, mtime
FROM file_md5_index
""")
indexed = {
(row[0], row[1], row[2])
for row in cur.fetchall()
}
print(f"✅ Loaded {len(indexed):,} indexed entries")
print("======================================")
new_files = 0
skipped = 0
for root, _, files in os.walk(ROOT_DIR):
for fname in files:
full_path = os.path.join(root, fname)
try:
stat = os.stat(full_path)
except (OSError, FileNotFoundError):
continue
key = (
full_path,
stat.st_size,
datetime.fromtimestamp(stat.st_mtime),
)
# FAST PATH
if key in indexed:
skipped += 1
if PRINT_SKIPPED:
print("⏭ SKIP")
print(f" File: {full_path}")
continue
print(" NEW / UPDATED")
print(f" Size: {format_size(stat.st_size)}")
print(f" File: {full_path}")
try:
md5 = compute_md5(full_path)
except Exception as e:
print(f"❌ MD5 failed: {e}")
continue
cur.execute("""
INSERT INTO file_md5_index
(full_path, file_name, directory, file_size, mtime, md5)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
file_size=VALUES(file_size),
mtime=VALUES(mtime),
md5=VALUES(md5),
updated_at=CURRENT_TIMESTAMP
""", (
full_path,
fname,
root,
stat.st_size,
datetime.fromtimestamp(stat.st_mtime),
md5,
))
new_files += 1
print(f" MD5 : {md5}")
print("--------------------------------------")
print("======================================")
print(f"✅ New / updated : {new_files}")
print(f"⏭ Skipped : {skipped}")
print("======================================")
cur.close()
db.close()
if __name__ == "__main__":
main()