z230
This commit is contained in:
143
70 MD5.py
Normal file
143
70 MD5.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
FAST MD5 indexer with in-memory cache
|
||||
- prints every processed file
|
||||
- skips unchanged files instantly
|
||||
"""
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
import pymysql
|
||||
|
||||
# ==============================
|
||||
# CONFIG
|
||||
# ==============================
|
||||
|
||||
ROOT_DIR = r"\\tower1\#ColdData\porno"
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 MB
|
||||
PRINT_SKIPPED = True # set False if too noisy
|
||||
|
||||
# ==============================
|
||||
# HELPERS
|
||||
# ==============================
|
||||
|
||||
def compute_md5(path: str) -> str:
|
||||
h = hashlib.md5()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
def format_size(size):
|
||||
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
||||
if size < 1024:
|
||||
return f"{size:.1f} {unit}"
|
||||
size /= 1024
|
||||
return f"{size:.1f} PB"
|
||||
|
||||
# ==============================
|
||||
# MAIN
|
||||
# ==============================
|
||||
|
||||
def main():
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor()
|
||||
|
||||
print("📥 Loading already indexed files into memory...")
|
||||
|
||||
cur.execute("""
|
||||
SELECT full_path, file_size, mtime
|
||||
FROM file_md5_index
|
||||
""")
|
||||
|
||||
indexed = {
|
||||
(row[0], row[1], row[2])
|
||||
for row in cur.fetchall()
|
||||
}
|
||||
|
||||
print(f"✅ Loaded {len(indexed):,} indexed entries")
|
||||
print("======================================")
|
||||
|
||||
new_files = 0
|
||||
skipped = 0
|
||||
|
||||
for root, _, files in os.walk(ROOT_DIR):
|
||||
for fname in files:
|
||||
full_path = os.path.join(root, fname)
|
||||
|
||||
try:
|
||||
stat = os.stat(full_path)
|
||||
except (OSError, FileNotFoundError):
|
||||
continue
|
||||
|
||||
key = (
|
||||
full_path,
|
||||
stat.st_size,
|
||||
datetime.fromtimestamp(stat.st_mtime),
|
||||
)
|
||||
|
||||
# FAST PATH
|
||||
if key in indexed:
|
||||
skipped += 1
|
||||
if PRINT_SKIPPED:
|
||||
print("⏭ SKIP")
|
||||
print(f" File: {full_path}")
|
||||
continue
|
||||
|
||||
print("➕ NEW / UPDATED")
|
||||
print(f" Size: {format_size(stat.st_size)}")
|
||||
print(f" File: {full_path}")
|
||||
|
||||
try:
|
||||
md5 = compute_md5(full_path)
|
||||
except Exception as e:
|
||||
print(f"❌ MD5 failed: {e}")
|
||||
continue
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO file_md5_index
|
||||
(full_path, file_name, directory, file_size, mtime, md5)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_size=VALUES(file_size),
|
||||
mtime=VALUES(mtime),
|
||||
md5=VALUES(md5),
|
||||
updated_at=CURRENT_TIMESTAMP
|
||||
""", (
|
||||
full_path,
|
||||
fname,
|
||||
root,
|
||||
stat.st_size,
|
||||
datetime.fromtimestamp(stat.st_mtime),
|
||||
md5,
|
||||
))
|
||||
|
||||
new_files += 1
|
||||
print(f" MD5 : {md5}")
|
||||
print("--------------------------------------")
|
||||
|
||||
print("======================================")
|
||||
print(f"✅ New / updated : {new_files}")
|
||||
print(f"⏭ Skipped : {skipped}")
|
||||
print("======================================")
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user