Merge remote-tracking branch 'origin/master'
This commit is contained in:
92
PST/10 ReadKulhavaPST.py
Normal file
92
PST/10 ReadKulhavaPST.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import win32com.client
|
||||
import os
|
||||
|
||||
# Your specific file path
|
||||
pst_path = r'd:\Dropbox\!!!Days\Downloads Z230\PST\tkulhava.pst'
|
||||
|
||||
|
||||
def main():
|
||||
if not os.path.exists(pst_path):
|
||||
print(f"Error: File not found at {pst_path}")
|
||||
return
|
||||
|
||||
try:
|
||||
# Connect to Outlook
|
||||
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
|
||||
|
||||
# 1. Add the PST to Outlook (This makes it visible in the sidebar)
|
||||
print(f"Mounting PST: {pst_path}...")
|
||||
outlook.AddStore(pst_path)
|
||||
|
||||
# 2. Find the folder object for this PST
|
||||
# We search specifically for the folder that matches the filename 'tkulhava'
|
||||
# or grab the last added store if the name doesn't match exactly.
|
||||
pst_name = "tkulhava" # derived from filename usually
|
||||
root_folder = None
|
||||
|
||||
# Loop through all stores to find the new one
|
||||
for folder in outlook.Folders:
|
||||
if pst_name.lower() in folder.Name.lower():
|
||||
root_folder = folder
|
||||
break
|
||||
|
||||
# Fallback: Just grab the last folder in the list if name didn't match
|
||||
if not root_folder:
|
||||
root_folder = outlook.Folders.GetLast()
|
||||
|
||||
print(f"Successfully opened root folder: {root_folder.Name}")
|
||||
print("=" * 50)
|
||||
|
||||
# 3. Start the recursive walk
|
||||
print_subjects_recursively(root_folder)
|
||||
|
||||
# 4. Cleanup: Remove the PST from Outlook
|
||||
# (Comment this out if you want to keep it open in Outlook to inspect manually)
|
||||
outlook.RemoveStore(root_folder)
|
||||
print("\nDone. PST detached.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
|
||||
def print_subjects_recursively(folder):
|
||||
"""
|
||||
Recursively prints subjects of emails in a folder and its subfolders.
|
||||
"""
|
||||
try:
|
||||
# Print current folder name for context
|
||||
# Check if folder has items
|
||||
if folder.Items.Count > 0:
|
||||
print(f"\n--- Folder: {folder.Name} ---")
|
||||
|
||||
# Iterate through items
|
||||
for item in folder.Items:
|
||||
try:
|
||||
# Class 43 is a standard MailItem.
|
||||
# Other items (meeting requests, reports) might not have a Subject or behave differently.
|
||||
if item.Class == 43:
|
||||
print(f"Subject: {item.Subject}")
|
||||
else:
|
||||
# Attempt to print subject anyway (e.g., for Meeting Items)
|
||||
print(f"[{type_name(item.Class)}] Subject: {item.Subject}")
|
||||
except Exception:
|
||||
# Skip items that are corrupted or unreadable
|
||||
pass
|
||||
|
||||
# Recursion: Go deeper into subfolders
|
||||
for subfolder in folder.Folders:
|
||||
print_subjects_recursively(subfolder)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Skipping restricted folder '{folder.Name}': {e}")
|
||||
|
||||
|
||||
def type_name(class_id):
|
||||
# Helper to identify non-email items
|
||||
if class_id == 53: return "Meeting"
|
||||
if class_id == 46: return "Report"
|
||||
return f"Type {class_id}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
142
PST/20 ReadKulhavaSavePhotos.py
Normal file
142
PST/20 ReadKulhavaSavePhotos.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import win32com.client
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
# --- CONFIGURATION ---
|
||||
pst_path = r'd:\Dropbox\!!!Days\Downloads Z230\PST\tkulhava.pst'
|
||||
output_dir = r'd:\Dropbox\!!!Days\Downloads Z230\PST\pictures'
|
||||
|
||||
# Image extensions to look for (case insensitive)
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tif', '.tiff'}
|
||||
|
||||
|
||||
def fix_encoding(text):
|
||||
"""Repairs text wrongly decoded as cp1252 instead of cp1250."""
|
||||
if not text: return ""
|
||||
try:
|
||||
return text.encode('cp1252').decode('cp1250')
|
||||
except Exception:
|
||||
return text
|
||||
|
||||
|
||||
def get_unique_filepath(directory, filename):
|
||||
"""
|
||||
Checks if a file exists. If so, adds a counter (_1, _2) to the filename
|
||||
until a unique name is found.
|
||||
"""
|
||||
# Clean filename of illegal characters just in case
|
||||
filename = "".join(x for x in filename if x.isalnum() or x in "._- ")
|
||||
|
||||
path = pathlib.Path(directory) / filename
|
||||
if not path.exists():
|
||||
return path
|
||||
|
||||
# Split name and extension
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
counter = 1
|
||||
|
||||
while True:
|
||||
new_filename = f"{stem}_{counter}{suffix}"
|
||||
new_path = pathlib.Path(directory) / new_filename
|
||||
if not new_path.exists():
|
||||
return new_path
|
||||
counter += 1
|
||||
|
||||
|
||||
def process_item_attachments(item, save_folder):
|
||||
"""Checks an item for attachments and saves pictures."""
|
||||
try:
|
||||
# Check if item has attachments
|
||||
if item.Attachments.Count > 0:
|
||||
for attachment in item.Attachments:
|
||||
try:
|
||||
# Get filename and extension
|
||||
fname = getattr(attachment, 'FileName', '')
|
||||
if not fname: continue
|
||||
|
||||
# Fix encoding on filename if needed (sometimes attachments inherit bad encoding)
|
||||
fname = fix_encoding(fname)
|
||||
|
||||
ext = os.path.splitext(fname)[1].lower()
|
||||
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
# Determine unique path
|
||||
save_path = get_unique_filepath(save_folder, fname)
|
||||
|
||||
# Save the file
|
||||
attachment.SaveAsFile(str(save_path))
|
||||
print(f" [SAVED] {save_path.name}")
|
||||
except Exception as e:
|
||||
print(f" [ERROR saving attachment]: {e}")
|
||||
except Exception:
|
||||
# Some items (like corrupted notes) fail when accessing .Attachments
|
||||
pass
|
||||
|
||||
|
||||
def scan_folder_recursively(folder, save_folder):
|
||||
"""Recursively walks folders and processes items."""
|
||||
try:
|
||||
folder_name = fix_encoding(folder.Name)
|
||||
|
||||
# Optional: Print folder progress
|
||||
if folder.Items.Count > 0:
|
||||
print(f"Scanning Folder: {folder_name}...")
|
||||
|
||||
# Process items in this folder
|
||||
for item in folder.Items:
|
||||
process_item_attachments(item, save_folder)
|
||||
|
||||
# Recursion
|
||||
for subfolder in folder.Folders:
|
||||
scan_folder_recursively(subfolder, save_folder)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Skipping folder '{fix_encoding(folder.Name)}': {e}")
|
||||
|
||||
|
||||
def main():
|
||||
# 1. Ensure output directory exists
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
print(f"Created directory: {output_dir}")
|
||||
|
||||
if not os.path.exists(pst_path):
|
||||
print(f"Error: PST file not found at {pst_path}")
|
||||
return
|
||||
|
||||
try:
|
||||
# 2. Connect to Outlook
|
||||
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
|
||||
|
||||
print(f"Mounting PST: {pst_path}...")
|
||||
outlook.AddStore(pst_path)
|
||||
|
||||
# 3. Find the PST folder
|
||||
pst_name = "tkulhava" # Usually derived from filename
|
||||
root_folder = None
|
||||
for folder in outlook.Folders:
|
||||
if pst_name.lower() in folder.Name.lower():
|
||||
root_folder = folder
|
||||
break
|
||||
|
||||
if not root_folder:
|
||||
root_folder = outlook.Folders.GetLast()
|
||||
|
||||
print(f"Opened: {fix_encoding(root_folder.Name)}")
|
||||
print(f"Saving pictures to: {output_dir}")
|
||||
print("=" * 50)
|
||||
|
||||
# 4. Start processing
|
||||
scan_folder_recursively(root_folder, output_dir)
|
||||
|
||||
# 5. Cleanup
|
||||
outlook.RemoveStore(root_folder)
|
||||
print("\nDone. PST detached.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Critical Error: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
295
WalkFilesOnBackupHDD/10 WalkBackupHDD.py
Normal file
295
WalkFilesOnBackupHDD/10 WalkBackupHDD.py
Normal file
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
FAST FILE HASH INDEXER – WINDOWS CLIENT (EXTERNAL DISKS)
|
||||
- Mode: PHYSICAL BACKUP
|
||||
- Hostname in DB = Disk Label (e.g., #HD015)
|
||||
- Path in DB = Relative path (e.g., /Movies/Film.mkv)
|
||||
"""
|
||||
|
||||
import os, time
|
||||
import pymysql
|
||||
import socket
|
||||
import platform
|
||||
import sys
|
||||
from blake3 import blake3
|
||||
|
||||
# ==============================
|
||||
# CONFIG
|
||||
# ==============================
|
||||
CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB
|
||||
PROGRESS_MIN_SIZE = 500 * 1024 * 1024 # 500 MB
|
||||
PROGRESS_INTERVAL = 1.0 # seconds
|
||||
|
||||
EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
|
||||
|
||||
# --- Limity velikosti ---
|
||||
FILE_MIN_SIZE = 0
|
||||
FILE_MAX_SIZE = 1024 * 1024 * 1024* 1024 # 1TB
|
||||
|
||||
# --- Nastavení Databáze ---
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB
|
||||
PRINT_SKIPPED = False # True = vypisovat i přeskočené
|
||||
|
||||
# ==============================
|
||||
# SYSTEM INFO
|
||||
# ==============================
|
||||
|
||||
# Fyzický název PC (jen pro výpis do konzole, do DB půjde název disku)
|
||||
REAL_PC_HOSTNAME = socket.gethostname()
|
||||
OS_NAME = platform.system()
|
||||
|
||||
|
||||
# ==============================
|
||||
# FUNCTIONS
|
||||
# ==============================
|
||||
|
||||
def compute_blake3(path: str) -> bytes:
|
||||
h = blake3()
|
||||
total_size = os.path.getsize(path)
|
||||
show_progress = total_size >= PROGRESS_MIN_SIZE
|
||||
|
||||
processed = 0
|
||||
start_time = time.time()
|
||||
last_report = start_time
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
while True:
|
||||
chunk = f.read(CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
h.update(chunk)
|
||||
processed += len(chunk)
|
||||
|
||||
if show_progress:
|
||||
now = time.time()
|
||||
if now - last_report >= PROGRESS_INTERVAL:
|
||||
elapsed = now - start_time
|
||||
speed = processed / elapsed if elapsed > 0 else 0
|
||||
percent = processed / total_size * 100
|
||||
remaining = total_size - processed
|
||||
eta = remaining / speed if speed > 0 else 0
|
||||
|
||||
print(
|
||||
f" ⏳ {percent:6.2f}% | "
|
||||
f"{processed/1024/1024:8.1f} / {total_size/1024/1024:.1f} MB | "
|
||||
f"{speed/1024/1024:6.1f} MB/s | "
|
||||
f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
|
||||
flush=True
|
||||
)
|
||||
last_report = now
|
||||
|
||||
if show_progress:
|
||||
total_time = time.time() - start_time
|
||||
avg_speed = total_size / total_time if total_time > 0 else 0
|
||||
print(
|
||||
f" ✅ DONE | "
|
||||
f"{total_size/1024/1024:.1f} MB | "
|
||||
f"avg {avg_speed/1024/1024:.1f} MB/s | "
|
||||
f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
|
||||
flush=True
|
||||
)
|
||||
|
||||
return h.digest()
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ HASH ERROR: {path} - {e}")
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def get_drive_info():
|
||||
"""Získá písmeno disku a jeho ID (které se použije jako host_name)."""
|
||||
print("\n💿 --- NASTAVENÍ SKENOVÁNÍ (EXTERNÍ DISK) ---")
|
||||
|
||||
# 1. Písmeno disku
|
||||
while True:
|
||||
drive_input = input("📂 Zadejte písmeno disku ve Windows (např. 'E'): ").strip().upper()
|
||||
drive_letter = drive_input.replace(":", "").replace("\\", "").replace("/", "")
|
||||
|
||||
if len(drive_letter) == 1 and drive_letter.isalpha():
|
||||
drive_root = f"{drive_letter}:\\"
|
||||
if os.path.isdir(drive_root):
|
||||
break
|
||||
else:
|
||||
print(f"❌ Disk {drive_root} není dostupný.")
|
||||
else:
|
||||
print("❌ Neplatný formát.")
|
||||
|
||||
# 2. Název disku -> HOST_NAME
|
||||
while True:
|
||||
disk_label = input("🏷️ Zadejte ID disku (bude uloženo jako 'host_name', např. '#HD015'): ").strip()
|
||||
if len(disk_label) >= 2:
|
||||
break
|
||||
print("❌ Název je příliš krátký.")
|
||||
|
||||
return drive_root, disk_label
|
||||
|
||||
|
||||
def size_allowed(size: int) -> bool:
|
||||
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
|
||||
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
|
||||
return True
|
||||
|
||||
|
||||
# ==============================
|
||||
# MAIN
|
||||
# ==============================
|
||||
|
||||
def main():
|
||||
print("🚀 BLAKE3 External Disk Indexer", flush=True)
|
||||
print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
|
||||
|
||||
# Získání vstupů
|
||||
scan_root, disk_hostname = get_drive_info()
|
||||
|
||||
print(f"✅ Konfigurace:")
|
||||
print(f" Zdroj (Windows) : {scan_root}")
|
||||
print(f" DB Hostname : {disk_hostname}")
|
||||
print(f" DB Cesty : /Složka/Soubor...")
|
||||
|
||||
try:
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor()
|
||||
except Exception as e:
|
||||
print(f"❌ DB Connection failed: {e}")
|
||||
input("Enter pro konec...")
|
||||
return
|
||||
|
||||
print(f"📥 Načítám index pro disk: '{disk_hostname}'...", flush=True)
|
||||
|
||||
# === OPTIMALIZACE: Hledáme přesně podle host_name ===
|
||||
cur.execute("""
|
||||
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""", (disk_hostname,))
|
||||
|
||||
# Mapa: { "/Slozka/Soubor.ext": (size, mtime) }
|
||||
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
|
||||
|
||||
print(f"✅ Nalezeno {len(indexed_map):,} souborů v DB pro tento disk.", flush=True)
|
||||
print("======================================", flush=True)
|
||||
|
||||
new_files = 0
|
||||
skipped = 0
|
||||
filtered = 0
|
||||
errors = 0
|
||||
seen_paths = set()
|
||||
|
||||
# --- SCAN ---
|
||||
for root, dirs, files in os.walk(scan_root):
|
||||
# Ignorace systémových složek
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
||||
|
||||
for fname in files:
|
||||
disk_path = os.path.join(root, fname)
|
||||
|
||||
# 1. Stat (velikost, čas)
|
||||
try:
|
||||
stat = os.stat(disk_path)
|
||||
except OSError:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
size = stat.st_size
|
||||
if not size_allowed(size):
|
||||
filtered += 1
|
||||
continue
|
||||
|
||||
# 2. Vytvoření čisté cesty pro DB
|
||||
# E:\Filmy\Avatar.mkv -> Filmy\Avatar.mkv
|
||||
try:
|
||||
rel_path = os.path.relpath(disk_path, scan_root)
|
||||
except ValueError:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
# Normalizace na Linux style: Filmy/Avatar.mkv
|
||||
clean_path = rel_path.replace("\\", "/")
|
||||
|
||||
# Přidání lomítka na začátek: /Filmy/Avatar.mkv
|
||||
if not clean_path.startswith("/"):
|
||||
clean_path = "/" + clean_path
|
||||
|
||||
if clean_path in seen_paths:
|
||||
continue
|
||||
seen_paths.add(clean_path)
|
||||
|
||||
mtime = int(stat.st_mtime)
|
||||
|
||||
# === STRICT CHECK ===
|
||||
is_match = False
|
||||
if clean_path in indexed_map:
|
||||
db_size, db_mtime = indexed_map[clean_path]
|
||||
if size == db_size and mtime == db_mtime:
|
||||
is_match = True
|
||||
|
||||
if is_match:
|
||||
skipped += 1
|
||||
if PRINT_SKIPPED:
|
||||
print(f"⏭ SKIP {clean_path}", flush=True)
|
||||
continue
|
||||
|
||||
# === INSERT / UPDATE ===
|
||||
print("➕ NEW / UPDATED", flush=True)
|
||||
print(f" File: {clean_path}", flush=True)
|
||||
print(f" Size: {size:,} B", flush=True)
|
||||
|
||||
try:
|
||||
b3 = compute_blake3(disk_path)
|
||||
except Exception:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO file_md5_index
|
||||
(os_name, host_name, full_path, file_name, directory,
|
||||
file_size, mtime, blake3)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
blake3 = VALUES(blake3),
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""", (
|
||||
OS_NAME, # Např. 'Windows' (kde se to skenovalo)
|
||||
disk_hostname, # ZDE SE UKLÁDÁ '#HD015'
|
||||
clean_path, # ZDE SE UKLÁDÁ '/Filmy/Avatar.mkv'
|
||||
fname,
|
||||
os.path.dirname(clean_path),
|
||||
size,
|
||||
mtime,
|
||||
b3,
|
||||
))
|
||||
|
||||
new_files += 1
|
||||
print(f" Hash: {b3.hex()}", flush=True)
|
||||
print("--------------------------------------", flush=True)
|
||||
|
||||
print("======================================", flush=True)
|
||||
print(f"✅ Hotovo : {new_files}")
|
||||
print(f"⏭ Shoda : {skipped}")
|
||||
print(f"⚠️ Chyby : {errors}")
|
||||
print("🏁 Konec.")
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
# input("\nStiskněte Enter pro ukončení...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
351
WalkFilesOnBackupHDD/20 WalkBackupHDD.py
Normal file
351
WalkFilesOnBackupHDD/20 WalkBackupHDD.py
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
r"""
|
||||
FAST FILE HASH INDEXER – WINDOWS CLIENT (HARDCODED CONFIG)
|
||||
- Mode: PHYSICAL BACKUP
|
||||
- Hostname in DB = Disk Label (e.g., #HD015)
|
||||
- Path in DB = Relative path (e.g., /Movies/Film.mkv)
|
||||
"""
|
||||
|
||||
import os, time
|
||||
import pymysql
|
||||
import socket
|
||||
import platform
|
||||
import sys
|
||||
from blake3 import blake3
|
||||
|
||||
# ==============================
|
||||
# ⚙️ USER CONFIGURATION
|
||||
# ==============================
|
||||
DISK_DRIVE_LETTER = "P" # (e.g., "E", "F", "P")
|
||||
DISK_HOSTNAME = "#HD08" # (e.g., "#HD015")
|
||||
|
||||
# 🔒 SAFETY SWITCH
|
||||
# True = LIST ONLY (No DB changes). "Simulates" the run.
|
||||
# False = EXECUTE (Deletes and Inserts into DB).
|
||||
DRY_RUN = False
|
||||
|
||||
# ==============================
|
||||
# TECHNICAL CONFIG
|
||||
# ==============================
|
||||
CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB
|
||||
PROGRESS_MIN_SIZE = 500 * 1024 * 1024 # 500 MB
|
||||
PROGRESS_INTERVAL = 1.0 # seconds
|
||||
|
||||
EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
|
||||
|
||||
# --- File Size Limits ---
|
||||
FILE_MIN_SIZE = 0
|
||||
FILE_MAX_SIZE = 1024 * 1024*1024*1024 # 1TB
|
||||
|
||||
# --- DB Config ---
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
PRINT_SKIPPED = False # Set True to see files that were already in DB
|
||||
|
||||
# ==============================
|
||||
# SYSTEM INFO
|
||||
# ==============================
|
||||
REAL_PC_HOSTNAME = socket.gethostname()
|
||||
OS_NAME = platform.system()
|
||||
|
||||
|
||||
# ==============================
|
||||
# FUNCTIONS
|
||||
# ==============================
|
||||
|
||||
def compute_blake3(path: str) -> bytes:
|
||||
h = blake3()
|
||||
total_size = os.path.getsize(path)
|
||||
show_progress = total_size >= PROGRESS_MIN_SIZE
|
||||
|
||||
processed = 0
|
||||
start_time = time.time()
|
||||
last_report = start_time
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
while True:
|
||||
chunk = f.read(CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
h.update(chunk)
|
||||
processed += len(chunk)
|
||||
|
||||
if show_progress:
|
||||
now = time.time()
|
||||
if now - last_report >= PROGRESS_INTERVAL:
|
||||
elapsed = now - start_time
|
||||
speed = processed / elapsed if elapsed > 0 else 0
|
||||
percent = processed / total_size * 100
|
||||
remaining = total_size - processed
|
||||
eta = remaining / speed if speed > 0 else 0
|
||||
|
||||
print(
|
||||
f" ⏳ {percent:6.2f}% | "
|
||||
f"{processed / 1024 / 1024:8.1f} / {total_size / 1024 / 1024:.1f} MB | "
|
||||
f"{speed / 1024 / 1024:6.1f} MB/s | "
|
||||
f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
|
||||
flush=True
|
||||
)
|
||||
last_report = now
|
||||
|
||||
if show_progress:
|
||||
total_time = time.time() - start_time
|
||||
avg_speed = total_size / total_time if total_time > 0 else 0
|
||||
print(
|
||||
f" ✅ DONE | "
|
||||
f"{total_size / 1024 / 1024:.1f} MB | "
|
||||
f"avg {avg_speed / 1024 / 1024:.1f} MB/s | "
|
||||
f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
|
||||
flush=True
|
||||
)
|
||||
|
||||
return h.digest()
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ HASH ERROR: {path} - {e}")
|
||||
raise
|
||||
|
||||
|
||||
def size_allowed(size: int) -> bool:
|
||||
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
|
||||
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
|
||||
return True
|
||||
|
||||
|
||||
def normalize_db_path(scan_root, disk_path):
|
||||
"""
|
||||
Converts a physical Windows path to the standardized DB format.
|
||||
E:\Movies\File.mkv -> /Movies/File.mkv
|
||||
"""
|
||||
try:
|
||||
rel_path = os.path.relpath(disk_path, scan_root)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
# Windows backslash to slash
|
||||
clean_path = rel_path.replace("\\", "/")
|
||||
|
||||
# Ensure leading slash
|
||||
if not clean_path.startswith("/"):
|
||||
clean_path = "/" + clean_path
|
||||
|
||||
return clean_path
|
||||
|
||||
|
||||
# ==============================
|
||||
# MAIN
|
||||
# ==============================
|
||||
|
||||
def main():
|
||||
print("🚀 BLAKE3 External Disk Indexer", flush=True)
|
||||
print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
|
||||
|
||||
if DRY_RUN:
|
||||
print("🛡️ DRY RUN MODE ACTIVE: No changes will be made to DB.", flush=True)
|
||||
else:
|
||||
print("⚠️ LIVE MODE: Changes WILL be committed to DB.", flush=True)
|
||||
|
||||
# Build root path
|
||||
scan_root = f"{DISK_DRIVE_LETTER}:\\"
|
||||
|
||||
if not os.path.isdir(scan_root):
|
||||
print(f"❌ ERROR: Drive '{scan_root}' not found!")
|
||||
print(f" Please check DISK_DRIVE_LETTER in config.")
|
||||
return
|
||||
|
||||
print(f"✅ Config:")
|
||||
print(f" Source (Win) : {scan_root}")
|
||||
print(f" DB Hostname : {DISK_HOSTNAME}")
|
||||
|
||||
try:
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor()
|
||||
except Exception as e:
|
||||
print(f"❌ DB Connection failed: {e}")
|
||||
return
|
||||
|
||||
print(f"📥 Loading DB index for: '{DISK_HOSTNAME}'...", flush=True)
|
||||
|
||||
# === LOAD EXISTING DB RECORDS ===
|
||||
cur.execute("""
|
||||
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""", (DISK_HOSTNAME,))
|
||||
|
||||
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
|
||||
|
||||
print(f"✅ Found {len(indexed_map):,} files in DB for this disk.", flush=True)
|
||||
|
||||
# =========================================================
|
||||
# PHASE 1: CLEANUP (DELETE MISSING FILES)
|
||||
# =========================================================
|
||||
print("======================================", flush=True)
|
||||
print("🧹 PHASE 1: Checking for deleted files...", flush=True)
|
||||
|
||||
current_disk_paths = set()
|
||||
|
||||
# Fast walk just to get paths
|
||||
for root, dirs, files in os.walk(scan_root):
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
||||
|
||||
for fname in files:
|
||||
disk_path = os.path.join(root, fname)
|
||||
clean_path = normalize_db_path(scan_root, disk_path)
|
||||
if clean_path:
|
||||
current_disk_paths.add(clean_path)
|
||||
|
||||
paths_to_delete = set(indexed_map.keys()) - current_disk_paths
|
||||
|
||||
if paths_to_delete:
|
||||
print(f"🗑️ Found {len(paths_to_delete):,} files to delete from DB.")
|
||||
|
||||
if DRY_RUN:
|
||||
print("🛡️ [DRY RUN] Listing files to be deleted (No action taken):")
|
||||
for p in sorted(list(paths_to_delete))[:20]: # Print first 20
|
||||
print(f" - {p}")
|
||||
if len(paths_to_delete) > 20:
|
||||
print(f" ... and {len(paths_to_delete) - 20} more.")
|
||||
else:
|
||||
# Delete in batches
|
||||
batch_size = 1000
|
||||
to_delete_list = list(paths_to_delete)
|
||||
|
||||
for i in range(0, len(to_delete_list), batch_size):
|
||||
batch = to_delete_list[i: i + batch_size]
|
||||
format_strings = ','.join(['%s'] * len(batch))
|
||||
|
||||
query = f"DELETE FROM file_md5_index WHERE host_name = %s AND full_path IN ({format_strings})"
|
||||
|
||||
try:
|
||||
cur.execute(query, [DISK_HOSTNAME] + batch)
|
||||
print(f" ... deleted batch {i}-{i + len(batch)}")
|
||||
except Exception as e:
|
||||
print(f"❌ Error deleting batch: {e}")
|
||||
|
||||
# Update local map
|
||||
for p in paths_to_delete:
|
||||
del indexed_map[p]
|
||||
print("✅ Cleanup complete.")
|
||||
else:
|
||||
print("✅ No deleted files detected.")
|
||||
|
||||
# =========================================================
|
||||
# PHASE 2: SCAN & UPDATE (HASHING)
|
||||
# =========================================================
|
||||
print("======================================", flush=True)
|
||||
print("🚀 PHASE 2: Scanning for changes & new files...", flush=True)
|
||||
|
||||
new_files = 0
|
||||
skipped = 0
|
||||
filtered = 0
|
||||
errors = 0
|
||||
seen_paths = set()
|
||||
|
||||
for root, dirs, files in os.walk(scan_root):
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
||||
|
||||
for fname in files:
|
||||
disk_path = os.path.join(root, fname)
|
||||
|
||||
try:
|
||||
stat = os.stat(disk_path)
|
||||
except OSError:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
size = stat.st_size
|
||||
if not size_allowed(size):
|
||||
filtered += 1
|
||||
continue
|
||||
|
||||
clean_path = normalize_db_path(scan_root, disk_path)
|
||||
if not clean_path:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if clean_path in seen_paths:
|
||||
continue
|
||||
seen_paths.add(clean_path)
|
||||
|
||||
mtime = int(stat.st_mtime)
|
||||
|
||||
# === MATCH CHECK ===
|
||||
is_match = False
|
||||
if clean_path in indexed_map:
|
||||
db_size, db_mtime = indexed_map[clean_path]
|
||||
if size == db_size and mtime == db_mtime:
|
||||
is_match = True
|
||||
|
||||
if is_match:
|
||||
skipped += 1
|
||||
if PRINT_SKIPPED:
|
||||
print(f"⏭ SKIP {clean_path}", flush=True)
|
||||
continue
|
||||
|
||||
# === INSERT / UPDATE ===
|
||||
print("➕ NEW / UPDATED", flush=True)
|
||||
print(f" File: {clean_path}", flush=True)
|
||||
print(f" Size: {size:,} B", flush=True)
|
||||
|
||||
try:
|
||||
b3 = compute_blake3(disk_path)
|
||||
except Exception:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if DRY_RUN:
|
||||
print(f"🛡️ [DRY RUN] Would INSERT/UPDATE: {clean_path}")
|
||||
print(f" Hash: {b3.hex()}")
|
||||
new_files += 1
|
||||
else:
|
||||
cur.execute("""
|
||||
INSERT INTO file_md5_index
|
||||
(os_name, host_name, full_path, file_name, directory,
|
||||
file_size, mtime, blake3)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
blake3 = VALUES(blake3),
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""", (
|
||||
OS_NAME,
|
||||
DISK_HOSTNAME,
|
||||
clean_path,
|
||||
fname,
|
||||
os.path.dirname(clean_path),
|
||||
size,
|
||||
mtime,
|
||||
b3,
|
||||
))
|
||||
new_files += 1
|
||||
print(f" Hash: {b3.hex()}", flush=True)
|
||||
|
||||
print("--------------------------------------", flush=True)
|
||||
|
||||
print("======================================", flush=True)
|
||||
print(f"✅ Processed : {new_files}")
|
||||
print(f"⏭ Skipped : {skipped}")
|
||||
print(f"🗑 Deleted : {len(paths_to_delete)} " + ("(DRY RUN)" if DRY_RUN else ""))
|
||||
print(f"⚠️ Errors : {errors}")
|
||||
print("🏁 Done.")
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user