diff --git a/.idea/WalkFiles.iml b/.idea/WalkFiles.iml
index 3786a27..6cb8b9a 100644
--- a/.idea/WalkFiles.iml
+++ b/.idea/WalkFiles.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/22 WalkandSave.py b/22 WalkandSave.py
index 8bf2f4f..7ab97c5 100644
--- a/22 WalkandSave.py
+++ b/22 WalkandSave.py
@@ -169,28 +169,59 @@ def load_folder_state(cursor, device_id: int):
return out
-def get_or_create_folder(cursor, conn, folder_state, device_id, folder_path, parent_path, now):
- if folder_path in folder_state:
- folder_id = folder_state[folder_path]["id"]
- cursor.execute(
- "UPDATE folders SET last_seen=%s, deleted=0 WHERE id=%s",
- (now, folder_id)
- )
- folder_state[folder_path]["deleted"] = 0
+import unicodedata
+
+import unicodedata
+from datetime import datetime
+
+def get_or_create_folder(cursor, conn, folder_state, device_id, folder_path, parent_id):
+ # Normalize Unicode to avoid Černý vs Černý issue
+ folder_path = unicodedata.normalize("NFC", folder_path)
+
+ # Cache key is folder_path
+ key = folder_path
+
+ # 1) If we already know this folder → return cached ID
+ if key in folder_state:
+ return folder_state[key]["id"]
+
+ now = datetime.now()
+
+ # 2) Try to SELECT existing record
+ cursor.execute("""
+ SELECT id
+ FROM folders
+ WHERE device_id = %s AND path = %s
+ LIMIT 1
+ """, (device_id, folder_path))
+ row = cursor.fetchone()
+
+ if row:
+ folder_id = row[0]
+ folder_state[key] = {"id": folder_id, "deleted": 0}
return folder_id
- parent_id = folder_state.get(parent_path, {}).get("id") if parent_path else None
-
+ # 3) INSERT new folder (idempotent)
cursor.execute("""
- INSERT INTO folders (path, parent_id, device_id, first_seen, last_seen, deleted)
- VALUES (%s, %s, %s, %s, %s, 0)
+ INSERT INTO folders (path, parent_id, device_id, first_seen, last_seen)
+ VALUES (%s, %s, %s, %s, %s)
+ ON DUPLICATE KEY UPDATE
+ id = LAST_INSERT_ID(id),
+ last_seen = VALUES(last_seen)
""", (folder_path, parent_id, device_id, now, now))
+ conn.commit()
+
folder_id = cursor.lastrowid
- folder_state[folder_path] = {"id": folder_id, "deleted": 0}
+
+ # 4) Save to memory cache
+ folder_state[key] = {"id": folder_id, "deleted": 0}
+
return folder_id
+
+
# ======================================================
# LOAD LAST FILE STATE
# ======================================================
@@ -225,8 +256,11 @@ def load_last_file_state(cursor, device_id: int):
def walk_and_store_bulk():
BATCH_SIZE = 10000
- target_dir = r"\\tower1\#colddata"
- device_name = "TOWER1"
+ # target_dir = r"\\tower1\#colddata"
+ # target_dir = r"z:"
+ target_dir = r"\\tower\ebooks"
+ # device_name = "TW22"
+ device_name = "TOWER"
if not os.path.isdir(target_dir):
print("Invalid directory:", target_dir)
@@ -256,12 +290,20 @@ def walk_and_store_bulk():
# -------------------------------------------------
for root, dirs, files in os.walk(target_dir):
folder_path = os.path.normpath(root)
- parent_path = os.path.normpath(os.path.dirname(root)) if root != target_dir else None
+ # 1️⃣ determine parent_id correctly
+ if root == target_dir:
+ parent_id = None
+ else:
+ parent_folder_path = os.path.normpath(os.path.dirname(root))
+ parent_id = get_or_create_folder(cursor, conn, folder_state,
+ device_id, parent_folder_path,
+ None)
+ # 2️⃣ now insert current folder with correct parent_id
seen_folders.add(folder_path)
folder_id = get_or_create_folder(cursor, conn, folder_state,
device_id, folder_path,
- parent_path, now)
+ parent_id)
# -------------------------------------------------
# FILE LOOP
diff --git a/51 MD5Calculate.py b/51 MD5Calculate.py
index c1ad0d7..bd5743a 100644
--- a/51 MD5Calculate.py
+++ b/51 MD5Calculate.py
@@ -229,7 +229,7 @@ def run_md5_calculator(device_name=None,
if __name__ == "__main__":
# Example usage:
run_md5_calculator(
- device_name="TWW11",
+ device_name="TOWER",
extension="ANY",
max_size="ANY",
path_prefix=r"ANY"