tw22
This commit is contained in:
2
.idea/WalkFiles.iml
generated
2
.idea/WalkFiles.iml
generated
@@ -4,7 +4,7 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.13 (walkfiles)" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
@@ -169,28 +169,59 @@ def load_folder_state(cursor, device_id: int):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_folder(cursor, conn, folder_state, device_id, folder_path, parent_path, now):
|
import unicodedata
|
||||||
if folder_path in folder_state:
|
|
||||||
folder_id = folder_state[folder_path]["id"]
|
import unicodedata
|
||||||
cursor.execute(
|
from datetime import datetime
|
||||||
"UPDATE folders SET last_seen=%s, deleted=0 WHERE id=%s",
|
|
||||||
(now, folder_id)
|
def get_or_create_folder(cursor, conn, folder_state, device_id, folder_path, parent_id):
|
||||||
)
|
# Normalize Unicode to avoid Černý vs Černý issue
|
||||||
folder_state[folder_path]["deleted"] = 0
|
folder_path = unicodedata.normalize("NFC", folder_path)
|
||||||
|
|
||||||
|
# Cache key is folder_path
|
||||||
|
key = folder_path
|
||||||
|
|
||||||
|
# 1) If we already know this folder → return cached ID
|
||||||
|
if key in folder_state:
|
||||||
|
return folder_state[key]["id"]
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
|
# 2) Try to SELECT existing record
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id
|
||||||
|
FROM folders
|
||||||
|
WHERE device_id = %s AND path = %s
|
||||||
|
LIMIT 1
|
||||||
|
""", (device_id, folder_path))
|
||||||
|
row = cursor.fetchone()
|
||||||
|
|
||||||
|
if row:
|
||||||
|
folder_id = row[0]
|
||||||
|
folder_state[key] = {"id": folder_id, "deleted": 0}
|
||||||
return folder_id
|
return folder_id
|
||||||
|
|
||||||
parent_id = folder_state.get(parent_path, {}).get("id") if parent_path else None
|
# 3) INSERT new folder (idempotent)
|
||||||
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
INSERT INTO folders (path, parent_id, device_id, first_seen, last_seen, deleted)
|
INSERT INTO folders (path, parent_id, device_id, first_seen, last_seen)
|
||||||
VALUES (%s, %s, %s, %s, %s, 0)
|
VALUES (%s, %s, %s, %s, %s)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
id = LAST_INSERT_ID(id),
|
||||||
|
last_seen = VALUES(last_seen)
|
||||||
""", (folder_path, parent_id, device_id, now, now))
|
""", (folder_path, parent_id, device_id, now, now))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
folder_id = cursor.lastrowid
|
folder_id = cursor.lastrowid
|
||||||
folder_state[folder_path] = {"id": folder_id, "deleted": 0}
|
|
||||||
|
# 4) Save to memory cache
|
||||||
|
folder_state[key] = {"id": folder_id, "deleted": 0}
|
||||||
|
|
||||||
return folder_id
|
return folder_id
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ======================================================
|
# ======================================================
|
||||||
# LOAD LAST FILE STATE
|
# LOAD LAST FILE STATE
|
||||||
# ======================================================
|
# ======================================================
|
||||||
@@ -225,8 +256,11 @@ def load_last_file_state(cursor, device_id: int):
|
|||||||
def walk_and_store_bulk():
|
def walk_and_store_bulk():
|
||||||
|
|
||||||
BATCH_SIZE = 10000
|
BATCH_SIZE = 10000
|
||||||
target_dir = r"\\tower1\#colddata"
|
# target_dir = r"\\tower1\#colddata"
|
||||||
device_name = "TOWER1"
|
# target_dir = r"z:"
|
||||||
|
target_dir = r"\\tower\ebooks"
|
||||||
|
# device_name = "TW22"
|
||||||
|
device_name = "TOWER"
|
||||||
|
|
||||||
if not os.path.isdir(target_dir):
|
if not os.path.isdir(target_dir):
|
||||||
print("Invalid directory:", target_dir)
|
print("Invalid directory:", target_dir)
|
||||||
@@ -256,12 +290,20 @@ def walk_and_store_bulk():
|
|||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
for root, dirs, files in os.walk(target_dir):
|
for root, dirs, files in os.walk(target_dir):
|
||||||
folder_path = os.path.normpath(root)
|
folder_path = os.path.normpath(root)
|
||||||
parent_path = os.path.normpath(os.path.dirname(root)) if root != target_dir else None
|
# 1️⃣ determine parent_id correctly
|
||||||
|
if root == target_dir:
|
||||||
|
parent_id = None
|
||||||
|
else:
|
||||||
|
parent_folder_path = os.path.normpath(os.path.dirname(root))
|
||||||
|
parent_id = get_or_create_folder(cursor, conn, folder_state,
|
||||||
|
device_id, parent_folder_path,
|
||||||
|
None)
|
||||||
|
|
||||||
|
# 2️⃣ now insert current folder with correct parent_id
|
||||||
seen_folders.add(folder_path)
|
seen_folders.add(folder_path)
|
||||||
folder_id = get_or_create_folder(cursor, conn, folder_state,
|
folder_id = get_or_create_folder(cursor, conn, folder_state,
|
||||||
device_id, folder_path,
|
device_id, folder_path,
|
||||||
parent_path, now)
|
parent_id)
|
||||||
|
|
||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
# FILE LOOP
|
# FILE LOOP
|
||||||
|
|||||||
@@ -229,7 +229,7 @@ def run_md5_calculator(device_name=None,
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Example usage:
|
# Example usage:
|
||||||
run_md5_calculator(
|
run_md5_calculator(
|
||||||
device_name="TWW11",
|
device_name="TOWER",
|
||||||
extension="ANY",
|
extension="ANY",
|
||||||
max_size="ANY",
|
max_size="ANY",
|
||||||
path_prefix=r"ANY"
|
path_prefix=r"ANY"
|
||||||
|
|||||||
Reference in New Issue
Block a user