This commit is contained in:
2025-11-23 17:53:47 +01:00
commit f41b2c43bc
8 changed files with 185 additions and 0 deletions

106
20 Walkandsave.py Normal file
View File

@@ -0,0 +1,106 @@
import os
import mysql.connector
from datetime import datetime
from dotenv import load_dotenv
load_dotenv() # Reads .env file and adds to environment
# Database setup with explicit UTF8MB4 collation
def init_db():
conn = mysql.connector.connect(
host=os.getenv("DB_MYSQL_HOST"),
user=os.getenv("DB_MYSQL_ROOT"),
password=os.getenv("DB_MYSQL_ROOT_PASS"),
database=os.getenv("walkfiles"),
port=int(os.getenv("DB_MYSQL_PORT", 3306)),
charset="utf8mb4",
collation="utf8mb4_general_ci"
)
cursor = conn.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS walkfiles CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci")
cursor.execute("USE walkfiles")
cursor.execute('''CREATE TABLE IF NOT EXISTS devices (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
scanned_at DATETIME
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
cursor.execute('''CREATE TABLE IF NOT EXISTS folders (
id INT AUTO_INCREMENT PRIMARY KEY,
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
parent_id INT,
device_id INT,
FOREIGN KEY(device_id) REFERENCES devices(id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
cursor.execute('''CREATE TABLE IF NOT EXISTS files (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
size BIGINT,
modified DATETIME,
type VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
folder_id INT,
device_id INT,
FOREIGN KEY(folder_id) REFERENCES folders(id),
FOREIGN KEY(device_id) REFERENCES devices(id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
conn.commit()
return conn, cursor
def insert_bulk_files(cursor, conn, files_data):
if not files_data:
return
query = '''INSERT IGNORE INTO files (name, path, size, modified, type, folder_id, device_id)
VALUES (%s,%s,%s,%s,%s,%s,%s)'''
cursor.executemany(query, files_data)
conn.commit()
def walk_and_store_bulk():
target_dir = r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"
device_name = "NTB"
conn, cursor = init_db()
now = datetime.now()
cursor.execute("INSERT IGNORE INTO devices (name, scanned_at) VALUES (%s, %s)", (device_name, now))
conn.commit()
cursor.execute("SELECT id FROM devices WHERE name=%s", (device_name,))
device_id = cursor.fetchone()[0]
folder_cache = {}
files_to_insert = []
for root, dirs, files in os.walk(target_dir):
parent_path = os.path.dirname(root)
parent_id = folder_cache.get(parent_path)
cursor.execute("INSERT IGNORE INTO folders (path, parent_id, device_id) VALUES (%s, %s, %s)", (root, parent_id, device_id))
conn.commit()
cursor.execute("SELECT id FROM folders WHERE path=%s", (root,))
folder_id = cursor.fetchone()[0]
folder_cache[root] = folder_id
for file in files:
file_path = os.path.join(root, file)
try:
stats = os.stat(file_path)
modified = datetime.fromtimestamp(stats.st_mtime)
ftype = os.path.splitext(file)[1]
files_to_insert.append((file, file_path, stats.st_size, modified, ftype, folder_id, device_id))
except FileNotFoundError:
continue
insert_bulk_files(cursor, conn, files_to_insert)
conn.close()
if __name__ == '__main__':
if not os.path.isdir(r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"):
print("Invalid directory path.")
else:
walk_and_store_bulk()
print("Scan completed for directory 'u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování' on device 'NTB'. Bulk data stored efficiently in MySQL database 'walkfiles'.")