import os import mysql.connector from datetime import datetime from dotenv import load_dotenv load_dotenv() # Reads .env file and adds to environment # Database setup with explicit UTF8MB4 collation def init_db(): conn = mysql.connector.connect( host=os.getenv("DB_MYSQL_HOST"), user=os.getenv("DB_MYSQL_ROOT"), password=os.getenv("DB_MYSQL_ROOT_PASS"), database=os.getenv("walkfiles"), port=int(os.getenv("DB_MYSQL_PORT", 3306)), charset="utf8mb4", collation="utf8mb4_general_ci" ) cursor = conn.cursor() cursor.execute("CREATE DATABASE IF NOT EXISTS walkfiles CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") cursor.execute("USE walkfiles") cursor.execute('''CREATE TABLE IF NOT EXISTS devices ( id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE, scanned_at DATETIME ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''') cursor.execute('''CREATE TABLE IF NOT EXISTS folders ( id INT AUTO_INCREMENT PRIMARY KEY, path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE, parent_id INT, device_id INT, FOREIGN KEY(device_id) REFERENCES devices(id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''') cursor.execute('''CREATE TABLE IF NOT EXISTS files ( id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE, size BIGINT, modified DATETIME, type VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, folder_id INT, device_id INT, FOREIGN KEY(folder_id) REFERENCES folders(id), FOREIGN KEY(device_id) REFERENCES devices(id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''') conn.commit() return conn, cursor def insert_bulk_files(cursor, conn, files_data): if not files_data: return query = '''INSERT IGNORE INTO files (name, path, size, modified, type, folder_id, device_id) VALUES (%s,%s,%s,%s,%s,%s,%s)''' cursor.executemany(query, files_data) conn.commit() def walk_and_store_bulk(): target_dir = r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování" device_name = "NTB" conn, cursor = init_db() now = datetime.now() cursor.execute("INSERT IGNORE INTO devices (name, scanned_at) VALUES (%s, %s)", (device_name, now)) conn.commit() cursor.execute("SELECT id FROM devices WHERE name=%s", (device_name,)) device_id = cursor.fetchone()[0] folder_cache = {} files_to_insert = [] for root, dirs, files in os.walk(target_dir): parent_path = os.path.dirname(root) parent_id = folder_cache.get(parent_path) cursor.execute("INSERT IGNORE INTO folders (path, parent_id, device_id) VALUES (%s, %s, %s)", (root, parent_id, device_id)) conn.commit() cursor.execute("SELECT id FROM folders WHERE path=%s", (root,)) folder_id = cursor.fetchone()[0] folder_cache[root] = folder_id for file in files: file_path = os.path.join(root, file) try: stats = os.stat(file_path) modified = datetime.fromtimestamp(stats.st_mtime) ftype = os.path.splitext(file)[1] files_to_insert.append((file, file_path, stats.st_size, modified, ftype, folder_id, device_id)) except FileNotFoundError: continue insert_bulk_files(cursor, conn, files_to_insert) conn.close() if __name__ == '__main__': if not os.path.isdir(r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"): print("Invalid directory path.") else: walk_and_store_bulk() print("Scan completed for directory 'u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování' on device 'NTB'. Bulk data stored efficiently in MySQL database 'walkfiles'.")