106 lines
4.3 KiB
Python
106 lines
4.3 KiB
Python
import os
|
|
import mysql.connector
|
|
from datetime import datetime
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv() # Reads .env file and adds to environment
|
|
|
|
# Database setup with explicit UTF8MB4 collation
|
|
def init_db():
|
|
conn = mysql.connector.connect(
|
|
host=os.getenv("DB_MYSQL_HOST"),
|
|
user=os.getenv("DB_MYSQL_ROOT"),
|
|
password=os.getenv("DB_MYSQL_ROOT_PASS"),
|
|
database=os.getenv("walkfiles"),
|
|
port=int(os.getenv("DB_MYSQL_PORT", 3306)),
|
|
charset="utf8mb4",
|
|
collation="utf8mb4_general_ci"
|
|
)
|
|
cursor = conn.cursor()
|
|
cursor.execute("CREATE DATABASE IF NOT EXISTS walkfiles CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci")
|
|
cursor.execute("USE walkfiles")
|
|
|
|
cursor.execute('''CREATE TABLE IF NOT EXISTS devices (
|
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
|
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
|
scanned_at DATETIME
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
|
|
|
cursor.execute('''CREATE TABLE IF NOT EXISTS folders (
|
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
|
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
|
parent_id INT,
|
|
device_id INT,
|
|
FOREIGN KEY(device_id) REFERENCES devices(id)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
|
|
|
cursor.execute('''CREATE TABLE IF NOT EXISTS files (
|
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
|
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
|
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
|
size BIGINT,
|
|
modified DATETIME,
|
|
type VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
|
folder_id INT,
|
|
device_id INT,
|
|
FOREIGN KEY(folder_id) REFERENCES folders(id),
|
|
FOREIGN KEY(device_id) REFERENCES devices(id)
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
|
|
|
conn.commit()
|
|
return conn, cursor
|
|
|
|
|
|
def insert_bulk_files(cursor, conn, files_data):
|
|
if not files_data:
|
|
return
|
|
query = '''INSERT IGNORE INTO files (name, path, size, modified, type, folder_id, device_id)
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s)'''
|
|
cursor.executemany(query, files_data)
|
|
conn.commit()
|
|
|
|
|
|
def walk_and_store_bulk():
|
|
target_dir = r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"
|
|
device_name = "NTB"
|
|
conn, cursor = init_db()
|
|
now = datetime.now()
|
|
|
|
cursor.execute("INSERT IGNORE INTO devices (name, scanned_at) VALUES (%s, %s)", (device_name, now))
|
|
conn.commit()
|
|
cursor.execute("SELECT id FROM devices WHERE name=%s", (device_name,))
|
|
device_id = cursor.fetchone()[0]
|
|
|
|
folder_cache = {}
|
|
files_to_insert = []
|
|
|
|
for root, dirs, files in os.walk(target_dir):
|
|
parent_path = os.path.dirname(root)
|
|
parent_id = folder_cache.get(parent_path)
|
|
|
|
cursor.execute("INSERT IGNORE INTO folders (path, parent_id, device_id) VALUES (%s, %s, %s)", (root, parent_id, device_id))
|
|
conn.commit()
|
|
cursor.execute("SELECT id FROM folders WHERE path=%s", (root,))
|
|
folder_id = cursor.fetchone()[0]
|
|
folder_cache[root] = folder_id
|
|
|
|
for file in files:
|
|
file_path = os.path.join(root, file)
|
|
try:
|
|
stats = os.stat(file_path)
|
|
modified = datetime.fromtimestamp(stats.st_mtime)
|
|
ftype = os.path.splitext(file)[1]
|
|
files_to_insert.append((file, file_path, stats.st_size, modified, ftype, folder_id, device_id))
|
|
except FileNotFoundError:
|
|
continue
|
|
|
|
insert_bulk_files(cursor, conn, files_to_insert)
|
|
conn.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if not os.path.isdir(r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"):
|
|
print("Invalid directory path.")
|
|
else:
|
|
walk_and_store_bulk()
|
|
print("Scan completed for directory 'u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování' on device 'NTB'. Bulk data stored efficiently in MySQL database 'walkfiles'.") |