ntb
This commit is contained in:
106
20 Walkandsave.py
Normal file
106
20 Walkandsave.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv() # Reads .env file and adds to environment
|
||||
|
||||
# Database setup with explicit UTF8MB4 collation
|
||||
def init_db():
|
||||
conn = mysql.connector.connect(
|
||||
host=os.getenv("DB_MYSQL_HOST"),
|
||||
user=os.getenv("DB_MYSQL_ROOT"),
|
||||
password=os.getenv("DB_MYSQL_ROOT_PASS"),
|
||||
database=os.getenv("walkfiles"),
|
||||
port=int(os.getenv("DB_MYSQL_PORT", 3306)),
|
||||
charset="utf8mb4",
|
||||
collation="utf8mb4_general_ci"
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE DATABASE IF NOT EXISTS walkfiles CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci")
|
||||
cursor.execute("USE walkfiles")
|
||||
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS devices (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
||||
scanned_at DATETIME
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
||||
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS folders (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
||||
parent_id INT,
|
||||
device_id INT,
|
||||
FOREIGN KEY(device_id) REFERENCES devices(id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
||||
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS files (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
path TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci UNIQUE,
|
||||
size BIGINT,
|
||||
modified DATETIME,
|
||||
type VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
folder_id INT,
|
||||
device_id INT,
|
||||
FOREIGN KEY(folder_id) REFERENCES folders(id),
|
||||
FOREIGN KEY(device_id) REFERENCES devices(id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci''')
|
||||
|
||||
conn.commit()
|
||||
return conn, cursor
|
||||
|
||||
|
||||
def insert_bulk_files(cursor, conn, files_data):
|
||||
if not files_data:
|
||||
return
|
||||
query = '''INSERT IGNORE INTO files (name, path, size, modified, type, folder_id, device_id)
|
||||
VALUES (%s,%s,%s,%s,%s,%s,%s)'''
|
||||
cursor.executemany(query, files_data)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def walk_and_store_bulk():
|
||||
target_dir = r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"
|
||||
device_name = "NTB"
|
||||
conn, cursor = init_db()
|
||||
now = datetime.now()
|
||||
|
||||
cursor.execute("INSERT IGNORE INTO devices (name, scanned_at) VALUES (%s, %s)", (device_name, now))
|
||||
conn.commit()
|
||||
cursor.execute("SELECT id FROM devices WHERE name=%s", (device_name,))
|
||||
device_id = cursor.fetchone()[0]
|
||||
|
||||
folder_cache = {}
|
||||
files_to_insert = []
|
||||
|
||||
for root, dirs, files in os.walk(target_dir):
|
||||
parent_path = os.path.dirname(root)
|
||||
parent_id = folder_cache.get(parent_path)
|
||||
|
||||
cursor.execute("INSERT IGNORE INTO folders (path, parent_id, device_id) VALUES (%s, %s, %s)", (root, parent_id, device_id))
|
||||
conn.commit()
|
||||
cursor.execute("SELECT id FROM folders WHERE path=%s", (root,))
|
||||
folder_id = cursor.fetchone()[0]
|
||||
folder_cache[root] = folder_id
|
||||
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
try:
|
||||
stats = os.stat(file_path)
|
||||
modified = datetime.fromtimestamp(stats.st_mtime)
|
||||
ftype = os.path.splitext(file)[1]
|
||||
files_to_insert.append((file, file_path, stats.st_size, modified, ftype, folder_id, device_id))
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
insert_bulk_files(cursor, conn, files_to_insert)
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not os.path.isdir(r"u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování"):
|
||||
print("Invalid directory path.")
|
||||
else:
|
||||
walk_and_store_bulk()
|
||||
print("Scan completed for directory 'u:\\Dropbox\\Ordinace\\Dokumentace_ke_zpracování' on device 'NTB'. Bulk data stored efficiently in MySQL database 'walkfiles'.")
|
||||
Reference in New Issue
Block a user