123 lines
3.7 KiB
Python
123 lines
3.7 KiB
Python
import os
|
|
import psycopg2
|
|
from psycopg2 import extras
|
|
from tqdm import tqdm
|
|
import time
|
|
import sys
|
|
|
|
# --- KONFIGURACE ---
|
|
DB_CONFIG = {
|
|
"host": "192.168.1.76", # Doplňte IP adresu svého Unraidu/Postgresu
|
|
"database": "files",
|
|
"user": "vladimir.buzalka",
|
|
"password": "Vlado7309208104++",
|
|
"port": "5432"
|
|
}
|
|
|
|
DIRECTORY_TO_SCAN = "//tower/Library"
|
|
BATCH_SIZE = 2000 # Zvýšeno na 2000 pro ještě lepší efektivitu u 5M souborů
|
|
|
|
|
|
# --------- ----------
|
|
|
|
def scan_to_postgres():
|
|
conn = None
|
|
total_count = 0
|
|
files_batch = []
|
|
|
|
try:
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
# Inicializace tabulky
|
|
cur.execute("""
|
|
CREATE TABLE IF NOT EXISTS library_files
|
|
(
|
|
id
|
|
SERIAL
|
|
PRIMARY
|
|
KEY,
|
|
file_path
|
|
TEXT
|
|
NOT
|
|
NULL,
|
|
file_name
|
|
TEXT
|
|
NOT
|
|
NULL,
|
|
file_size_bytes
|
|
BIGINT,
|
|
indexed_at
|
|
TIMESTAMP
|
|
DEFAULT
|
|
CURRENT_TIMESTAMP
|
|
);
|
|
""")
|
|
conn.commit()
|
|
|
|
print(f"🚀 Zahajuji indexaci: {DIRECTORY_TO_SCAN}")
|
|
|
|
# Progress bar s automatickým škálováním jednotek (k, M)
|
|
pbar = tqdm(
|
|
unit=" soubor",
|
|
unit_scale=True,
|
|
unit_divisor=1000,
|
|
desc="Probíhá skenování",
|
|
dynamic_ncols=True
|
|
)
|
|
|
|
def save_batch(batch_data):
|
|
"""Pomocná funkce pro zápis do DB"""
|
|
insert_query = "INSERT INTO library_files (file_path, file_name, file_size_bytes) VALUES %s"
|
|
psycopg2.extras.execute_values(cur, insert_query, batch_data)
|
|
conn.commit()
|
|
|
|
# Rychlé procházení pomocí os.scandir
|
|
for root, dirs, files in os.walk(DIRECTORY_TO_SCAN):
|
|
for name in files:
|
|
full_path = os.path.join(root, name)
|
|
try:
|
|
# Získání metadat (velikost)
|
|
file_size = os.path.getsize(full_path)
|
|
files_batch.append((full_path, name, file_size))
|
|
total_count += 1
|
|
|
|
if len(files_batch) >= BATCH_SIZE:
|
|
save_batch(files_batch)
|
|
pbar.update(len(files_batch))
|
|
files_batch = []
|
|
except (OSError, PermissionError):
|
|
continue
|
|
|
|
# Uložení posledního neúplného zbytku
|
|
if files_batch:
|
|
save_batch(files_batch)
|
|
pbar.update(len(files_batch))
|
|
|
|
pbar.close()
|
|
print(f"\n✅ Hotovo! Celkem zaindexováno {total_count} souborů.")
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\n⚠️ Skenování přerušeno uživatelem. Ukládám rozpracovaná data...")
|
|
if files_batch:
|
|
try:
|
|
save_batch(files_batch)
|
|
print(f"Posledních {len(files_batch)} záznamů uloženo.")
|
|
except:
|
|
print("Nepodařilo se uložit poslední dávku.")
|
|
sys.exit(0)
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Chyba: {e}")
|
|
finally:
|
|
if conn:
|
|
cur.close()
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
start_time = time.time()
|
|
scan_to_postgres()
|
|
duration = time.time() - start_time
|
|
print(
|
|
f"⏱️ Celkový čas: {duration / 60:.2f} minut (rychlost: {int(5000000 / duration if duration > 0 else 0)} souborů/s)") |