diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 3a20db1..1c731c7 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -8,7 +8,11 @@ "Bash(pip install:*)", "Bash(tasklist:*)", "Bash(wmic process:*)", - "Bash(taskkill:*)" + "Bash(taskkill:*)", + "Bash(C:Pythonpython.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''files count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(''RUNS:'', cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password=''Vlado9674+'', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); rows = cur.fetchall\\(\\); print\\(''RUNS:''\\); [print\\(r\\) for r in rows]; cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)[0]\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM file_events''\\); print\\(''EVENTS count:'', cur.fetchone\\(\\)[0]\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe:*)" ] } } diff --git a/backup_report.xlsx b/backup_report.xlsx new file mode 100644 index 0000000..81709f3 Binary files /dev/null and b/backup_report.xlsx differ diff --git a/indexer/config.py b/indexer/config.py index a8bf2b0..bdb01d0 100644 --- a/indexer/config.py +++ b/indexer/config.py @@ -29,5 +29,5 @@ BACKUP_PATH = os.getenv("BACKUP_PATH") # Behaviour # ========================= -DRY_RUN = os.getenv("DRY_RUN", "1") == "1" +DRY_RUN = os.getenv("DRY_RUN", "true").lower() in ("1", "true", "yes") BATCH_SIZE = int(os.getenv("BATCH_SIZE", 1000)) diff --git a/indexer/db.py b/indexer/db.py index ac4cae1..f2a9ea9 100644 --- a/indexer/db.py +++ b/indexer/db.py @@ -79,10 +79,15 @@ def batch_insert_files(cur, files_list: list, run_id: int) -> dict: f["size"], f["mtime"], f["content_hash"], run_id, run_id) for f in chunk] ) - # pymysql executemany: lastrowid = first id in batch - first_id = cur.lastrowid - for j, f in enumerate(chunk): - path_to_id[f["relative_path"]] = first_id + j + # Fetch real IDs — lastrowid+j is unreliable with executemany + paths = [f["relative_path"] for f in chunk] + placeholders = ",".join(["%s"] * len(paths)) + cur.execute( + f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})", + paths, + ) + for row in cur.fetchall(): + path_to_id[row[1]] = row[0] return path_to_id diff --git a/indexer/scanner.py b/indexer/scanner.py index a4a50fe..2e0d468 100644 --- a/indexer/scanner.py +++ b/indexer/scanner.py @@ -20,11 +20,14 @@ def scan_files(root_path: str) -> dict: continue rel_path = os.path.relpath(full_path, root_path).replace("\\", "/") rel_dir = os.path.relpath(root, root_path).replace("\\", "/") + # Truncate microseconds — MySQL DATETIME rounds to whole seconds, + # which causes false "modified" detections on every run. + mtime = datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0) result[rel_path] = { "full_path": full_path, "file_name": name, "directory": rel_dir, "size": stat.st_size, - "mtime": datetime.fromtimestamp(stat.st_mtime), + "mtime": mtime, } return result diff --git a/main.py b/main.py index 5fb17d7..5c974bb 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,6 @@ +import os +from datetime import datetime + from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH from indexer.scanner import scan_files from indexer.hasher import blake3_file @@ -195,6 +198,18 @@ def main(): print(f"Unchanged: {stats['unchanged']}") print("=" * 60) + # ── 8. Generate Excel report ── + try: + from report import generate_report + + report_dir = r"u:\Dropbox\!!!Days\Downloads Z230" + timestamp = datetime.now().strftime("%Y-%m-%d %H_%M") + report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx") + print(f"\n[8] Generating report...") + generate_report(report_path) + except Exception as e: + print(f" WARN: Report generation failed: {e}") + if __name__ == "__main__": main() diff --git a/report.py b/report.py new file mode 100644 index 0000000..84d59f2 --- /dev/null +++ b/report.py @@ -0,0 +1,122 @@ +""" +Generate Excel report of backup runs and file events. + +Usage: python report.py [output.xlsx] + +Single sheet with all events from all runs. +Skips runs where total events > THRESHOLD (mass initial imports). +""" + +import os +import sys +from datetime import datetime as dt +import pymysql +from openpyxl import Workbook +from openpyxl.styles import Font, PatternFill, Alignment, Border, Side +from openpyxl.utils import get_column_letter +from indexer.config import DB_CONFIG + +THRESHOLD = 5000 # skip runs with more events than this + + +def generate_report(output_path: str): + conn = pymysql.connect(**DB_CONFIG) + cur = conn.cursor() + + # ── 1. Load runs ── + cur.execute("SELECT * FROM runs ORDER BY id") + runs = cur.fetchall() + + # ── 2. Collect all events from non-skipped runs ── + all_events = [] + skipped_runs = [] + for run in runs: + run_id, started, finished, status, total, new, mod, deleted, unchanged = run + total_changes = new + mod + deleted + if total_changes > THRESHOLD: + skipped_runs.append(run_id) + continue + cur.execute( + """SELECT fe.event_type, f.relative_path, f.file_name, f.directory, + fe.old_size, fe.new_size + FROM file_events fe + JOIN files f ON fe.file_id = f.id + WHERE fe.run_id = %s + ORDER BY fe.event_type, f.relative_path""", + (run_id,) + ) + for ev in cur.fetchall(): + all_events.append((run_id, started, *ev)) + + conn.close() + + # ── 3. Build Excel — single sheet ── + wb = Workbook() + ws = wb.active + ws.title = "Events" + + header_font = Font(bold=True, color="FFFFFF", size=11) + header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") + thin_border = Border( + left=Side(style="thin"), right=Side(style="thin"), + top=Side(style="thin"), bottom=Side(style="thin"), + ) + + type_fills = { + "CREATED": PatternFill(start_color="E2EFDA", end_color="E2EFDA", fill_type="solid"), + "MODIFIED": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"), + "DELETED": PatternFill(start_color="FCE4EC", end_color="FCE4EC", fill_type="solid"), + } + + headers = ["Run #", "Run Time", "Event", "Path", "File Name", "Directory", + "Old Size", "New Size", "Size Change"] + for col, h in enumerate(headers, 1): + cell = ws.cell(row=1, column=col, value=h) + cell.font = header_font + cell.fill = header_fill + cell.alignment = Alignment(horizontal="center") + cell.border = thin_border + + for row_idx, ev in enumerate(all_events, 2): + run_id, started, event_type, rel_path, file_name, directory, old_size, new_size = ev + + size_change = "" + if old_size is not None and new_size is not None: + diff = new_size - old_size + if diff != 0: + size_change = f"{'+' if diff > 0 else ''}{diff:,}" + elif new_size is not None: + size_change = f"+{new_size:,}" + elif old_size is not None: + size_change = f"-{old_size:,}" + + values = [run_id, started, event_type, rel_path, file_name, directory, + old_size, new_size, size_change] + + fill = type_fills.get(event_type) + for col, val in enumerate(values, 1): + cell = ws.cell(row=row_idx, column=col, value=val) + cell.border = thin_border + if fill: + cell.fill = fill + + # Auto-width + widths = [8, 18, 10, 60, 30, 40, 12, 12, 14] + for col, w in enumerate(widths, 1): + ws.column_dimensions[get_column_letter(col)].width = w + + # Autofilter + ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{len(all_events) + 1}" + + wb.save(output_path) + print(f"Report saved to {output_path}") + print(f" Runs total: {len(runs)}, skipped: {len(skipped_runs)} (threshold: {THRESHOLD})") + print(f" Events: {len(all_events)} rows") + + +if __name__ == "__main__": + REPORT_DIR = r"u:\Dropbox\!!!Days\Downloads Z230" + timestamp = dt.now().strftime("%Y-%m-%d %H_%M") + default_name = f"{timestamp} DropboxBackupReport.xlsx" + output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(REPORT_DIR, default_name) + generate_report(output)