This commit is contained in:
2026-02-10 10:29:20 +01:00
parent 9838164b88
commit f9082f1e5b
7 changed files with 156 additions and 7 deletions

View File

@@ -8,7 +8,11 @@
"Bash(pip install:*)",
"Bash(tasklist:*)",
"Bash(wmic process:*)",
"Bash(taskkill:*)"
"Bash(taskkill:*)",
"Bash(C:Pythonpython.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''files count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")",
"Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(''RUNS:'', cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")",
"Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password=''Vlado9674+'', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); rows = cur.fetchall\\(\\); print\\(''RUNS:''\\); [print\\(r\\) for r in rows]; cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)[0]\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM file_events''\\); print\\(''EVENTS count:'', cur.fetchone\\(\\)[0]\\); conn.close\\(\\)\")",
"Bash(/c/Python/python.exe:*)"
]
}
}

BIN
backup_report.xlsx Normal file

Binary file not shown.

View File

@@ -29,5 +29,5 @@ BACKUP_PATH = os.getenv("BACKUP_PATH")
# Behaviour
# =========================
DRY_RUN = os.getenv("DRY_RUN", "1") == "1"
DRY_RUN = os.getenv("DRY_RUN", "true").lower() in ("1", "true", "yes")
BATCH_SIZE = int(os.getenv("BATCH_SIZE", 1000))

View File

@@ -79,10 +79,15 @@ def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
for f in chunk]
)
# pymysql executemany: lastrowid = first id in batch
first_id = cur.lastrowid
for j, f in enumerate(chunk):
path_to_id[f["relative_path"]] = first_id + j
# Fetch real IDs — lastrowid+j is unreliable with executemany
paths = [f["relative_path"] for f in chunk]
placeholders = ",".join(["%s"] * len(paths))
cur.execute(
f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})",
paths,
)
for row in cur.fetchall():
path_to_id[row[1]] = row[0]
return path_to_id

View File

@@ -20,11 +20,14 @@ def scan_files(root_path: str) -> dict:
continue
rel_path = os.path.relpath(full_path, root_path).replace("\\", "/")
rel_dir = os.path.relpath(root, root_path).replace("\\", "/")
# Truncate microseconds — MySQL DATETIME rounds to whole seconds,
# which causes false "modified" detections on every run.
mtime = datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0)
result[rel_path] = {
"full_path": full_path,
"file_name": name,
"directory": rel_dir,
"size": stat.st_size,
"mtime": datetime.fromtimestamp(stat.st_mtime),
"mtime": mtime,
}
return result

15
main.py
View File

@@ -1,3 +1,6 @@
import os
from datetime import datetime
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
from indexer.scanner import scan_files
from indexer.hasher import blake3_file
@@ -195,6 +198,18 @@ def main():
print(f"Unchanged: {stats['unchanged']}")
print("=" * 60)
# ── 8. Generate Excel report ──
try:
from report import generate_report
report_dir = r"u:\Dropbox\!!!Days\Downloads Z230"
timestamp = datetime.now().strftime("%Y-%m-%d %H_%M")
report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx")
print(f"\n[8] Generating report...")
generate_report(report_path)
except Exception as e:
print(f" WARN: Report generation failed: {e}")
if __name__ == "__main__":
main()

122
report.py Normal file
View File

@@ -0,0 +1,122 @@
"""
Generate Excel report of backup runs and file events.
Usage: python report.py [output.xlsx]
Single sheet with all events from all runs.
Skips runs where total events > THRESHOLD (mass initial imports).
"""
import os
import sys
from datetime import datetime as dt
import pymysql
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from indexer.config import DB_CONFIG
THRESHOLD = 5000 # skip runs with more events than this
def generate_report(output_path: str):
conn = pymysql.connect(**DB_CONFIG)
cur = conn.cursor()
# ── 1. Load runs ──
cur.execute("SELECT * FROM runs ORDER BY id")
runs = cur.fetchall()
# ── 2. Collect all events from non-skipped runs ──
all_events = []
skipped_runs = []
for run in runs:
run_id, started, finished, status, total, new, mod, deleted, unchanged = run
total_changes = new + mod + deleted
if total_changes > THRESHOLD:
skipped_runs.append(run_id)
continue
cur.execute(
"""SELECT fe.event_type, f.relative_path, f.file_name, f.directory,
fe.old_size, fe.new_size
FROM file_events fe
JOIN files f ON fe.file_id = f.id
WHERE fe.run_id = %s
ORDER BY fe.event_type, f.relative_path""",
(run_id,)
)
for ev in cur.fetchall():
all_events.append((run_id, started, *ev))
conn.close()
# ── 3. Build Excel — single sheet ──
wb = Workbook()
ws = wb.active
ws.title = "Events"
header_font = Font(bold=True, color="FFFFFF", size=11)
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
thin_border = Border(
left=Side(style="thin"), right=Side(style="thin"),
top=Side(style="thin"), bottom=Side(style="thin"),
)
type_fills = {
"CREATED": PatternFill(start_color="E2EFDA", end_color="E2EFDA", fill_type="solid"),
"MODIFIED": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
"DELETED": PatternFill(start_color="FCE4EC", end_color="FCE4EC", fill_type="solid"),
}
headers = ["Run #", "Run Time", "Event", "Path", "File Name", "Directory",
"Old Size", "New Size", "Size Change"]
for col, h in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=h)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal="center")
cell.border = thin_border
for row_idx, ev in enumerate(all_events, 2):
run_id, started, event_type, rel_path, file_name, directory, old_size, new_size = ev
size_change = ""
if old_size is not None and new_size is not None:
diff = new_size - old_size
if diff != 0:
size_change = f"{'+' if diff > 0 else ''}{diff:,}"
elif new_size is not None:
size_change = f"+{new_size:,}"
elif old_size is not None:
size_change = f"-{old_size:,}"
values = [run_id, started, event_type, rel_path, file_name, directory,
old_size, new_size, size_change]
fill = type_fills.get(event_type)
for col, val in enumerate(values, 1):
cell = ws.cell(row=row_idx, column=col, value=val)
cell.border = thin_border
if fill:
cell.fill = fill
# Auto-width
widths = [8, 18, 10, 60, 30, 40, 12, 12, 14]
for col, w in enumerate(widths, 1):
ws.column_dimensions[get_column_letter(col)].width = w
# Autofilter
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{len(all_events) + 1}"
wb.save(output_path)
print(f"Report saved to {output_path}")
print(f" Runs total: {len(runs)}, skipped: {len(skipped_runs)} (threshold: {THRESHOLD})")
print(f" Events: {len(all_events)} rows")
if __name__ == "__main__":
REPORT_DIR = r"u:\Dropbox\!!!Days\Downloads Z230"
timestamp = dt.now().strftime("%Y-%m-%d %H_%M")
default_name = f"{timestamp} DropboxBackupReport.xlsx"
output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(REPORT_DIR, default_name)
generate_report(output)