From f9082f1e5bfdec8ba4839eb2c3f198ed04bf9a40 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Tue, 10 Feb 2026 10:29:20 +0100 Subject: [PATCH] z230 --- .claude/settings.local.json | 6 +- backup_report.xlsx | Bin 0 -> 5493 bytes indexer/config.py | 2 +- indexer/db.py | 13 ++-- indexer/scanner.py | 5 +- main.py | 15 +++++ report.py | 122 ++++++++++++++++++++++++++++++++++++ 7 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 backup_report.xlsx create mode 100644 report.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 3a20db1..1c731c7 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -8,7 +8,11 @@ "Bash(pip install:*)", "Bash(tasklist:*)", "Bash(wmic process:*)", - "Bash(taskkill:*)" + "Bash(taskkill:*)", + "Bash(C:Pythonpython.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''files count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(''RUNS:'', cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password=''Vlado9674+'', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); rows = cur.fetchall\\(\\); print\\(''RUNS:''\\); [print\\(r\\) for r in rows]; cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)[0]\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM file_events''\\); print\\(''EVENTS count:'', cur.fetchone\\(\\)[0]\\); conn.close\\(\\)\")", + "Bash(/c/Python/python.exe:*)" ] } } diff --git a/backup_report.xlsx b/backup_report.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..81709f36bd44d67d79d38613695493d3b01d5762 GIT binary patch literal 5493 zcmZ`-1yoeq_Z|?CmJ}q#0i<(KLRz|0I)srKi2;W080iKH329JDU}&W!g`pe}P!Nz1 z5b1{h_*`$jC;#`mbI-l&t~KAjcb~n#efIgZ)vn=C0RRAez_G5XnPRVo@^j475N42J zhAqrS+ZE;v=eKrt=JRrN&_HPrbqWyU&9~`#G-O0E6_P%bjLLb7;&Xpi&*_Lb+1kXB zcJlV_qtEAyyu)6fuCUFwwBAplaI)A%@-|xq9s+Ev)=Q2Tq1IW9zgU`}2 zG{FvEC)@h9{+yT$vn(JP9srPa7XYBdOpKQUzZ(SX2>yK){53V3CZ_OdQL2Egsu3@{ z14p8sd%^WcN;uSG2AP4_)MS1}ZWQa_ZJUS)Y*V^NHkeAXnWWL2`2dG!?Q~c{X=PQ^ zd-n(54zX7i?W{6O?~{`N-+~7sGor6!0<*l0{KhPNL_Me<3ogK-d?d@> z6oK41c6fg_{Y46lS`%j!3U(5pH@Y1-3@cgEG>Q1C-`U;%)K1#azISv?IVhyxKLxj0 z<)C1k8efP+LOk1O*u(Fjd-G+?z_c64k#2r*+e@^5a4g!H0cT#PC8slc&!MeQmH=4* z9kHD{n66ALejTf)HGb$nx1I^fB8)ZF8n^fSJRTR>Q!(O2(z>k`-c=ddQ#RtZ4rRZ4 zUTI&rb`v6}xVY{^L0$JvX8bn5$k_w^VeDF}1?P!-3f(8hmd~^7XD9$=$*fr$AHd@VibDl&<0oc5;5GgM-8( zHX+p3G2y!A13PBJ)HmiR)KX52E3oDBfsy}#mXZ^2@>P57oyinMR)v>+yvd2}{td18eRHu9hh?xAl8^F?RHhL}WQuePS@31} zsKe>?BE``IgSWE}7n}JmMmXm=7F{YGt099=&Km>5J_eR4J1XA?|h)lhijKFT(q&J#V!uLwvC=N2l+1mK}w(iv{xe zql&hg_9XMh8tw!A?K;MzO}{8QnRcI63QkXg_&97HxeoR9>Q^OmPKmv2-#CXzP6{qJd$2+=WozQejyRv~V;OY(oMwEf+ z&YRY!l1OSz)P-?H-%Db0&6`9N$~Gx9#j5$cxAa<9xK+9hMB5aE>nYM`-!3lqx@hV( zQh3zgz!qw15qsqW7PKqgnd2oU;4zIbrdZH0-~70enN{g?$Kxc;v%_lV+__)r#y5!- z%RO`X%7BB3{eWJH)n_At?QX z=-m!+6DRd>g_KyJ0#9g|ro|W=1y$4=Vq6v)*Tt=^-oeJfb zMrfvEp*b<<8SyXIES4#G5j{G+0R$jctLHdkpKvt>yjeYb;eXP%*F3J*Td6Z=h+ znH1S_lS`{z_hU}Nvxza?16jsXJ+3IOuIQN-B=!z6+9l3=B03kY@PbbGV|2M#EFnzx zqs%wynF$ie0SSm95!XU6qyApOk|yDU*^4cbH=W&aUpJ%Mu>M>?R^`<)53m6MT0+eD zy?}VaT9zXgV*b^WY@(Hh!r{1W_nX>W$Qe=9no1`8L;Y$dd4#9Va7ROPGonLuJ^+f4>T`T2*idKIK6qkIsjn6;M}I`CEt)^S=wsE;eM zD3YTiY=0^xA{RmTQEyQof0~*z7H$?#Do<+3zUaxe8t${Fwq!r~)jwlAet(IYfs%Tu zFefGD9=>vocnAquw=DHhE@64OHCTfwZieXx?My!4K%%^pbVfGr;~N)B>p`UHlU*@E zj_C)n&I0u2Ut+?*MOD|s9o_|p8j{5iPr3!XbX}<4T$JBA8l2LBJ!3>@xH;XKK$>kv zMa@aC-7%Gxf}BTybOVixGkwxvA`$^`b}*N(1<+lFgXH-Hsj)M2cPK$}`lCf|P@3q>x@h4Yc zdTwVI7_K}a0RS-PuVxGG=IsE6|7x>7KbnJO3XokqBHLnA=`$(?yt0V}K{L7PZuq;0 zV5wbnw7*Ka*x$9jp8)D@cG3c>zS&xA-5PEcRvlZrR&pp(G(AJa*D_I6=TsS*JA>9w z_!0WF6r}vhL>8G*Z!jXl%cgK$Gvs>oCmuo^>oF8X6QzBg1gy+Mx8&jsva_*iZyhr_ zcF1F1N%-RAI^3MV#emAfpSRn6^4KDQEf_eMq}v}1|7sd)%^u>Tg(wNo6rDkqdvZrw zomp*0f2lDrtIN5!rj1Xa{<+rQ>PK-x*&>DdYc!q}cw?(aEQWUGY4S_EJG>ki;uR7Ym2A^*rpZc&B)lmp= z(Y{Jy^(eBO)to&IQ}#E{3}I>l9@>9!?q^AgWwo~(93WVH-(w7P z=GhFkZk%~(*aFZ$)9vQ$ZW5lC}|p!Q_b>5hg9ZqvrkO0&DS z4ym!L!VZ!4Pg2p3~ysq~}h5}s)rv^Q~+yaS1EagknaLMsy^odBW?big9 zqA$}Y2~mR~5JcJF2;rg0R}1vkos>gneWLBCp=X5*z1Fk}t2@ib-vnIUNDA;<@mc6t zj&eQr7PG0&d>Mjnw_NPge^!cQ$+!`RDact zEm%JFMm~b5F><9KqdU>2d+#82s{J;ET*MWmLN}f!7OF>kq*O`ed~Pk=EwXux^qO0} z+#M;Ul00t97AT4wn^YiRhrKz*{j(yhsEiRqFniK0jH5SxSA<`y=VKVm{#Q+iO*DmJ zcGk}>RcTnZ6|L_>BkpLl+|if;v0IcNT^>cVRwe=H+X6JzZbA5cRs9(JrFt_Q`gq~_ z;2kh>-zF`R3*nPh+*;bMES*cobExX7h{yNYP8A%9JF8nQWF_w-&R|hYiX>J>KQi(i z9lPk-sk`>d50@;)Nk=iCdR%2?<0IRewE|X@DCcV(!RI(+mtlf%cD`_8p3&ox zw~(VGjmlu=hL2n$lHUP84Dr02k*`Ms?$O32GA%(2N3$cJ;|aMmEK8oQ2G?agJ;%Xg zEQH%Eg=M~CtXbsX7!C1xGU8Ha6X*xE$7A+|3#H|_ym)-(#B{e*oAZs!9&+wv?d4vj zZ-EO8Es zHVYV|Qriw~?f5}az7jf<5)``-_5pD31{0uAf`M8A{gsM6cE4cZwm*^Pnrt0Dlwq!e z`BqI|oVpG#=_Si_@tTNKML@jh_6-5Qa^_NbTTZTTrb}j>4hQ!-Op-Vfia6HIPHHp@ zg9a(c`SGCfwQI?NK1I^u=ChlRnnX}ajTJ2Od+Q5Ce`a20m>hy0An8!UdNMm4vZ zk9!*eq>x; z*jl*zbjLn*!6rX=!6-&p0YU2krO}7l62WRagQl2KI=0Bgh57v(s@n=2O|E?%JrB*C z<#HlxBUtvhzv>_P1f|4;IdD_HQ|^W4*J7_hum%U|?3#()9jR7x58~8RV+aa+@Pe{< zl}6uFgSHc_&%O*fStT?nFHJZSl7Fhh30%o`SSyR}lS}9`6I(*qZhpFp_~JrxVkoM0 z1T6IUiSz!=2j*SRmv_%i>AY{`op1S~tBR6)v`8YpN>)i@5(W#K3h=)rGECO|bp&B% z`2P!LSJ7A1soz)tAPB4L-{`-!s;ls;8qPoPS dict: f["size"], f["mtime"], f["content_hash"], run_id, run_id) for f in chunk] ) - # pymysql executemany: lastrowid = first id in batch - first_id = cur.lastrowid - for j, f in enumerate(chunk): - path_to_id[f["relative_path"]] = first_id + j + # Fetch real IDs — lastrowid+j is unreliable with executemany + paths = [f["relative_path"] for f in chunk] + placeholders = ",".join(["%s"] * len(paths)) + cur.execute( + f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})", + paths, + ) + for row in cur.fetchall(): + path_to_id[row[1]] = row[0] return path_to_id diff --git a/indexer/scanner.py b/indexer/scanner.py index a4a50fe..2e0d468 100644 --- a/indexer/scanner.py +++ b/indexer/scanner.py @@ -20,11 +20,14 @@ def scan_files(root_path: str) -> dict: continue rel_path = os.path.relpath(full_path, root_path).replace("\\", "/") rel_dir = os.path.relpath(root, root_path).replace("\\", "/") + # Truncate microseconds — MySQL DATETIME rounds to whole seconds, + # which causes false "modified" detections on every run. + mtime = datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0) result[rel_path] = { "full_path": full_path, "file_name": name, "directory": rel_dir, "size": stat.st_size, - "mtime": datetime.fromtimestamp(stat.st_mtime), + "mtime": mtime, } return result diff --git a/main.py b/main.py index 5fb17d7..5c974bb 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,6 @@ +import os +from datetime import datetime + from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH from indexer.scanner import scan_files from indexer.hasher import blake3_file @@ -195,6 +198,18 @@ def main(): print(f"Unchanged: {stats['unchanged']}") print("=" * 60) + # ── 8. Generate Excel report ── + try: + from report import generate_report + + report_dir = r"u:\Dropbox\!!!Days\Downloads Z230" + timestamp = datetime.now().strftime("%Y-%m-%d %H_%M") + report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx") + print(f"\n[8] Generating report...") + generate_report(report_path) + except Exception as e: + print(f" WARN: Report generation failed: {e}") + if __name__ == "__main__": main() diff --git a/report.py b/report.py new file mode 100644 index 0000000..84d59f2 --- /dev/null +++ b/report.py @@ -0,0 +1,122 @@ +""" +Generate Excel report of backup runs and file events. + +Usage: python report.py [output.xlsx] + +Single sheet with all events from all runs. +Skips runs where total events > THRESHOLD (mass initial imports). +""" + +import os +import sys +from datetime import datetime as dt +import pymysql +from openpyxl import Workbook +from openpyxl.styles import Font, PatternFill, Alignment, Border, Side +from openpyxl.utils import get_column_letter +from indexer.config import DB_CONFIG + +THRESHOLD = 5000 # skip runs with more events than this + + +def generate_report(output_path: str): + conn = pymysql.connect(**DB_CONFIG) + cur = conn.cursor() + + # ── 1. Load runs ── + cur.execute("SELECT * FROM runs ORDER BY id") + runs = cur.fetchall() + + # ── 2. Collect all events from non-skipped runs ── + all_events = [] + skipped_runs = [] + for run in runs: + run_id, started, finished, status, total, new, mod, deleted, unchanged = run + total_changes = new + mod + deleted + if total_changes > THRESHOLD: + skipped_runs.append(run_id) + continue + cur.execute( + """SELECT fe.event_type, f.relative_path, f.file_name, f.directory, + fe.old_size, fe.new_size + FROM file_events fe + JOIN files f ON fe.file_id = f.id + WHERE fe.run_id = %s + ORDER BY fe.event_type, f.relative_path""", + (run_id,) + ) + for ev in cur.fetchall(): + all_events.append((run_id, started, *ev)) + + conn.close() + + # ── 3. Build Excel — single sheet ── + wb = Workbook() + ws = wb.active + ws.title = "Events" + + header_font = Font(bold=True, color="FFFFFF", size=11) + header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") + thin_border = Border( + left=Side(style="thin"), right=Side(style="thin"), + top=Side(style="thin"), bottom=Side(style="thin"), + ) + + type_fills = { + "CREATED": PatternFill(start_color="E2EFDA", end_color="E2EFDA", fill_type="solid"), + "MODIFIED": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"), + "DELETED": PatternFill(start_color="FCE4EC", end_color="FCE4EC", fill_type="solid"), + } + + headers = ["Run #", "Run Time", "Event", "Path", "File Name", "Directory", + "Old Size", "New Size", "Size Change"] + for col, h in enumerate(headers, 1): + cell = ws.cell(row=1, column=col, value=h) + cell.font = header_font + cell.fill = header_fill + cell.alignment = Alignment(horizontal="center") + cell.border = thin_border + + for row_idx, ev in enumerate(all_events, 2): + run_id, started, event_type, rel_path, file_name, directory, old_size, new_size = ev + + size_change = "" + if old_size is not None and new_size is not None: + diff = new_size - old_size + if diff != 0: + size_change = f"{'+' if diff > 0 else ''}{diff:,}" + elif new_size is not None: + size_change = f"+{new_size:,}" + elif old_size is not None: + size_change = f"-{old_size:,}" + + values = [run_id, started, event_type, rel_path, file_name, directory, + old_size, new_size, size_change] + + fill = type_fills.get(event_type) + for col, val in enumerate(values, 1): + cell = ws.cell(row=row_idx, column=col, value=val) + cell.border = thin_border + if fill: + cell.fill = fill + + # Auto-width + widths = [8, 18, 10, 60, 30, 40, 12, 12, 14] + for col, w in enumerate(widths, 1): + ws.column_dimensions[get_column_letter(col)].width = w + + # Autofilter + ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{len(all_events) + 1}" + + wb.save(output_path) + print(f"Report saved to {output_path}") + print(f" Runs total: {len(runs)}, skipped: {len(skipped_runs)} (threshold: {THRESHOLD})") + print(f" Events: {len(all_events)} rows") + + +if __name__ == "__main__": + REPORT_DIR = r"u:\Dropbox\!!!Days\Downloads Z230" + timestamp = dt.now().strftime("%Y-%m-%d %H_%M") + default_name = f"{timestamp} DropboxBackupReport.xlsx" + output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(REPORT_DIR, default_name) + generate_report(output)