notebookVb
This commit is contained in:
+41
-10
@@ -214,6 +214,19 @@ def verify_file(local_path: Path, row) -> list[tuple[str, str, str]]:
|
|||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def _progress(msg: str) -> None:
|
||||||
|
"""In-place jednořádkový progress na stdout (přepisuje stejný řádek)."""
|
||||||
|
# \r = návrat na začátek řádku, \033[K = smaž zbytek řádku
|
||||||
|
sys.stdout.write(f"\r{msg}\033[K")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
def _progress_clear() -> None:
|
||||||
|
"""Ukončí progress řádek nulou + newline (aby další log.info šel na čistý řádek)."""
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
# ── Zpracování ───────────────────────────────────────────────────────────────
|
# ── Zpracování ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -231,7 +244,8 @@ def reset_state(conn, dry_run: bool) -> None:
|
|||||||
log.info("Reset done. Cleared verified_at on %d rows.", affected)
|
log.info("Reset done. Cleared verified_at on %d rows.", affected)
|
||||||
|
|
||||||
|
|
||||||
def process_batch(conn, batch_size: int, dry_run: bool) -> int:
|
def process_batch(conn, batch_size: int, dry_run: bool,
|
||||||
|
batch_num: int = 0, total_so_far: int = 0) -> int:
|
||||||
where = "" if REVERIFY_ALL else "WHERE verified_at IS NULL"
|
where = "" if REVERIFY_ALL else "WHERE verified_at IS NULL"
|
||||||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
@@ -249,23 +263,29 @@ def process_batch(conn, batch_size: int, dry_run: bool) -> int:
|
|||||||
if not rows:
|
if not rows:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
batch_total = len(rows)
|
||||||
processed = 0
|
processed = 0
|
||||||
for row in rows:
|
crit_n = warn_n = info_n = 0
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
for idx, row in enumerate(rows, 1):
|
||||||
photo_id = row["id"]
|
photo_id = row["id"]
|
||||||
local_path = db_path_to_local(row["file_path"])
|
local_path = db_path_to_local(row["file_path"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
errors = verify_file(local_path, row)
|
errors = verify_file(local_path, row)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
# Vyčistit progress a logovat exception celý
|
||||||
|
_progress_clear()
|
||||||
log.exception("Unexpected error verifying id=%d (%s)", photo_id, local_path)
|
log.exception("Unexpected error verifying id=%d (%s)", photo_id, local_path)
|
||||||
errors = [("critical", "read_error", "Unexpected exception during verification")]
|
errors = [("critical", "read_error", "Unexpected exception during verification")]
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
for sev, code, msg in errors:
|
for sev, code, msg in errors:
|
||||||
|
_progress_clear()
|
||||||
log.info("[DRY RUN] id=%d %s/%s: %s", photo_id, sev, code, msg)
|
log.info("[DRY RUN] id=%d %s/%s: %s", photo_id, sev, code, msg)
|
||||||
else:
|
else:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
# Smažeme staré záznamy pro tuto fotku — vždy chceme čerstvý snapshot.
|
|
||||||
cur.execute("DELETE FROM photo_errors WHERE photo_id = %s", (photo_id,))
|
cur.execute("DELETE FROM photo_errors WHERE photo_id = %s", (photo_id,))
|
||||||
if errors:
|
if errors:
|
||||||
psycopg2.extras.execute_values(
|
psycopg2.extras.execute_values(
|
||||||
@@ -277,14 +297,24 @@ def process_batch(conn, batch_size: int, dry_run: bool) -> int:
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
processed += 1
|
processed += 1
|
||||||
|
for sev, _, _ in errors:
|
||||||
|
if sev == "critical": crit_n += 1
|
||||||
|
elif sev == "warning": warn_n += 1
|
||||||
|
elif sev == "info": info_n += 1
|
||||||
|
|
||||||
if errors:
|
# Single-line progress
|
||||||
crit = sum(1 for s, _, _ in errors if s == "critical")
|
elapsed = time.time() - t_start
|
||||||
warn = sum(1 for s, _, _ in errors if s == "warning")
|
rate = processed / elapsed if elapsed > 0 else 0
|
||||||
info = sum(1 for s, _, _ in errors if s == "info")
|
name = local_path.name[:40]
|
||||||
log.info("id=%d: %d errors (crit=%d, warn=%d, info=%d) — %s",
|
_progress(
|
||||||
photo_id, len(errors), crit, warn, info, local_path.name)
|
f"[batch {batch_num}] {idx}/{batch_total} "
|
||||||
|
f"total={total_so_far + processed} "
|
||||||
|
f"id={photo_id} "
|
||||||
|
f"crit={crit_n} warn={warn_n} info={info_n} "
|
||||||
|
f"{rate:.1f}/s {name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
_progress_clear()
|
||||||
return processed
|
return processed
|
||||||
|
|
||||||
|
|
||||||
@@ -322,7 +352,8 @@ def main():
|
|||||||
|
|
||||||
batch_num += 1
|
batch_num += 1
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
count = process_batch(conn, remaining, args.dry_run)
|
count = process_batch(conn, remaining, args.dry_run,
|
||||||
|
batch_num=batch_num, total_so_far=total_processed)
|
||||||
elapsed = time.time() - t0
|
elapsed = time.time() - t0
|
||||||
|
|
||||||
if count == 0:
|
if count == 0:
|
||||||
|
|||||||
Reference in New Issue
Block a user