From 1a08b4a8cd0d03db1acc08d5f7424194fe599fa0 Mon Sep 17 00:00:00 2001 From: administrator Date: Mon, 25 May 2026 06:51:07 +0200 Subject: [PATCH] notebookVb --- 00 PictureCollector/collect_pictures.py | 25 ++++++++++++++++--- .../collect_pictures_windows.py | 22 +++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/00 PictureCollector/collect_pictures.py b/00 PictureCollector/collect_pictures.py index 2e0d843..0c402dc 100644 --- a/00 PictureCollector/collect_pictures.py +++ b/00 PictureCollector/collect_pictures.py @@ -15,6 +15,7 @@ Bezpečné pro souběžný běh na více strojích (ON CONFLICT v SQL). import os import sys +import time import shutil import socket import logging @@ -203,6 +204,8 @@ def process(conn, hostname): log.info(f"Přeskočeno (již v DB): {stats['preskoceno']}") continue + t_start = time.perf_counter() + try: velikost = source.stat().st_size hash_val = compute_blake3(source) @@ -211,13 +214,18 @@ def process(conn, hostname): stats["chyb"] += 1 continue + t_hash = time.perf_counter() + zaloha_id = known_hashes.get(hash_val) if zaloha_id is not None: - # Hash známý z prefetch — duplikát, jen zapíšeme zdroj stats["duplicit"] += 1 + vel_mb = velikost / (1024 * 1024) + log.info( + f"DUPLIKÁT {source.name} " + f"({vel_mb:.1f} MB, hash={t_hash - t_start:.2f}s)" + ) else: - # Nový hash — zkopírovat a zapsat do zaloha_obrazku dest = dest_path_for(source, hostname) try: copy_to_backup(source, dest) @@ -226,21 +234,30 @@ def process(conn, hostname): stats["chyb"] += 1 continue + t_copy = time.perf_counter() + cur = conn.cursor() cur.execute(SQL_INSERT_ZALOHA, (hash_val, str(dest), source.name, velikost)) row = cur.fetchone() if row: zaloha_id = row[0] else: - # Jiný stroj vložil mezitím stejný hash — ON CONFLICT, získáme existující ID cur.execute(SQL_GET_ZALOHA_ID, (hash_val,)) zaloha_id = cur.fetchone()[0] cur.close() conn.commit() + t_db = time.perf_counter() + known_hashes[hash_val] = zaloha_id stats["kopirovano"] += 1 - log.info(f"ZKOPÍROVÁNO [{stats['kopirovano']:>6}] {source}") + vel_mb = velikost / (1024 * 1024) + log.info( + f"ZKOPÍROVÁNO [{stats['kopirovano']:>6}] {source.name} " + f"({vel_mb:.1f} MB, hash={t_hash - t_start:.2f}s " + f"copy={t_copy - t_hash:.2f}s db={t_db - t_copy:.2f}s " + f"celkem={t_db - t_start:.2f}s)" + ) pending_zdroje.append((hostname, src_str, source.name, velikost, hash_val, zaloha_id)) known_sources.add(src_str) diff --git a/00 PictureCollector/collect_pictures_windows.py b/00 PictureCollector/collect_pictures_windows.py index 4d35227..68450ed 100644 --- a/00 PictureCollector/collect_pictures_windows.py +++ b/00 PictureCollector/collect_pictures_windows.py @@ -14,6 +14,7 @@ Bezpečné pro souběžný běh na více strojích (ON CONFLICT v SQL). import os import sys +import time import shutil import socket import string @@ -210,6 +211,8 @@ def process(conn, hostname, drives): log.info(f"Přeskočeno (již v DB): {stats['preskoceno']}") continue + t_start = time.perf_counter() + try: velikost = source.stat().st_size hash_val = compute_blake3(source) @@ -218,10 +221,17 @@ def process(conn, hostname, drives): stats["chyb"] += 1 continue + t_hash = time.perf_counter() + zaloha_id = known_hashes.get(hash_val) if zaloha_id is not None: stats["duplicit"] += 1 + vel_mb = velikost / (1024 * 1024) + log.info( + f"DUPLIKÁT {source.name} " + f"({vel_mb:.1f} MB, hash={t_hash - t_start:.2f}s)" + ) else: dest = dest_path_for(source, hostname) try: @@ -231,6 +241,8 @@ def process(conn, hostname, drives): stats["chyb"] += 1 continue + t_copy = time.perf_counter() + cur = conn.cursor() cur.execute(SQL_INSERT_ZALOHA, (hash_val, str(dest), source.name, velikost)) row = cur.fetchone() @@ -242,9 +254,17 @@ def process(conn, hostname, drives): cur.close() conn.commit() + t_db = time.perf_counter() + known_hashes[hash_val] = zaloha_id stats["kopirovano"] += 1 - log.info(f"ZKOPÍROVÁNO [{stats['kopirovano']:>6}] {source}") + vel_mb = velikost / (1024 * 1024) + log.info( + f"ZKOPÍROVÁNO [{stats['kopirovano']:>6}] {source.name} " + f"({vel_mb:.1f} MB, hash={t_hash - t_start:.2f}s " + f"copy={t_copy - t_hash:.2f}s db={t_db - t_copy:.2f}s " + f"celkem={t_db - t_start:.2f}s)" + ) pending_zdroje.append((hostname, src_str, source.name, velikost, hash_val, zaloha_id)) known_sources.add(src_str)