z230
This commit is contained in:
@@ -12,7 +12,15 @@
|
||||
"Bash(C:Pythonpython.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''files count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")",
|
||||
"Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password='''', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); print\\(''RUNS:'', cur.fetchall\\(\\)\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)\\); conn.close\\(\\)\")",
|
||||
"Bash(/c/Python/python.exe -c \"import pymysql; conn = pymysql.connect\\(host=''192.168.1.76'', port=3306, user=''root'', password=''Vlado9674+'', db=''OrdinaceDropBoxBackup''\\); cur = conn.cursor\\(\\); cur.execute\\(''SELECT * FROM runs''\\); rows = cur.fetchall\\(\\); print\\(''RUNS:''\\); [print\\(r\\) for r in rows]; cur.execute\\(''SELECT COUNT\\(*\\) FROM files''\\); print\\(''FILES count:'', cur.fetchone\\(\\)[0]\\); cur.execute\\(''SELECT COUNT\\(*\\) FROM file_events''\\); print\\(''EVENTS count:'', cur.fetchone\\(\\)[0]\\); conn.close\\(\\)\")",
|
||||
"Bash(/c/Python/python.exe:*)"
|
||||
"Bash(/c/Python/python.exe:*)",
|
||||
"Bash(cd \"u:\\\\OnedriveOrdinace\\\\OneDrive\\\\DropBoxBackupClaude\"\" && powershell -Command \"Get-ChildItem -Recurse -Filter '*.blob')",
|
||||
"Bash(Measure-Object:*)",
|
||||
"Bash(Select-Object -ExpandProperty Count \")",
|
||||
"Bash(powershell:*)",
|
||||
"Bash(\"C:\\\\Python\\\\python.exe\" -m pip list)",
|
||||
"Bash(findstr:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(C:Pythonpython.exe -m pip install pyzipper)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -1,21 +1,24 @@
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import pyzipper
|
||||
|
||||
from indexer.config import BACKUP_PASSWORD
|
||||
|
||||
|
||||
def blob_path(backup_root: str, content_hash: bytes) -> str:
|
||||
"""Vrátí cestu k blob souboru: BACKUP/ab/cd/abcdef...blob"""
|
||||
"""Vrátí cestu k ZIP souboru: BACKUP/ab/cd/abcdef...zip"""
|
||||
hex_hash = content_hash.hex()
|
||||
return os.path.join(backup_root, hex_hash[:2], hex_hash[2:4], hex_hash + ".blob")
|
||||
return os.path.join(backup_root, hex_hash[:2], hex_hash[2:4], hex_hash + ".zip")
|
||||
|
||||
|
||||
def ensure_backed_up(files_with_hash: list, backup_root: str) -> int:
|
||||
"""
|
||||
Zkopíruje soubory do content-addressable storage.
|
||||
Vytvoří AES-256 šifrovaný ZIP pro každý soubor v content-addressable storage.
|
||||
files_with_hash: [(full_path, content_hash_bytes), ...]
|
||||
Přeskočí soubory, jejichž blob už existuje (deduplikace).
|
||||
Přeskočí soubory, jejichž zip už existuje (deduplikace).
|
||||
Returns: počet nově zálohovaných souborů.
|
||||
"""
|
||||
password = BACKUP_PASSWORD.encode("utf-8")
|
||||
backed_up = 0
|
||||
for full_path, content_hash in files_with_hash:
|
||||
target = blob_path(backup_root, content_hash)
|
||||
@@ -25,17 +28,26 @@ def ensure_backed_up(files_with_hash: list, backup_root: str) -> int:
|
||||
target_dir = os.path.dirname(target)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
tmp_path = None
|
||||
try:
|
||||
# Atomický zápis: temp soubor + přejmenování
|
||||
fd, tmp_path = tempfile.mkstemp(dir=target_dir, suffix=".tmp")
|
||||
os.close(fd)
|
||||
shutil.copy2(full_path, tmp_path)
|
||||
|
||||
hex_hash = content_hash.hex()
|
||||
with pyzipper.AESZipFile(
|
||||
tmp_path, "w",
|
||||
compression=pyzipper.ZIP_DEFLATED,
|
||||
encryption=pyzipper.WZ_AES,
|
||||
) as zf:
|
||||
zf.setpassword(password)
|
||||
zf.write(full_path, arcname=hex_hash + ".blob")
|
||||
|
||||
os.replace(tmp_path, target)
|
||||
backed_up += 1
|
||||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||||
print(f" WARN: backup failed for {full_path}: {e}")
|
||||
# Uklidíme temp soubor pokud existuje
|
||||
if os.path.exists(tmp_path):
|
||||
if tmp_path and os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
continue
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ DB_CONFIG = {
|
||||
ROOT_PATH = os.getenv("ROOT_PATH")
|
||||
ROOT_NAME = os.getenv("ROOT_NAME", "ORDINACE")
|
||||
BACKUP_PATH = os.getenv("BACKUP_PATH")
|
||||
BACKUP_PASSWORD = os.getenv("BACKUP_PASSWORD")
|
||||
|
||||
# =========================
|
||||
# Behaviour
|
||||
|
||||
134
migrate_to_zip.py
Normal file
134
migrate_to_zip.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
One-time migration: convert plain .bak backup blobs to AES-256 encrypted .zip files.
|
||||
|
||||
Usage: python migrate_to_zip.py
|
||||
|
||||
Walks BACKUP_PATH, finds all .bak files, creates encrypted .zip for each,
|
||||
then deletes the original .bak. Resumable: skips files where .zip already exists.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import pyzipper
|
||||
from indexer.config import BACKUP_PATH, BACKUP_PASSWORD
|
||||
|
||||
|
||||
def collect_bak_files(backup_root: str) -> list:
|
||||
"""Walk backup dir and collect all .bak file paths."""
|
||||
bak_files = []
|
||||
for dirpath, _dirnames, filenames in os.walk(backup_root):
|
||||
for fn in filenames:
|
||||
if fn.endswith(".bak"):
|
||||
bak_files.append(os.path.join(dirpath, fn))
|
||||
return bak_files
|
||||
|
||||
|
||||
def migrate(backup_root: str, password: str):
|
||||
print(f"Backup dir: {backup_root}")
|
||||
print("Scanning for .bak files...")
|
||||
bak_files = collect_bak_files(backup_root)
|
||||
total = len(bak_files)
|
||||
print(f"Found {total} .bak files to migrate.\n")
|
||||
|
||||
if total == 0:
|
||||
print("Nothing to migrate.")
|
||||
return
|
||||
|
||||
password_bytes = password.encode("utf-8")
|
||||
converted = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
for i, bak_path in enumerate(bak_files, 1):
|
||||
# Derive the .zip path from the .bak path
|
||||
# e.g., ab/cd/abcdef...64hex.bak -> ab/cd/abcdef...64hex.zip
|
||||
base = bak_path[:-4] # strip ".bak"
|
||||
zip_path = base + ".zip"
|
||||
hex_hash = os.path.basename(base) # the 64-char hex name
|
||||
|
||||
# Resume support: skip if .zip already exists
|
||||
if os.path.exists(zip_path):
|
||||
skipped += 1
|
||||
if i % 500 == 0 or i == total:
|
||||
elapsed = time.time() - start_time
|
||||
print(f" [{i}/{total}] ({100*i//total}%) "
|
||||
f"converted={converted} skipped={skipped} errors={errors} "
|
||||
f"elapsed={elapsed:.0f}s")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Create encrypted zip in a temp file, then rename
|
||||
tmp_path = zip_path + ".tmp"
|
||||
with pyzipper.AESZipFile(
|
||||
tmp_path, "w",
|
||||
compression=pyzipper.ZIP_DEFLATED,
|
||||
encryption=pyzipper.WZ_AES,
|
||||
) as zf:
|
||||
zf.setpassword(password_bytes)
|
||||
zf.write(bak_path, arcname=hex_hash + ".blob")
|
||||
|
||||
os.replace(tmp_path, zip_path)
|
||||
|
||||
# Verify the zip is valid before deleting original
|
||||
with pyzipper.AESZipFile(zip_path, "r") as zf:
|
||||
zf.setpassword(password_bytes)
|
||||
names = zf.namelist()
|
||||
if not names:
|
||||
raise ValueError("ZIP is empty after creation")
|
||||
|
||||
# Delete original .bak
|
||||
os.remove(bak_path)
|
||||
converted += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ERROR: {bak_path}: {e}")
|
||||
errors += 1
|
||||
# Clean up temp file if it exists
|
||||
if os.path.exists(zip_path + ".tmp"):
|
||||
try:
|
||||
os.remove(zip_path + ".tmp")
|
||||
except OSError:
|
||||
pass
|
||||
continue
|
||||
|
||||
# Progress every 500 files
|
||||
if i % 500 == 0 or i == total:
|
||||
elapsed = time.time() - start_time
|
||||
rate = converted / elapsed if elapsed > 0 else 0
|
||||
eta = (total - i) / rate if rate > 0 else 0
|
||||
print(f" [{i}/{total}] ({100*i//total}%) "
|
||||
f"converted={converted} skipped={skipped} errors={errors} "
|
||||
f"elapsed={elapsed:.0f}s ETA={eta:.0f}s")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n\nInterrupted by user at file {i}/{total}.")
|
||||
print("Migration is resumable — run again to continue.")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Migration complete.")
|
||||
print(f" Total .bak files : {total}")
|
||||
print(f" Converted : {converted}")
|
||||
print(f" Skipped (exists) : {skipped}")
|
||||
print(f" Errors : {errors}")
|
||||
print(f" Time : {elapsed:.0f}s")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not BACKUP_PATH or not os.path.isdir(BACKUP_PATH):
|
||||
print(f"ERROR: BACKUP_PATH is not a valid directory: {BACKUP_PATH}")
|
||||
sys.exit(1)
|
||||
if not BACKUP_PASSWORD:
|
||||
print("ERROR: BACKUP_PASSWORD not set in .env")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 60)
|
||||
print("MIGRATION: .bak -> encrypted .zip")
|
||||
print(f"Backup dir: {BACKUP_PATH}")
|
||||
print("=" * 60)
|
||||
|
||||
migrate(BACKUP_PATH, BACKUP_PASSWORD)
|
||||
113
reconcile.py
Normal file
113
reconcile.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
reconcile.py — Cross-check MySQL content_hash values against .zip files on disk.
|
||||
|
||||
Reports:
|
||||
1. DB hashes with no blob on disk (missing backups)
|
||||
2. Blob files on disk with no matching DB record (orphan blobs)
|
||||
3. Summary stats
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from indexer.config import BACKUP_PATH
|
||||
from indexer.db import get_connection
|
||||
|
||||
|
||||
def collect_disk_hashes(backup_root: str) -> set:
|
||||
"""Walk backup dir and collect all hex hashes from .zip filenames."""
|
||||
hashes = set()
|
||||
for dirpath, _dirnames, filenames in os.walk(backup_root):
|
||||
for fn in filenames:
|
||||
if fn.endswith(".zip"):
|
||||
hex_hash = fn[:-4] # strip ".zip"
|
||||
if len(hex_hash) == 64:
|
||||
hashes.add(hex_hash)
|
||||
else:
|
||||
print(f" WARN: unexpected zip name: {os.path.join(dirpath, fn)}")
|
||||
return hashes
|
||||
|
||||
|
||||
def collect_db_hashes(conn) -> set:
|
||||
"""Fetch all distinct non-NULL content_hash values from files table."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT DISTINCT HEX(content_hash) FROM files WHERE content_hash IS NOT NULL")
|
||||
return {row[0].lower() for row in cur.fetchall()}
|
||||
|
||||
|
||||
def main():
|
||||
if not BACKUP_PATH or not os.path.isdir(BACKUP_PATH):
|
||||
print(f"ERROR: BACKUP_PATH is not a valid directory: {BACKUP_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Backup dir : {BACKUP_PATH}")
|
||||
print("Scanning disk blobs...")
|
||||
disk_hashes = collect_disk_hashes(BACKUP_PATH)
|
||||
print(f" Found {len(disk_hashes)} blob files on disk.")
|
||||
|
||||
print("Loading DB hashes...")
|
||||
conn = get_connection()
|
||||
try:
|
||||
db_hashes = collect_db_hashes(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
print(f" Found {len(db_hashes)} distinct hashes in DB.")
|
||||
|
||||
# --- Options ---
|
||||
# PURGE_ORPHANS = True # uncomment to delete orphan blobs
|
||||
PURGE_ORPHANS = False
|
||||
|
||||
# --- Reconcile ---
|
||||
missing_on_disk = db_hashes - disk_hashes
|
||||
orphans_on_disk = disk_hashes - db_hashes
|
||||
matched = db_hashes & disk_hashes
|
||||
|
||||
print()
|
||||
print("=== Reconciliation Results ===")
|
||||
print(f" Matched (DB + disk) : {len(matched)}")
|
||||
print(f" Missing on disk : {len(missing_on_disk)}")
|
||||
print(f" Orphan blobs (no DB) : {len(orphans_on_disk)}")
|
||||
|
||||
if missing_on_disk:
|
||||
print(f"\n--- Missing on disk ({len(missing_on_disk)}) ---")
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
for h in sorted(missing_on_disk):
|
||||
cur.execute(
|
||||
"SELECT relative_path FROM files WHERE content_hash = UNHEX(%s) LIMIT 5",
|
||||
(h,)
|
||||
)
|
||||
paths = [row[0] for row in cur.fetchall()]
|
||||
print(f" {h} -> {paths}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if orphans_on_disk:
|
||||
print(f"\n--- Orphan blobs ({len(orphans_on_disk)}) ---")
|
||||
total_orphan_bytes = 0
|
||||
for h in sorted(orphans_on_disk):
|
||||
blob = os.path.join(BACKUP_PATH, h[:2], h[2:4], h + ".zip")
|
||||
size = os.path.getsize(blob) if os.path.exists(blob) else 0
|
||||
total_orphan_bytes += size if isinstance(size, int) else 0
|
||||
print(f" {h} ({size} bytes)")
|
||||
print(f" Total orphan size: {total_orphan_bytes / 1024 / 1024:.1f} MB")
|
||||
|
||||
if PURGE_ORPHANS:
|
||||
print("\n PURGING orphan blobs...")
|
||||
purged = 0
|
||||
for h in sorted(orphans_on_disk):
|
||||
blob = os.path.join(BACKUP_PATH, h[:2], h[2:4], h + ".zip")
|
||||
try:
|
||||
os.remove(blob)
|
||||
purged += 1
|
||||
except OSError as e:
|
||||
print(f" WARN: could not delete {blob}: {e}")
|
||||
print(f" Purged {purged}/{len(orphans_on_disk)} orphan blobs.")
|
||||
|
||||
if not missing_on_disk and not orphans_on_disk:
|
||||
print("\nAll clean — DB and disk are in sync.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
28
recovery.py
28
recovery.py
@@ -11,8 +11,8 @@ the original directory structure.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
from indexer.config import DB_CONFIG, BACKUP_PATH
|
||||
import pyzipper
|
||||
from indexer.config import BACKUP_PATH, BACKUP_PASSWORD
|
||||
from indexer.db import get_connection
|
||||
from indexer.backup import blob_path
|
||||
|
||||
@@ -37,21 +37,37 @@ def recover(run_id: int, output_dir: str):
|
||||
print(f"Recovering {len(rows)} files from run #{run_id} to {output_dir}")
|
||||
recovered = 0
|
||||
missing = 0
|
||||
password = BACKUP_PASSWORD.encode("utf-8")
|
||||
|
||||
for relative_path, content_hash in rows:
|
||||
source = blob_path(BACKUP_PATH, content_hash)
|
||||
target = os.path.join(output_dir, relative_path.replace("/", os.sep))
|
||||
|
||||
if not os.path.exists(source):
|
||||
print(f" MISSING blob: {content_hash.hex()} for {relative_path}")
|
||||
print(f" MISSING zip: {content_hash.hex()} for {relative_path}")
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
os.makedirs(os.path.dirname(target), exist_ok=True)
|
||||
shutil.copy2(source, target)
|
||||
recovered += 1
|
||||
|
||||
print(f"\nRecovered: {recovered} Missing blobs: {missing}")
|
||||
try:
|
||||
with pyzipper.AESZipFile(source, "r") as zf:
|
||||
zf.setpassword(password)
|
||||
names = zf.namelist()
|
||||
if not names:
|
||||
print(f" WARN: empty zip: {source}")
|
||||
missing += 1
|
||||
continue
|
||||
data = zf.read(names[0])
|
||||
with open(target, "wb") as f:
|
||||
f.write(data)
|
||||
recovered += 1
|
||||
except Exception as e:
|
||||
print(f" ERROR extracting {source} for {relative_path}: {e}")
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
print(f"\nRecovered: {recovered} Missing/errors: {missing}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pymysql
|
||||
blake3
|
||||
python-dotenv
|
||||
pyzipper
|
||||
|
||||
Reference in New Issue
Block a user