import os import nntplib import sabctools import io import binascii from dotenv import load_dotenv from db import get_conn BASE_DIR = "FastLane" OUTPUT_DIR = os.path.join(BASE_DIR, "output") if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) load_dotenv() EWEKA_USER = os.getenv("EWEKA_USER") EWEKA_PASS = os.getenv("EWEKA_PASS") EWEKA_HOST = "news.eweka.nl" NEWSGROUP = 'alt.binaries.e-book.magazines' SUBJECT_FILTER = '%PC Pro 2011-07.pdf%' FINAL_PATH = os.path.join(OUTPUT_DIR, "Fast_Lane_Biker_Dec_2011.pdf") def final_precision_downloader(): print("🚀 Startuji FINÁLNÍ OPRAVENÉ stahování (bez překryvu bajtů)...") conn = get_conn() cur = conn.cursor() cur.execute(""" SELECT article_number FROM articles WHERE newsgroup = %s AND metadata->>'subject' LIKE %s ORDER BY article_number; """, (NEWSGROUP, SUBJECT_FILTER)) articles = cur.fetchall() if not articles: return try: server = nntplib.NNTP(EWEKA_HOST, user=EWEKA_USER, password=EWEKA_PASS) server.group(NEWSGROUP) except Exception as e: print(f"💥 Chyba NNTP: {e}"); return # Zjištění celkové velikosti resp, info = server.body(str(articles[0][0])) stuffed = b"\r\n".join([(b"." + l if l.startswith(b".") else l) for l in info.lines]) wrapped = b"222 0 \r\n" + stuffed + b"\r\n.\r\n" decoder = sabctools.Decoder(len(wrapped)) decoder.process(io.BytesIO(wrapped).readinto(decoder)) meta = next(decoder, None) total_size = meta.file_size print(f"📏 Alokuji soubor: {total_size} bajtů.") with open(FINAL_PATH, "wb") as f: f.truncate(total_size) # Zápis na offsety part_fixed_size = 384000 with open(FINAL_PATH, "r+b") as f_out: for i, (art_num,) in enumerate(articles): try: resp, info = server.body(str(art_num)) # Re-stuffing teček stuffed_lines = [(b"." + l if l.startswith(b".") else l) for l in info.lines] raw_body = b"\r\n".join(stuffed_lines) wrapped = b"222 0 \r\n" + raw_body + b"\r\n.\r\n" decoder = sabctools.Decoder(len(wrapped)) decoder.process(io.BytesIO(wrapped).readinto(decoder)) res = next(decoder, None) if res and res.data: # MATEMATICKÁ OPRAVA: Výpočet bez překryvu # První part (i=0) -> Offset 0 # Druhý part (i=1) -> Offset 384000 (ne 383999!) current_offset = i * part_fixed_size f_out.seek(current_offset) f_out.write(res.data) v_crc = binascii.crc32(res.data) e_crc = getattr(res, 'crc_expected', 0) status = "✅" if (e_crc == 0 or v_crc == e_crc) else "❌ CRC FAIL" print(f" [{status}] Part {art_num} -> Offset: {current_offset}, Len: {len(res.data)}") except Exception as e: print(f" ❌ Chyba u {art_num}: {e}") print("-" * 50) print(f"🏁 HOTOVO! Teď už to PDF musí být perfektní.") server.quit() cur.close() conn.close() if __name__ == "__main__": final_precision_downloader()