z230
This commit is contained in:
97
27 test FastLane.py
Normal file
97
27 test FastLane.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import os
|
||||
import nntplib
|
||||
import sabctools
|
||||
import io
|
||||
import binascii
|
||||
from dotenv import load_dotenv
|
||||
from db import get_conn
|
||||
|
||||
BASE_DIR = "FastLane"
|
||||
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
|
||||
if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR)
|
||||
|
||||
load_dotenv()
|
||||
EWEKA_USER = os.getenv("EWEKA_USER")
|
||||
EWEKA_PASS = os.getenv("EWEKA_PASS")
|
||||
EWEKA_HOST = "news.eweka.nl"
|
||||
|
||||
NEWSGROUP = 'alt.binaries.e-book.magazines'
|
||||
SUBJECT_FILTER = '%PC Pro 2011-07.pdf%'
|
||||
FINAL_PATH = os.path.join(OUTPUT_DIR, "Fast_Lane_Biker_Dec_2011.pdf")
|
||||
|
||||
|
||||
def final_precision_downloader():
|
||||
print("🚀 Startuji FINÁLNÍ OPRAVENÉ stahování (bez překryvu bajtů)...")
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT article_number FROM articles
|
||||
WHERE newsgroup = %s AND metadata->>'subject' LIKE %s
|
||||
ORDER BY article_number;
|
||||
""", (NEWSGROUP, SUBJECT_FILTER))
|
||||
articles = cur.fetchall()
|
||||
|
||||
if not articles: return
|
||||
|
||||
try:
|
||||
server = nntplib.NNTP(EWEKA_HOST, user=EWEKA_USER, password=EWEKA_PASS)
|
||||
server.group(NEWSGROUP)
|
||||
except Exception as e:
|
||||
print(f"💥 Chyba NNTP: {e}");
|
||||
return
|
||||
|
||||
# Zjištění celkové velikosti
|
||||
resp, info = server.body(str(articles[0][0]))
|
||||
stuffed = b"\r\n".join([(b"." + l if l.startswith(b".") else l) for l in info.lines])
|
||||
wrapped = b"222 0 <id>\r\n" + stuffed + b"\r\n.\r\n"
|
||||
decoder = sabctools.Decoder(len(wrapped))
|
||||
decoder.process(io.BytesIO(wrapped).readinto(decoder))
|
||||
meta = next(decoder, None)
|
||||
|
||||
total_size = meta.file_size
|
||||
print(f"📏 Alokuji soubor: {total_size} bajtů.")
|
||||
with open(FINAL_PATH, "wb") as f:
|
||||
f.truncate(total_size)
|
||||
|
||||
# Zápis na offsety
|
||||
part_fixed_size = 384000
|
||||
with open(FINAL_PATH, "r+b") as f_out:
|
||||
for i, (art_num,) in enumerate(articles):
|
||||
try:
|
||||
resp, info = server.body(str(art_num))
|
||||
# Re-stuffing teček
|
||||
stuffed_lines = [(b"." + l if l.startswith(b".") else l) for l in info.lines]
|
||||
raw_body = b"\r\n".join(stuffed_lines)
|
||||
|
||||
wrapped = b"222 0 <id>\r\n" + raw_body + b"\r\n.\r\n"
|
||||
decoder = sabctools.Decoder(len(wrapped))
|
||||
decoder.process(io.BytesIO(wrapped).readinto(decoder))
|
||||
res = next(decoder, None)
|
||||
|
||||
if res and res.data:
|
||||
# MATEMATICKÁ OPRAVA: Výpočet bez překryvu
|
||||
# První part (i=0) -> Offset 0
|
||||
# Druhý part (i=1) -> Offset 384000 (ne 383999!)
|
||||
current_offset = i * part_fixed_size
|
||||
|
||||
f_out.seek(current_offset)
|
||||
f_out.write(res.data)
|
||||
|
||||
v_crc = binascii.crc32(res.data)
|
||||
e_crc = getattr(res, 'crc_expected', 0)
|
||||
status = "✅" if (e_crc == 0 or v_crc == e_crc) else "❌ CRC FAIL"
|
||||
|
||||
print(f" [{status}] Part {art_num} -> Offset: {current_offset}, Len: {len(res.data)}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Chyba u {art_num}: {e}")
|
||||
|
||||
print("-" * 50)
|
||||
print(f"🏁 HOTOVO! Teď už to PDF musí být perfektní.")
|
||||
server.quit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
final_precision_downloader()
|
||||
Reference in New Issue
Block a user