import os
import re
import nntplib
from dotenv import load_dotenv
from db import get_conn
def yenc_decode_lines(lines: list[bytes], debug=False) -> bytes:
    """
    Decode yEnc from NNTP BODY lines.
    Handles NNTP dot-stuffing and logs what happens.
    """
    out = bytearray()
    saw_ybegin = False
    data_lines = 0

    for idx, orig_line in enumerate(lines):
        line = orig_line

        # --- NNTP dot-stuffing ---
        if line.startswith(b".."):
            if debug:
                print(f"    [dot] line {idx}: '..' -> '.'")
            line = line[1:]
        elif line.startswith(b"."):
            if debug:
                print(f"    [dot] line {idx}: '.' removed")
            line = line[1:]

        # --- yEnc control lines ---
        if line.startswith(b"=ybegin"):
            saw_ybegin = True
            if debug:
                print(f"    [yEnc] =ybegin detected")
            continue

        if line.startswith(b"=ypart"):
            if debug:
                print(f"    [yEnc] =ypart detected")
            continue

        if line.startswith(b"=yend"):
            if debug:
                print(f"    [yEnc] =yend detected")
            continue

        # --- actual yEnc data ---
        data_lines += 1
        i = 0
        length = len(line)

        while i < length:
            c = line[i]

            if c == ord('='):
                i += 1
                if i >= length:
                    break
                c = (line[i] - 64) & 0xFF

            out.append((c - 42) & 0xFF)
            i += 1

    if debug:
        print(f"    [yEnc] saw_ybegin={saw_ybegin}, decoded_data_lines={data_lines}")
        print(f"    [yEnc] decoded_bytes={len(out)}")

    if not saw_ybegin:
        print("⚠️  WARNING: yEnc decoder used but =ybegin was NOT seen")

    return bytes(out)


# def yenc_decode_lines(lines: list[bytes]) -> bytes:
#     """
#     Decode yEnc from NNTP BODY lines.
#     Handles NNTP dot-stuffing correctly.
#     """
#     out = bytearray()
#
#     for line in lines:
#         # --- undo NNTP dot-stuffing ---
#         if line.startswith(b".."):
#             line = line[1:]
#         elif line.startswith(b"."):
#             line = line[1:]
#
#         # --- skip yEnc control lines ---
#         if line.startswith(b"=ybegin"):
#             continue
#         if line.startswith(b"=ypart"):
#             continue
#         if line.startswith(b"=yend"):
#             continue
#
#         i = 0
#         length = len(line)
#
#         while i < length:
#             c = line[i]
#
#             if c == ord('='):   # yEnc escape
#                 i += 1
#                 if i >= length:
#                     break
#                 c = (line[i] - 64) & 0xFF
#
#             out.append((c - 42) & 0xFF)
#             i += 1
#
#     return bytes(out)


# ================== CONFIG ==================
GROUP = "alt.binaries.e-book.magazines"
SUBJECT_KEY = "PC Pro 2011-07.pdf"
OUT_DIR = r"downloads/PC_Pro_2011-07"
FINAL_PDF = r"downloads/PC_Pro_2011-07.pdf"
# ============================================

load_dotenv()
EWEKA_USER = os.getenv("EWEKA_USER")
EWEKA_PASS = os.getenv("EWEKA_PASS")

os.makedirs(OUT_DIR, exist_ok=True)

print("🔌 Connecting to PostgreSQL...")
conn = get_conn()
cur = conn.cursor()

# --- load article numbers + subject ---
cur.execute("""
    SELECT article_number, metadata->>'subject'
    FROM articles
    WHERE newsgroup = %s
      AND metadata->>'subject' LIKE %s
    ORDER BY article_number
""", (GROUP, f"%{SUBJECT_KEY}%"))

rows = cur.fetchall()
print(f"📦 Found {len(rows)} parts")

# --- parse part number from subject ---
part_re = re.compile(r"\((\d+)\s*/\s*\d+\)")

parts = []
for art_num, subject in rows:
    m = part_re.search(subject or "")
    if not m:
        raise RuntimeError(f"Cannot parse part number from subject: {subject}")
    part_no = int(m.group(1))
    parts.append((part_no, art_num))

# sort by part number (1..N)
parts.sort(key=lambda x: x[0])

print("🔌 Connecting to Eweka NNTP...")
with nntplib.NNTP_SSL(
    "news.eweka.nl",
    563,
    EWEKA_USER,
    EWEKA_PASS,
    readermode=True
) as nntp:

    nntp.group(GROUP)

    for idx, (part_no, art_num) in enumerate(parts, start=1):
        out_path = os.path.join(OUT_DIR, f"part_{part_no:03d}.bin")

        if os.path.exists(out_path):
            print(f"⏭️  [{idx}/{len(parts)}] part {part_no} already exists, skipping")
            continue

        print(f"⬇️  [{idx}/{len(parts)}] Downloading part {part_no} (article {art_num})")

        resp, info = nntp.body(art_num)

        print(f"    BODY lines received: {len(info.lines)}")

        # rychlá kontrola prvních řádků
        for ln in info.lines[:3]:
            print(f"    RAW:", ln[:80])

        decoded = yenc_decode_lines(info.lines, debug=True)

        print(f"    RESULT bytes: {len(decoded)}")

        with open(out_path, "wb") as f:
            f.write(decoded)


print("🧩 Assembling final PDF...")

with open(FINAL_PDF, "wb") as out:
    for part_no, _ in parts:
        part_path = os.path.join(OUT_DIR, f"part_{part_no:03d}.bin")
        with open(part_path, "rb") as pf:
            out.write(pf.read())

print("🎉 DONE")
print(f"📄 Final PDF: {FINAL_PDF}")