From fdba388228493281539d80095cd3d0b16fb181d9 Mon Sep 17 00:00:00 2001 From: vlado Date: Sun, 28 Dec 2025 07:19:48 +0100 Subject: [PATCH 1/4] reporter --- 21 poslednich 100k.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/21 poslednich 100k.py b/21 poslednich 100k.py index 7383083..666c373 100644 --- a/21 poslednich 100k.py +++ b/21 poslednich 100k.py @@ -15,8 +15,9 @@ def sanitize(value): # ================= CONFIG ================= GROUP = "alt.binaries.e-book.magazines" -TOTAL_ARTICLES = 50_000_000 -BATCH_SIZE = 10_000 +TOTAL_ARTICLES = 75_000_000 +BATCH_SIZE = 1_000 +FIRST=40805000 # ========================================= load_dotenv() @@ -43,8 +44,10 @@ with nntplib.NNTP_SSL( first = int(first) last = int(last) - start_global = first - end_global = min(first + TOTAL_ARTICLES - 1, last) + # start_global = first + start_global = FIRST + # end_global = min(first + TOTAL_ARTICLES - 1, last) + end_global = last print(f"🎯 Target range: {start_global} – {end_global}") From e99d42fa97e17fbdb328b127f2c2d6ec695e572d Mon Sep 17 00:00:00 2001 From: vlado Date: Mon, 29 Dec 2025 05:48:36 +0100 Subject: [PATCH 2/4] Add .gitignore (PyCharm, Python cache) --- .idea/.gitignore | 3 --- .idea/NewsGroups.iml | 10 ---------- .idea/inspectionProfiles/Project_Default.xml | 16 ---------------- .idea/inspectionProfiles/profiles_settings.xml | 6 ------ .idea/modules.xml | 8 -------- .idea/vcs.xml | 6 ------ 6 files changed, 49 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/NewsGroups.iml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 26d3352..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/.idea/NewsGroups.iml b/.idea/NewsGroups.iml deleted file mode 100644 index 0e5de3a..0000000 --- a/.idea/NewsGroups.iml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index c53b08f..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 2075ecb..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 4678d3fe7fa332ce973d702fb4e703e198fd0451 Mon Sep 17 00:00:00 2001 From: vlado Date: Mon, 29 Dec 2025 06:32:04 +0100 Subject: [PATCH 3/4] reporter --- 90 test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/90 test.py b/90 test.py index 116d95d..e4b9a37 100644 --- a/90 test.py +++ b/90 test.py @@ -4,7 +4,7 @@ import os import binascii import re -# --- KONFIGURACE --- +# --- KONFIGURACE ---+ INPUT_FILE = r"u:\PycharmProjects\NewsGroups\downloads\raw\part_001.raw" OUTPUT_DIR = r"u:\PycharmProjects\NewsGroups\downloads\decoded" From a3dbc934ed26dd1548b1491e6a89fe7b871decd4 Mon Sep 17 00:00:00 2001 From: vlado Date: Tue, 30 Dec 2025 18:40:37 +0100 Subject: [PATCH 4/4] reporter --- 21 poslednich 100k.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/21 poslednich 100k.py b/21 poslednich 100k.py index 666c373..0d3e2b2 100644 --- a/21 poslednich 100k.py +++ b/21 poslednich 100k.py @@ -16,8 +16,8 @@ def sanitize(value): # ================= CONFIG ================= GROUP = "alt.binaries.e-book.magazines" TOTAL_ARTICLES = 75_000_000 -BATCH_SIZE = 1_000 -FIRST=40805000 +BATCH_SIZE = 50_000 +FIRST=70805000 # ========================================= load_dotenv()