From fc13fe9d340a1f9201fd869b910c704a79bef050 Mon Sep 17 00:00:00 2001
From: Vladimir Buzalka <reports@buzalka.cz>
Date: Wed, 20 May 2026 06:09:44 +0200
Subject: [PATCH] notebookvb

---
 Webpagescraping/splcr.cz/convert_to_md.py | 67 +++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 Webpagescraping/splcr.cz/convert_to_md.py

diff --git a/Webpagescraping/splcr.cz/convert_to_md.py b/Webpagescraping/splcr.cz/convert_to_md.py
new file mode 100644
index 0000000..d190e65
--- /dev/null
+++ b/Webpagescraping/splcr.cz/convert_to_md.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+import csv
+from collections import defaultdict
+from pathlib import Path
+
+# Load CSV
+docs = []
+with open('output/documents.csv', 'r', encoding='utf-8') as f:
+    reader = csv.DictReader(f)
+    docs = list(reader)
+
+# Organize by type and year
+by_type = defaultdict(list)
+appel_by_year = defaultdict(list)
+
+for doc in docs:
+    doc_type = doc['type']
+    url = doc['found_on']
+
+    # Check if appel
+    if 'appel' in url:
+        year = url.split('appel-rocnik-')[-1].rstrip('/')
+        appel_by_year[year].append(doc)
+    else:
+        by_type[doc_type].append(doc)
+
+# Generate markdown
+appel_count = sum(len(docs) for docs in appel_by_year.values())
+other_count = len(docs) - appel_count
+
+md = f"""# splcr.cz — Stažené dokumenty
+
+**Datum:** 2026-05-20
+**Celkem:** {len(docs)} dokumentů
+**Appel:** {appel_count} dokumentů
+**Ostatní:** {other_count} dokumentů
+
+## Přehled po typu
+
+| Typ | Počet |
+|-----|-------|
+"""
+
+for doc_type in sorted(by_type.keys()):
+    md += f"| {doc_type} | {len(by_type[doc_type])} |\n"
+
+# Appel by year
+md += "\n## Apel — Všechny ročníky\n\n"
+for year in sorted(appel_by_year.keys(), reverse=True):
+    year_docs = appel_by_year[year]
+    md += f"### Ročník {year} ({len(year_docs)} dokumentů)\n\n"
+    for doc in sorted(year_docs, key=lambda x: x['title']):
+        md += f"- [{doc['title']}]({doc['url']})\n"
+    md += "\n"
+
+# Other documents
+md += "## Ostatní dokumenty\n\n"
+for doc_type in sorted(by_type.keys()):
+    md += f"### {doc_type} ({len(by_type[doc_type])} dokumentů)\n\n"
+    for doc in sorted(by_type[doc_type], key=lambda x: x['title']):
+        md += f"- [{doc['title']}]({doc['url']})\n"
+    md += "\n"
+
+# Save
+output_path = Path('output/DOKUMENTY.md')
+output_path.write_text(md, encoding='utf-8')
+print(f"OK - Ulozeno: {output_path}")