This commit is contained in:
2026-06-11 21:49:04 +02:00
parent 8e760d3adf
commit 8ef7d1cfd1
15 changed files with 1621 additions and 0 deletions
File diff suppressed because one or more lines are too long
@@ -0,0 +1,152 @@
Subject: =?utf-8?q?Pozv=C3=A1nka?= na projekt
From: wp2.cz <support@wp2.cz>
To: MUDr. Michaela =?utf-8?q?Buzalkov=C3=A1?= <michaela.buzalkova@buzalka.cz>
Message-ID: <4gK2ZN3bfTzXZL@m1-u6-ing.websupport.sk>
Date: Mon, 18 May 2026 16:00:16 +0200
MIME-Version: 1.0
Content-Type: multipart/alternative;
boundary="===============8327047951166956212=="
--===============8327047951166956212==
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
VsOhxb5lbsOhIHBhbsOtIGRva3RvcmtvIC8gVsOhxb5lbsO9IHBhbmUgZG9rdG9yZQoKTmEgesOh
a2xhZMSbIHBvdsSbxZllbsOtIGZhcm1hY2V1dGlja291IHNwb2xlxI1ub3N0w60gU3RhZGEgdmUg
dsSbY2kgcHJvbsOham11IHJla2xhbW7DrSBwbG9jaHkgdmUgVmHFocOtIG9yZGluYWNpIHNlIG5h
IFbDoXMgZG92b2x1amVtZSBvYnLDoXRpdCBzIMW+w6Fkb3N0w60gbyBvdsSbxZllbsOtIFZhxaFp
Y2ggw7pkYWrFryBudXRuw71jaCBwcm8gdXphdsWZZW7DrSBzbWx1dm7DrWhvIHZ6dGFodSBtZXpp
IFbDoW1pIGEgc3BvbGXEjW5vc3TDrSBLYXJhdE5ldCBzLnIuby4sIGpha28gcG92xJvFmWVuw71t
IHNwcsOhdmNlbS4KCkNvIGplIHBvdMWZZWJhIHVkxJtsYXQ6CjEuIFDFmWlobGHFoXRlIHNlIG5h
IHdlYm92b3Ugc3Ryw6Fua3UgcHJvamVrdHU6Cmh0dHBzOi8vd3AyLmN6L3Byb2plY3RzL3ZpYT9j
b2RlPXh2Qnc2TWdtZHVObjFnVkdPU29icGFpU2xPNFlJVUpoT1NIVSUyQlZRS1MxY3lweTZlTm5n
UUM0NzdkQThDa2psQjlud0tuYUVOSFJ1bDh1aXd5Q0RIM1JyRk9ZRTVxbVl3Y0FxRHclMkJ3NXJv
V2lGTmxIMnV3VSUyRnEzZEtOdlBIcm9qcTRKWUFLVHAlMkZkM20lMkJ5RGZ0Snpyd0ElM0QlM0QK
Mi4gWmtvbnRyb2x1anRlIGEgcMWZw61wYWRuxJsgdXByYXZ0ZSBzdsOpIMO6ZGFqZSBkbGUgcG90
xZllYnkKMy4gUG8ga29udHJvbGUgxI1pIGRvcGxuxJtuw60gw7pkYWrFryBwxZllamTEm3RlIGsg
cG9kcGlzdSBzbWxvdXZ5CgpWIHDFmcOtcGFkxJsgamFrw71jaGtvbGl2IG90w6F6ZWsgbsOhcywg
cHJvc8OtbSwga29udGFrdHVqdGUgbmEgZW1haWxvdsOpIGFkcmVzZSBzdXBwb3J0QHdwMi5jeiBu
ZWJvIG5hIHRlbGVmb25uw61tIMSNw61zbGUgKzQyMCA3MzAgNTE2IDUyMC4KCkTEm2t1amVtZSBW
w6FtIHphIHNwb2x1cHLDoWNpCg==
--===============8327047951166956212==
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
<html><head>
<meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Dutf-8"></he=
ad><body leftmargin=3D"10" topmargin=3D"10" style=3D"background-color:#FFFFFF=
"><table cellpadding=3D"0" cellspacing=3D"0" border=3D"0" style=3D"border-wid=
th:0px; empty-cells:show; width:624px; height:688px; background-color:#FFFFFF=
"><tbody><tr><td style=3D"width:0px; height:0px"></td><td style=3D"height:0px=
; width:624px"></td></tr><tr style=3D"vertical-align:top"><td style=3D"width:=
0px; height:688px"></td><td style=3D"color:#000000; background-color:transpar=
ent; border-left-style:none; border-top-style:none; border-right-style:none; =
border-bottom-style:none; font-family:Arial; font-size:13px; font-weight:norm=
al; font-style:normal; padding-left:2px; width:622px; height:688px; text-deco=
ration:none"><div style=3D"overflow:hidden; width:622px; height:688px"><div><=
p style=3D"text-align:justify; text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-=
height:1.2"><span style=3D"color:#000000; background-color:#FFFFFF; font-fami=
ly:Calibri; font-size:16px; font-weight:normal; font-style:normal">V=C3=A1=C5=
=BEen=C3=A1 pan=C3=AD doktorko / V=C3=A1=C5=BEen=C3=BD pane doktore&nbsp;</sp=
an><span style=3D"color:#000000; background-color:transparent; font-family:Ca=
libri; font-size:16px; font-weight:normal; font-style:normal">&nbsp;</span></=
p><p style=3D"text-align:justify; text-indent:0pt; margin:0pt 0pt 8pt 0pt; li=
ne-height:1.2"><span style=3D"color:#000000; background-color:transparent; fo=
nt-family:Calibri; font-size:16px; font-weight:normal; font-style:normal">&nb=
sp;</span></p><p style=3D"text-align:justify; text-indent:0pt; margin:0pt 0pt=
8pt 0pt; line-height:1.2"><span style=3D"color:#000000; background-color:tra=
nsparent; font-family:Calibri; font-size:16px; font-weight:normal; font-style=
:normal">Na z=C3=A1klad=C4=9B pov=C4=9B=C5=99en=C3=AD farmaceutickou spole=C4=
=8Dnost=C3=AD Stada </span><span style=3D"color:#000000; background-color:tra=
nsparent; font-family:Calibri; font-size:16px; font-weight:bold; font-style:n=
ormal">ve v=C4=9Bci pron=C3=A1jmu reklamn=C3=AD plochy ve Va=C5=A1=C3=AD ordi=
naci</span><span style=3D"color:#000000; background-color:transparent; font-f=
amily:Calibri; font-size:16px; font-weight:normal; font-style:normal"> se na =
V=C3=A1s dovolujeme obr=C3=A1tit s =C5=BE=C3=A1dost=C3=AD o ov=C4=9B=C5=99en=
=C3=AD Va=C5=A1ich =C3=BAdaj=C5=AF nutn=C3=BDch pro uzav=C5=99en=C3=AD smluvn=
=C3=ADho vztahu mezi V=C3=A1mi a spole=C4=8Dnost=C3=AD </span><span style=3D"=
color:#000000; background-color:transparent; font-family:Calibri; font-size:1=
6px; font-weight:bold; font-style:normal">KaratNet s.r.o.</span><span style=
=3D"color:#000000; background-color:transparent; font-family:Calibri; font-si=
ze:16px; font-weight:normal; font-style:normal">, jako pov=C4=9B=C5=99en=C3=
=BDm spr=C3=A1vcem.&nbsp;</span></p><p style=3D"text-align:justify; text-inde=
nt:0pt; margin:0pt 0pt 8pt 0pt; line-height:1.2"><span style=3D"color:#000000=
; background-color:transparent; font-family:Calibri; font-size:16px; font-wei=
ght:bold; font-style:normal">&nbsp;</span></p><p style=3D"text-align:justify;=
text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-height:1.2"><span style=3D"col=
or:#000000; background-color:transparent; font-family:Calibri; font-size:16px=
; font-weight:bold; font-style:normal">Co je pot=C5=99eba ud=C4=9Blat:</span>=
</p><p style=3D"text-align:left; text-indent:0pt; margin:0pt 0pt 8pt 0pt; lin=
e-height:1.2"><span style=3D"color:#000000; background-color:transparent; fon=
t-family:Calibri; font-size:16px; font-weight:normal; font-style:normal">1. P=
=C5=99ihla=C5=A1te se na webovou str=C3=A1nku projektu:</span></p><p style=3D=
"text-align:left; text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-height:1.2"><=
span style=3D"color:#548DD4; background-color:transparent; font-family:Calibr=
i; font-size:16px; font-weight:normal; font-style:normal; text-decoration:und=
erline">https://wp2.cz/projects/via?code=3DxvBw6MgmduNn1gVGOSobpaiSlO4YIUJhOS=
HU%2BVQKS1cypy6eNngQC477dA8CkjlB9nwKnaENHRul8uiwyCDH3RrFOYE5qmYwcAqDw%2Bw5roW=
iFNlH2uwU%2Fq3dKNvPHrojq4JYAKTp%2Fd3m%2ByDftJzrwA%3D%3D</span></p><p style=3D=
"text-align:left; text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-height:1.2"><=
span style=3D"color:#000000; background-color:transparent; font-family:Calibr=
i; font-size:16px; font-weight:normal; font-style:normal">2. Zkontrolujte a p=
=C5=99=C3=ADpadn=C4=9B upravte sv=C3=A9 =C3=BAdaje dle pot=C5=99eby</span></p=
><p style=3D"text-align:left; text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-h=
eight:1.2"><span style=3D"color:#000000; background-color:transparent; font-f=
amily:Calibri; font-size:16px; font-weight:normal; font-style:normal">3. Po k=
ontrole =C4=8Di dopln=C4=9Bn=C3=AD =C3=BAdaj=C5=AF p=C5=99ejd=C4=9Bte k podpi=
su smlouvy</span></p><p style=3D"text-align:justify; text-indent:0pt; margin:=
0pt 0pt 8pt 0pt; line-height:1.2"><span style=3D"color:#000000; background-co=
lor:transparent; font-family:Calibri; font-size:16px; font-weight:bold; font-=
style:normal">&nbsp;</span></p><p style=3D"text-align:left; text-indent:0pt; =
margin:0pt 0pt 8pt 0pt; line-height:1.2"><span style=3D"color:#000000; backgr=
ound-color:transparent; font-family:Calibri; font-size:16px; font-weight:norm=
al; font-style:normal">V p=C5=99=C3=ADpad=C4=9B jak=C3=BDchkoliv ot=C3=A1zek =
n=C3=A1s, pros=C3=ADm, kontaktujte na emailov=C3=A9 adrese&nbsp;<br><a href=
=3D"mailto:support@wp2.cz" style=3D"color:#000000; background-color:transpare=
nt; font-family:Calibri; font-size:16px; font-weight:normal; font-style:norma=
l; text-decoration:none"><span style=3D"color:#467886; background-color:trans=
parent; font-family:Calibri; font-size:16px; font-weight:normal; font-style:n=
ormal; text-decoration:underline">support@wp2.cz</span></a></span><span style=
=3D"color:#000000; background-color:transparent; font-family:Calibri; font-si=
ze:16px; font-weight:normal; font-style:normal"> nebo na telefonn=C3=ADm =C4=
=8D=C3=ADsle +420 730 516 520.&nbsp;</span></p><p style=3D"text-align:justify=
; text-indent:0pt; margin:0pt 0pt 0pt 0pt; line-height:1.2"><span style=3D"co=
lor:#000000; background-color:transparent; font-family:Calibri; font-size:16p=
x; font-weight:normal; font-style:normal">&nbsp;</span></p><p style=3D"text-a=
lign:justify; text-indent:0pt; margin:0pt 0pt 0pt 0pt; line-height:1.2"><span=
style=3D"color:#000000; background-color:transparent; font-family:Calibri; f=
ont-size:16px; font-weight:normal; font-style:normal">D=C4=9Bkujeme V=C3=A1m =
za spolupr=C3=A1ci</span></p><p style=3D"text-align:justify; text-indent:0pt;=
margin:0pt 0pt 0pt 0pt; line-height:1.2"><span style=3D"color:#000000; backg=
round-color:transparent; font-family:Calibri; font-size:16px; font-weight:nor=
mal; font-style:normal">&nbsp;&nbsp;</span></p><p style=3D"text-align:justify=
; text-indent:0pt; margin:0pt 0pt 8pt 0pt; line-height:1.2"><span style=3D"co=
lor:#000000; background-color:transparent; font-family:Calibri; font-size:16p=
x; font-weight:normal; font-style:normal">&nbsp;</span></p><p style=3D"text-a=
lign:justify; text-indent:0pt; margin:0pt 0pt 0pt 0pt; line-height:1.2"><span=
style=3D"color:#000000; background-color:transparent; font-family:Calibri; f=
ont-size:13px; font-weight:normal; font-style:italic">Tato emailov=C3=A1 zpr=
=C3=A1va obsahuje d=C5=AFv=C4=9Brn=C3=A9 anebo pr=C3=A1vn=C4=9B chr=C3=A1n=C4=
=9Bn=C3=A9 informace, kter=C3=A9 jsou v=C3=BDhradn=C4=9B ur=C4=8Den=C3=A9 jen=
adres=C3=A1tovi emailu. V p=C5=99=C3=ADpad=C4=9B, =C5=BEe tento email nebyl =
spr=C3=A1vn=C4=9B adresovan=C3=BD V=C3=A1m, anebo V=C3=A1m byl zaslan=C3=BD o=
mylem, =C5=BE=C3=A1d=C3=A1me V=C3=A1s, abyste o t=C3=A9to skute=C4=8Dnosti be=
zodkladn=C4=9B informovali odes=C3=ADlatele (KaratNet s.r.o., Na Hutmance 104=
5/7e, Praha 5, 158 00, <a href=3D"mailto:support@wp2.cz" style=3D"color:#0000=
00; background-color:transparent; font-family:Calibri; font-size:13px; font-w=
eight:normal; font-style:italic; text-decoration:none"><span style=3D"color:#=
467886; background-color:transparent; font-family:Calibri; font-size:13px; fo=
nt-weight:normal; font-style:italic; text-decoration:underline">support@wp2.c=
z</span></a></span><span style=3D"color:#000000; background-color:transparent=
; font-family:Calibri; font-size:13px; font-weight:normal; font-style:italic"=
>, +420730516520) a zpr=C3=A1vu odstranili ze sv=C3=A9ho syst=C3=A9mu. Neopr=
=C3=A1vn=C4=9Bn=C3=A9 pou=C5=BEit=C3=AD, kop=C3=ADrov=C3=A1n=C3=AD, p=C5=99ep=
os=C3=ADl=C3=A1n=C3=AD, distribuce anebo poskytnut=C3=AD obsahu je t=C5=99et=
=C3=AD stran=C4=9B zak=C3=A1zan=C3=A9.</span><span style=3D"color:#000000; ba=
ckground-color:transparent; font-family:Calibri; font-size:13px; font-weight:=
normal; font-style:normal">&nbsp;</span></p></div></div></td></tr></tbody></t=
able></body></html>
--===============8327047951166956212==--
+99
View File
@@ -0,0 +1,99 @@
# SeaweedFS na Unraidu — S3-kompatibilní úložiště
Vyladěný Docker stack proti původnímu příkladu. Hlavní rozdíly a proč:
| Změna | Proč |
|---|---|
| Pin verze image `:4.32` místo `:latest` | reprodukovatelnost, žádné překvapení po `pull` |
| Master má `-mdir=/data` + namapovaný volume | **metadata masteru** (kde co leží) jinak po restartu zmizí |
| Filer má namapovaný `/data` | leveldb s metadaty souborů musí persistovat |
| `-config=config_s3.json` u s3 | bez něj je S3 **bez autentizace, otevřené komukoliv** v síti |
| `healthcheck` + `depends_on: service_healthy` | služby nestartují dřív, než je master skutečně nahoře |
| `restart: unless-stopped` | přežije restart Unraidu |
| `-ip.bind=0.0.0.0` | dostupnost z LAN, ne jen z kontejneru |
| Data pod `/mnt/user/appdata/...` | zálohovatelné, ne v anonymním volume |
## Kam se ukládají data (DŮLEŽITÉ pro Unraid)
Klíčové pravidlo Unraidu: **cache není pod paritou** → patří tam jen to, co jde
snadno obnovit. Rozhoduje tedy ne velikost, ale obnovitelnost:
| Co | Obnovitelné? | Kam | Cesta |
|---|---|---|---|
| volume (vlastní obsah) | ne (to jsou ta data) | **POLE (parita)** | `/mnt/user/seaweedfs/volume` |
| **filer metadata** | **NE** — bez nich jsou bloby slepé | **externí DB** (Mongo/PG) | `filer.toml` |
| master metadata | ano — poskládá se z heartbeatů | cache / appdata | `/mnt/user/appdata/seaweedfs/master` |
| config_s3.json | trivální | vedle compose | `./config_s3.json` |
### Proč filer metadata nesmí na cache
filer drží mapu *název souboru → které chunky na kterém volume*. Volume servery ji
**neumí zrekonstruovat** — bloby na poli přežijí, ale ztratíš informaci, co je co.
Proto je dáváme do tvé existující DB na 192.168.1.76 (viz `filer.toml`):
- **MongoDB (doporučeno)** — kolekce v DB `seaweedfs` se vytvoří sama, žádné schéma.
- **PostgreSQL** — vytvoř DB a tabulku:
```sql
CREATE DATABASE seaweedfs;
-- v DB seaweedfs:
CREATE TABLE IF NOT EXISTS filemeta (
dirhash BIGINT,
name VARCHAR(65535),
directory VARCHAR(65535),
meta BYTEA,
PRIMARY KEY (dirhash, name)
);
```
Master metadata na cache nechávám schválně — po ztrátě cache si je master poskládá
z hlášení volume serverů, takže spadají do „snadno obnovitelné".
### Nejdřív založ share na poli
Settings → Shares → Add Share:
- Název: `seaweedfs`
- **Primary storage: Array** (NE cache), Secondary: none — jinak by data tekla zpět na cache a zaplnila ji
- Allocation/split dle libosti
Tím vznikne `/mnt/user/seaweedfs`, kam míří volume vrstva. (Alternativa: napřímo na
konkrétní ZFS disk, `/mnt/diskX/seaweedfs/volume`, když chceš obejít user share.)
> Pozn.: Disk 8 (sdd) má v poli **14 223 čtecích chyb** — než tam pustíš nová data,
> mrkni na SMART / zvaž jeho vyřazení. Na takový disk bych volume dir nesměroval.
## Spuštění
```bash
cd /boot/config/plugins/compose/... # nebo kamkoliv stack uložíš
docker compose -p seaweedfs up -d
docker compose -p seaweedfs ps
```
## Endpoints (nahraď `UNRAID-IP`)
- Master UI: `http://UNRAID-IP:9333`
- Filer/web: `http://UNRAID-IP:8888`
- **S3 API:** `http://UNRAID-IP:8333`
## Před produkcí
1. **Změň klíče** v `config_s3.json` (admin i readonly). Dlouhé náhodné secret keys.
2. Soubor `config_s3.json` drž s právy `600`.
3. Zvaž reverzní proxy (SWAG/NPM) s TLS, pokud má být S3 dostupné mimo LAN.
4. `defaultReplication=000` = bez replikace (1 kopie). Pro odolnost přes víc disků/serverů zvyš (např. `001`, `010`) a přidej volume servery.
## Test
```bash
# AWS CLI
S3=http://UNRAID-IP:8333 AK=admin SK=tajne ./test_s3.sh
```
Nebo přes `s3cmd`, `rclone`, MinIO client (`mc`) — vše funguje proti `:8333`.
## Škálování později
Tohle je single-node setup (test / menší nasazení). Pro distribuovaný cluster přidej
další `volume` servery (klidně na jiných strojích) mířící na stejný master a zvyš
replikaci. Master + filer mohou zůstat, volume vrstva se škáluje horizontálně.
+33
View File
@@ -0,0 +1,33 @@
{
"identities": [
{
"name": "admin",
"credentials": [
{
"accessKey": "ZMEN_ME_admin",
"secretKey": "ZMEN_ME_tajny_klic_dlouhy_nahodny"
}
],
"actions": [
"Admin",
"Read",
"Write",
"List",
"Tagging"
]
},
{
"name": "readonly",
"credentials": [
{
"accessKey": "ZMEN_ME_readonly",
"secretKey": "ZMEN_ME_tajny_klic_readonly"
}
],
"actions": [
"Read",
"List"
]
}
]
}
+89
View File
@@ -0,0 +1,89 @@
# =============================================================================
# SeaweedFS — S3-kompatibilní úložiště na Unraidu (Docker)
# Vyladěná verze: persistence VŠECH dat, S3 přihlášení, healthcheck, restart.
#
# Spuštění:
# docker compose -p seaweedfs up -d
# (na starším Unraidu: docker-compose -f docker-compose.yml -p seaweedfs up -d)
#
# Endpoints (nahraď UNRAID-IP IP adresou serveru):
# Master UI : http://UNRAID-IP:9333
# Filer/web : http://UNRAID-IP:8888
# S3 API : http://UNRAID-IP:8333
# Volume : http://UNRAID-IP:8080 (interní, kvůli debugu)
#
# DŮLEŽITÉ: před produkčním použitím změň klíče v config_s3.json!
# =============================================================================
x-image: &swimg chrislusf/seaweedfs:4.32 # pin verze (ne :latest) kvůli reprodukovatelnosti
services:
seaweed-master:
image: *swimg
container_name: seaweed-master
command: >
master -ip=seaweed-master -ip.bind=0.0.0.0
-mdir=/data -volumeSizeLimitMB=1024 -defaultReplication=000
ports:
- "9333:9333"
volumes:
# metadata masteru — cache OK: po ztrátě se poskládá z heartbeatů volume serverů
- /mnt/user/appdata/seaweedfs/master:/data
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:9333/cluster/status"]
interval: 15s
timeout: 5s
retries: 5
start_period: 20s
seaweed-volume:
image: *swimg
container_name: seaweed-volume
command: >
volume -mserver=seaweed-master:9333 -ip=seaweed-volume -ip.bind=0.0.0.0
-dir=/data -max=0
ports:
- "8080:8080"
volumes:
# POZOR: vlastní obsah (roste do TB) -> na POLE, NE do appdata/cache!
# Vyžaduje share "seaweedfs" s primary storage = Array (cache=No).
- /mnt/user/seaweedfs/volume:/data
depends_on:
seaweed-master:
condition: service_healthy
restart: unless-stopped
seaweed-filer:
image: *swimg
container_name: seaweed-filer
command: >
filer -master=seaweed-master:9333 -ip=seaweed-filer -ip.bind=0.0.0.0
ports:
- "8888:8888"
volumes:
# filer metadata -> externí DB dle filer.toml (Mongo/Postgres) = chráněné.
- ./filer.toml:/etc/seaweedfs/filer.toml:ro
# Jen pokud ve filer.toml zvolíš variantu C (leveldb2): odkomentuj a dej na POLE
# - /mnt/user/seaweedfs/filermeta:/data
depends_on:
seaweed-master:
condition: service_healthy
seaweed-volume:
condition: service_started
restart: unless-stopped
seaweed-s3:
image: *swimg
container_name: seaweed-s3
command: >
s3 -filer=seaweed-filer:8888 -ip.bind=0.0.0.0
-config=/etc/seaweedfs/config_s3.json
ports:
- "8333:8333"
volumes:
- ./config_s3.json:/etc/seaweedfs/config_s3.json:ro # S3 přihlašovací údaje (read-only)
depends_on:
seaweed-filer:
condition: service_started
restart: unless-stopped
+38
View File
@@ -0,0 +1,38 @@
# =============================================================================
# SeaweedFS filer — kde se ukládají METADATA (název -> chunky, stromová struktura)
#
# POZOR: filer metadata NEJDOU rekonstruovat z volume serverů. Když se ztratí,
# data na poli sice přežijí, ale ztratíš mapu, co je co. Proto NESMÍ ležet na
# nechráněné cache. Tři varianty (zapni právě JEDNU sekci enabled=true):
#
# A) mongodb (DOPORUČENO u tebe) — metadata do tvého Monga (192.168.1.76),
# kolekce se vytvoří sama. Chrání je, co chrání Mongo (zálohy/replica).
# B) postgres — metadata do tvého Postgresu; vyžaduje ručně založit tabulku
# (CREATE TABLE viz README).
# C) leveldb2 — embedded soubor; pak ho MUSÍŠ mapovat na POLE (parita), ne cache.
# =============================================================================
# --- A) MongoDB (doporučeno) -------------------------------------------------
[mongodb]
enabled = true
uri = "mongodb://192.168.1.76:27017"
option_pool_size = 0
database = "seaweedfs"
# --- B) PostgreSQL -----------------------------------------------------------
[postgres]
enabled = false
hostname = "192.168.1.76"
port = 5432
username = "seaweedfs"
password = "ZMEN_ME"
database = "seaweedfs"
sslmode = "disable"
connection_max_idle = 5
connection_max_open = 30
# --- C) leveldb2 (embedded) — jen když chceš zůstat bez DB --------------------
# Pak v compose mapuj /data na POLE: /mnt/user/seaweedfs/filermeta:/data
[leveldb2]
enabled = false
dir = "/data/filerldb2"
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
# =============================================================================
# Rychlý test SeaweedFS S3 API přes AWS CLI.
# Předpoklad: nainstalované awscli (pip install awscli / apt install awscli)
#
# Použití:
# ./test_s3.sh # použije defaulty níže
# S3=http://192.168.1.50:8333 AK=admin SK=tajne ./test_s3.sh
# =============================================================================
set -euo pipefail
S3="${S3:-http://UNRAID-IP:8333}"
AK="${AK:-ZMEN_ME_admin}"
SK="${SK:-ZMEN_ME_tajny_klic_dlouhy_nahodny}"
BUCKET="${BUCKET:-test-bucket}"
export AWS_ACCESS_KEY_ID="$AK"
export AWS_SECRET_ACCESS_KEY="$SK"
export AWS_DEFAULT_REGION="us-east-1"
AWS=(aws --endpoint-url "$S3")
echo "== 1) vytvoření bucketu =="
"${AWS[@]}" s3 mb "s3://$BUCKET" || true
echo "== 2) upload souboru =="
echo "ahoj ze SeaweedFS $(date)" > /tmp/sw_test.txt
"${AWS[@]}" s3 cp /tmp/sw_test.txt "s3://$BUCKET/hello.txt"
echo "== 3) výpis bucketu =="
"${AWS[@]}" s3 ls "s3://$BUCKET/"
echo "== 4) stažení zpět a kontrola =="
"${AWS[@]}" s3 cp "s3://$BUCKET/hello.txt" /tmp/sw_back.txt
diff /tmp/sw_test.txt /tmp/sw_back.txt && echo "OK: obsah sedí"
echo "== 5) úklid =="
"${AWS[@]}" s3 rm "s3://$BUCKET/hello.txt"
echo "Hotovo."
+38
View File
@@ -0,0 +1,38 @@
---
name: project-mailstore
description: "MailStore Server na 192.168.1.53 — archiv emailů, Management API, IMAP přístup, Claws Mail klient"
metadata:
node_type: memory
type: project
originSessionId: 49cbd8a2-c71e-49be-8c52-59dfa5ac7680
---
MailStore Server v26.2.1.24065 na `192.168.1.53` (hostname MAILSTORE, Win). Archiv ~2,3 mil emailů, data na `Z:\MailArchive`. Největší schránka vladimir.buzalka@buzalka.cz: 1 077 799 zpráv / 273 GB.
**Přístupy:**
- Windows admin (WinRM): `administrator` / `Vlado7309208104++`. WinRM remoting funguje z U:/janssen PC (TrustedHosts nastaveno).
- MailStore admin: `admin` / `*$N(B)vMUym!%`
**Management API (HTTPS, port 8463):** zapnuté v configu `MailStoreServer.json` (`API via HTTPS Configuration.Enabled=true`). Volá se `POST https://192.168.1.53:8463/api/invoke/<Funkce>`, Basic Auth, parametry jako **form body** (`application/x-www-form-urlencoded`), ne JSON. Self-signed cert → `-SkipCertificateCheck`. Async operace vrátí `token`+`statusCode=running`, výsledek se poluje přes `POST /api/get-status` (params `token`, `lastKnownStatusVersion`, `millisecondsTimeout`); `result` je v poslední odpovědi po `succeeded`. ~90 funkcí (GetUsers, GetStores, GetMessages, GetChildFolders, GetFolderStatistics, RunProfile, CreateBackup…). Dokumentace: help.mailstore.com/en/server/Administration_API_-_Function_Reference. GetMessages chce přesnou cestu složky (např. `vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items`).
**IMAP (port 143, STARTTLS):** zapnuté v configu (`IMAP Server Configuration.Enabled=true`). Po STARTTLS server nabízí jen `AUTH=PLAIN`, ale prostý IMAP `LOGIN` command funguje (Python imaplib.login OK, curl `--ssl-reqd` OK). Jako admin vidět všechny archivy. Lze stáhnout raw EML konkrétní zprávy.
**Claves Mail (Windows) klient:** Metoda autentizace MUSÍ být **"Prostý text"** (= prostý LOGIN command). "PLAIN"/"LOGIN"/"Automaticky" selhávají — Claws/libetpan na Windows je **bez SASL pluginů** ("PLAIN" → "Bad arguments", "LOGIN" → chybějící SASL plugin). Nastavení: server 192.168.1.53:143, STARTTLS, auth "Prostý text", admin / heslo. Na první zobrazení složek nutno "Obnovit strom složek" → Ano.
**Bezpečnost:** port 8463 ani 143 NEjsou forwardované na MikroTiku (192.168.1.2), přístup jen z LAN. MikroTik API na 8728 (admin/Vlado9674+).
**Nástroje v `U:/janssen/mailstore/`** (ruční prohlížeč archivu, schránka→složka→zpráva, spouštět `.venv\Scripts\python.exe`):
- `mailstore_map_v1.0.py <schránka>` — strom složek z API GetChildFolders (+`--no-stats`, `--list`). Arg = top-level složka (např. `vladimir.buzalka@buzalka.cz`).
- `mailstore_folder_v1.0.py "<plná cesta složky>"` — seznam zpráv (datum|od|předmět) přes dávkový IMAP FETCH hlaviček (+`--limit N`, `--all`, `--oldest`).
- `mailstore_read_v1.0.py "<složka>" <číslo>` — plný obsah jedné zprávy (hlavičky, tělo, přílohy) přes IMAP FETCH RFC822 (+`--uid`, `--save DIR`, `--raw`).
**Ingest do Mongo — `mailstore_ingest_v1.0.py <schránka> --since ROK [--dry-run] [--folder X] [--limit N]`**: backfill staré historie z MailStore do Mongo kolekce `emaily`. Dedup podle internet Message-ID (= `_id` v Mongu, shoduje se 1:1 s IMAP hlavičkou). Filtr data client-side z DATE headeru (NE IMAP SEARCH). Schéma dokumentu = jako Graph import. `--dry-run` spočítá kolik chybí bez zápisu. PILOT OVĚŘEN end-to-end 2026-06-11: MailStore IMAP → ingest → Mongo → enrich_fulltext → PG → MCP emaily search našel zprávu z 2020. Header scan ~490 zpráv/s (1M ≈ 30 min). vladimir.buzalka@buzalka.cz: Sent Items má 20k zpráv 2020+ k dobrání (i odeslané chybí z Graphu!). Plán: roztáhnout celou schránku, pak `--since` hlouběji do minulosti.
**Dva gotchas (vyřešené):**
1. Kolekce `emaily` má unique+sparse index na `graph_id`. MailStore dokument musí pole `graph_id` ÚPLNĚ VYNECHAT (ne `None`) — explicitní null koliduje (sparse ignoruje jen chybějící pole). Jinak E11000 duplicate key.
2. Mongo `{'graph_id': None}` matchuje i dokumenty BEZ pole — `delete_many` tím smete i validní dokumenty. Pozor při úklidu.
3. enrich_fulltext spouštět přes `U:/janssen/.venv/Scripts/python.exe` (system Python C:\Python312 nemá psycopg).
IMAP SEARCH je slepá ulička (78s, vrací jen ~10 výsledků). API GetMessages dává jen metadata (id, date, uid1=Message-ID, outgoing), ne obsah — obsah jen přes IMAP. API `id` (1:947923) ≠ IMAP UID, most je Message-ID (=uid1).
Pracovní adresář: `U:/janssen/mailstore/`. Pozn.: [[feedback-admin-powershell]] — admin příkazy (winget) rovnou psát uživateli.
+29
View File
@@ -0,0 +1,29 @@
---
name: project-tower-backups
description: "Unraid user scripts na Toweru (192.168.1.76) — zálohy MongoDB/PostgreSQL/MySQL, MongoDBBackupWithGzip zálohuje dynamicky všechny DB"
metadata:
node_type: memory
type: project
originSessionId: 5338a9b3-9290-4241-8c98-42b86d832dfc
---
Unraid user scripts jsou na Toweru (192.168.1.76, ssh root) v
`/boot/config/plugins/user.scripts/scripts/<název>/script`.
`MongoDBBackupWithGzip` (denně 4:40) od 2026-06-11 zjišťuje seznam databází
dynamicky přes `mongosh listDatabases` (vynechává `local` a `config`) —
nové DB se zálohují automaticky. Dump přes `docker exec MongoDB mongodump
--archive --gzip` do `/mnt/user/Backup/Critical/MongoDBBackup/tower/<db>/<timestamp>/`.
Rotace GFS bez kopírování (selektivní prune dle data v názvu adresáře):
7 denních / 4 týdenní / 4 měsíční, maže se jen po úspěšném dumpu.
Restore ověřen testem 2026-06-11 (covance → temp DB, 100% shoda docs+indexů).
Sesterské skripty: `MongoDBRestoreFromBackup` (sám projde všechny DB složky),
`MongoDBVerifyIntegrity` (ROZBITÝ — natvrdo seznam DB z jiného serveru);
obdobné trio existuje pro PostgreSQL.
Zbývající díry v disaster recovery (k 2026-06-11): zálohy neopouštějí Tower
(žádný rsync na Synology/cloud) a při selhání zálohy nechodí notifikace.
Pozor na Toweru: `du -h` na FUSE `/mnt/user` hlásí čerstvě zapsaným souborům
falešnou velikost (1.0K) — skutečnou délku dá `ls -l`. mongodump píše průběžné
logy na stderr, takže neprázdný stderr ≠ chyba (rozhoduje exit code).
+49
View File
@@ -0,0 +1,49 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Hlida serverovy log (na Unraidu) dokud dany beh neskonci.
Poluje pres SSH, tiskne ridky progress, skonci na koncovem markeru.
Pouziti: _watch_server_log.py <vzdalena_cesta_logu> [marker]
"""
import sys
import time
import paramiko
HOST = "192.168.1.76"
USER = "root"
PASS = "7309208104"
logpath = sys.argv[1] if len(sys.argv) > 1 else "/mnt/user/Scripts/MailStore/dryrun_full.log"
marker = sys.argv[2] if len(sys.argv) > 2 else "Zprav proskenovano"
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect(HOST, username=USER, password=PASS, timeout=10)
def sh(cmd):
i, o, e = c.exec_command(cmd)
return o.read().decode("utf-8", "replace")
t0 = time.time()
last_count = -1
while True:
content = sh(f"cat {logpath!r} 2>/dev/null")
done = (marker in content) or ("Traceback" in content)
folders = content.count("k dobrani=")
if folders != last_count:
mins = (time.time() - t0) / 60
# posledni zpracovana slozka
lines = [l for l in content.splitlines() if "k dobrani=" in l]
last = lines[-1].strip() if lines else ""
print(f"[{mins:4.1f} min] slozek hotovo: {folders:4} | {last[:70]}", flush=True)
last_count = folders
if done:
print("=== HOTOVO ===", flush=True)
tail = "\n".join(content.splitlines()[-10:])
print(tail, flush=True)
break
time.sleep(30)
c.close()
+54
View File
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""Test IMAP SEARCH proti MailStore serveru — ověření rychlosti a funkčnosti."""
import imaplib
import ssl
import sys
import time
HOST = "192.168.1.53"
PORT = 143
USER = "admin"
PASS = "*$N(B)vMUym!%"
def connect():
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
M = imaplib.IMAP4(HOST, PORT)
M.starttls(ssl_context=ctx)
M.login(USER, PASS)
return M
def main():
t0 = time.time()
M = connect()
print(f"[{time.time()-t0:.1f}s] připojeno + login", flush=True)
# Přímý SELECT na konkrétní složku (LIST cizí archivy neukáže, SELECT ano)
target = "vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items"
typ, data = M.select(f'"{target}"', readonly=True)
count = int(data[0]) if typ == "OK" and data and data[0] else 0
print(f"[{time.time()-t0:.1f}s] SELECT '{target}' = {count} zpráv (typ={typ})", flush=True)
if count == 0:
M.logout()
return
# Test SEARCH různých kritérií
for crit, val in [("ALL", None), ("SUBJECT", "re"), ("FROM", "cz"), ("TEXT", "objednávka")]:
ts = time.time()
if val is None:
typ, data = M.search(None, crit)
else:
typ, data = M.search(None, crit, f'"{val}"')
nums = data[0].split() if data and data[0] else []
label = crit if val is None else f'{crit} "{val}"'
print(f"[{time.time()-t0:.1f}s] SEARCH {label}: {len(nums)} výsledků ({time.time()-ts:.2f}s)", flush=True)
M.logout()
print(f"[{time.time()-t0:.1f}s] hotovo", flush=True)
if __name__ == "__main__":
main()
+176
View File
@@ -0,0 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
================================================================================
Nazev: mailstore_folder_v1.0.py
Verze: 1.0
Datum: 2026-06-11
Autor: Vladimir Buzalka (asistovano Claude)
Popis: Vypise obsah jedne MailStore slozky jako seznam zprav
(datum | od | predmet) pres davkovy IMAP FETCH hlavicek.
Predstupen ingestu - overuje davkove cteni hlavicek.
Argument = plna cesta slozky (fullName z mapy), napr.:
"vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items"
Zdroj: MailStore IMAP server, port 143, STARTTLS, auth Prosty text (LOGIN).
IMAP FETCH BODY.PEEK[HEADER.FIELDS (...)] = hlavicky bez oznaceni
jako precteno. Davkove jednim prikazem, ne po jedne zprave.
Spusteni:
python mailstore_folder_v1.0.py "...slozka..." # poslednich 50
python mailstore_folder_v1.0.py "...slozka..." --limit 200
python mailstore_folder_v1.0.py "...slozka..." --all # vse (pozor velke slozky)
python mailstore_folder_v1.0.py "...slozka..." --oldest # od nejstarsich
================================================================================
"""
from __future__ import annotations
import argparse
import email
import imaplib
import re
import ssl
import sys
from email.header import decode_header
from email.utils import parsedate_to_datetime
# --- konfigurace ------------------------------------------------------------
HOST = "192.168.1.53"
PORT = 143
USER = "admin"
PASS = "*$N(B)vMUym!%"
DEFAULT_LIMIT = 50
# --- helpery ----------------------------------------------------------------
def connect() -> imaplib.IMAP4:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
M = imaplib.IMAP4(HOST, PORT)
M.starttls(ssl_context=ctx)
M.login(USER, PASS)
return M
def encode_mutf7(s: str) -> str:
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501) kvuli diakritice
(MailStore neumi UTF8=ACCEPT). Vysledek je cisty ASCII."""
import base64 as _b64
res = []
i, n = 0, len(s)
while i < n:
ch = s[i]; o = ord(ch)
if 0x20 <= o <= 0x7e:
res.append("&-" if ch == "&" else ch); i += 1
else:
j = i
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
j += 1
enc = _b64.b64encode(s[i:j].encode("utf-16-be")).decode("ascii").rstrip("=").replace("/", ",")
res.append("&" + enc + "-"); i = j
return "".join(res)
def dec(s: str | None) -> str:
"""Dekoduje MIME-encoded hlavicku (=?utf-8?...?=) na citelny text."""
if not s:
return ""
out = []
for txt, enc in decode_header(s):
if isinstance(txt, bytes):
out.append(txt.decode(enc or "utf-8", errors="replace"))
else:
out.append(txt)
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
def fmt_date(raw: str | None) -> str:
if not raw:
return "?"
try:
dt = parsedate_to_datetime(raw)
return dt.strftime("%Y-%m-%d %H:%M")
except Exception:
return (raw or "")[:16]
def short(s: str, n: int) -> str:
s = s or ""
return s if len(s) <= n else s[: n - 1] + ""
# IMAP FETCH header bloky prijdou jako tuple (b'N (BODY[...] {len}', b'<headers>')
_NUM_RX = re.compile(rb"^(\d+)\s")
def main() -> int:
ap = argparse.ArgumentParser(description="Vypis obsahu MailStore slozky")
ap.add_argument("folder", help="Plna cesta slozky (fullName z mapy)")
ap.add_argument("--limit", type=int, default=DEFAULT_LIMIT,
help=f"Pocet zprav (default {DEFAULT_LIMIT})")
ap.add_argument("--all", action="store_true", help="Vsechny zpravy (ignoruje --limit)")
ap.add_argument("--oldest", action="store_true",
help="Od nejstarsich (default: od nejnovejsich)")
args = ap.parse_args()
M = connect()
typ, data = M.select(f'"{encode_mutf7(args.folder)}"', readonly=True)
if typ != "OK":
print(f"Slozku nelze otevrit: {data}", file=sys.stderr)
return 1
total = int(data[0]) if data and data[0] else 0
print(f"Slozka: {args.folder}")
print(f"Zprav celkem: {total:,}")
if total == 0:
M.logout()
return 0
# urci rozsah porad. cisel (1 = nejstarsi, total = nejnovejsi)
if args.all:
lo, hi = 1, total
else:
n = min(args.limit, total)
lo, hi = (1, n) if args.oldest else (total - n + 1, total)
rng = f"{lo}:{hi}"
shown = hi - lo + 1
order = "nejstarsi" if args.oldest else "nejnovejsi"
print(f"Zobrazuji {shown} zprav ({order} prvni), rozsah #{rng}")
print("=" * 100)
# davkovy FETCH hlavicek
typ, msgs = M.fetch(rng, "(BODY.PEEK[HEADER.FIELDS (DATE FROM SUBJECT)])")
rows = []
for item in msgs:
if not isinstance(item, tuple):
continue
meta, hdr_bytes = item[0], item[1]
m = _NUM_RX.match(meta or b"")
seqno = int(m.group(1)) if m else 0
hdr = email.message_from_bytes(hdr_bytes)
rows.append((seqno, fmt_date(hdr.get("Date")),
dec(hdr.get("From")), dec(hdr.get("Subject"))))
rows.sort(key=lambda r: r[0], reverse=not args.oldest)
print(f"{'#':>6} {'Datum':<16} {'Od':<32} Predmet")
print("-" * 100)
for seqno, d, frm, subj in rows:
print(f"{seqno:>6} {d:<16} {short(frm, 32):<32} {short(subj, 40)}")
M.logout()
print("=" * 100)
print(f"Vypsano {len(rows)} zprav.")
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print("\nPreruseno", file=sys.stderr)
sys.exit(1)
+427
View File
@@ -0,0 +1,427 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
================================================================================
Nazev: mailstore_ingest_v1.0.py
Verze: 1.0
Datum: 2026-06-11
Autor: Vladimir Buzalka (asistovano Claude)
Popis: Backfill stare historie z MailStore archivu do MongoDB `emaily`.
Dobere do existujici kolekce schranky JEN zpravy, ktere tam jeste
nejsou - dedup podle internet Message-ID (= _id v Mongu).
Cilove schema dokumentu = stejne jako Graph import, takze navazujici
enrich_fulltext_emails + MCP `emaily` search funguji bez uprav.
Strategie:
1. Nacti SET vsech Message-ID (_id) co uz v Mongu pro schranku jsou.
2. Projdi slozky schranky (API GetChildFolders).
3. Per slozka davkove stahni hlavicky (UID, DATE, MESSAGE-ID) - rychle.
4. Kandidat = Message-ID neni v setu AND rok(DATE) >= --since.
5. Pro kandidaty stahni cele telo (RFC822), naparsuj, upsert do Mongo.
Filtr data je client-side z DATE headeru (IMAP SEARCH je u MailStme 78s/k nicemu).
Spusteni:
# KOLIK by se dobralo (nic nezapise) - delej VZDY prvni:
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020 --dry-run
# ostry beh:
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020
# test na jedne slozce / s limitem:
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020 \
--folder "vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items" --limit 50
================================================================================
"""
from __future__ import annotations
import argparse
import email
import imaplib
import json
import re
import ssl
import sys
import time
import urllib.parse
import urllib.request
from base64 import b64encode
from datetime import datetime, timezone
from email.header import decode_header
from email.utils import getaddresses, parsedate_to_datetime
from pymongo import MongoClient, UpdateOne
# --- konfigurace ------------------------------------------------------------
MS_HOST = "192.168.1.53"
IMAP_PORT = 143
API_PORT = 8463
MS_USER = "admin"
MS_PASS = "*$N(B)vMUym!%"
MONGO_URI = "mongodb://192.168.1.76:27017"
MONGO_DB = "emaily"
HEADER_BATCH = 2000 # kolik hlavicek FETCHovat naraz
UPSERT_BATCH = 100 # kolik dokumentu zapsat naraz do Mongo
# --- API (jen GetChildFolders na seznam slozek) -----------------------------
_API_BASE = f"https://{MS_HOST}:{API_PORT}/api"
_API_AUTH = "Basic " + b64encode(f"{MS_USER}:{MS_PASS}".encode()).decode()
_CTX = ssl.create_default_context()
_CTX.check_hostname = False
_CTX.verify_mode = ssl.CERT_NONE
def api_result(method: str, params: dict | None = None):
data = urllib.parse.urlencode(params or {}).encode()
req = urllib.request.Request(f"{_API_BASE}/invoke/{method}", data=data, method="POST",
headers={"Authorization": _API_AUTH,
"Content-Type": "application/x-www-form-urlencoded"})
with urllib.request.urlopen(req, context=_CTX, timeout=30) as resp:
r = json.loads(resp.read().decode("utf-8-sig"))
if r.get("statusCode") != "succeeded":
raise RuntimeError(f"{method}: {(r.get('error') or {}).get('message')}")
return r.get("result")
def collect_folders(mailbox: str) -> list[str]:
"""Vrati seznam plnych cest vsech slozek schranky (rekurzivne)."""
tree = api_result("GetChildFolders", {"folder": mailbox, "maxLevels": 20})
out: list[str] = []
def walk(node):
for ch in node.get("childFolders") or []:
out.append(ch["fullName"])
walk(ch)
walk(tree)
return out
# --- IMAP --------------------------------------------------------------------
def imap_connect() -> imaplib.IMAP4:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
M = imaplib.IMAP4(MS_HOST, IMAP_PORT)
M.starttls(ssl_context=ctx)
M.login(MS_USER, MS_PASS)
return M
_SEQ_RX = re.compile(rb"^(\d+)\s")
_UID_RX = re.compile(rb"UID (\d+)")
def dec(s) -> str:
if not s:
return ""
out = []
for txt, enc in decode_header(s):
out.append(txt.decode(enc or "utf-8", errors="replace") if isinstance(txt, bytes) else txt)
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
def parse_date(raw) -> datetime | None:
if not raw:
return None
try:
dt = parsedate_to_datetime(raw)
if dt.tzinfo:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
return dt
except Exception:
return None
def encode_mutf7(s: str) -> str:
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501). MailStore neumi
UTF8=ACCEPT, takze slozky s diakritikou (Dorucena posta) musi byt mUTF-7.
Vysledek je cisty ASCII -> bezpecne projde imaplib (ascii encoding)."""
res = []
i, n = 0, len(s)
while i < n:
ch = s[i]
o = ord(ch)
if 0x20 <= o <= 0x7e:
res.append("&-" if ch == "&" else ch)
i += 1
else:
j = i
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
j += 1
import base64 as _b64
b = s[i:j].encode("utf-16-be")
enc = _b64.b64encode(b).decode("ascii").rstrip("=").replace("/", ",")
res.append("&" + enc + "-")
i = j
return "".join(res)
def imap_select(M: imaplib.IMAP4, folder: str):
"""SELECT slozky s mUTF-7 enkodovanim nazvu (kvuli diakritice)."""
return M.select(f'"{encode_mutf7(folder)}"', readonly=True)
def scan_folder_headers(M: imaplib.IMAP4, folder: str):
"""Davkove stahne (seq, uid, msgid, date) vsech zprav slozky."""
typ, data = imap_select(M, folder)
if typ != "OK":
return None, []
total = int(data[0]) if data and data[0] else 0
if total == 0:
return 0, []
items = []
lo = 1
while lo <= total:
hi = min(lo + HEADER_BATCH - 1, total)
typ, msgs = M.fetch(f"{lo}:{hi}",
"(UID BODY.PEEK[HEADER.FIELDS (MESSAGE-ID DATE)])")
for it in msgs:
if not isinstance(it, tuple):
continue
meta, hdr = it[0], it[1]
mseq = _SEQ_RX.match(meta or b"")
muid = _UID_RX.search(meta or b"")
h = email.message_from_bytes(hdr or b"")
mid = (h.get("Message-ID") or "").strip()
items.append((int(mseq.group(1)) if mseq else 0,
int(muid.group(1)) if muid else 0,
mid, parse_date(h.get("Date"))))
lo = hi + 1
return total, items
def fetch_full(M: imaplib.IMAP4, seq: int) -> bytes | None:
typ, data = M.fetch(str(seq), "(RFC822)")
if typ != "OK" or not data or not isinstance(data[0], tuple):
return None
return data[0][1]
# --- mapovani EML -> Mongo dokument -----------------------------------------
def relativize(folder: str, mailbox: str) -> str:
"""schranka/Exchange X/Sent Items -> Sent Items (jako Graph folder_path)."""
parts = folder.split("/")
# odstran prefix schranky a 'Exchange ...' uroven
if len(parts) >= 2 and parts[0] == mailbox:
rest = parts[2:] if len(parts) > 2 else parts[1:]
return "/".join(rest) if rest else parts[-1]
return parts[-1]
def parse_addr_one(raw) -> dict:
if not raw:
return {"email": None, "name": None}
pairs = getaddresses([raw])
if not pairs:
return {"email": None, "name": None}
name, addr = pairs[0]
return {"email": (addr or "").lower() or None, "name": dec(name) or (addr or None)}
def parse_recipients(msg) -> list[dict]:
out = []
for kind, hdr in (("to", "To"), ("cc", "Cc"), ("bcc", "Bcc")):
val = msg.get(hdr)
if not val:
continue
for name, addr in getaddresses([val]):
if addr:
out.append({"type": kind, "email": addr.lower(),
"name": dec(name) or addr})
return out
def extract_bodies(msg):
body_text = body_html = ""
atts = []
for part in msg.walk():
if part.is_multipart():
continue
ct = part.get_content_type()
disp = str(part.get("Content-Disposition") or "")
payload = part.get_payload(decode=True)
is_att = "attachment" in disp or (part.get_filename() and ct not in ("text/plain", "text/html"))
if is_att:
atts.append({
"filename": dec(part.get_filename()) or "(bez nazvu)",
"size_bytes": len(payload or b""),
"mime_type": ct,
"is_inline": "inline" in disp,
})
elif ct == "text/plain" and not body_text:
body_text = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
elif ct == "text/html" and not body_html:
body_html = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
return body_text, body_html, atts
def build_doc(raw: bytes, uid: int, folder: str, mailbox: str) -> dict | None:
msg = email.message_from_bytes(raw)
mid = (msg.get("Message-ID") or "").strip()
if not mid:
return None
dt = parse_date(msg.get("Date"))
body_text, body_html, atts = extract_bodies(msg)
now = datetime.now(timezone.utc).replace(tzinfo=None)
preview = (body_text or "")[:255]
return {
"_id": mid,
"source": "mailstore",
"mailstore_uid": uid,
"mailstore_folder": folder,
# graph_id zamerne VYNECHANO: kolekce ma unique+sparse index na graph_id,
# explicitni None by kolidoval (sparse ignoruje jen CHYBEJICI pole).
"conversation_id": None,
"folder_path": relativize(folder, mailbox),
"subject": dec(msg.get("Subject")),
"sender": parse_addr_one(msg.get("From")),
"recipients": parse_recipients(msg),
"to": dec(msg.get("To")),
"cc": dec(msg.get("Cc")),
"bcc": dec(msg.get("Bcc")),
"sent_at": dt,
"received_at": dt,
"modified_at": now,
"created_at": now,
"parsed_at": now,
"is_read": True,
"is_draft": "draft" in folder.lower() or "koncept" in folder.lower(),
"has_attachments": bool(atts),
"attachment_count": len(atts),
"attachments": atts,
"body_html": body_html or None,
"body_text": body_text or None,
"body_preview": preview,
}
# --- hlavni ------------------------------------------------------------------
def main() -> int:
ap = argparse.ArgumentParser(description="MailStore -> Mongo backfill (dedup dle Message-ID)")
ap.add_argument("mailbox", help="Schranka (top-level slozka MailStore = Mongo kolekce)")
ap.add_argument("--since", type=int, default=None,
help="Ber jen zpravy s rokem >= SINCE (napr. 2020)")
ap.add_argument("--until", type=int, default=None,
help="Ber jen zpravy s rokem <= UNTIL")
ap.add_argument("--folder", default=None, help="Jen jedna konkretni slozka (plna cesta)")
ap.add_argument("--limit", type=int, default=None, help="Max zprav k ingestu (test)")
ap.add_argument("--max-folders", type=int, default=None, help="Max slozek (diagnostika)")
ap.add_argument("--dry-run", action="store_true",
help="Jen spocitej kolik by se dobralo, NIC nezapisuj")
args = ap.parse_args()
t0 = time.time()
print(f"=== MailStore ingest v1.0 | schranka: {args.mailbox} ===")
print(f"Filtr: rok >= {args.since or '-'}{' a <= ' + str(args.until) if args.until else ''}"
f"{' [DRY-RUN]' if args.dry_run else ''}")
# Mongo + set znamych Message-ID
mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
mongo.admin.command("ping")
coll = mongo[MONGO_DB][args.mailbox]
print("Nacitam existujici Message-ID z Mongo...", flush=True)
known = set(coll.distinct("_id"))
print(f" v Mongu uz mam: {len(known):,} zprav")
# slozky
if args.folder:
folders = [args.folder]
else:
folders = collect_folders(args.mailbox)
print(f"Slozek ke kontrole: {len(folders)}")
M = imap_connect()
grand_seen = grand_cand = grand_ingested = 0
queue: list[UpdateOne] = []
def flush():
nonlocal queue
if queue and not args.dry_run:
coll.bulk_write(queue, ordered=False)
queue = []
nonlocal_M = {"M": M}
for fidx, folder in enumerate(folders):
if args.max_folders and fidx >= args.max_folders:
print(f" (--max-folders {args.max_folders} dosazeno)")
break
try:
total, items = scan_folder_headers(nonlocal_M["M"], folder)
except Exception as ex:
# jedna chybna slozka nesmi shodit cely beh - zaloguj a pokracuj.
# Pri chybe IMAP spojeni (abort) se prepoj.
print(f" [{relativize(folder, args.mailbox)[:45]:45}] CHYBA: {type(ex).__name__}: {str(ex)[:80]}", flush=True)
try:
nonlocal_M["M"].logout()
except Exception:
pass
nonlocal_M["M"] = imap_connect()
continue
M = nonlocal_M["M"]
if not total:
continue
# kandidati: rok ok, neni v known, ma msgid
cands = []
for seq, uid, mid, dt in items:
if not mid or mid in known:
continue
yr = dt.year if dt else None
if args.since and (yr is None or yr < args.since):
continue
if args.until and (yr is None or yr > args.until):
continue
cands.append((seq, uid, mid))
grand_seen += total
grand_cand += len(cands)
rel = relativize(folder, args.mailbox)
print(f" [{rel[:45]:45}] zprav={total:>6} k dobrani={len(cands):>6}", flush=True)
if args.dry_run:
continue
for seq, uid, mid in cands:
if args.limit and grand_ingested >= args.limit:
break
raw = fetch_full(M, seq)
if not raw:
continue
doc = build_doc(raw, uid, folder, args.mailbox)
if not doc:
continue
queue.append(UpdateOne({"_id": doc["_id"]}, {"$setOnInsert": doc}, upsert=True))
known.add(doc["_id"])
grand_ingested += 1
if len(queue) >= UPSERT_BATCH:
flush()
flush()
if args.limit and grand_ingested >= args.limit:
print(f" (dosazen limit {args.limit})")
break
M.logout()
flush()
print("-" * 64)
print(f"Zprav proskenovano: {grand_seen:,}")
print(f"K dobrani (chybi, v okne): {grand_cand:,}")
if args.dry_run:
print(">>> DRY-RUN: nic nezapsano. Pro ostry beh spust bez --dry-run.")
else:
print(f"Zapsano do Mongo: {grand_ingested:,}")
print(f"Trvalo: {time.time()-t0:.1f}s")
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print("\nPreruseno", file=sys.stderr)
sys.exit(1)
+176
View File
@@ -0,0 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
================================================================================
Nazev: mailstore_map_v1.0.py
Verze: 1.0
Datum: 2026-06-11
Autor: Vladimir Buzalka (asistovano Claude)
Popis: Vykresli "mapu" jedne MailStore schranky - strom slozek z
Administration API (GetChildFolders) + celkovy pocet zprav schranky
z GetFolderStatistics.
Argument = nazev schranky (top-level slozka v MailStore archivu),
napr. "vladimir.buzalka@buzalka.cz" nebo "lenka.hanzalova".
Seznam dostupnych schranek: --list (vola GetUsers/GetChildFolders root).
Zdroj: MailStore Server Administration API, HTTPS port 8463.
Auth: admin / heslo (Basic). Parametry jako form-body. Async operace
(GetFolderStatistics) se poluji pres /api/get-status.
Pozn.: API umi jen strukturu + souhrnne pocty per schranka. Pocty zprav per
jednotliva slozka API levne nedava - to bude dalsi krok (IMAP STATUS).
Spusteni:
python mailstore_map_v1.0.py "lenka.hanzalova"
python mailstore_map_v1.0.py "vladimir.buzalka@buzalka.cz" --no-stats
python mailstore_map_v1.0.py --list
================================================================================
"""
from __future__ import annotations
import argparse
import json
import ssl
import sys
import time
import urllib.parse
import urllib.request
from base64 import b64encode
# --- konfigurace ------------------------------------------------------------
HOST = "192.168.1.53"
PORT = 8463
USER = "admin"
PASS = "*$N(B)vMUym!%"
BASE = f"https://{HOST}:{PORT}/api"
_AUTH = "Basic " + b64encode(f"{USER}:{PASS}".encode()).decode()
_CTX = ssl.create_default_context()
_CTX.check_hostname = False
_CTX.verify_mode = ssl.CERT_NONE
# --- API helper -------------------------------------------------------------
def _post(path: str, params: dict | None = None) -> dict:
"""Jeden POST na API, vrati naparsovany JSON (odstrani BOM)."""
data = urllib.parse.urlencode(params or {}).encode()
req = urllib.request.Request(
f"{BASE}/{path}", data=data, method="POST",
headers={"Authorization": _AUTH,
"Content-Type": "application/x-www-form-urlencoded"},
)
with urllib.request.urlopen(req, context=_CTX, timeout=30) as resp:
raw = resp.read().decode("utf-8-sig") # utf-8-sig sezere BOM
return json.loads(raw)
def api(method: str, params: dict | None = None, poll_timeout: int = 120) -> dict:
"""Zavola API funkci. Pokud je async (statusCode=running), poluje
/api/get-status az do dokonceni. Vrati cely objekt odpovedi."""
r = _post(f"invoke/{method}", params)
if r.get("statusCode") != "running":
return r
token = r.get("token")
sv = r.get("statusVersion", 0)
t0 = time.time()
while r.get("statusCode") == "running":
if time.time() - t0 > poll_timeout:
raise TimeoutError(f"{method}: polling prekrocil {poll_timeout}s")
r = _post("get-status", {"token": token,
"lastKnownStatusVersion": sv,
"millisecondsTimeout": 5000})
sv = r.get("statusVersion", sv)
return r
def api_result(method: str, params: dict | None = None):
r = api(method, params)
if r.get("statusCode") != "succeeded":
err = (r.get("error") or {}).get("message", "neznama chyba")
raise RuntimeError(f"{method} selhalo: {err}")
return r.get("result")
# --- formatovani ------------------------------------------------------------
def human_size(n: int) -> str:
f = float(n)
for unit in ("B", "KB", "MB", "GB", "TB"):
if f < 1024 or unit == "TB":
return f"{f:.1f} {unit}"
f /= 1024
def print_tree(node: dict, indent: int = 0) -> int:
"""Rekurzivne vypise strom slozek. Vrati pocet vypsanych slozek."""
count = 0
for ch in node.get("childFolders") or []:
marker = "+" if ch.get("hasChildFolders") else "-"
print(f" {' ' * indent}{marker} {ch.get('name')}")
count += 1
count += print_tree(ch, indent + 1)
return count
# --- akce -------------------------------------------------------------------
def list_mailboxes() -> None:
"""Vypise top-level slozky (schranky) v archivu."""
root = api_result("GetChildFolders", {"maxLevels": 1})
print("Dostupne schranky (top-level slozky archivu):")
for ch in root.get("childFolders") or []:
print(f" - {ch.get('name')}")
def map_mailbox(mailbox: str, with_stats: bool = True) -> None:
# 1) celkovy pocet zprav schranky (volitelne - GetFolderStatistics je ~20s)
total = size = None
if with_stats:
print("Nacitam statistiky (GetFolderStatistics, muze trvat ~20s)...",
file=sys.stderr, flush=True)
stats = api_result("GetFolderStatistics") or []
for s in stats:
if s.get("folder") == mailbox:
total, size = s.get("count"), s.get("size")
break
# 2) strom slozek
tree = api_result("GetChildFolders", {"folder": mailbox, "maxLevels": 20})
print("=" * 64)
print(f"MAILSTORE MAPA SCHRANKY: {mailbox}")
if total is not None:
print(f"Celkem zprav: {total:,} Velikost: {human_size(size)}")
print("=" * 64)
n = print_tree(tree)
print("-" * 64)
print(f"Slozek celkem: {n}")
def main() -> int:
ap = argparse.ArgumentParser(description="MailStore mapa schranky (API)")
ap.add_argument("mailbox", nargs="?", help="Nazev schranky (top-level slozka)")
ap.add_argument("--list", action="store_true",
help="Vypsat dostupne schranky a skoncit")
ap.add_argument("--no-stats", action="store_true",
help="Preskocit celkovy pocet zprav (rychlejsi, bez ~20s GetFolderStatistics)")
args = ap.parse_args()
if args.list:
list_mailboxes()
return 0
if not args.mailbox:
ap.error("zadej nazev schranky, nebo --list pro seznam")
map_mailbox(args.mailbox, with_stats=not args.no_stats)
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print("\nPreruseno", file=sys.stderr)
sys.exit(1)
+212
View File
@@ -0,0 +1,212 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
================================================================================
Nazev: mailstore_read_v1.0.py
Verze: 1.0
Datum: 2026-06-11
Autor: Vladimir Buzalka (asistovano Claude)
Popis: Precte JEDNU konkretni zpravu z MailStore slozky a vypise jeji plny
obsah - hlavicky, telo (text), seznam priloh. Volitelne ulozi
prilohy na disk. Posledni dilek rucniho prohlizece archivu.
Argumenty: <slozka> <cislo>
slozka = plna cesta (fullName z mapy / vystupu mailstore_folder)
cislo = poradove cislo zpravy (# z mailstore_folder), nebo UID s --uid
Zdroj: MailStore IMAP, port 143, STARTTLS, auth Prosty text (LOGIN).
FETCH <n> (RFC822) = cely syrovy EML, naparsovan emailem.
Spusteni:
python mailstore_read_v1.0.py "...slozka..." 63627
python mailstore_read_v1.0.py "...slozka..." 12345 --uid # cislo je UID
python mailstore_read_v1.0.py "...slozka..." 63627 --save .\att # ulozi prilohy
python mailstore_read_v1.0.py "...slozka..." 63627 --raw # vypise cely EML
================================================================================
"""
from __future__ import annotations
import argparse
import email
import imaplib
import os
import ssl
import sys
from email.header import decode_header
from email.utils import parsedate_to_datetime
# --- konfigurace ------------------------------------------------------------
HOST = "192.168.1.53"
PORT = 143
USER = "admin"
PASS = "*$N(B)vMUym!%"
BODY_PREVIEW_CHARS = 4000 # kolik znaku tela vypsat na obrazovku
def connect() -> imaplib.IMAP4:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
M = imaplib.IMAP4(HOST, PORT)
M.starttls(ssl_context=ctx)
M.login(USER, PASS)
return M
def encode_mutf7(s: str) -> str:
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501) kvuli diakritice."""
import base64 as _b64
res = []
i, n = 0, len(s)
while i < n:
ch = s[i]; o = ord(ch)
if 0x20 <= o <= 0x7e:
res.append("&-" if ch == "&" else ch); i += 1
else:
j = i
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
j += 1
enc = _b64.b64encode(s[i:j].encode("utf-16-be")).decode("ascii").rstrip("=").replace("/", ",")
res.append("&" + enc + "-"); i = j
return "".join(res)
def dec(s: str | None) -> str:
if not s:
return ""
out = []
for txt, enc in decode_header(s):
if isinstance(txt, bytes):
out.append(txt.decode(enc or "utf-8", errors="replace"))
else:
out.append(txt)
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
def html_to_text(html: str) -> str:
"""HTML -> text. Zkusi bs4 (je v projektu), jinak hrubsi fallback."""
try:
from bs4 import BeautifulSoup
try:
soup = BeautifulSoup(html, "lxml")
except Exception:
soup = BeautifulSoup(html, "html.parser")
for t in soup(["script", "style", "head"]):
t.decompose()
text = soup.get_text(separator="\n")
except Exception:
import re
text = re.sub(r"<[^>]+>", "", html)
lines = [ln.strip() for ln in text.splitlines()]
return "\n".join(ln for ln in lines if ln)
def main() -> int:
ap = argparse.ArgumentParser(description="Precist jednu zpravu z MailStore")
ap.add_argument("folder", help="Plna cesta slozky")
ap.add_argument("number", help="Poradove cislo zpravy (nebo UID s --uid)")
ap.add_argument("--uid", action="store_true", help="Cislo je IMAP UID, ne poradi")
ap.add_argument("--save", metavar="DIR", help="Ulozit prilohy do adresare")
ap.add_argument("--raw", action="store_true", help="Vypsat cely syrovy EML a skoncit")
args = ap.parse_args()
M = connect()
typ, data = M.select(f'"{encode_mutf7(args.folder)}"', readonly=True)
if typ != "OK":
print(f"Slozku nelze otevrit: {data}", file=sys.stderr)
return 1
# FETCH cele zpravy (RFC822). UID FETCH kdyz --uid.
if args.uid:
typ, msg_data = M.uid("FETCH", args.number, "(RFC822)")
else:
typ, msg_data = M.fetch(args.number, "(RFC822)")
if typ != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
print(f"Zpravu #{args.number} nelze nacist (typ={typ})", file=sys.stderr)
M.logout()
return 1
raw = msg_data[0][1]
M.logout()
if args.raw:
sys.stdout.buffer.write(raw)
return 0
msg = email.message_from_bytes(raw)
# --- hlavicky ---
print("=" * 80)
print(f"Slozka : {args.folder}")
print(f"{'UID' if args.uid else 'Cislo'} : {args.number}")
print("-" * 80)
print(f"Datum : {msg.get('Date')}")
print(f"Od : {dec(msg.get('From'))}")
print(f"Komu : {dec(msg.get('To'))}")
if msg.get("Cc"):
print(f"Kopie : {dec(msg.get('Cc'))}")
print(f"Predmet : {dec(msg.get('Subject'))}")
print(f"Msg-ID : {msg.get('Message-ID')}")
print(f"EML velikost: {len(raw):,} bytu")
# --- telo + prilohy ---
body_text = body_html = ""
attachments = [] # (filename, size, payload)
for part in msg.walk():
if part.is_multipart():
continue
ct = part.get_content_type()
disp = str(part.get("Content-Disposition") or "")
payload = part.get_payload(decode=True)
if "attachment" in disp or (part.get_filename() and ct not in ("text/plain", "text/html")):
attachments.append((dec(part.get_filename()) or "(bez nazvu)",
len(payload or b""), payload or b""))
elif ct == "text/plain" and not body_text:
body_text = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
elif ct == "text/html" and not body_html:
body_html = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
print("-" * 80)
if attachments:
print(f"Prilohy ({len(attachments)}):")
for name, size, _ in attachments:
print(f" - {name} ({size:,} B)")
else:
print("Prilohy: zadne")
# telo: preferuj plain, jinak html->text
text = body_text or (html_to_text(body_html) if body_html else "")
src = "text/plain" if body_text else ("text/html->text" if body_html else "(zadne)")
print("-" * 80)
print(f"TELO ({src}, {len(text):,} znaku):")
print("-" * 80)
if text:
print(text[:BODY_PREVIEW_CHARS])
if len(text) > BODY_PREVIEW_CHARS:
print(f"\n... [zkraceno, celkem {len(text):,} znaku] ...")
else:
print("(prazdne telo)")
# --- ulozeni priloh ---
if args.save and attachments:
os.makedirs(args.save, exist_ok=True)
print("-" * 80)
for name, size, payload in attachments:
safe = name.replace("/", "_").replace("\\", "_") or "att.bin"
path = os.path.join(args.save, safe)
with open(path, "wb") as f:
f.write(payload)
print(f"Ulozeno: {path} ({size:,} B)")
print("=" * 80)
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print("\nPreruseno", file=sys.stderr)
sys.exit(1)