Add Outlook/Soubory/Clario/Feasibility scripts and reports; ignore Incoming, Outlook downloads & profile

2026-06-03 16:15:19 +02:00
parent 61c6aeea23
commit 6c57ab3ae6
36 changed files with 4949 additions and 0 deletions
@@ -4,3 +4,6 @@ __pycache__/
 .idea/
 .claude/
 EmailsImport/SouboryRůznéVelikosti/
+IWRS/Patients/Incoming/
+Outlook/downloads/
+Outlook/outlook_profile/
@@ -0,0 +1,44 @@
+"Protocol","Study Population","Country","Site","Principal Investigator","Participant ID","Baseline Stool Frequency","Visit","Visit Date","Endoscopy Completed?","Endoscopy Date","Bowel Preparation Start Date 1","Bowel Preparation End Date 1","Bowel Preparation Start Date 2","Bowel Preparation End Date 2","Central Endoscopy Score","Local Endoscopy Score","PGA Score","Eligible Day (-1)","Day (-1) Excluded Reason(s)","Eligible Day (-2)","Day (-2) Excluded Reason(s)","Eligible Day (-3)","Day (-3) Excluded Reason(s)","Eligible Day (-4)","Day (-4) Excluded Reason(s)","Eligible Day (-5)","Day (-5) Excluded Reason(s)","Eligible Day (-6)","Day (-6) Excluded Reason(s)","Eligible Day (-7)","Day (-7) Excluded Reason(s)","Eligible Day (-8)","Day (-8) Excluded Reason(s)","Eligible Day (-9)","Day (-9) Excluded Reason(s)","Eligible Day (-10)","Day (-10) Excluded Reason(s)","Eligible Day (-1) Stool Count","Eligible Day (-2) Stool Count","Eligible Day (-3) Stool Count","Eligible Day (-4) Stool Count","Eligible Day (-5) Stool Count","Eligible Day (-6) Stool Count","Eligible Day (-7) Stool Count","Eligible Day (-8) Stool Count","Eligible Day (-9) Stool Count","Eligible Day (-10) Stool Count","Stool Frequency Sub-score","Eligible Day (-1) Rectal Bleeding Score","Eligible Day (-2) Rectal Bleeding Score","Eligible Day (-3) Rectal Bleeding Score","Eligible Day (-4) Rectal Bleeding Score","Eligible Day (-5) Rectal Bleeding Score","Eligible Day (-6) Rectal Bleeding Score","Eligible Day (-7) Rectal Bleeding Score","Eligible Day (-8) Rectal Bleeding Score","Eligible Day (-9) Rectal Bleeding Score","Eligible Day (-10) Rectal Bleeding Score","Rectal Bleeding Sub-score","Partial Mayo Score","Modified Mayo Score","Full Mayo Score","Site Action","Last Mayo Score Submission","Week I-12 Clinical Responder","Week I-12 Clinical Remission","Clinical Flare","Loss of Response","Partial Mayo Response Post Loss of Response","Partial Mayo Response for Clinical Non-Responders"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","1","I-0","19 Feb 2026","Yes","05 Feb 2026","04 Feb 2026","04 Feb 2026","-","-","2","-","3","18 Feb 2026","-","17 Feb 2026","-","16 Feb 2026","-","15 Feb 2026","-","14 Feb 2026","-","13 Feb 2026","-","12 Feb 2026","-","11 Feb 2026","Day Not Applicable for Calculation","10 Feb 2026","Day Not Applicable for Calculation","09 Feb 2026","Day Not Applicable for Calculation","10","8","7","5","7","8","8","-","-","-","3","1","1","1","0","1","1","1","-","-","-","1","7","6","9","-","08 Apr 2026 07:11:25","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","1","I-2","04 Mar 2026","-","-","-","-","-","-","-","-","3","03 Mar 2026","-","02 Mar 2026","-","01 Mar 2026","-","28 Feb 2026","-","27 Feb 2026","-","26 Feb 2026","-","25 Feb 2026","-","24 Feb 2026","Day Not Applicable for Calculation","23 Feb 2026","Day Not Applicable for Calculation","22 Feb 2026","Day Not Applicable for Calculation","5","4","5","4","5","6","6","-","-","-","2","1","0","1","0","1","0","1","-","-","-","1","6","","","-","28 May 2026 10:04:05","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","1","I-4","18 Mar 2026","-","-","-","-","-","-","-","-","2","17 Mar 2026","-","16 Mar 2026","-","15 Mar 2026","-","14 Mar 2026","-","13 Mar 2026","-","12 Mar 2026","-","11 Mar 2026","-","10 Mar 2026","Day Not Applicable for Calculation","09 Mar 2026","Day Not Applicable for Calculation","08 Mar 2026","Day Not Applicable for Calculation","5","5","5","4","5","4","5","-","-","-","2","1","0","0","1","1","1","0","-","-","-","1","5","","","-","08 Apr 2026 11:04:49","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","1","I-8","05 May 2026","-","-","-","-","-","-","-","-","1","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","Day Not Applicable for Calculation","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","3","3","4","4","5","4","4","-","-","-","2","1","1","1","1","1","1","1","-","-","-","1","4","","","-","28 May 2026 14:42:53","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","1","I-12","13 May 2026","Yes","06 May 2026","05 May 2026","05 May 2026","-","-","1","-","1","12 May 2026","-","11 May 2026","-","10 May 2026","-","09 May 2026","-","08 May 2026","-","07 May 2026","-","06 May 2026","Endoscopy","05 May 2026","Bowel Preparation for Procedure;Day Not Applicable for Calculation","04 May 2026","-","03 May 2026","Day Not Applicable for Calculation","5","4","6","5","5","5","-","-","3","-","2","1","0","1","1","1","1","-","-","1","-","1","4","4","5","-","28 May 2026 14:43:11","Clinical Responder","No","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012002","1","I-0","08 Apr 2026","Yes","18 Mar 2026","17 Mar 2026","18 Mar 2026","-","-","2","-","2","07 Apr 2026","-","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","Missing Diary","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","-","31 Mar 2026","Day Not Applicable for Calculation","30 Mar 2026","Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","3","3","4","-","3","3","4","-","-","-","1","0","0","0","-","0","0","1","-","-","-","0","3","3","5","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012002","1","I-2","23 Apr 2026","-","-","-","-","-","-","-","-","2","22 Apr 2026","Missing Diary","21 Apr 2026","-","20 Apr 2026","-","19 Apr 2026","-","18 Apr 2026","-","17 Apr 2026","-","16 Apr 2026","-","15 Apr 2026","Day Not Applicable for Calculation","14 Apr 2026","Day Not Applicable for Calculation","13 Apr 2026","Day Not Applicable for Calculation","-","3","3","6","5","5","4","-","-","-","2","-","0","0","1","1","1","1","-","-","-","1","5","","","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012002","1","I-4","06 May 2026","-","-","-","-","-","-","-","-","1","05 May 2026","-","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","Day Not Applicable for Calculation","27 Apr 2026","Day Not Applicable for Calculation","26 Apr 2026","Day Not Applicable for Calculation","6","3","2","3","3","3","3","-","-","-","1","1","0","0","0","1","1","0","-","-","-","0","2","","","-","28 May 2026 14:43:38","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012003","1","I-0","27 May 2026","Yes","13 May 2026","12 May 2026","12 May 2026","-","-","3","-","2","26 May 2026","-","25 May 2026","-","24 May 2026","-","23 May 2026","-","22 May 2026","-","21 May 2026","-","20 May 2026","-","19 May 2026","Day Not Applicable for Calculation","18 May 2026","Day Not Applicable for Calculation","17 May 2026","Day Not Applicable for Calculation","6","9","7","8","9","7","8","-","-","-","3","2","2","2","2","1","1","1","-","-","-","2","7","8","10","-","27 May 2026 07:24:39","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062001","1","I-0","20 Mar 2026","Yes","19 Feb 2026","-","-","-","-","3","-","3","19 Mar 2026","-","18 Mar 2026","-","17 Mar 2026","-","16 Mar 2026","-","15 Mar 2026","-","14 Mar 2026","-","13 Mar 2026","-","12 Mar 2026","Day Not Applicable for Calculation","11 Mar 2026","Day Not Applicable for Calculation","10 Mar 2026","Day Not Applicable for Calculation","7","7","8","8","7","8","5","-","-","-","3","2","1","1","1","1","1","0","-","-","-","1","7","7","10","-","20 Mar 2026 07:02:44","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062001","1","I-2","08 Apr 2026","-","-","-","-","-","-","-","-","2","07 Apr 2026","Medication For Diarrhea","06 Apr 2026","Medication For Diarrhea","05 Apr 2026","Medication For Diarrhea","04 Apr 2026","Medication For Diarrhea","03 Apr 2026","Medication For Diarrhea","02 Apr 2026","Medication For Diarrhea","01 Apr 2026","Medication For Diarrhea","31 Mar 2026","Medication For Diarrhea;Day Not Applicable for Calculation","30 Mar 2026","Medication For Diarrhea;Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","-","-","-","-","-","-","-","-","-","-","Non-Evaluable","-","-","-","-","-","-","-","-","-","-","Non-Evaluable","Non-Evaluable","Non-Evaluable","Non-Evaluable","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062001","1","I-4","15 Apr 2026","-","-","-","-","-","-","-","-","3","14 Apr 2026","-","13 Apr 2026","-","12 Apr 2026","-","11 Apr 2026","-","10 Apr 2026","-","09 Apr 2026","-","08 Apr 2026","-","07 Apr 2026","Medication For Diarrhea;Day Not Applicable for Calculation","06 Apr 2026","Medication For Diarrhea;Day Not Applicable for Calculation","05 Apr 2026","Medication For Diarrhea;Day Not Applicable for Calculation","9","22","20","19","17","18","18","-","-","-","3","1","3","2","2","2","2","2","-","-","-","2","8","","","-","04 May 2026 22:05:32","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062001","1","I-8","18 May 2026","-","-","-","-","-","-","-","-","2","17 May 2026","-","16 May 2026","-","15 May 2026","-","14 May 2026","-","13 May 2026","-","12 May 2026","-","11 May 2026","-","10 May 2026","Day Not Applicable for Calculation","09 May 2026","Day Not Applicable for Calculation","08 May 2026","Day Not Applicable for Calculation","7","5","9","7","7","8","8","-","-","-","3","1","1","1","1","1","1","1","-","-","-","1","6","","","-","29 May 2026 15:43:30","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062002","1","I-0","26 May 2026","Yes","14 May 2026","13 May 2026","13 May 2026","-","-","2","-","2","25 May 2026","-","24 May 2026","-","23 May 2026","-","22 May 2026","-","21 May 2026","-","20 May 2026","-","19 May 2026","-","18 May 2026","Day Not Applicable for Calculation","17 May 2026","Day Not Applicable for Calculation","16 May 2026","Day Not Applicable for Calculation","8","8","6","7","7","6","7","-","-","-","3","2","2","2","2","2","2","2","-","-","-","2","7","7","9","-","29 May 2026 15:45:00","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10009","Jiri Pumprla","CZ100092001","1","I-0","05 May 2026","Yes","24 Apr 2026","23 Apr 2026","23 Apr 2026","-","-","2","-","2","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","Day Not Applicable for Calculation","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","5","5","5","5","5","5","5","-","-","-","2","1","1","1","1","1","1","1","-","-","-","1","5","5","7","-","05 May 2026 11:19:40","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10009","Jiri Pumprla","CZ100092001","1","I-2","19 May 2026","-","-","-","-","-","-","-","-","1","18 May 2026","-","17 May 2026","-","16 May 2026","-","15 May 2026","-","14 May 2026","-","13 May 2026","-","12 May 2026","-","11 May 2026","Day Not Applicable for Calculation","10 May 2026","Day Not Applicable for Calculation","09 May 2026","Day Not Applicable for Calculation","5","4","5","5","5","4","6","-","-","-","2","1","1","1","1","1","1","1","-","-","-","1","4","","","-","19 May 2026 10:38:25","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10012","Stefan Konecny","CZ100122001","5","I-0","07 Apr 2026","Yes","24 Mar 2026","22 Mar 2026","22 Mar 2026","-","-","2","-","2","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","-","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","-","31 Mar 2026","-","30 Mar 2026","Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","28 Mar 2026","Day Not Applicable for Calculation","8","11","5","9","11","10","13","-","-","-","3","1","2","2","2","2","2","2","-","-","-","2","7","7","9","-","04 May 2026 08:44:52","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10012","Stefan Konecny","CZ100122001","5","I-2","22 Apr 2026","-","-","-","-","-","-","-","-","2","21 Apr 2026","-","20 Apr 2026","-","19 Apr 2026","-","18 Apr 2026","-","17 Apr 2026","-","16 Apr 2026","-","15 Apr 2026","-","14 Apr 2026","Day Not Applicable for Calculation","13 Apr 2026","Day Not Applicable for Calculation","12 Apr 2026","Day Not Applicable for Calculation","7","5","6","6","7","8","2","-","-","-","1","1","0","1","1","1","2","0","-","-","-","1","4","","","-","04 May 2026 08:45:07","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10012","Stefan Konecny","CZ100122001","5","I-4","07 May 2026","-","-","-","-","-","-","-","-","1","06 May 2026","-","05 May 2026","-","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","Day Not Applicable for Calculation","28 Apr 2026","Day Not Applicable for Calculation","27 Apr 2026","Day Not Applicable for Calculation","8","7","7","8","4","11","7","-","-","-","1","2","1","1","1","0","1","1","-","-","-","1","3","","","-","01 Jun 2026 00:57:35","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132001","1","I-0","24 Mar 2026","Yes","12 Mar 2026","11 Mar 2026","11 Mar 2026","-","-","2","-","2","23 Mar 2026","-","22 Mar 2026","-","21 Mar 2026","-","20 Mar 2026","-","19 Mar 2026","-","18 Mar 2026","-","17 Mar 2026","-","16 Mar 2026","Day Not Applicable for Calculation","15 Mar 2026","Day Not Applicable for Calculation","14 Mar 2026","Day Not Applicable for Calculation","8","6","5","7","6","7","6","-","-","-","3","1","1","1","0","1","1","1","-","-","-","1","6","6","8","-","05 Apr 2026 22:41:27","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132001","1","I-2","08 Apr 2026","-","-","-","-","-","-","-","-","2","07 Apr 2026","-","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","-","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","-","31 Mar 2026","Day Not Applicable for Calculation","30 Mar 2026","Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","5","2","3","6","5","5","5","-","-","-","2","0","0","0","0","1","1","0","-","-","-","0","4","","","-","27 May 2026 12:53:52","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132001","1","I-4","21 Apr 2026","-","-","-","-","-","-","-","-","0","20 Apr 2026","-","19 Apr 2026","-","18 Apr 2026","-","17 Apr 2026","-","16 Apr 2026","-","15 Apr 2026","-","14 Apr 2026","-","13 Apr 2026","Day Not Applicable for Calculation","12 Apr 2026","Day Not Applicable for Calculation","11 Apr 2026","Day Not Applicable for Calculation","4","3","4","3","3","4","4","-","-","-","2","0","0","0","0","0","0","0","-","-","-","0","2","","","-","27 May 2026 12:54:41","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132002","1","I-0","12 May 2026","Yes","21 Apr 2026","20 Apr 2026","21 Apr 2026","-","-","2","-","2","11 May 2026","-","10 May 2026","-","09 May 2026","-","08 May 2026","-","07 May 2026","-","06 May 2026","-","05 May 2026","Missing Diary","04 May 2026","Day Not Applicable for Calculation","03 May 2026","Day Not Applicable for Calculation","02 May 2026","Day Not Applicable for Calculation","2","1","1","1","1","2","-","-","-","-","0","0","0","0","0","0","0","-","-","-","-","0","2","2","4","-","28 May 2026 23:19:30","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132002","1","I-2","26 May 2026","-","-","-","-","-","-","-","-","1","25 May 2026","-","24 May 2026","Missing Diary","23 May 2026","-","22 May 2026","-","21 May 2026","-","20 May 2026","-","19 May 2026","-","18 May 2026","Missing Diary;Day Not Applicable for Calculation","17 May 2026","Day Not Applicable for Calculation","16 May 2026","Day Not Applicable for Calculation","1","-","1","2","1","2","2","-","-","-","1","0","-","0","0","0","0","0","-","-","-","0","2","","","-","28 May 2026 23:19:51","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10013","David Stepek","CZ100132003","0","I-0","02 Jun 2026","Yes","25 May 2026","24 May 2026","24 May 2026","-","-","2","-","2","01 Jun 2026","-","31 May 2026","-","30 May 2026","-","29 May 2026","-","28 May 2026","-","27 May 2026","-","26 May 2026","-","25 May 2026","Endoscopy;Missing Diary;Day Not Applicable for Calculation","24 May 2026","Bowel Preparation for Procedure;Missing Diary;Day Not Applicable for Calculation","23 May 2026","Missing Diary;Day Not Applicable for Calculation","8","8","11","10","10","11","6","-","-","-","3","2","2","1","2","1","2","2","-","-","-","2","7","7","9","-","02 Jun 2026 08:17:40","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10016","Robert Mudr","CZ100162001","1","I-0","28 May 2026","Yes","19 May 2026","18 May 2026","19 May 2026","-","-","3","-","3","27 May 2026","-","26 May 2026","-","25 May 2026","-","24 May 2026","-","23 May 2026","-","22 May 2026","-","21 May 2026","-","20 May 2026","Day Not Applicable for Calculation","19 May 2026","Endoscopy;Bowel Preparation for Procedure;Day Not Applicable for Calculation","18 May 2026","Bowel Preparation for Procedure;Day Not Applicable for Calculation","14","15","15","15","15","15","15","-","-","-","3","2","3","3","2","2","3","3","-","-","-","3","9","9","12","-","28 May 2026 10:17:25","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adolescent","Czech Republic","DD5-CZ10020","Lucie Gonsorcikova","CZ100201001","1","Unscheduled 1","04 May 2026","Yes","20 Apr 2026","12 Apr 2026","15 Apr 2026","-","-","2","-","3","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","-","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","24 Apr 2026","Day Not Applicable for Calculation","5","6","6","7","6","3","3","-","-","-","2","0","0","0","0","0","0","0","-","-","-","0","5","4","7","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adolescent","Czech Republic","DD5-CZ10020","Lucie Gonsorcikova","CZ100201001","1","I-0","18 May 2026","Yes","01 May 2026","01 May 2026","01 May 2026","-","-","2","-","3","17 May 2026","-","16 May 2026","-","15 May 2026","-","14 May 2026","-","13 May 2026","-","12 May 2026","-","11 May 2026","-","10 May 2026","Day Not Applicable for Calculation","09 May 2026","Day Not Applicable for Calculation","08 May 2026","Day Not Applicable for Calculation","6","6","6","6","6","6","6","-","-","-","3","0","0","0","0","0","0","0","-","-","-","0","6","5","8","-","18 May 2026 08:38:55","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adolescent","Czech Republic","DD5-CZ10020","Lucie Gonsorcikova","CZ100201001","1","I-2","01 Jun 2026","-","-","-","-","-","-","-","-","3","31 May 2026","-","30 May 2026","Missing Diary","29 May 2026","Missing Diary","28 May 2026","Missing Diary","27 May 2026","-","26 May 2026","-","25 May 2026","-","24 May 2026","Day Not Applicable for Calculation","23 May 2026","Day Not Applicable for Calculation","22 May 2026","Day Not Applicable for Calculation","6","-","-","-","6","6","6","-","-","-","3","0","-","-","-","0","0","0","-","-","-","0","6","","","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10021","Martin Bortlik","CZ100212001","1","I-0","07 Apr 2026","Yes","16 Mar 2026","15 Mar 2026","16 Mar 2026","-","-","3","-","3","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","-","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","-","31 Mar 2026","-","30 Mar 2026","Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","28 Mar 2026","Day Not Applicable for Calculation","11","11","10","11","11","10","9","-","-","-","3","2","2","2","2","2","2","2","-","-","-","2","8","8","11","-","20 Apr 2026 09:27:58","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10021","Martin Bortlik","CZ100212001","1","I-2","20 Apr 2026","-","-","-","-","-","-","-","-","3","19 Apr 2026","-","18 Apr 2026","-","17 Apr 2026","-","16 Apr 2026","-","15 Apr 2026","-","14 Apr 2026","-","13 Apr 2026","-","12 Apr 2026","Day Not Applicable for Calculation","11 Apr 2026","Day Not Applicable for Calculation","10 Apr 2026","Day Not Applicable for Calculation","8","7","9","8","8","7","8","-","-","-","3","2","2","1","1","1","2","1","-","-","-","1","7","","","-","20 Apr 2026 09:29:01","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10021","Martin Bortlik","CZ100212001","1","I-4","05 May 2026","-","-","-","-","-","-","-","-","1","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","Day Not Applicable for Calculation","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","6","6","6","6","7","7","6","-","-","-","3","0","0","1","1","1","1","1","-","-","-","1","5","","","-","-","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10021","Martin Bortlik","CZ100212001","1","I-8","02 Jun 2026","-","-","-","-","-","-","-","-","1","01 Jun 2026","-","31 May 2026","-","30 May 2026","-","29 May 2026","-","28 May 2026","-","27 May 2026","-","26 May 2026","-","25 May 2026","Day Not Applicable for Calculation","24 May 2026","Day Not Applicable for Calculation","23 May 2026","Day Not Applicable for Calculation","3","4","4","4","5","5","5","-","-","-","2","0","0","0","0","0","1","1","-","-","-","0","3","","","-","02 Jun 2026 14:44:34","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222002","1","I-0","19 Feb 2026","Yes","11 Feb 2026","10 Feb 2026","11 Feb 2026","-","-","2","-","2","18 Feb 2026","-","17 Feb 2026","-","16 Feb 2026","-","15 Feb 2026","-","14 Feb 2026","-","13 Feb 2026","-","12 Feb 2026","-","11 Feb 2026","Endoscopy;Bowel Preparation for Procedure;Day Not Applicable for Calculation","10 Feb 2026","Bowel Preparation for Procedure;Day Not Applicable for Calculation","09 Feb 2026","Day Not Applicable for Calculation","3","2","2","3","4","3","2","-","-","-","1","1","1","0","0","0","2","2","-","-","-","1","4","4","6","-","19 Feb 2026 15:41:35","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","1","I-0","09 Mar 2026","Yes","11 Feb 2026","10 Feb 2026","11 Feb 2026","-","-","2","-","2","08 Mar 2026","-","07 Mar 2026","-","06 Mar 2026","-","05 Mar 2026","-","04 Mar 2026","-","03 Mar 2026","Missing Diary","02 Mar 2026","Missing Diary","01 Mar 2026","Missing Diary;Day Not Applicable for Calculation","28 Feb 2026","Missing Diary;Day Not Applicable for Calculation","27 Feb 2026","Missing Diary;Day Not Applicable for Calculation","7","7","6","6","7","-","-","-","-","-","3","2","2","2","2","2","-","-","-","-","-","2","7","7","9","-","22 Mar 2026 18:34:58","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","1","I-2","27 Mar 2026","-","-","-","-","-","-","-","-","2","26 Mar 2026","-","25 Mar 2026","-","24 Mar 2026","-","23 Mar 2026","-","22 Mar 2026","-","21 Mar 2026","-","20 Mar 2026","-","19 Mar 2026","Day Not Applicable for Calculation","18 Mar 2026","Day Not Applicable for Calculation","17 Mar 2026","Day Not Applicable for Calculation","7","3","3","3","5","5","5","-","-","-","2","0","0","1","1","1","1","2","-","-","-","1","5","","","-","08 Apr 2026 07:36:56","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","1","I-4","08 Apr 2026","-","-","-","-","-","-","-","-","2","07 Apr 2026","-","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","-","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","-","31 Mar 2026","Day Not Applicable for Calculation","30 Mar 2026","Day Not Applicable for Calculation","29 Mar 2026","Day Not Applicable for Calculation","3","3","4","4","5","4","3","-","-","-","2","1","0","0","2","1","1","2","-","-","-","1","5","","","-","08 Apr 2026 07:59:35","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","1","I-8","04 May 2026","-","-","-","-","-","-","-","-","2","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","-","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","24 Apr 2026","Missing Diary;Day Not Applicable for Calculation","3","5","3","3","3","2","3","-","-","-","1","0","0","0","0","0","0","0","-","-","-","0","3","","","-","04 May 2026 08:08:40","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","1","I-12","01 Jun 2026","Yes","20 May 2026","19 May 2026","20 May 2026","-","-","3","-","2","31 May 2026","-","30 May 2026","-","29 May 2026","-","28 May 2026","-","27 May 2026","-","26 May 2026","-","25 May 2026","-","24 May 2026","Day Not Applicable for Calculation","23 May 2026","Day Not Applicable for Calculation","22 May 2026","Day Not Applicable for Calculation","4","4","6","3","3","3","3","-","-","-","2","1","1","2","1","1","1","2","-","-","-","1","5","6","8","-","01 Jun 2026 14:25:57","Clinical Nonresponder","No","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","1","I-0","09 Apr 2026","Yes","08 Apr 2026","31 Mar 2026","01 Apr 2026","-","-","2","-","2","08 Apr 2026","Endoscopy","07 Apr 2026","-","06 Apr 2026","-","05 Apr 2026","-","04 Apr 2026","-","03 Apr 2026","-","02 Apr 2026","-","01 Apr 2026","Bowel Preparation for Procedure;Day Not Applicable for Calculation","31 Mar 2026","Bowel Preparation for Procedure;Day Not Applicable for Calculation","30 Mar 2026","-","-","3","3","4","3","4","3","-","-","3","1","-","2","2","2","2","2","2","-","-","2","2","5","5","7","-","29 May 2026 11:07:08","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","1","I-2","22 Apr 2026","-","-","-","-","-","-","-","-","2","21 Apr 2026","-","20 Apr 2026","-","19 Apr 2026","-","18 Apr 2026","-","17 Apr 2026","-","16 Apr 2026","-","15 Apr 2026","-","14 Apr 2026","Day Not Applicable for Calculation","13 Apr 2026","Day Not Applicable for Calculation","12 Apr 2026","Day Not Applicable for Calculation","3","3","5","3","2","3","2","-","-","-","1","1","2","2","1","1","1","2","-","-","-","1","4","","","-","05 May 2026 15:00:39","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","1","I-4","05 May 2026","-","-","-","-","-","-","-","-","2","04 May 2026","-","03 May 2026","-","02 May 2026","-","01 May 2026","-","30 Apr 2026","-","29 Apr 2026","-","28 Apr 2026","-","27 Apr 2026","Day Not Applicable for Calculation","26 Apr 2026","Day Not Applicable for Calculation","25 Apr 2026","Day Not Applicable for Calculation","4","2","2","2","2","2","2","-","-","-","1","1","1","1","1","2","1","1","-","-","-","1","4","","","-","05 May 2026 07:30:02","N/A","N/A","N/A","N/A","N/A","N/A"
+"77242113UCO3001","Adult","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","1","I-8","02 Jun 2026","-","-","-","-","-","-","-","-","2","01 Jun 2026","-","31 May 2026","-","30 May 2026","-","29 May 2026","-","28 May 2026","-","27 May 2026","-","26 May 2026","-","25 May 2026","Day Not Applicable for Calculation","24 May 2026","Day Not Applicable for Calculation","23 May 2026","Day Not Applicable for Calculation","2","2","2","2","2","4","10","-","-","-","1","2","1","2","1","2","2","2","-","-","-","2","5","","","-","02 Jun 2026 08:19:16","N/A","N/A","N/A","N/A","N/A","N/A"
@@ -0,0 +1,178 @@
+"Protocol","Country","Site","PI Name","Subject ID","Age at Informed Consent","Baseline Stool Count","Confirm Baseline Stool Count","Data Correction ID","Creation Date UTC","Status","Description","Date of Last Action UTC","Total Open Period","Total Open Time (Days)","Current Status Time (Days)","Type","Next Action Required","Category","Query History","Reason for Change","Resolution"
+"77242113UCO3001","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012001","48","1","","SW00703544","13-May-2026","Submitted","Please change answer to clinical remision from no to YES (week 12).  Entry erros ","20-May-2026","8-14 Days","14","9","Query Active  ","Site","New","(1) 20 May 2026 msullivan (Clario): Please confirm your request
+
+Dear Site. Thank you for submitting this Data Clarification Request. 
+
+For us to process your request, please let us know the name of the form (with date) with question. 
+
+Thank you.  ERT/CLARIO   Data Coordination Team
+
+","Entry Error",""
+"77242113UCO3001","Czech Republic","DD5-CZ10001","Matej Falc","CZ100012002","79","1","","SW00696586","09-Apr-2026","ReadyForQC","Please correct date of endoscopy to date: 18 March 2026  (from 25 March 2026)","15-Apr-2026","Over 28 Days","36","32","Query Active  ","Site","Site-Entered Data","","Entry Error","CLARIO RESOLUTION:
+
+Part 1: In Mayo Subscore (1) dated 08 Apr 2026 for I-0 visit, CLARIO to make the following changes:
+- What was the date of endoscopy? (ENDODT1D): from 25 Mar 2026 to 18 Mar 2026
+- Data Flag (QSDFLG1B): from blank to check
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10006","Michal Konecny","CZ100062001","19","1","","SW00704536","19-May-2026","ReadyForQC","Please change the endoscopy date to 19-FEB-2026. 06-MAR-2026 was entered in error. ","26-May-2026","8-14 Days","10","5","Query Active  ","Site","Site-Entered Data","","Entry Error","CLARIO RESOLUTION:
+
+Part 1: In Mayo Subscore (1) dated 20 Mar 2026 for I-0 visit, CLARIO to make the following changes:
+-What was the date of endoscopy? (ENDODT1D): from 06 Mar 2026 to 19 Feb 2026
+- Data Flag (QSDFLG1B): from blank to check
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10012","Stefan Konecny","CZ100122001","22","5","Yes, I confirm this is the correct stool count.","SW00706684","01-Jun-2026","Submitted","The right endoscopy date is 23MAR2026, please change the date","01-Jun-2026","2-3 Days","2","2","","Clario DM","New","","Entry Error",""
+"77242113UCO3001","Czech Republic","DD5-CZ10013","David Stepek","CZ100132002","29","1","","SW00705646","26-May-2026","Submitted","Correct visit date  I-O is 12-May-2026.  All questionaries were filled on paper and entered in tablet later.
+Log-in issue. ","01-Jun-2026","4-7 Days","6","2","","Clario DM","New","(1) 01 Jun 2026 msullivan (Clario): Please confirm your request
+
+Dear Site. Thank you for submitting this Data Clarification. 
+
+     Please provide the timestamps for each of the assessments if you used paper forms and transcribed into the device. 
+     If unknown, ERT will use a dummy timestamp. 
+
+Thank you. ERT/CLARIO Data Coordination Team.  
+
+(2) 01 Jun 2026 dstepek@vnbrno.cz (Site User): time is unknown
+
+","Changed Information",""
+"77242113UCO3001","Czech Republic","DD5-CZ10013","David Stepek","CZ100132003","49","0","","SW00706581","29-May-2026","Submitted","baseline stool count reported by subject is 0, please change to 1 as per CRA request  (subject has 1 stool in 2-3 days if in remission)","29-May-2026","2-3 Days","2","2","","Clario DM","New","","Changed Information",""
+"77242113UCO3001","Czech Republic","DD5-CZ10016","Robert Mudr","CZ100162001","48","1","","SW00705916","27-May-2026","ReadyForEntry","As per ATS investigation (ATS26040111), please remove the below form which was entered as a duplicate    
+
+-  MAYO Diary (5) 24 Apr 2026","02-Jun-2026","4-7 Days","5","1","","Clario DM","Technical Revision","","Technical Revision - Other","CLARIO RESOLUTION:
+
+Part 1: CLARIO to delete MAYO Diary (5) dated 24 Apr 2026
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10020","Lucie Gonsorcikova","CZ100201001","15","1","","SW00701729","06-May-2026","Completed","Dears, please delete data from visit I-0 (reported as 4th of May 2026) as this visit had to be postponed - see the previous DCR of this patient and change data request that was corrected. Patient has left the site before it was resolved and and new date of I-0 was planned. Patient continues to fill in his diary and patient is coming to I=0 visit within allowed window. We need the system and tablet to be ready to run new Mayo Score Report with updated and recent data (e.g. reflect new I-0 visit date, new eligible days -1 to -7.). 
+thank you, Jiri Skopek","19-May-2026","8-14 Days","8","","","","Visit Data","(1) 11 May 2026 msullivan (Clario): Please confirm your request
+
+Dear Site. Thank you for submitting this Data Clarification. 
+
+Please note that the delete forms are allowed if the reason is one of the following.
+If not, forms will move to unscheduled visit.
+
+Data collected by the wrong patient.
+Data collected by someone other than the patient.
+Data collected prior to informed consent, or after withdrawal from the study.
+Duplicate data erroneously entered at an Unscheduled visit via paper transcription.
+Data collected that is not expected per protocol.
+
+Also, I-0 visit is still ongoing. Please close the visit.
+Once the visit was closed, we will process accoridngly.
+
+Thank you.  ERT/CLARIO   Data Coordination Team
+
+(2) 11 May 2026 jskopek (Site User): Dears, 
+I do not see any option that is adequate -from the list. Data are not needed to be deleted fully, they reflect the situation at May4th. Please mark it as unscheduled visit - as exactly that is the case. We need the system to be ready for I-0 visit planned for next week. 
+I will close the visit tomorrow - do you mean in tablet/ipad? 
+Thank you very much for your help! Jiri  
+
+(3) 12 May 2026 venkata.ramana (Clario): Thank you for your response. 
+Please note that the visit I-0 was still ongoing but not closed yet.
+So please close the visit.
+Kind Regards, Clario Data Coordination Team.
+
+(4) 12 May 2026 jskopek (Site User): If I try to close the I-O visit in TABLET, it asks me if patient fulfils eligibility criteria to proceed to next visit based on these old data – if I answer NO, it asks me to DEACTIVATE patient. I do not want to DEACTIVATE patient – can you help WHERE and HOW to close this visit for you to change it to UNSCHEDULED and not to de-activate patient?
+Thank you Jiri
+
+
+","Other-delete visit I-0","CLARIO RESOLUTION:
+
+Part 1: In the following forms dated 04 May 2026, CLARIO to make the following changes:
+-Event ID: from I-0 to Unscheduled Visit 1
+-Event At Entry: from I-0 to Unscheduled Visit 1
+
+Visit Start (49)
+ePRO Availability (1)
+Mayo Subscore (1)
+PGA (1)
+
+Part 2: CLARIO to delete the following forms dated 04 May 2026 for I-0 visit.
+
+C-SSRS Since Last Visit (1)
+C-SSRS Since Last Visit Findings Report (1)
+
+Part 3: CLARIO to manually enter Visit End form for Unscheduled visit 1 with the following information:
+-Protocol: 77242113UCO3001
+-Report Date: 04 May 2026
+-Report Start Date and Time: 04 May 2026 23:59:59
+-Event ID: Unscheduled Visit 1
+-Event End Date: 04 May 2026 23:59:59
+-Visit Status: Incomplete
+-Phase At Entry: Screening
+-Phase At Entry Timestamp: 13 Apr 2026 12:32:20
+-Event At Entry: Unscheduled visit 1
+-Event Start Date: 04 May 2026 23:59:59
+-Event Time Zone Offset in Milliseconds: 7200000
+-Session Repeat Number (SESREP1N): 0
+-Session Instance Id (SESINST1S): 3f1214f0-4788-11f1-a0cf-bb403212adce
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10020","Lucie Gonsorcikova","CZ100201001","15","1","","SW00701226","04-May-2026","Completed","Dears, we would like ask you to change the information I read on assignment form given by patient on April 13, 2026 (Visit 1), Baseline Stool Count (PT.Custom4)  as 3 that should be reported as 1. 
+Patient has entered wrong number as he did not understood it should be number of stools when illness is in remission or absent. He is a child and did not reflected this question correctly. Therefore, please change Baseline Stool Count = 1.
+Thank you, Jiri Skopek  ","04-May-2026","1 Day","1","","","","Demographic","","Changed Information","(Clario instructions)
+
+1. Please make below changes in the assignment form:
+
+Baseline Stool Count (PT. Custom4): 03 to 01."
+"77242113UCO3001","Czech Republic","DD5-CZ10021","Martin Bortlik","CZ100212001","61","1","","SW00699492","23-Apr-2026","ReadyForQC","Please correct the date of endoscopy done during screening visit of patient CZ100212001 to correct date 16-MAR-2026.","29-Apr-2026","22-28 Days","27","23","Query Active  ","Site","Site-Entered Data","","Changed Information","CLARIO RESOLUTION:
+
+Part 1: In the Mayo Subscore (1) dated 07 Apr 2026 for I-0 visit, CLARIO to make the following changes:
+-What was the date of endoscopy? (ENDODT1D): from 24 Mar 2026 to 16 Mar 2026
+- Data Flag (QSDFLG1B): from blank to check
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","39","1","","SW00703322","12-May-2026","Completed","As per ATS investigation (ATS26040111), please remove the below form that's been entered as a duplicate    
+
+- MAYO Diary (16) - 18 Mar 2026
+","20-May-2026","4-7 Days","6","","","","Technical Revision","","Technical Revision - Other","CLARIO RESOLUTION:
+
+Part 1: CLARIO to delete the MAYO Diary (16) dated 18 Mar 2026.
+"
+"77242113UCO3001","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222003","39","1","","SW00689748","09-Mar-2026","Completed","Dear all,
+
+Patient CZ 100222003 was randomized on 9 Mar 2026. Kindly correct the colonoscopy date to 11 Feb 2025.
+
+The date was initially entered as 21 Feb 2025 because the earlier date could not be entered in the system. The patient was rescreened.","02-Apr-2026","15-21 Days","17","","","","Site-Entered Data","(1) 13 Mar 2026 msullivan (Clario): Please confirm your request
+
+Dear Site. Thank you for submitting this Data Clarification. 
+
+Could you please conform that if you are requesting following?
+
+Mayo Subscore (1) dated 09 Mar 2026 for I-0 visit
+-What was the date of endoscopy? (ENDODT1D): from 23 Feb 2026 to 11 Feb 2025
+
+Could you please confirm the year? This subject was assigned on 02 Mar 2026, you are providing that correct date is 11 Feb 2025 which a year ago.
+If you are not requesting above, please provide us the name of the form with question. 
+
+Thank you.  ERT/CLARIO   Data Coordination Team
+
+
+(2) 13 Mar 2026 katerina.havlikova@clinoxus.com (Site User): confirm date of colonoscopy 11Feb2026
+
+(3) 21 Mar 2026 msullivan (Clario): Dear Site,
+
+The requested changes to the Mayo data have been updated. Please navigate to the Mayo Score Report and resubmit the form for visit to log the updated Mayo Score form. Once done, please respond to this query confirming that the Mayo Score has been resubmitted.
+
+Thank you.  ERT/CLARIO   Data Coordination Team
+
+(4) 24 Mar 2026 jana.pomahacova@clinoxus.com (Site User): Thank you and sent
+
+","New Information","CLARIO RESOLUTION:
+
+Part 1: In the Mayo Subscore (1) dated 09 Mar 2026 for I-0 visit, CLARIO to make the following changes:
+-What was the date of endoscopy? (ENDODT1D): from 23 Feb 2026 to 11 Feb 2025
+-Data Flag (QSDFLG1B): from blank to check"
+"77242113UCO3001","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","33","1","","SW00705372","22-May-2026","Submitted","Dear all, please change Colonoscopz date from 8April2026 to date 01Apr2026 Thank you in advance","02-Jun-2026","4-7 Days","7","1","","Clario DM","New","(1) 29 May 2026 msullivan (Clario): Please confirm your request
+
+Dear Site. Thank you for submitting this Data Clarification. 
+
+Please provide us the name of the form for this request.
+
+Thank you.  ERT/CLARIO   Data Coordination Team
+
+(2) 02 Jun 2026 katerina.havlikova@clinoxus.com (Site User): Dear all, please change Colonoscopy for Week I-12 date from 8April2026 to date 01Apr2026 Thank you in advance
+
+","Changed Information",""
+"77242113UCO3001","Czech Republic","DD5-CZ10022","Petr Hrabak","CZ100222005","33","1","","SW00702538","08-May-2026","Completed","This TRR is to document the correction to the Mayo Subscore (1) form, where the following variables were populated with NULL values, due to a known core defect:
+Event At Entry, Event Start Date, Event Time Zone Offset in Milliseconds.","12-May-2026","2-3 Days","2","","","","Technical Revision","","Technical Revision - Other","Please make the below changes in Mayo Subscore (1) dated 22 Apr 2026:
+
+-Event At Entry: I-0
+-Event Start Date: 09 Apr 2026 08:09:19
+-Event Time Zone Offset in Milliseconds: 7200000"
@@ -0,0 +1,6 @@
+"Protocol","Country","Site ID","PI_NAME","Subject Number","Age","Data Correction ID","Creation Date UTC","Status","Date of Last Action UTC","Total Open Period","Total Open Time (Days)","Current Status Time (Days)","Type","Next Action Required","Category","Query History","Reason for Change"
+"77242113UCO3001_ANALYSIS","Czech Republic The","CZ10001","Falc, Matej","CZ100012001","48 Years","16923867","14-May-2026","Escalated","26-May-2026","8-14 Days","13","5","QUERY","Site","Patient","(3) 15 May 2026 Clario:  You can upload scans of your paper ECGs using the Site Upload Tool. ---- Instructions can be found in the ""Reference Materials"" tab of the study portal. Please contact Customer Care for assistance if needed!","Data Checks"
+"77242113UCO3001_ANALYSIS","Czech Republic The","CZ10001","Falc, Matej","CZ100012001","48 Years","16567067","22-Jan-2026","Resolved","28-Jan-2026","4-7 Days","4","","QUERY","","Patient","MD Falc","Data Checks"
+"77242113UCO3001_ANALYSIS","Czech Republic The","CZ10009","Pumprla, Jiri","CZ100092001","49 Years","16776685","31-Mar-2026","Resolved","13-May-2026","Over 28 Days","29","","QUERY","","Patient","(2) 13 May 2026 Clario:  I confirm, that only ONE ECG was collected by mistake.","Data Checks"
+"77242113UCO3001_ANALYSIS","Czech Republic The","CZ10021","Bortlik, Martin","CZ100212001","61 Years","16717619","11-Mar-2026","Resolved","28-Apr-2026","Over 28 Days","32","","QUERY","","Patient","(2) 28 Apr 2026 Clario:  I confirmed that due to technical problems, the ECG was done only twice","Data Checks"
+"77242113UCO3001_ANALYSIS","Czech Republic The","CZ10022","Hrabak, Petr","CZ100222003","39 Years","16945114","21-May-2026","Escalated","27-May-2026","8-14 Days","8","4","DCR","Site","Patient","(6) 27 May 2026 Botdorf, Paul-Daniel:  We still do not have any ECGs for any patients at your site with a collection Date/Time of  20-May-2026 at  14:19:34, 14:20:32, 14:21:15. Please review the records in the portal and let us know if anything more is needed. If you see these ECGs, please double check that this is actually the study they are currently in(77242113UCO3001_ANALYSIS).Thank you",""
@@ -0,0 +1,138 @@
+# Report generator: feasibility/investigators -> Excel
+# Projekt: 77242113UCO2001
+# Ulozeni: u:\Dropbox\!!!Days\Downloads Z230\\
+
+import os
+import sys
+from datetime import datetime
+from pymongo import MongoClient
+import openpyxl
+from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+from openpyxl.utils import get_column_letter
+
+# --- Připojení k MongoDB ---
+MONGO_URI = os.environ.get("MONGO_URI", "mongodb://192.168.1.76:27017")
+client = MongoClient(MONGO_URI)
+db = client["feasibility"]
+col = db["investigators"]
+
+# --- Načtení dat ---
+docs = list(col.find({}))
+print(f"Načteno {len(docs)} záznamů.")
+
+# --- Cílová složka ---
+OUTPUT_DIR = r"u:\Dropbox\!!!Days\Downloads Z230"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+datum = datetime.now().strftime("%Y%m%d_%H%M")
+filename = f"77242113UCO2001_investigators_{datum}.xlsx"
+filepath = os.path.join(OUTPUT_DIR, filename)
+
+# --- Definice sloupců ---
+# Pořadí: jméno, email, status, kriticka_poznamka, pak ostatní
+FIXED_COLS = [
+    ("prijmeni",        "Příjmení"),
+    ("jmeno",           "Jméno"),
+    ("email",           "Email"),
+    ("STATUS",          "STATUS"),
+    ("kriticka_poznamka", "Kritická poznámka"),
+    ("zeme",            "Země"),
+    ("pracoviste",      "Pracoviště"),
+    ("internet_summary","Internet summary"),
+]
+
+# Klíče, které přeskočíme (složité nested objekty)
+SKIP_KEYS = {"_id", "excel", "sites_illuminator", "maf", "zdroje", "studie", "Viper_Performance", "Viper_Contacts"}
+
+# Ostatní skalární pole
+fixed_keys = {c[0] for c in FIXED_COLS}
+extra_keys = set()
+for doc in docs:
+    for k in doc.keys():
+        if k not in fixed_keys and k not in SKIP_KEYS:
+            extra_keys.add(k)
+extra_keys = sorted(extra_keys)
+
+ALL_COLS = FIXED_COLS + [(k, k) for k in extra_keys]
+
+# --- Barvy podle STATUS ---
+def status_color(status):
+    if not status:
+        return None
+    s = status.lower()
+    if "nezájem" in s or "nezajem" in s or "nechceme" in s:
+        return "FFFFC7CE"   # červená
+    if "zájem" in s or "zajem" in s:
+        return "FFC6EFCE"   # zelená
+    if "nedoručen" in s or "nedorucen" in s:
+        return "FFFFEB9C"   # žlutá
+    if "email odeslán" in s or "email odeslan" in s:
+        return "FFDCE6F1"   # modrá
+    return None
+
+# --- Vytvoření workbooku ---
+wb = openpyxl.Workbook()
+ws = wb.active
+ws.title = "Investigators"
+
+# Styly
+header_font = Font(bold=True, color="FFFFFFFF")
+header_fill = PatternFill("solid", fgColor="FF1F4E79")
+header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
+cell_align = Alignment(vertical="top", wrap_text=True)
+thin = Side(style="thin", color="FFB0B0B0")
+border = Border(left=thin, right=thin, top=thin, bottom=thin)
+
+# Záhlaví
+for col_idx, (key, label) in enumerate(ALL_COLS, 1):
+    cell = ws.cell(row=1, column=col_idx, value=label)
+    cell.font = header_font
+    cell.fill = header_fill
+    cell.alignment = header_align
+    cell.border = border
+
+ws.row_dimensions[1].height = 30
+
+# Data
+for row_idx, doc in enumerate(docs, 2):
+    status_val = str(doc.get("STATUS", "") or "")
+    bg = status_color(status_val)
+
+    for col_idx, (key, label) in enumerate(ALL_COLS, 1):
+        val = doc.get(key, "")
+        # Převod na string pokud je list nebo dict
+        if isinstance(val, list):
+            val = ", ".join(str(v) for v in val)
+        elif isinstance(val, dict):
+            val = str(val)
+        elif val is None:
+            val = ""
+        else:
+            val = str(val)
+
+        cell = ws.cell(row=row_idx, column=col_idx, value=val)
+        cell.alignment = cell_align
+        cell.border = border
+
+        if bg:
+            cell.fill = PatternFill("solid", fgColor=bg)
+
+# Šířky sloupců
+col_widths = {
+    "prijmeni": 18, "jmeno": 15, "email": 35,
+    "STATUS": 45, "kriticka_poznamka": 60,
+    "zeme": 12, "pracoviste": 35, "internet_summary": 60,
+}
+for col_idx, (key, label) in enumerate(ALL_COLS, 1):
+    w = col_widths.get(key, 20)
+    ws.column_dimensions[get_column_letter(col_idx)].width = w
+
+# Zmrazení záhlaví
+ws.freeze_panes = "A2"
+
+# Autofilter
+ws.auto_filter.ref = ws.dimensions
+
+# Uložení
+wb.save(filepath)
+print(f"Ulozeno: {filepath}")
@@ -0,0 +1,56 @@
+# Test: najit posledni odeslany email na klucho@gastroenterolog.com,
+# preposlat na vladimir.buzalka@buzalka.cz, predmet "Ahoj", prvni radek "Ahoj"
+
+from playwright.sync_api import sync_playwright
+
+with sync_playwright() as p:
+    browser = p.chromium.launch(headless=False)
+    context = browser.new_context(storage_state="outlook_auth.json")
+    page = context.new_page()
+
+    # 1. Otevrit Outlook
+    page.goto("https://outlook.cloud.microsoft/mail/")
+    page.wait_for_selector('[placeholder="Search or ask Copilot"]')
+
+    # 2. Prejit do Sent Items
+    page.click('text=Sent Items')
+    page.wait_for_url("**/sentitems")
+
+    # 3. Vyhledat emaily na klucho@gastroenterolog.com
+    search = page.locator('[placeholder="Search or ask Copilot"]')
+    search.click()
+    search.fill("to:klucho@gastroenterolog.com")
+    search.press("Enter")
+    page.wait_for_selector("text=All results")
+    page.wait_for_timeout(1000)
+
+    # 4. Kliknout na prvni (nejnovejsi) email
+    page.locator('[role="option"]').first.click()
+    page.wait_for_selector('button:has-text("Forward"), [aria-label="Forward"]')
+
+    # 5. Kliknout na Forward
+    page.locator('button[aria-label="Forward"]').first.click()
+    page.wait_for_selector('[aria-label="To"]', timeout=5000)
+
+    # 6. Vyplnit prijemce
+    page.locator('[aria-label="To"]').fill("vladimir.buzalka@buzalka.cz")
+    page.keyboard.press("Tab")
+
+    # 7. Zmenit predmet na "Ahoj"
+    subject = page.locator('[aria-label="Subject"]')
+    subject.triple_click()
+    subject.type("Ahoj")
+
+    # 8. Napsat "Ahoj" na prvni radek tela emailu
+    body = page.locator('[aria-label="Message body"]')
+    body.click()
+    page.keyboard.press("Control+Home")
+    page.keyboard.type("Ahoj")
+    page.keyboard.press("Enter")
+
+    # 9. Odeslat
+    page.click('button[aria-label="Send"]')
+    page.wait_for_timeout(2000)
+
+    print("Email uspesne odeslan!")
+    browser.close()
@@ -0,0 +1,39 @@
+"""
+Jednorázový skript — vytvoří/aktualizuje tabulky v MySQL.
+Spusť jednou: python create_iwrs_tables.py
+"""
+import os
+import mysql.connector
+import db_config
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+SQL_FILE = os.path.join(BASE_DIR, "create_iwrs_tables.sql")
+
+conn = mysql.connector.connect(
+    host=db_config.DB_HOST,
+    port=db_config.DB_PORT,
+    user=db_config.DB_USER,
+    password=db_config.DB_PASSWORD,
+    database=db_config.DB_NAME,
+)
+cursor = conn.cursor()
+
+sql = open(SQL_FILE, encoding="utf-8").read()
+# Odstraň komentáře a rozdělíme na příkazy
+stmts = [s.strip() for s in sql.split(";")]
+for stmt in stmts:
+    # Odstraň řádkové komentáře
+    lines = [l for l in stmt.splitlines() if not l.strip().startswith("--")]
+    stmt = "\n".join(lines).strip()
+    if not stmt or stmt.upper().startswith("USE"):
+        continue
+    try:
+        cursor.execute(stmt)
+        print(f"OK: {stmt[:80]}")
+    except Exception as e:
+        print(f"SKIP: {e}")
+
+conn.commit()
+cursor.close()
+conn.close()
+print("\nHotovo.")
@@ -0,0 +1,128 @@
+-- IWRS tabulky pro databázi studie
+-- Spustit jednou: mysql -h 192.168.1.76 -u root -p studie < create_iwrs_tables.sql
+
+USE studie;
+
+-- ── Import log ───────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_import (
+    import_id   INT AUTO_INCREMENT PRIMARY KEY,
+    study       VARCHAR(20)  NOT NULL,
+    imported_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    source_file VARCHAR(500) NOT NULL,
+    INDEX idx_study (study)
+);
+
+-- ── UCO3001 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_uco3001_subject_summary (
+    id                                  INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                           INT          NOT NULL,
+    subject                             VARCHAR(20)  NOT NULL,
+    prior_subject_identifier            VARCHAR(20),
+    site                                VARCHAR(50),
+    investigator                        VARCHAR(100),
+    location                            VARCHAR(50),
+    cohort_per_irt                      VARCHAR(100),
+    informed_consent_date               DATE,
+    adolescent_assent_date              DATE,
+    age                                 SMALLINT,
+    weight                              DECIMAL(5,1),
+    rescreened_subject                  VARCHAR(10),
+    adt_ir                              VARCHAR(10),
+    three_or_more_advanced_therapies    VARCHAR(10),
+    only_oral_5asa_compounds            VARCHAR(10),
+    ustekinumab                         VARCHAR(10),
+    isolated_proctitis                  VARCHAR(10),
+    clinical_responder_status_i12_m0    VARCHAR(100),
+    irt_subject_status                  VARCHAR(50),
+    i0_rand_date_local                  DATE,
+    last_irt_transaction                VARCHAR(100),
+    last_irt_transaction_date_local     DATE,
+    last_irt_transaction_date_utc       DATE,
+    next_irt_transaction                VARCHAR(100),
+    next_irt_transaction_date_local     DATE,
+    most_recent_med_assignment_date     DATE,
+    days_since_last_med_assignment      SMALLINT,
+    patient_forecast_status             VARCHAR(50),
+    patient_forecast_status_changed_date DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── MDD3003 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_mdd3003_subject_summary (
+    id                                      INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                               INT          NOT NULL,
+    subject                                 VARCHAR(20)  NOT NULL,
+    prior_subject_identifier                VARCHAR(20),
+    site                                    VARCHAR(50),
+    investigator                            VARCHAR(100),
+    location                                VARCHAR(50),
+    cohort_per_irt                          VARCHAR(50),
+    madrs_criteria_integrated               VARCHAR(50),
+    informed_consent_date                   DATE,
+    age                                     SMALLINT,
+    madrs_criteria_v15                      VARCHAR(10),
+    madrs_criteria_v16                      VARCHAR(10),
+    madrs_criteria_v17                      VARCHAR(10),
+    stratification_country                  VARCHAR(10),
+    age_group                               VARCHAR(20),
+    stable_remitters                        VARCHAR(50),
+    irt_subject_status                      VARCHAR(100),
+    last_irt_transaction                    VARCHAR(100),
+    last_irt_transaction_date_local         DATE,
+    last_irt_transaction_date_utc           DATE,
+    next_irt_transaction                    VARCHAR(100),
+    next_irt_transaction_date_local         DATE,
+    date_screened                           DATE,
+    date_screen_failed                      DATE,
+    date_randomized_part1                   DATE,
+    date_early_withdraw_randomized_part1    DATE,
+    date_open_label_induction               DATE,
+    date_early_withdraw_open_label_induction DATE,
+    date_randomized_part2                   DATE,
+    date_early_withdraw_randomized_part2    DATE,
+    date_completed                          DATE,
+    date_unblinded                          DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── Notifications ────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_notifications (
+    id           INT AUTO_INCREMENT PRIMARY KEY,
+    study        VARCHAR(20)   NOT NULL,
+    subject      VARCHAR(20)   NOT NULL,
+    pk           INT           NOT NULL,
+    title        VARCHAR(100),
+    label        VARCHAR(500),
+    event        VARCHAR(50),
+    actual_date  DATE,
+    text         TEXT,
+    pdf          MEDIUMBLOB,
+    source_file  VARCHAR(500),
+    imported_at  DATETIME      NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE KEY uq_pk (pk),
+    INDEX idx_study_subject (study, subject)
+);
+
+-- ── Subject visits / transactions (obě studie) ───────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_subject_visits (
+    id                          INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                   INT          NOT NULL,
+    study                       VARCHAR(20)  NOT NULL,
+    subject                     VARCHAR(20)  NOT NULL,
+    visit_type                  ENUM('Past','Upcoming') NOT NULL,
+    scheduled_date              DATE,
+    window_days                 VARCHAR(20),
+    actual_date                 DATE,
+    irt_transaction_no          SMALLINT,
+    irt_transaction_description VARCHAR(200),
+    medication_assignment       VARCHAR(200),
+    quantity_assigned           SMALLINT,
+    medication_id               VARCHAR(20),
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_study_subject (study, subject)
+);
@@ -0,0 +1,201 @@
+from playwright.sync_api import sync_playwright
+import os
+import glob
+import datetime
+import requests
+
+import pandas as pd
+
+# ── CONFIG ──────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+# ────────────────────────────────────────────────────────────────────────────
+
+
+def get_subjects(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    if not os.path.basename(files[0]).startswith(today):
+        raise FileNotFoundError(
+            f"Dnešní Subject Summary Report pro {study} neexistuje — spusť nejdříve download_subject_summary.py"
+        )
+    path = files[0]
+    print(f"  Čtu subjekty z: {os.path.basename(path)}")
+
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+
+    df = pd.read_excel(path, header=header_row)
+    subjects = df["Subject"].dropna().astype(str).str.strip().tolist()
+    return subjects
+
+
+def get_jwt_and_api_base(page, study):
+    """Získá JWT token a api_base_url pro danou studii."""
+    jwt = page.evaluate("localStorage.getItem('JWT.access')")
+    if not jwt:
+        raise ValueError("JWT token nenalezen v localStorage")
+
+    instances = page.evaluate("""async (jwt) => {
+        const res = await fetch('/_/api/dispatch/app_instances/', {
+            headers: { 'Authorization': `Bearer ${jwt}` }
+        });
+        return res.json();
+    }""", jwt)
+
+    instance = next(
+        (i for i in instances if study in i.get("label", "")),
+        None
+    )
+    if not instance:
+        raise ValueError(f"app_instance pro studii {study} nenalezena")
+
+    return jwt, instance["api_base_url"]
+
+
+def get_notifications(jwt, api_base, study, subject):
+    """Načte seznam notifikací pro daného subjekta přes report_data API."""
+    url = f"{BASE_URL}{api_base}/api/v1/reports_api/report_data"
+    params = {
+        "path": "patient_detail_report",
+        "id": subject,
+        "key": "table_1",
+        "unblinded": "false",
+    }
+    payload = {
+        "path": "patient_detail_report",
+        "study": study,
+        "id": subject,
+        "key": "table_1",
+        "fields": {},
+        "filters": [{"tableId": "table_1", "tableFilters": {}}],
+        "pagination_details": {"order": "type", "reverseOrder": False, "page": 1, "limit": 500},
+        "cache_key": f"py_{subject}_{datetime.datetime.now().timestamp()}",
+    }
+    headers = {
+        "Authorization": f"Bearer {jwt}",
+        "Content-Type": "application/json",
+        "lang": "en",
+    }
+    resp = requests.post(url, params=params, json=payload, headers=headers)
+    resp.raise_for_status()
+    data = resp.json()
+
+    notifications = []
+    for row in data.get("data", []):
+        for notif in row.get("notification", []):
+            item = notif.get("item", {})
+            pk = item.get("pk")
+            title = item.get("et_title")
+            if pk and title:
+                notifications.append({"pk": pk, "title": title, "event": row.get("event_event_id", "")})
+    return notifications
+
+
+def download_pdf(jwt, api_base, pk, title, out_path):
+    """Stáhne PDF notifikaci a uloží ji."""
+    url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification"
+    params = {"pk": pk, "title": title, "html": "true"}
+    headers = {
+        "Authorization": f"Bearer {jwt}",
+        "lang": "en",
+        "Accept": "*/*",
+    }
+    resp = requests.get(url, params=params, headers=headers)
+    resp.raise_for_status()
+    with open(out_path, "wb") as f:
+        f.write(resp.content)
+
+
+def run(page, study):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    os.makedirs(out_dir, exist_ok=True)
+
+    subjects = get_subjects(study)
+    print(f"  Nalezeno {len(subjects)} subjektů")
+    today = datetime.date.today().strftime("%Y-%m-%d")
+
+    # Načteme stránku aby byl platný session kontext
+    page.goto(f"{BASE_URL}/report/patient_detail_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    jwt, api_base = get_jwt_and_api_base(page, study)
+    print(f"  API base: {api_base}")
+
+    for subject in subjects:
+        print(f"  [{subject}] Stahuji notifikace...")
+        try:
+            notifications = get_notifications(jwt, api_base, study, subject)
+            if not notifications:
+                print(f"  [{subject}] Žádné notifikace")
+                continue
+
+            for notif in notifications:
+                pk = notif["pk"]
+                title = notif["title"]
+                filename = os.path.join(out_dir, f"{today} {study} {subject} Notification {title} pk{pk}.pdf")
+                if os.path.exists(filename):
+                    print(f"  [{subject}] {title} (pk={pk}) — již existuje, přeskakuji")
+                    continue
+                download_pdf(jwt, api_base, pk, title, filename)
+                print(f"  [{subject}] {title} (pk={pk}) OK")
+
+        except Exception as e:
+            print(f"  [{subject}] CHYBA při notifikacích: {e}")
+
+    print(f"  [{study}] Notifikace hotovo.")
+
+
+def main():
+    os.makedirs(DETAILS_DIR, exist_ok=True)
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print(f"\n[{study}] Přihlášení...")
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            page.goto(BASE_URL)
+            page.wait_for_load_state("networkidle")
+            page.get_by_label("Email *").fill(EMAIL)
+            page.get_by_label("Password *").fill(PASSWORD)
+            page.locator("#login__submit").click()
+            page.wait_for_load_state("networkidle")
+
+            page.get_by_label("Study *").click()
+            page.get_by_role("option", name=study).click()
+            page.get_by_role("button", name="SELECT").click()
+            page.wait_for_load_state("networkidle")
+
+            try:
+                run(page, study)
+            except Exception as e:
+                print(f"  [{study}] CHYBA: {e}")
+
+            browser.close()
+
+    print("\nVše hotovo.")
+
+
+main()
@@ -0,0 +1,76 @@
+from playwright.sync_api import sync_playwright
+import os
+import datetime
+
+# ── CONFIG ──────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+CREATED_DIR  = os.path.join(BASE_DIR, "CreatedReports")
+# ────────────────────────────────────────────────────────────────────────────
+
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def download_study(page, study, today):
+    print(f"\n[{study}] Prihlaseni...")
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+
+    print(f"[{study}] Vyber studie...")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+    print(f"[{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"[{study}] OK -> {filename}")
+    return filename
+
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+    os.makedirs(CREATED_DIR, exist_ok=True)
+
+    downloaded = []
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            filename = download_study(page, study, today)
+            downloaded.append((study, filename))
+
+            browser.close()
+
+    print("\nVse stazeno:")
+    for study, path in downloaded:
+        print(f"  {study}: {path}")
+
+
+main()
@@ -0,0 +1,453 @@
+"""
+Importuje data z IWRS Excel reportů do MySQL (databáze studie).
+
+Pořadí spuštění:
+  1. download_subject_summary.py
+  2. download_subject_details.py
+  3. tento skript
+
+Každé spuštění vytvoří nový import_id v iwrs_import.
+Reportovací skripty pracují vždy s MAX(import_id) pro danou studii.
+"""
+
+import os
+import glob
+import datetime
+import re
+
+import numpy as np
+import pandas as pd
+import mysql.connector
+
+import db_config
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def get_conn():
+    return mysql.connector.connect(
+        host=db_config.DB_HOST,
+        port=db_config.DB_PORT,
+        user=db_config.DB_USER,
+        password=db_config.DB_PASSWORD,
+        database=db_config.DB_NAME,
+    )
+
+
+def _py(val):
+    """Převede numpy skalár na Python nativní typ."""
+    if isinstance(val, np.generic):
+        return val.item()
+    return val
+
+
+def to_date(val):
+    """Převede pandas Timestamp / string / NaT / NaN na date nebo None."""
+    val = _py(val)
+    if val is None or (isinstance(val, float) and (val != val)):  # NaN check
+        return None
+    try:
+        if pd.isna(val):
+            return None
+    except (TypeError, ValueError):
+        pass
+    if isinstance(val, pd.Timestamp):
+        return None if pd.isna(val) else val.date()
+    if isinstance(val, datetime.datetime):
+        return val.date()
+    if isinstance(val, datetime.date):
+        return val
+    s = str(val).strip()
+    if not s or s.lower() in ("nat", "nan", "none", ""):
+        return None
+    for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
+        try:
+            return datetime.datetime.strptime(s, fmt).date()
+        except ValueError:
+            pass
+    return None
+
+
+def to_int(val):
+    val = _py(val)
+    try:
+        v = float(val)
+        return None if (v != v) else int(v)  # v != v je True jen pro NaN
+    except (TypeError, ValueError):
+        return None
+
+
+def to_float(val):
+    val = _py(val)
+    try:
+        v = float(val)
+        return None if (v != v) else float(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def to_str(val):
+    val = _py(val)
+    if val is None:
+        return None
+    if isinstance(val, float) and (val != val):  # NaN
+        return None
+    s = str(val).strip()
+    return None if s.lower() in ("nan", "nat", "none", "") else s
+
+
+def find_summary_file(study):
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
+    if not os.path.basename(files[0]).startswith(today):
+        print(f"  UPOZORNĚNÍ: nejnovější Summary Report pro {study} není z dnešního dne ({os.path.basename(files[0])[:10]})")
+    return files[0]
+
+
+def read_summary_df(path):
+    """Přečte Summary xlsx, vrátí DataFrame od řádku s hlavičkou."""
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError(f"Hlavičkový řádek nenalezen v {path}")
+    return pd.read_excel(path, header=header_row)
+
+
+def find_detail_files(study):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    # Vezme soubory ze stejného dne jako nejnovější Summary Report
+    summary_path = find_summary_file(study)
+    file_date = os.path.basename(summary_path)[:10]  # "YYYY-MM-DD"
+    pattern = os.path.join(out_dir, f"{file_date} {study} * Subject Detail.xlsx")
+    files = [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")]
+    return sorted(files)
+
+
+def parse_detail_visits(path):
+    """
+    Vrátí list slovníků s daty visitů z Detail xlsx.
+    Každý řádek tabulky (od řádku s hlavičkou Visit Type) je jedna transakce.
+    """
+    df = pd.read_excel(path, sheet_name="patient_detail_report", header=None)
+
+    header_row = None
+    for i, row in df.iterrows():
+        if "Visit Type" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        return []
+
+    visits_df = df.iloc[header_row + 1:].copy()
+    visits_df.columns = range(visits_df.shape[1])
+
+    rows = []
+    for _, r in visits_df.iterrows():
+        visit_type = to_str(r.get(0))
+        if visit_type not in ("Past", "Upcoming"):
+            continue
+        rows.append({
+            "visit_type":                  visit_type,
+            "scheduled_date":              to_date(r.get(1)),
+            "window_days":                 to_str(r.get(2)),
+            "actual_date":                 to_date(r.get(3)),
+            "irt_transaction_no":          to_int(r.get(4)),
+            "irt_transaction_description": to_str(r.get(5)),
+            "medication_assignment":       to_str(r.get(6)),
+            "quantity_assigned":           to_int(r.get(7)),
+            "medication_id":               to_str(r.get(8)),
+        })
+    return rows
+
+
+# ── insert helpers ────────────────────────────────────────────────────────────
+
+def insert_import(cursor, study, source_file):
+    cursor.execute(
+        "INSERT INTO iwrs_import (study, imported_at, source_file) VALUES (%s, %s, %s)",
+        (study, datetime.datetime.now(), os.path.basename(source_file)),
+    )
+    return cursor.lastrowid
+
+
+def insert_uco3001_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_uco3001_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, informed_consent_date, adolescent_assent_date, age, weight,
+            rescreened_subject, adt_ir, three_or_more_advanced_therapies,
+            only_oral_5asa_compounds, ustekinumab, isolated_proctitis,
+            clinical_responder_status_i12_m0, irt_subject_status,
+            i0_rand_date_local, last_irt_transaction,
+            last_irt_transaction_date_local, last_irt_transaction_date_utc,
+            next_irt_transaction, next_irt_transaction_date_local,
+            most_recent_med_assignment_date, days_since_last_med_assignment,
+            patient_forecast_status, patient_forecast_status_changed_date
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    def c(name):
+        return col.index(name) if name in col else None
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_date(r["Informed Consent Date"]),
+            to_date(r["Adolescent Assent Date"]) if "Adolescent Assent Date" in col else None,
+            to_int(r["Subject's age collection"]),
+            to_float(r["Subject's weight collection"]) if "Subject's weight collection" in col else None,
+            to_str(r["Rescreened Subject"]) if "Rescreened Subject" in col else None,
+            to_str(r["ADT-IR"]) if "ADT-IR" in col else None,
+            to_str(r["3 or More Advanced Therapies"]) if "3 or More Advanced Therapies" in col else None,
+            to_str(r["Only Oral 5-ASA Compounds"]) if "Only Oral 5-ASA Compounds" in col else None,
+            to_str(r["Ustekinumab"]) if "Ustekinumab" in col else None,
+            to_str(r["Isolated Proctitis"]) if "Isolated Proctitis" in col else None,
+            to_str(r["Clinical Responder Status at I-12 / M-0"]) if "Clinical Responder Status at I-12 / M-0" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_date(r["I0_RAND_TIMESTAMP_LOCAL [Local]"]) if "I0_RAND_TIMESTAMP_LOCAL [Local]" in col else None,
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Most Recent Medication Assignment Transaction [Local]"]) if "Most Recent Medication Assignment Transaction [Local]" in col else None,
+            to_int(r["Days Since Last Medication Assignment Transaction"]) if "Days Since Last Medication Assignment Transaction" in col else None,
+            to_str(r["Patient Forecast Status"]) if "Patient Forecast Status" in col else None,
+            to_date(r["Patient Forecast Status Changed Date (UTC)"]) if "Patient Forecast Status Changed Date (UTC)" in col else None,
+        ))
+
+
+def insert_mdd3003_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_mdd3003_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, madrs_criteria_integrated, informed_consent_date, age,
+            madrs_criteria_v15, madrs_criteria_v16, madrs_criteria_v17,
+            stratification_country, age_group, stable_remitters, irt_subject_status,
+            last_irt_transaction, last_irt_transaction_date_local,
+            last_irt_transaction_date_utc, next_irt_transaction,
+            next_irt_transaction_date_local, date_screened, date_screen_failed,
+            date_randomized_part1, date_early_withdraw_randomized_part1,
+            date_open_label_induction, date_early_withdraw_open_label_induction,
+            date_randomized_part2, date_early_withdraw_randomized_part2,
+            date_completed, date_unblinded
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_str(r["MADRS response criteria integrated or manually entered"]) if "MADRS response criteria integrated or manually entered" in col else None,
+            to_date(r["Informed Consent Date"]),
+            to_int(r["Subject's age collection"]),
+            to_str(r["MADRS response criteria v1.5 from RAVE"]) if "MADRS response criteria v1.5 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.6 from RAVE"]) if "MADRS response criteria v1.6 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.7 from RAVE"]) if "MADRS response criteria v1.7 from RAVE" in col else None,
+            to_str(r["Stratification Country"]) if "Stratification Country" in col else None,
+            to_str(r["Age Group"]) if "Age Group" in col else None,
+            to_str(r["Stable Remitters vs. Non Stable Remitters"]) if "Stable Remitters vs. Non Stable Remitters" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Date Screened [Local]"]) if "Date Screened [Local]" in col else None,
+            to_date(r["Date Screen Failed [Local]"]) if "Date Screen Failed [Local]" in col else None,
+            to_date(r["Date Randomized Part 1 [Local]"]) if "Date Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 1 [Local]"]) if "Date Early Withdraw Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Open Label Induction [Local]"]) if "Date Open Label Induction [Local]" in col else None,
+            to_date(r["Date Early Withdraw Open Label Induction [Local]"]) if "Date Early Withdraw Open Label Induction [Local]" in col else None,
+            to_date(r["Date Randomized Part 2 [Local]"]) if "Date Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 2 [Local]"]) if "Date Early Withdraw Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Completed [Local]"]) if "Date Completed [Local]" in col else None,
+            to_date(r["Date Unblinded [Local]"]) if "Date Unblinded [Local]" in col else None,
+        ))
+
+
+def insert_visits(cursor, import_id, study, subject, visits):
+    if not visits:
+        return
+    sql = """
+        INSERT INTO iwrs_subject_visits (
+            import_id, study, subject, visit_type, scheduled_date, window_days,
+            actual_date, irt_transaction_no, irt_transaction_description,
+            medication_assignment, quantity_assigned, medication_id
+        ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+    """
+    for v in visits:
+        cursor.execute(sql, (
+            import_id, study, subject,
+            v["visit_type"], v["scheduled_date"], v["window_days"],
+            v["actual_date"], v["irt_transaction_no"],
+            v["irt_transaction_description"], v["medication_assignment"],
+            v["quantity_assigned"], v["medication_id"],
+        ))
+
+
+# ── notifications ─────────────────────────────────────────────────────────────
+
+def find_notification_json_files(study):
+    """Najde všechny .json soubory notifikací pro danou studii."""
+    out_dir = os.path.join(DETAILS_DIR, study)
+    return sorted(glob.glob(os.path.join(out_dir, "*.json")))
+
+
+def import_notifications(conn, study):
+    import json as json_lib
+    json_files = find_notification_json_files(study)
+    if not json_files:
+        print(f"  Žádné notifikace k importu pro {study}")
+        return 0
+
+    sql = """
+        INSERT INTO iwrs_notifications
+            (study, subject, pk, title, label, event, actual_date, text, pdf, source_file)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        ON DUPLICATE KEY UPDATE
+            label       = VALUES(label),
+            text        = VALUES(text),
+            pdf         = VALUES(pdf),
+            source_file = VALUES(source_file)
+    """
+
+    done_dir = os.path.join(os.path.join(DETAILS_DIR, study), "Zpracováno")
+    os.makedirs(done_dir, exist_ok=True)
+
+    cursor = conn.cursor()
+    count = 0
+    for json_path in json_files:
+        try:
+            with open(json_path, "r", encoding="utf-8") as f:
+                meta = json_lib.load(f)
+
+            pdf_path = json_path.replace(".json", ".pdf")
+            pdf_data = None
+            if os.path.exists(pdf_path):
+                with open(pdf_path, "rb") as f:
+                    pdf_data = f.read()
+
+            cursor.execute(sql, (
+                meta.get("study", study),
+                meta.get("subject"),
+                meta.get("pk"),
+                meta.get("title"),
+                meta.get("label"),
+                meta.get("event"),
+                to_date(meta.get("actual_date")),
+                meta.get("text"),
+                pdf_data,
+                os.path.basename(json_path),
+            ))
+            count += 1
+
+            # Přesun do Zpracováno
+            import shutil
+            shutil.move(json_path, os.path.join(done_dir, os.path.basename(json_path)))
+            if os.path.exists(pdf_path):
+                shutil.move(pdf_path, os.path.join(done_dir, os.path.basename(pdf_path)))
+
+        except Exception as e:
+            print(f"  CHYBA při importu {os.path.basename(json_path)}: {e}")
+
+    conn.commit()
+    cursor.close()
+    print(f"  Notifikací uloženo/přesunuto: {count}")
+    return count
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def import_study(conn, study):
+    summary_path = find_summary_file(study)
+    print(f"  Summary: {os.path.basename(summary_path)}")
+
+    df_summary = read_summary_df(summary_path)
+    df_summary = df_summary.dropna(how="all")
+
+    detail_files = find_detail_files(study)
+    print(f"  Detail souborů: {len(detail_files)}")
+
+    cursor = conn.cursor()
+    import_id = insert_import(cursor, study, summary_path)
+    print(f"  import_id = {import_id}")
+
+    if study == "77242113UCO3001":
+        insert_uco3001_summary(cursor, import_id, df_summary)
+    else:
+        insert_mdd3003_summary(cursor, import_id, df_summary)
+    print(f"  Summary řádků: {len(df_summary)}")
+
+    visited = 0
+    for path in detail_files:
+        fname = os.path.basename(path)
+        # název: "2026-05-04 77242113UCO3001 CZ100012001 Subject Detail.xlsx"
+        m = re.search(r"\d{4}-\d{2}-\d{2} \S+ (\S+) Subject Detail\.xlsx", fname)
+        subject = m.group(1) if m else "UNKNOWN"
+        visits = parse_detail_visits(path)
+        insert_visits(cursor, import_id, study, subject, visits)
+        visited += len(visits)
+
+    conn.commit()
+    cursor.close()
+    print(f"  Transakce uloženo: {visited}")
+    return import_id
+
+
+def main():
+    conn = get_conn()
+    print("Připojeno k MySQL.\n")
+
+    for study in STUDIES:
+        print(f"[{study}]")
+        try:
+            import_id = import_study(conn, study)
+            print(f"  OK — import_id {import_id}")
+        except Exception as e:
+            print(f"  CHYBA: {e}")
+        try:
+            import_notifications(conn, study)
+        except Exception as e:
+            print(f"  CHYBA notifikace: {e}")
+        print()
+
+    conn.close()
+    print("Hotovo.")
+
+
+main()
@@ -0,0 +1,175 @@
+"""
+Kompletní pipeline:
+  1. Stažení Subject Summary Reportů (obě studie)
+  2. Stažení Subject Detail Reportů + notifikací (obě studie)
+  3. Import do MongoDB (subject_summary + visits + notifications)
+
+Spusť tento skript místo samostatných skriptů.
+"""
+
+import os
+import sys
+import datetime
+import glob
+
+from playwright.sync_api import sync_playwright
+
+import download_subject_details as dsd
+import import_to_mongo
+import import_notifications_to_mongo
+
+# ── CONFIG ───────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def login(page, study):
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+
+# ── KROK 1: Subject Summary ───────────────────────────────────────────────────
+
+def download_summary(page, study, today):
+    print(f"  [{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"  [{study}] Summary OK -> {os.path.basename(filename)}")
+    return filename
+
+
+# ── KROK 2: Subject Details ───────────────────────────────────────────────────
+
+def get_subjects_from_summary(summary_path):
+    import pandas as pd
+    raw = pd.read_excel(summary_path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+    df = pd.read_excel(summary_path, header=header_row)
+    return df["Subject"].dropna().astype(str).str.strip().tolist()
+
+
+def download_details(page, study, summary_path, today):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    os.makedirs(out_dir, exist_ok=True)
+
+    subjects = get_subjects_from_summary(summary_path)
+    print(f"  [{study}] Subjektů k stažení: {len(subjects)}")
+
+    page.goto(f"{BASE_URL}/report/patient_detail_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    for subject in subjects:
+        filename = os.path.join(out_dir, f"{today} {study} {subject} Subject Detail.xlsx")
+        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
+        input_field.click()
+        input_field.fill(subject)
+        page.wait_for_timeout(500)
+        page.locator("mat-option").first.dispatch_event("click")
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+        with page.expect_download(timeout=120000) as dl:
+            page.get_by_role("button", name="Download XLS").click()
+        dl.value.save_as(filename)
+        print(f"  [{study}] Detail {subject} OK")
+
+        page.get_by_role("button", name="Clear").click()
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+
+# ── KROK 3: Import do MongoDB ────────────────────────────────────────────────
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+    os.makedirs(DETAILS_DIR, exist_ok=True)
+
+    summary_paths = {}
+
+    # Krok 1 + 2: stahování (Playwright, každá studie zvlášť kvůli session)
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print("\n" + "=" * 60)
+            print(f"[{study}] KROK 1: Subject Summary Report")
+            print("=" * 60)
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            try:
+                login(page, study)
+                summary_path = download_summary(page, study, today)
+                summary_paths[study] = summary_path
+
+                print(f"\n[{study}] KROK 2: Subject Detail Reports + notifikace")
+                dsd.run(page, study)
+
+            except Exception as e:
+                print(f"  [{study}] CHYBA při stahování: {e}")
+                summary_paths[study] = None
+            finally:
+                browser.close()
+
+    # Krok 3: import do MongoDB
+    print("\n" + "=" * 60)
+    print("KROK 3: Import do MongoDB")
+    print("=" * 60)
+
+    for study in STUDIES:
+        summary_path = summary_paths.get(study)
+        if not summary_path:
+            print(f"  [{study}] PŘESKOČENO — stahování selhalo")
+            continue
+
+        try:
+            import_to_mongo.run(study, summary_path, DETAILS_DIR, today)
+        except Exception as e:
+            print(f"  [{study}] CHYBA při importu summary/visits: {e}")
+
+    # Notifikace: PDF/JSON z disku rovnou do Mongo iwrs_notifications
+    print("\n  [notifikace] import PDF/JSON do Mongo...")
+    try:
+        import_notifications_to_mongo.main(STUDIES)
+    except Exception as e:
+        print(f"  CHYBA při importu notifikací: {e}")
+
+    print("\n" + "=" * 60)
+    print("Vše hotovo.")
+    print("=" * 60)
+
+
+main()
@@ -0,0 +1,172 @@
+from playwright.sync_api import sync_playwright
+import re
+import os
+import datetime
+import mysql.connector
+import db_config
+
+
+def get_existing_pks(study):
+    """Vrátí set pk notifikací které už jsou v DB pro danou studii."""
+    try:
+        conn = mysql.connector.connect(
+            host=db_config.DB_HOST, port=db_config.DB_PORT,
+            user=db_config.DB_USER, password=db_config.DB_PASSWORD,
+            database=db_config.DB_NAME,
+        )
+        cursor = conn.cursor()
+        cursor.execute("SELECT pk FROM iwrs_notifications WHERE study = %s", (study,))
+        pks = {row[0] for row in cursor.fetchall()}
+        cursor.close()
+        conn.close()
+        return pks
+    except Exception as e:
+        print(f"  UPOZORNĚNÍ: nelze načíst existující pk z DB ({e}), stahuji vše")
+        return set()
+
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDY   = "77242113UCO3001"
+SUBJECT = "CZ100222003"
+
+BASE_DIR    = os.path.dirname(os.path.abspath(__file__))
+DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+
+def strip_html(html):
+    text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
+    text = re.sub(r"<[^>]+>", "", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def main():
+    existing_pks = get_existing_pks(STUDY)
+    print(f"V DB již existuje {len(existing_pks)} notifikací pro {STUDY}")
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False, args=["--start-maximized"])
+        context = browser.new_context(no_viewport=True)
+        page = context.new_page()
+
+        print("Přihlašuji se...")
+        page.goto(BASE_URL)
+        page.wait_for_load_state("networkidle")
+        page.get_by_label("Email *").fill(EMAIL)
+        page.get_by_label("Password *").fill(PASSWORD)
+        page.locator("#login__submit").click()
+        page.wait_for_load_state("networkidle")
+
+        page.get_by_label("Study *").click()
+        page.get_by_role("option", name=STUDY).click()
+        page.get_by_role("button", name="SELECT").click()
+        page.wait_for_load_state("networkidle")
+
+        page.goto(f"{BASE_URL}/report/patient_detail_report")
+        page.wait_for_load_state("networkidle", timeout=60000)
+
+        # JWT + api_base
+        jwt = page.evaluate("localStorage.getItem('JWT.access')")
+        print(f"JWT: {jwt[:30]}...")
+        instances = page.evaluate("""async (jwt) => {
+            const res = await fetch('/_/api/dispatch/app_instances/', {
+                headers: { 'Authorization': `Bearer ${jwt}` }
+            });
+            return res.json();
+        }""", jwt)
+        instance = next((i for i in instances if STUDY in i.get("label", "")), None)
+        if not instance:
+            raise ValueError(f"Instance pro {STUDY} nenalezena")
+        api_base = instance["api_base_url"]
+        print(f"API base: {api_base}")
+
+        # Vyber subjekt a zachyť table_1 response přímo
+        print(f"Vybírám subjekt {SUBJECT}...")
+        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
+        input_field.click()
+        input_field.fill(SUBJECT)
+        page.wait_for_timeout(1000)
+
+        captured = {}
+        with page.expect_response(
+            lambda r: "report_data" in r.url and "table_1" in r.url,
+            timeout=60000
+        ) as resp_info:
+            page.locator("mat-option").first.dispatch_event("click")
+
+        response = resp_info.value
+        data = response.json()
+
+        out_dir = os.path.join(DETAILS_DIR, STUDY)
+        os.makedirs(out_dir, exist_ok=True)
+        today = datetime.date.today().strftime("%Y-%m-%d")
+
+        print(f"\n{'='*60}")
+        print(f"Subjekt: {SUBJECT}  |  Studie: {STUDY}")
+        print(f"{'='*60}")
+
+        count = 0
+        for row in data.get("data", []):
+            for notif in (row.get("notification") or []):
+                item  = notif.get("item", {})
+                pk    = item.get("pk")
+                title = item.get("et_title")
+                label = (notif.get("label") or title or "").strip()
+                # Celý label, mezery → podtržítka, nepovolené znaky pryč
+                safe_label = re.sub(r'[\\/*?:"<>|]', "", label).replace(" ", "_")
+                body = item.get("body", "")
+                text = strip_html(body)
+                count += 1
+                print(f"\n--- Notifikace #{count}: {safe_label} (pk={pk}) | event: {row.get('event_event_id')} ---")
+                print(text)
+
+                if pk in existing_pks:
+                    print(f"  → pk={pk} již v DB, přeskakuji")
+                    continue
+
+                actual_date = row.get("actual_date_raw", "0000-00-00")
+                pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}.pdf")
+                if os.path.exists(pdf_filename):
+                    pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}_pk{pk}.pdf")
+
+                pdf_url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification?pk={pk}&title={title}&html=true"
+                pdf_resp = page.request.get(pdf_url, headers={
+                    "Authorization": f"Bearer {jwt}",
+                    "lang": "en",
+                    "prancer_study": STUDY,
+                    "Accept": "application/json, text/plain, */*",
+                })
+                if pdf_resp.ok:
+                    with open(pdf_filename, "wb") as f:
+                        f.write(pdf_resp.body())
+                    print(f"  → PDF uloženo: {os.path.basename(pdf_filename)}")
+                    json_filename = pdf_filename.replace(".pdf", ".json")
+                    import json
+                    with open(json_filename, "w", encoding="utf-8") as f:
+                        json.dump({
+                            "pk": pk,
+                            "title": title,
+                            "label": label,
+                            "event": row.get("event_event_id"),
+                            "actual_date": actual_date,
+                            "subject": SUBJECT,
+                            "study": STUDY,
+                            "text": text,
+                        }, f, ensure_ascii=False, indent=2)
+                    print(f"  → JSON uloženo: {os.path.basename(json_filename)}")
+                else:
+                    print(f"  → PDF chyba: {pdf_resp.status}")
+                page.wait_for_timeout(300)
+
+        if count == 0:
+            print("Žádné notifikace nalezeny.")
+        else:
+            print(f"\n{'='*60}")
+            print(f"Celkem notifikací: {count}")
+
+        browser.close()
+
+
+main()
@@ -0,0 +1,90 @@
+"""
+Stažení reportů z IWRS portálu — vše do jednoho adresáře `Incoming/`.
+
+  1. Subject Summary Report (per studie)
+  2. Subject Detail Reports + notifikace (per subjekt)
+
+Import se spouští samostatně skriptem `import_all.py`.
+"""
+
+import os
+import datetime
+
+from playwright.sync_api import sync_playwright
+
+import download_subject_details as dsd
+
+# ── CONFIG ───────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
+
+
+def unique_path(directory, stem, ext=".xlsx"):
+    path = os.path.join(directory, f"{stem}{ext}")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}{ext}")
+
+
+def login(page, study):
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+
+def download_summary(page, study, today):
+    print(f"  [{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"  [{study}] Summary OK -> {os.path.basename(filename)}")
+    return filename
+
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print("\n" + "=" * 60)
+            print(f"[{study}] Stažení reportů")
+            print("=" * 60)
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+            try:
+                login(page, study)
+                download_summary(page, study, today)
+                # detail XLSX + notifikace přímo do Incoming/
+                dsd.run(page, study, out_dir=INCOMING_DIR, subjects_source_dir=INCOMING_DIR)
+            except Exception as e:
+                print(f"  [{study}] CHYBA: {e}")
+            finally:
+                browser.close()
+
+    print("\n" + "=" * 60)
+    print(f"Stahování hotovo. Soubory v: {INCOMING_DIR}")
+    print("Pro import spusť: python import_all.py")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,107 @@
+"""
+Import všech čekajících reportů z `Incoming/` do MongoDB.
+
+Pořadí zpracování per typ + studie: nejstarší soubor podle mtime první
+(důležité pro chronologickou správnost snapshotů).
+
+Po úspěšném importu se soubor přesune do `Incoming/Zpracováno/`.
+Při chybě zůstane soubor v `Incoming/`.
+"""
+
+import os
+import sys
+import glob
+import shutil
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.mongo_writer import ensure_indexes
+
+import import_to_mongo
+import import_notifications_to_mongo
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
+DONE_DIR     = os.path.join(INCOMING_DIR, "Zpracováno")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+
+def _move_done(path):
+    os.makedirs(DONE_DIR, exist_ok=True)
+    dst = os.path.join(DONE_DIR, os.path.basename(path))
+    # kolize → přepiš (Mongo už má aktuální data, soubor je jen archiv)
+    if os.path.exists(dst):
+        os.remove(dst)
+    shutil.move(path, dst)
+
+
+def _sorted_by_mtime(paths):
+    """Nejstarší první."""
+    return sorted(
+        (p for p in paths if not os.path.basename(p).startswith("~$")),
+        key=os.path.getmtime,
+    )
+
+
+def import_summaries(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report*.xlsx")
+    files = _sorted_by_mtime(glob.glob(pattern))
+    if not files:
+        print(f"  [{study}] summary: nic ke zpracování")
+        return
+    print(f"  [{study}] summary: {len(files)} soubor(ů) (oldest first)")
+    for path in files:
+        try:
+            import_to_mongo.import_subject_summary(study, path)
+            _move_done(path)
+        except Exception as e:
+            print(f"  [{study}] CHYBA summary {os.path.basename(path)}: {e}")
+
+
+def import_details(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} * Subject Detail.xlsx")
+    files = _sorted_by_mtime(glob.glob(pattern))
+    if not files:
+        print(f"  [{study}] detail: nic ke zpracování")
+        return
+    print(f"  [{study}] detail: {len(files)} soubor(ů) (oldest first)")
+    for path in files:
+        parsed = import_to_mongo.parse_detail_filename(path)
+        if not parsed:
+            print(f"  [{study}] PŘESKAKUJI (nelze parsovat název): {os.path.basename(path)}")
+            continue
+        _, parsed_study, subject = parsed
+        if parsed_study != study:
+            continue  # patří jiné studii
+        try:
+            import_to_mongo.import_visits_single_file(study, subject, path)
+            _move_done(path)
+        except Exception as e:
+            print(f"  [{study}] CHYBA detail {os.path.basename(path)}: {e}")
+
+
+def main():
+    if not os.path.isdir(INCOMING_DIR):
+        print(f"Adresář neexistuje: {INCOMING_DIR}")
+        return
+    ensure_indexes()
+
+    print("=" * 60)
+    print("Import Subject Summary + Visits")
+    print("=" * 60)
+    for study in STUDIES:
+        import_summaries(study)
+        import_details(study)
+
+    print("\n" + "=" * 60)
+    print("Import notifikací")
+    print("=" * 60)
+    import_notifications_to_mongo.import_from_dir(INCOMING_DIR, DONE_DIR, STUDIES)
+
+    print("\n" + "=" * 60)
+    print(f"Hotovo. Zpracované soubory: {DONE_DIR}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,23 @@
+# JustOpenOutlook_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+
+## Cíl
+Jen otevře Outlook OWA v Playwrightu pomocí už uloženého persistent profilu —
+žádný login, žádné ukládání.
+
+## Co dělá
+1. Načte profil `outlook_profile/` (vytvořený `outlook_login_v1.0.py`).
+2. Otevře `https://outlook.cloud.microsoft/mail/`.
+3. Čeká na Enter v konzoli.
+4. Zavře prohlížeč.
+
+## Spuštění
+```
+python JustOpenOutlook_v1.0.py
+```
+
+## Předpoklad
+Existuje `outlook_profile/` ve stejném adresáři.
+Pokud ne — nejprve spustit `outlook_login_v1.0.py`.
@@ -0,0 +1,50 @@
+"""
+=======================================================================
+ Název:   JustOpenOutlook_v1.0.py
+ Verze:   1.0
+ Datum:   2026-06-03
+ Popis:   Otevře Outlook OWA v persistent Chromium profilu vytvořeném
+          skriptem outlook_login_v1.0.py. Žádný login — pouze otevře
+          okno, počká, až uživatel stiskne Enter, a zavře.
+=======================================================================
+"""
+
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+
+BASE_DIR = Path(__file__).resolve().parent
+PROFILE_DIR = BASE_DIR / "outlook_profile"
+START_URL = "https://outlook.cloud.microsoft/mail/"
+
+
+def main() -> None:
+    if not PROFILE_DIR.exists():
+        print(f" Profil nenalezen: {PROFILE_DIR}")
+        print(" Nejprve spusť outlook_login_v1.0.py a přihlas se.")
+        return
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=str(PROFILE_DIR),
+            headless=False,
+            no_viewport=True,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--start-maximized",
+            ],
+        )
+
+        page = context.pages[0] if context.pages else context.new_page()
+        page.goto(START_URL)
+
+        print()
+        print("=" * 70)
+        print(" Outlook otevřen. Stiskni Enter pro zavření.")
+        print("=" * 70)
+        input()
+
+        context.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,54 @@
+# download_all_inbox_eml_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+
+## Cíl
+Stáhnout zprávy z Outlook Inboxu jako `.eml` soubory.
+
+## Klíčový princip — virtualizovaný seznam
+OWA drží v DOM jen ~16 viditelných řádků. `nth(19)` proto nefunguje.
+Řešení: **navigace klávesnicí** — vybrat první zprávu a opakovaně mačkat
+`ArrowDown`. Outlook sám scrolluje a dorenderovává. Aktuálně vybraná zpráva
+je vždy `[role="option"][aria-selected="true"]`.
+
+### Oddělovače sekcí (Today / Yesterday / This week)
+Jsou to `role="button"` `aria-expanded` prvky, ne zprávy. Když na nich kurzor
+po `ArrowDown` zastaví, **žádná** zpráva nemá `aria-selected`
+(`selected.count() == 0`). Takový krok se musí jen přeskočit (`ArrowDown` dál),
+NEpočítat jako zprávu a NEukončovat smyčku. Konec seznamu se pozná až podle
+toho, že se `aria-label` vybrané zprávy přestane měnit (`no_progress`).
+
+Alternativa: v OWA přepnout řazení na "Show as Messages" (bez seskupení podle
+data) — pak seznam žádné oddělovače nemá.
+
+## Postup
+1. Otevře OWA z persistent profilu (`outlook_profile/`).
+2. Přejde do Inboxu.
+3. Vybere první zprávu.
+4. Smyčka: stáhne vybranou (pravý klik → Download → Download as EML) →
+   `ArrowDown` → opakuje, dokud se výběr přestane hýbat (= konec seznamu).
+
+## Nastavení (v hlavičce skriptu)
+- `LIMIT` — max počet **uložených zpráv** (`None` = celý Inbox). Aktuálně `30`.
+- `SKIP_EXISTING` — `True` = soubor stejného jména v `downloads/` znovu neuloží;
+  `False` (aktuální) = existující soubor **smaže a uloží nový** (přepis).
+
+## Výstup
+`downloads/<název_z_OWA>.eml`. Při kolizi jmen:
+- `SKIP_EXISTING=False` → starý soubor se smaže a přepíše novým,
+- `SKIP_EXISTING=True` → soubor se ponechá, nový se neuloží.
+
+## Spuštění
+```
+python download_all_inbox_eml_v1.0.py
+```
+
+## Poznámky / omezení
+- Celý Inbox (tisíce zpráv) přes UI je pomalý a křehký — pro velký objem
+  nejdřív zúžit hledáním/filtrem v OWA. `LIMIT=30` je rozumný test.
+- `SKIP_EXISTING` nešetří čas: identitu zprávy známe až z názvu **po** stažení,
+  takže pravý klik + download proběhne pro každou zprávu; jen se nepřepíše soubor.
+- Konec seznamu se pozná tak, že se `aria-label` vybrané zprávy přestane měnit
+  (počítadlo `no_progress`, práh `NO_PROGRESS_MAX = 4`).
+- Okno se po doběhnutí nezavře, čeká na Enter.
@@ -0,0 +1,216 @@
+"""
+=======================================================================
+ Název:   download_all_inbox_eml_v1.0.py
+ Verze:   1.0
+ Datum:   2026-06-03
+ Popis:   Stáhne zprávy z Outlook Inboxu jako .eml. Virtualizovaný seznam
+          řeší navigací klávesnicí (ArrowDown) — Outlook sám scrolluje
+          a dorenderovává. Postup:
+            1. vybrat první zprávu
+            2. stáhnout vybranou (pravý klik → Download → Download as EML)
+            3. ArrowDown na další
+            4. opakovat, dokud se výběr (aria-selected) přestane hýbat
+
+          Používá persistent profil z outlook_login_v1.0.py.
+
+ Nastavení:
+   LIMIT          – max počet zpráv (None = celý Inbox)
+   SKIP_EXISTING  – přeskočit zprávy, jejichž EML už v downloads/ existuje
+=======================================================================
+"""
+
+import re
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+
+BASE_DIR = Path(__file__).resolve().parent
+PROFILE_DIR = BASE_DIR / "outlook_profile"
+OUT_DIR = BASE_DIR / "downloads"
+START_URL = "https://outlook.cloud.microsoft/mail/"
+
+LIMIT = 30            # max počet zpráv; None = celý Inbox
+SKIP_EXISTING = False  # False = existující stejný soubor přepsat (smazat + uložit nový)
+
+
+def safe_name(name: str) -> str:
+    """Očistí název pro filesystem (Windows)."""
+    name = re.sub(r'[<>:"/\\|?*\r\n\t]', "_", name).strip().strip(".")
+    return name[:150] or "message"
+
+
+def download_selected(page, out_dir: Path) -> Path | None:
+    """Pravý klik na vybranou zprávu → Download as EML. Vrátí cestu nebo None."""
+    selected = page.locator('[role="option"][aria-selected="true"]').first
+    if selected.count() == 0:
+        return None
+
+    selected.click(button="right")
+    page.wait_for_timeout(600)
+
+    # Download (rodič submenu)
+    download_parent = None
+    for name in ("Download", "Stáhnout"):
+        loc = page.get_by_role("menuitem", name=name).first
+        if loc.count() and loc.is_visible():
+            download_parent = loc
+            break
+    if download_parent is None:
+        page.keyboard.press("Escape")
+        return None
+
+    download_parent.hover()
+    page.wait_for_timeout(500)
+
+    # Download as EML (submenu); fallback = klik přímo na Download
+    eml_item = None
+    for name in ("Download as EML", "Stáhnout jako EML", "Stáhnout jako .eml"):
+        loc = page.get_by_role("menuitem", name=name).first
+        if loc.count() and loc.is_visible():
+            eml_item = loc
+            break
+
+    try:
+        if eml_item is not None:
+            with page.expect_download(timeout=15_000) as dl:
+                eml_item.click()
+        else:
+            with page.expect_download(timeout=15_000) as dl:
+                download_parent.click()
+        download = dl.value
+    except Exception:
+        page.keyboard.press("Escape")
+        return None
+
+    fname = safe_name(download.suggested_filename or "message.eml")
+    if not fname.lower().endswith(".eml"):
+        fname += ".eml"
+    target = out_dir / fname
+
+    if target.exists():
+        if SKIP_EXISTING:
+            return target  # už máme — neukládat znovu
+        target.unlink()    # přepsat: smazat starou verzi a uložit novou
+
+    download.save_as(str(target))
+    return target
+
+
+def main() -> None:
+    if not PROFILE_DIR.exists():
+        print(f" Profil nenalezen: {PROFILE_DIR}")
+        print(" Nejprve spusť outlook_login_v1.0.py.")
+        return
+    OUT_DIR.mkdir(exist_ok=True)
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=str(PROFILE_DIR),
+            headless=False,
+            no_viewport=True,
+            accept_downloads=True,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--start-maximized",
+            ],
+        )
+        page = context.pages[0] if context.pages else context.new_page()
+
+        # 1) Otevřít Outlook
+        print(" 1/4  Otevírám Outlook...")
+        page.goto(START_URL)
+        page.wait_for_load_state("domcontentloaded")
+        search_selector = (
+            '[placeholder*="Search"], [aria-label*="Search"], '
+            '[placeholder*="Hledat"], [aria-label*="Hledat"]'
+        )
+        page.wait_for_selector(search_selector, timeout=30_000)
+
+        # 2) Inbox / Doručená pošta
+        print(" 2/4  Otevírám Inbox...")
+        inbox_candidates = [
+            'div[role="treeitem"]:has-text("Inbox")',
+            'div[role="treeitem"]:has-text("Doručená pošta")',
+            'text=Inbox',
+            'text=Doručená pošta',
+        ]
+        for sel in inbox_candidates:
+            loc = page.locator(sel).first
+            if loc.count() and loc.is_visible():
+                loc.click()
+                break
+        page.wait_for_selector('div[role="option"]', timeout=15_000)
+        page.wait_for_timeout(1000)
+
+        # 3) Vybrat první zprávu
+        print(" 3/4  Vybírám první zprávu...")
+        page.locator('div[role="option"]').first.click()
+        page.wait_for_timeout(800)
+
+        # 4) Smyčka: stáhni vybranou → ArrowDown → dokud se výběr hýbe
+        # Pozn.: oddělovače sekcí (Today/Yesterday/...) jsou role="button"
+        # aria-expanded — kurzor na nich ZASTAVÍ a žádná zpráva nemá
+        # aria-selected (selected.count()==0). Takový krok jen přeskočíme
+        # (ArrowDown dál), NEpočítáme ho a NEukončujeme smyčku.
+        print(" 4/4  Stahuji zprávy...\n")
+        saved = 0
+        dividers = 0
+        failed = 0
+        prev_label = None
+        no_progress = 0          # kolikrát po sobě se výběr neposunul
+        NO_PROGRESS_MAX = 4      # tolik = konec seznamu / zaseknutí
+
+        while LIMIT is None or saved < LIMIT:
+            selected = page.locator('[role="option"][aria-selected="true"]').first
+
+            # (a) stojíme na oddělovači sekce → krok přes něj
+            if selected.count() == 0:
+                dividers += 1
+                no_progress += 1
+                if no_progress >= NO_PROGRESS_MAX:
+                    print(" Konec seznamu / zaseknutí — končím.")
+                    break
+                page.keyboard.press("ArrowDown")
+                page.wait_for_timeout(250)
+                continue
+
+            label = selected.get_attribute("aria-label") or ""
+
+            # (b) výběr se neposunul (konec seznamu)
+            if label == prev_label:
+                no_progress += 1
+                if no_progress >= NO_PROGRESS_MAX:
+                    print(" Konec seznamu (výběr se nehýbe).")
+                    break
+                page.keyboard.press("ArrowDown")
+                page.wait_for_timeout(250)
+                continue
+
+            # (c) nová zpráva → stáhni
+            no_progress = 0
+            prev_label = label
+            target = download_selected(page, OUT_DIR)
+
+            if target is None:
+                failed += 1
+                print(f"   [!]   selhalo: {label[:70]}")
+            else:
+                saved += 1
+                print(f"   [{saved:>4}] {target.name}")
+
+            # refokus seznamu (klik na zprávu, ne na oddělovač) + posun dál
+            try:
+                selected.click()
+            except Exception:
+                pass
+            page.wait_for_timeout(200)
+            page.keyboard.press("ArrowDown")
+            page.wait_for_timeout(300)
+
+        print(f"\n Hotovo. Uloženo {saved}, oddělovačů přeskočeno {dividers}, "
+              f"selhalo {failed} → {OUT_DIR}")
+        input(" Stiskni Enter pro zavření okna... ")
+        context.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,30 @@
+# download_first_inbox_eml_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+
+## Cíl
+Otevřít Outlook OWA, vybrat první zprávu v Inboxu a stáhnout ji jako `.eml`.
+
+## Kroky
+1. Otevře OWA z persistent profilu (`outlook_profile/`).
+2. Přejde do Inboxu / Doručené pošty.
+3. Klikne na první zprávu v seznamu.
+4. **Pravý klik** na řádek zprávy → kontextové menu (patří celé zprávě, ne příloze)
+   → hover na **Download** → klik **Download as EML**, soubor uloží do `downloads/`.
+
+## Výstup
+`downloads/<původní_název_z_OWA>.eml`
+
+## Spuštění
+```
+python download_first_inbox_eml_v1.0.py
+```
+
+## Poznámky
+- **Pravý klik na řádek zprávy** je spolehlivější než "..." v toolbaru — kontextové
+  menu je vždy svázané s celou zprávou, takže odpadá riziko trefení "..." přílohy.
+- Na **Download** se najíždí `hover()` (otevře submenu), ne klikem.
+- Selektory mají EN i CZ varianty.
+- `accept_downloads=True` + `page.expect_download()` — bez toho Playwright stažení nezachytí.
+- Okno se po stažení nezavře, čeká na Enter.
@@ -0,0 +1,142 @@
+"""
+=======================================================================
+ Název:   download_first_inbox_eml_v1.0.py
+ Verze:   1.0
+ Datum:   2026-06-03
+ Popis:   Pokusný skript: otevře Outlook OWA, přejde do Inboxu, klikne
+          na první zprávu a stáhne ji jako .eml přes menu
+          "More email actions" → Download → Download as EML.
+
+          Používá persistent profil z outlook_login_v1.0.py.
+=======================================================================
+"""
+
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+
+BASE_DIR = Path(__file__).resolve().parent
+PROFILE_DIR = BASE_DIR / "outlook_profile"
+OUT_DIR = BASE_DIR / "downloads"
+START_URL = "https://outlook.cloud.microsoft/mail/"
+
+
+def main() -> None:
+    if not PROFILE_DIR.exists():
+        print(f" Profil nenalezen: {PROFILE_DIR}")
+        print(" Nejprve spusť outlook_login_v1.0.py.")
+        return
+    OUT_DIR.mkdir(exist_ok=True)
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=str(PROFILE_DIR),
+            headless=False,
+            no_viewport=True,
+            accept_downloads=True,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--start-maximized",
+            ],
+        )
+        page = context.pages[0] if context.pages else context.new_page()
+
+        # 1) Otevřít Outlook
+        print(" 1/6  Otevírám Outlook...")
+        page.goto(START_URL)
+        page.wait_for_load_state("domcontentloaded")
+        search_selector = (
+            '[placeholder*="Search"], [aria-label*="Search"], '
+            '[placeholder*="Hledat"], [aria-label*="Hledat"]'
+        )
+        page.wait_for_selector(search_selector, timeout=30_000)
+
+        # 2) Inbox / Doručená pošta
+        print(" 2/6  Otevírám Inbox...")
+        inbox_candidates = [
+            'div[role="treeitem"]:has-text("Inbox")',
+            'div[role="treeitem"]:has-text("Doručená pošta")',
+            'text=Inbox',
+            'text=Doručená pošta',
+        ]
+        for sel in inbox_candidates:
+            loc = page.locator(sel).first
+            if loc.count() and loc.is_visible():
+                loc.click()
+                break
+        page.wait_for_selector('div[role="option"]', timeout=15_000)
+        page.wait_for_timeout(1000)
+
+        # 3) První zpráva v inboxu
+        print(" 3/4  Vybírám první zprávu...")
+        first_msg = page.locator('div[role="option"]').first
+        first_msg.click()
+        page.wait_for_timeout(1000)
+
+        # 4) PRAVÝ KLIK na řádek zprávy → kontextové menu patří CELÉ zprávě
+        #    (ne příloze). Na "Download" najet hoverem (otevře submenu), pak
+        #    kliknout na "Download as EML".
+        print(" 4/4  Pravý klik → Download → Download as EML...")
+        first_msg.click(button="right")
+        page.wait_for_timeout(700)
+
+        download_parent = None
+        for name in ("Download", "Stáhnout"):
+            loc = page.get_by_role("menuitem", name=name).first
+            if loc.count() and loc.is_visible():
+                download_parent = loc
+                break
+        if download_parent is None:
+            items = page.get_by_role("menuitem").all()
+            print(" ! Download položka v menu nenalezena. Obsah menu:")
+            for it in items:
+                try:
+                    txt = it.inner_text(timeout=500).strip().replace("\n", " | ")
+                    print(f"      - {txt[:100]}")
+                except Exception:
+                    pass
+            page.screenshot(path=str(OUT_DIR / "debug_menu.png"))
+            print(f"   screenshot: {OUT_DIR / 'debug_menu.png'}")
+            input(" Enter pro zavření... ")
+            context.close()
+            return
+
+        download_parent.hover()
+        page.wait_for_timeout(600)
+
+        eml_item = None
+        for name in ("Download as EML", "Stáhnout jako EML", "Stáhnout jako .eml"):
+            loc = page.get_by_role("menuitem", name=name).first
+            if loc.count() and loc.is_visible():
+                eml_item = loc
+                break
+
+        try:
+            if eml_item is not None:
+                with page.expect_download(timeout=15_000) as download_info:
+                    eml_item.click()
+            else:
+                # některé buildy OWA stahují EML přímo bez submenu
+                with page.expect_download(timeout=15_000) as download_info:
+                    download_parent.click()
+            download = download_info.value
+        except Exception as e:
+            page.screenshot(path=str(OUT_DIR / "debug_menu.png"))
+            print(f" ! Stažení selhalo: {e}")
+            print(f"   screenshot: {OUT_DIR / 'debug_menu.png'}")
+            input(" Enter pro zavření... ")
+            context.close()
+            return
+
+        target = OUT_DIR / (download.suggested_filename or "first_inbox.eml")
+        download.save_as(str(target))
+        print(f" Hotovo → {target}")
+
+        if not target.name.lower().endswith(".eml"):
+            print(f" ! POZOR: {target.name} nevypadá jako EML — možná stažena příloha!")
+
+        input(" Stiskni Enter pro zavření okna... ")
+        context.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,30 @@
+# forward_last_to_klucho_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+
+## Cíl
+Pokusný skript: přepošle poslední odeslaný e-mail na `klucho@gastroenterolog.com`
+na adresu `vladimir.buzalka@buzalka.cz` s předmětem `Ahoj` a slovem `Ahoj`
+na prvním řádku těla.
+
+## Kroky
+1. Otevře OWA (persistent profil z `outlook_login_v1.0.py`).
+2. Přejde do Odeslané pošty.
+3. Vyhledá `to:klucho@gastroenterolog.com`.
+4. Otevře nejnovější výsledek.
+5. Klikne Forward / Přeposlat.
+6. Vyplní příjemce.
+7. Změní předmět na `Ahoj`.
+8. Vloží `Ahoj` na první řádek těla.
+9. Odešle (a počká na potvrzení Enterem před zavřením okna).
+
+## Poznámky
+- Selektory mají EN i CZ varianty (`Forward` / `Přeposlat`, `To` / `Komu`, …).
+- `headless=False` — schválně viditelné, aby šlo sledovat průběh.
+- POZOR: krok 9 reálně odešle e-mail. Pro suchý běh zakomentuj `send_btn.click()`.
+
+## Spuštění
+```
+python forward_last_to_klucho_v1.0.py
+```
@@ -0,0 +1,155 @@
+"""
+=======================================================================
+ Název:   forward_last_to_klucho_v1.0.py
+ Verze:   1.0
+ Datum:   2026-06-03
+ Popis:   Pokusný skript: v Outlook OWA najde poslední odeslaný e-mail
+          na adresu klucho@gastroenterolog.com, otevře Forward, vyplní
+          příjemce vladimir.buzalka@buzalka.cz, předmět "Ahoj", na
+          první řádek těla "Ahoj" a odešle.
+
+          Používá persistent profil z outlook_login_v1.0.py.
+          headless=False kvůli sledování průběhu.
+=======================================================================
+"""
+
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+
+BASE_DIR = Path(__file__).resolve().parent
+PROFILE_DIR = BASE_DIR / "outlook_profile"
+START_URL = "https://outlook.cloud.microsoft/mail/"
+
+TARGET_RECIPIENT = "klucho@gastroenterolog.com"
+FORWARD_TO = "vladimir.buzalka@buzalka.cz"
+GREETING = "Ahoj"
+
+
+def main() -> None:
+    if not PROFILE_DIR.exists():
+        print(f" Profil nenalezen: {PROFILE_DIR}")
+        print(" Nejprve spusť outlook_login_v1.0.py.")
+        return
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=str(PROFILE_DIR),
+            headless=False,
+            no_viewport=True,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--start-maximized",
+            ],
+        )
+        page = context.pages[0] if context.pages else context.new_page()
+
+        # 1) Otevřít Outlook
+        print(" 1/9  Otevírám Outlook...")
+        page.goto(START_URL)
+        page.wait_for_load_state("domcontentloaded")
+        # Search box má proměnlivý placeholder; zkusíme víc variant
+        search_selector = (
+            '[placeholder*="Search"], [aria-label*="Search"], '
+            '[placeholder*="Hledat"], [aria-label*="Hledat"]'
+        )
+        page.wait_for_selector(search_selector, timeout=30_000)
+
+        # 2) Přejít do Sent Items / Odeslaná pošta
+        print(" 2/9  Otevírám Odeslanou poštu...")
+        sent_candidates = [
+            'div[role="treeitem"]:has-text("Sent Items")',
+            'div[role="treeitem"]:has-text("Odeslaná pošta")',
+            'text=Sent Items',
+            'text=Odeslaná pošta',
+        ]
+        for sel in sent_candidates:
+            loc = page.locator(sel).first
+            if loc.count() and loc.is_visible():
+                loc.click()
+                break
+        page.wait_for_timeout(1500)
+
+        # 3) Vyhledat e-maily na příjemce
+        print(f" 3/9  Hledám e-maily na {TARGET_RECIPIENT}...")
+        search = page.locator(search_selector).first
+        search.click()
+        search.fill(f"to:{TARGET_RECIPIENT}")
+        search.press("Enter")
+        page.wait_for_timeout(2500)
+
+        # 4) Kliknout na první (nejnovější) výsledek
+        print(" 4/9  Otevírám nejnovější výsledek...")
+        first_msg = page.locator('div[role="option"]').first
+        first_msg.wait_for(state="visible", timeout=15_000)
+        first_msg.click()
+        page.wait_for_timeout(2000)
+
+        # 5) Forward
+        print(" 5/9  Klikám Forward...")
+        forward_candidates = [
+            'button[aria-label="Forward"]',
+            'button[aria-label="Přeposlat"]',
+            'button:has-text("Forward")',
+            'button:has-text("Přeposlat")',
+        ]
+        clicked = False
+        for sel in forward_candidates:
+            btn = page.locator(sel).first
+            if btn.count() and btn.is_visible():
+                btn.click()
+                clicked = True
+                break
+        if not clicked:
+            print(" ! Tlačítko Forward nenalezeno — končím.")
+            input(" Stiskni Enter pro zavření... ")
+            context.close()
+            return
+
+        # 6) Příjemce
+        print(f" 6/9  Vyplňuji příjemce {FORWARD_TO}...")
+        to_field = page.locator(
+            '[aria-label="To"], [aria-label="Komu"], '
+            '[placeholder*="To"], [placeholder*="Komu"]'
+        ).first
+        to_field.wait_for(state="visible", timeout=10_000)
+        to_field.click()
+        to_field.fill(FORWARD_TO)
+        page.keyboard.press("Tab")
+        page.wait_for_timeout(500)
+
+        # 7) Předmět
+        print(f" 7/9  Měním předmět na '{GREETING}'...")
+        subject = page.locator(
+            '[aria-label="Subject"], [aria-label="Předmět"]'
+        ).first
+        subject.click()
+        # vybrat vše a přepsat
+        page.keyboard.press("Control+A")
+        page.keyboard.type(GREETING)
+
+        # 8) Tělo — "Ahoj" na první řádek
+        print(f" 8/9  Vkládám '{GREETING}' na první řádek těla...")
+        body = page.locator(
+            '[aria-label="Message body"], [aria-label="Tělo zprávy"], '
+            'div[role="textbox"][contenteditable="true"]'
+        ).first
+        body.click()
+        page.keyboard.press("Control+Home")
+        page.keyboard.type(GREETING)
+        page.keyboard.press("Enter")
+
+        # 9) Send — POZOR: skutečně odešle e-mail
+        print(" 9/9  Odesílám...")
+        send_btn = page.locator(
+            'button[aria-label="Send"], button[aria-label="Odeslat"]'
+        ).first
+        send_btn.click()
+        page.wait_for_timeout(3000)
+
+        print(" Hotovo — e-mail odeslán.")
+        input(" Stiskni Enter pro zavření okna... ")
+        context.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,44 @@
+# outlook_login_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+
+## Cíl
+Jednorázové ruční přihlášení do Outlook OWA (`https://outlook.cloud.microsoft/mail/`)
+a uložení session pro pozdější neinteraktivní skripty.
+
+## Co dělá
+1. Spustí Chromium v **persistent contextu** (adresář `outlook_profile/` vedle skriptu).
+2. Otevře OWA.
+3. Čeká, až se uživatel ručně přihlásí (účet, heslo, MFA, "Stay signed in").
+4. V konzoli se zeptá `Hotovo? Napiš 'OK' pro uložení session:`.
+5. Po zadání `OK` uloží:
+   - `outlook_profile/` — persistent profil (cookies, IndexedDB, service workers)
+   - `outlook_auth.json` — `storage_state` (cookies + localStorage)
+6. Zavře prohlížeč.
+
+## Spuštění
+```
+python outlook_login_v1.0.py
+```
+
+## Závislosti
+- `playwright` (`pip install playwright && playwright install chromium`)
+
+## Použití session v dalším skriptu
+Persistent profil (doporučeno pro OWA):
+```python
+context = p.chromium.launch_persistent_context(
+    user_data_dir="./outlook_profile",
+    headless=False,
+)
+```
+
+Nebo `storage_state` (pokud chceš jen cookies):
+```python
+context = browser.new_context(storage_state="outlook_auth.json")
+```
+
+## Poznámky
+- Při prvním přihlášení zaškrtnout **"Zůstat přihlášen"** — MFA cookie u J&J typicky vydrží ~30 dní.
+- Pokud session vyprší, stačí znovu spustit tento skript.
@@ -0,0 +1,62 @@
+"""
+=======================================================================
+ Název:   outlook_login_v1.0.py
+ Verze:   1.0
+ Datum:   2026-06-03
+ Popis:   Otevře Outlook OWA (https://outlook.cloud.microsoft/mail/)
+          v persistent Chromium profilu, počká na ruční přihlášení
+          uživatele (včetně MFA), po potvrzení v konzoli uloží
+          session (profile + storage_state) a zavře prohlížeč.
+
+          Další skripty mohou stejný profil znovu otevřít bez loginu.
+=======================================================================
+"""
+
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+
+BASE_DIR = Path(__file__).resolve().parent
+PROFILE_DIR = BASE_DIR / "outlook_profile"
+STORAGE_STATE = BASE_DIR / "outlook_auth.json"
+START_URL = "https://outlook.cloud.microsoft/mail/"
+
+
+def main() -> None:
+    PROFILE_DIR.mkdir(exist_ok=True)
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=str(PROFILE_DIR),
+            headless=False,
+            no_viewport=True,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--start-maximized",
+            ],
+        )
+
+        page = context.pages[0] if context.pages else context.new_page()
+        page.goto(START_URL)
+
+        print()
+        print("=" * 70)
+        print(" Přihlas se v otevřeném okně do Outlooku.")
+        print(" Až budeš v inboxu (vidíš seznam e-mailů), vrať se sem.")
+        print("=" * 70)
+        answer = input(" Hotovo? Napiš 'OK' pro uložení session: ").strip().lower()
+
+        if answer == "ok":
+            try:
+                context.storage_state(path=str(STORAGE_STATE))
+                print(f"  Uloženo: {STORAGE_STATE}")
+            except Exception as e:
+                print(f"  storage_state se neuložil: {e}")
+            print(f"  Persistent profil: {PROFILE_DIR}")
+        else:
+            print(" Zrušeno — session se neuloží (profil ale zůstává).")
+
+        context.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,80 @@
+# enrich_fulltext_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+**Skript:** `enrich_fulltext_v1.0.py`
+
+## Účel
+Pro každý dokument odkazovaný v MongoDB (`soubory.*`) vytáhne **plný text** a uloží do PostgreSQL s GIN `tsvector` indexem pro fulltext vyhledávání.
+
+## Cíl: PostgreSQL `MongoSoubory`
+- **host:** 192.168.1.76:5432
+- **db:** `MongoSoubory`
+- **user:** vladimir.buzalka
+- **extension:** `unaccent`, `pg_trgm`
+- **text search config:** `soubory` (= simple + unaccent → case- a diakritika-insensitivní)
+
+## Tabulka `documents`
+| sloupec | typ | popis |
+|---|---|---|
+| id | BIGSERIAL | PK |
+| mongo_id | TEXT | ObjectId z Mongo |
+| study | TEXT | kolekce v Mongo (`42847922MDD3003` / `77242113UCO3001`) |
+| path | TEXT | absolutní cesta (UNIQUE s study) |
+| rel_path, name, ext | TEXT | doplňková metadata |
+| sha256 | TEXT | pro inkrementální kontrolu |
+| size_bytes, mtime | | |
+| **body** | TEXT | plný extrahovaný text (max 5 MB) |
+| body_length | INT | délka v znacích |
+| **tsv** | tsvector GENERATED STORED | `to_tsvector('soubory', body)` |
+| extracted_at | TIMESTAMPTZ | čas extrakce |
+| extractor_version | TEXT | verze tohoto skriptu |
+| ok | BOOLEAN | true pokud extrakce proběhla |
+| error | TEXT | chybové hlášení |
+
+**Indexy:** GIN nad `tsv`, GIN trigram nad `name`, btree `sha256`, btree `(study, ext)`.
+
+## Podporované přípony
+`pdf`, `docx`, `xlsx`, `xlsm`, `pptx`, `eml`, `msg`, `txt`, `csv`
+
+## Inkrementální chování
+Soubor se přeskočí pokud v PG už existuje záznam s:
+- shodným `sha256`
+- shodnou `extractor_version`
+- `ok = true`
+
+Jinak se přeparsuje a UPSERT.
+
+## Limity (skip s `error=too_big_...`)
+- PDF nad 500 MB
+- XLSX nad 200 MB
+- ostatní nad 300 MB
+- `body` se vždy ořízne na 5 MB UTF-8
+
+## Příklady dotazů (psql)
+```sql
+-- fulltext (case+diakritika insensitivní)
+SELECT study, name, ts_rank_cd(tsv, q) AS rank,
+       ts_headline('soubory', body, q, 'MaxFragments=2,MinWords=5,MaxWords=15') AS snippet
+FROM documents, plainto_tsquery('soubory', 'amendment 3') q
+WHERE tsv @@ q
+ORDER BY rank DESC
+LIMIT 20;
+
+-- jméno obsahuje (trigram, fuzzy)
+SELECT study, name FROM documents
+WHERE name ILIKE '%protokol%';
+
+-- nejdelsi dokumenty per studie
+SELECT study, name, body_length
+FROM documents
+WHERE ok = true
+ORDER BY body_length DESC LIMIT 10;
+```
+
+## Spuštění
+```
+python U:\PythonProject\Janssen\Soubory\enrich_fulltext_v1.0.py
+```
+
+Průběh tiskne řádek na soubor: `[n/total] OK pdf  2.3MB  protokol.pdf  | 12340 znaku  'Protocol amendment ...'`
@@ -0,0 +1,416 @@
+"""
+==============================================================================
+Skript:   enrich_fulltext_v1.0.py
+Verze:    1.0
+Datum:    2026-06-03
+Autor:    vladimir.buzalka
+Popis:    Vytahne PLNY TEXT z dokumentu odkazovanych v MongoDB (db: soubory)
+          a ulozi ho do PostgreSQL (db: MongoSoubory) s GIN tsvector
+          fulltext indexem.
+
+          Zdroje:
+            - MongoDB    192.168.1.76 db=soubory  kolekce=42847922MDD3003, 77242113UCO3001
+            - PostgreSQL 192.168.1.76 db=MongoSoubory tabulka=documents
+
+          Podporovane pripony: pdf, docx, xlsx, xlsm, pptx, eml, msg, txt, csv
+
+          Inkrementalne: preskoci soubor, kde v PG existuje radek se shodnym
+          sha256 a extractor_version a ok=true.
+
+          Pri prvnim behu sam vytvori tabulku, indexy a textovou konfiguraci
+          'soubory' (unaccent + simple) - vyhleda case- a diakritika-insensitivni.
+==============================================================================
+"""
+
+from __future__ import annotations
+
+import email
+import email.policy
+import sys
+import time
+import traceback
+from datetime import datetime, timezone
+from pathlib import Path
+
+import psycopg
+from pymongo import MongoClient
+
+# --- konfigurace ------------------------------------------------------------
+MONGO_URI = "mongodb://192.168.1.76:27017"
+MONGO_DB = "soubory"
+MONGO_COLLECTIONS = ["42847922MDD3003", "77242113UCO3001"]
+
+PG_DSN = ("host=192.168.1.76 port=5432 dbname=MongoSoubory "
+          "user=vladimir.buzalka password=Vlado7309208104++")
+
+EXTRACTOR_VERSION = "1.0"
+
+MAX_TEXT_BYTES = 5 * 1024 * 1024   # 5 MB textu na dokument max
+MAX_PDF_BYTES = 500 * 1024 * 1024
+MAX_XLSX_BYTES = 200 * 1024 * 1024
+MAX_GENERIC_BYTES = 300 * 1024 * 1024
+
+SUPPORTED = ("pdf", "docx", "xlsx", "xlsm", "pptx", "eml", "msg", "txt", "csv")
+
+
+# --- SCHEMA -----------------------------------------------------------------
+
+SCHEMA_SQL = """
+CREATE EXTENSION IF NOT EXISTS unaccent;
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+
+DO $$
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'soubory') THEN
+        CREATE TEXT SEARCH CONFIGURATION soubory ( COPY = simple );
+        ALTER TEXT SEARCH CONFIGURATION soubory
+            ALTER MAPPING FOR hword, hword_part, word
+            WITH unaccent, simple;
+    END IF;
+END$$;
+
+CREATE TABLE IF NOT EXISTS documents (
+    id              BIGSERIAL PRIMARY KEY,
+    mongo_id        TEXT NOT NULL,
+    study           TEXT NOT NULL,
+    path            TEXT NOT NULL,
+    rel_path        TEXT,
+    name            TEXT,
+    ext             TEXT,
+    sha256          TEXT NOT NULL,
+    size_bytes      BIGINT,
+    mtime           TIMESTAMPTZ,
+    body            TEXT,
+    body_length     INT,
+    tsv             tsvector GENERATED ALWAYS AS (
+                        to_tsvector('soubory'::regconfig, coalesce(body, ''))
+                    ) STORED,
+    extracted_at    TIMESTAMPTZ DEFAULT now(),
+    extractor_version TEXT,
+    ok              BOOLEAN,
+    error           TEXT,
+    UNIQUE (study, path)
+);
+
+CREATE INDEX IF NOT EXISTS documents_tsv_gin       ON documents USING gin(tsv);
+CREATE INDEX IF NOT EXISTS documents_name_trgm     ON documents USING gin(name gin_trgm_ops);
+CREATE INDEX IF NOT EXISTS documents_sha256_idx    ON documents(sha256);
+CREATE INDEX IF NOT EXISTS documents_study_ext_idx ON documents(study, ext);
+"""
+
+
+# --- EXTRAKTORY (vraci string, max MAX_TEXT_BYTES) --------------------------
+
+def _truncate(s: str) -> str:
+    if not s:
+        return ""
+    b = s.encode("utf-8", errors="replace")
+    if len(b) <= MAX_TEXT_BYTES:
+        return s
+    return b[:MAX_TEXT_BYTES].decode("utf-8", errors="ignore")
+
+
+def extract_pdf(path: Path) -> str:
+    from pypdf import PdfReader
+    reader = PdfReader(str(path))
+    if reader.is_encrypted:
+        try:
+            reader.decrypt("")
+        except Exception:
+            return ""
+    parts = []
+    total = 0
+    for page in reader.pages:
+        try:
+            t = page.extract_text() or ""
+        except Exception:
+            continue
+        parts.append(t)
+        total += len(t)
+        if total > MAX_TEXT_BYTES:
+            break
+    return _truncate("\n".join(parts))
+
+
+def extract_docx(path: Path) -> str:
+    from docx import Document
+    doc = Document(str(path))
+    parts = [p.text for p in doc.paragraphs if p.text]
+    for tbl in doc.tables:
+        for row in tbl.rows:
+            parts.append(" | ".join(c.text for c in row.cells))
+    return _truncate("\n".join(parts))
+
+
+def extract_xlsx(path: Path) -> str:
+    from openpyxl import load_workbook
+    wb = load_workbook(str(path), read_only=True, data_only=True)
+    parts = []
+    total = 0
+    for ws in wb.worksheets:
+        parts.append(f"# {ws.title}")
+        for row in ws.iter_rows(values_only=True):
+            line = "\t".join("" if v is None else str(v) for v in row)
+            if line.strip():
+                parts.append(line)
+                total += len(line)
+                if total > MAX_TEXT_BYTES:
+                    break
+        if total > MAX_TEXT_BYTES:
+            break
+    wb.close()
+    return _truncate("\n".join(parts))
+
+
+def extract_pptx(path: Path) -> str:
+    from pptx import Presentation
+    prs = Presentation(str(path))
+    parts = []
+    for i, slide in enumerate(prs.slides, 1):
+        parts.append(f"# slide {i}")
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for para in shape.text_frame.paragraphs:
+                    line = "".join(run.text for run in para.runs)
+                    if line.strip():
+                        parts.append(line)
+        if slide.has_notes_slide:
+            notes = slide.notes_slide.notes_text_frame.text
+            if notes:
+                parts.append(f"[notes] {notes}")
+    return _truncate("\n".join(parts))
+
+
+def extract_eml(path: Path) -> str:
+    with path.open("rb") as f:
+        msg = email.message_from_binary_file(f, policy=email.policy.default)
+    head = []
+    for k in ("From", "To", "Cc", "Subject", "Date"):
+        v = msg.get(k)
+        if v:
+            head.append(f"{k}: {v}")
+    parts = ["\n".join(head)]
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.get_content_type() == "text/plain" and not part.get_filename():
+                try:
+                    parts.append(part.get_content())
+                except Exception:
+                    pass
+    else:
+        try:
+            parts.append(msg.get_content())
+        except Exception:
+            pass
+    return _truncate("\n\n".join(parts))
+
+
+def extract_msg(path: Path) -> str:
+    import extract_msg
+    with extract_msg.openMsg(str(path)) as m:
+        head = []
+        if m.subject:  head.append(f"Subject: {m.subject}")
+        if m.sender:   head.append(f"From: {m.sender}")
+        if m.to:       head.append(f"To: {m.to}")
+        if m.cc:       head.append(f"Cc: {m.cc}")
+        if m.date:     head.append(f"Date: {m.date}")
+        return _truncate("\n".join(head) + "\n\n" + (m.body or ""))
+
+
+def extract_text(path: Path) -> str:
+    data = path.read_bytes()[:MAX_TEXT_BYTES]
+    for enc in ("utf-8-sig", "cp1250", "latin-1"):
+        try:
+            return data.decode(enc)
+        except UnicodeDecodeError:
+            continue
+    return data.decode("utf-8", errors="replace")
+
+
+EXTRACTORS = {
+    "pdf":  (extract_pdf,   MAX_PDF_BYTES),
+    "docx": (extract_docx,  MAX_GENERIC_BYTES),
+    "xlsx": (extract_xlsx,  MAX_XLSX_BYTES),
+    "xlsm": (extract_xlsx,  MAX_XLSX_BYTES),
+    "pptx": (extract_pptx,  MAX_GENERIC_BYTES),
+    "eml":  (extract_eml,   MAX_GENERIC_BYTES),
+    "msg":  (extract_msg,   MAX_GENERIC_BYTES),
+    "txt":  (extract_text,  MAX_GENERIC_BYTES),
+    "csv":  (extract_text,  MAX_GENERIC_BYTES),
+}
+
+
+def _short(s, n=40):
+    if not s:
+        return ""
+    s = str(s).replace("\n", " ").replace("\r", " ").strip()
+    return s if len(s) <= n else s[:n] + "..."
+
+
+def _now() -> datetime:
+    return datetime.now(tz=timezone.utc)
+
+
+# --- HLAVNI SMYCKA ----------------------------------------------------------
+
+def process_collection(pg: psycopg.Connection, mongo_coll, study: str) -> dict:
+    # nactu z PG existujici sha256 + verzi
+    with pg.cursor() as cur:
+        cur.execute(
+            "SELECT path, sha256, extractor_version, ok FROM documents WHERE study = %s",
+            (study,),
+        )
+        existing = {row[0]: (row[1], row[2], row[3]) for row in cur.fetchall()}
+
+    cursor = mongo_coll.find(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}},
+        {"_id": 1, "path": 1, "rel_path": 1, "name": 1, "ext": 1,
+         "sha256": 1, "size_bytes": 1, "mtime": 1},
+        no_cursor_timeout=True,
+    )
+
+    processed = ok = errors = skipped = too_big = 0
+    queue = []
+    total_pending = mongo_coll.count_documents(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}}
+    )
+    print(f"[{study}] kandidatu v Mongo: {total_pending}")
+
+    n = 0
+    try:
+        for doc in cursor:
+            n += 1
+            prev = existing.get(doc["path"])
+            if prev and prev[0] == doc.get("sha256") and prev[1] == EXTRACTOR_VERSION and prev[2]:
+                skipped += 1
+                continue
+
+            ext = doc["ext"]
+            extractor, max_bytes = EXTRACTORS[ext]
+            path = Path(doc["path"])
+
+            row = {
+                "mongo_id":  str(doc["_id"]),
+                "study":     study,
+                "path":      doc["path"],
+                "rel_path":  doc.get("rel_path"),
+                "name":      doc.get("name"),
+                "ext":       ext,
+                "sha256":    doc.get("sha256"),
+                "size_bytes": doc.get("size_bytes"),
+                "mtime":     doc.get("mtime"),
+                "body":      None,
+                "body_length": 0,
+                "extracted_at": _now(),
+                "extractor_version": EXTRACTOR_VERSION,
+                "ok":        False,
+                "error":     None,
+            }
+
+            status = "OK "
+            detail = ""
+            size_mb = (doc.get("size_bytes") or 0) / 1024 / 1024
+
+            if not path.exists():
+                row["error"] = "file_missing"
+                status = "ERR"; detail = "file_missing"; errors += 1
+            elif (doc.get("size_bytes") or 0) > max_bytes:
+                row["error"] = f"too_big_>{max_bytes}"
+                status = "BIG"; detail = f"too_big_>{max_bytes//1024//1024}MB"; too_big += 1
+            else:
+                try:
+                    body = extractor(path) or ""
+                    row["body"] = body if body else None
+                    row["body_length"] = len(body)
+                    row["ok"] = True
+                    ok += 1
+                    detail = f"{len(body)} znaku  {_short(body, 60)!r}"
+                except Exception as e:
+                    row["error"] = f"{type(e).__name__}: {e}"[:500]
+                    status = "ERR"; detail = row["error"][:80]; errors += 1
+
+            queue.append(row)
+            processed += 1
+            print(f"  [{n:>4}/{total_pending}] {status} {ext:<4} {size_mb:6.1f}MB  "
+                  f"{path.name}  | {detail}", flush=True)
+
+            if len(queue) >= 50:
+                _flush(pg, queue); queue.clear()
+    finally:
+        cursor.close()
+
+    if queue:
+        _flush(pg, queue)
+
+    return {"study": study, "processed": processed, "ok": ok,
+            "errors": errors, "skipped": skipped, "too_big": too_big}
+
+
+UPSERT_SQL = """
+INSERT INTO documents
+    (mongo_id, study, path, rel_path, name, ext, sha256, size_bytes, mtime,
+     body, body_length, extracted_at, extractor_version, ok, error)
+VALUES
+    (%(mongo_id)s, %(study)s, %(path)s, %(rel_path)s, %(name)s, %(ext)s, %(sha256)s,
+     %(size_bytes)s, %(mtime)s, %(body)s, %(body_length)s, %(extracted_at)s,
+     %(extractor_version)s, %(ok)s, %(error)s)
+ON CONFLICT (study, path) DO UPDATE SET
+    mongo_id          = EXCLUDED.mongo_id,
+    rel_path          = EXCLUDED.rel_path,
+    name              = EXCLUDED.name,
+    ext               = EXCLUDED.ext,
+    sha256            = EXCLUDED.sha256,
+    size_bytes        = EXCLUDED.size_bytes,
+    mtime             = EXCLUDED.mtime,
+    body              = EXCLUDED.body,
+    body_length       = EXCLUDED.body_length,
+    extracted_at      = EXCLUDED.extracted_at,
+    extractor_version = EXCLUDED.extractor_version,
+    ok                = EXCLUDED.ok,
+    error             = EXCLUDED.error
+"""
+
+
+def _flush(pg: psycopg.Connection, rows: list[dict]) -> None:
+    with pg.cursor() as cur:
+        cur.executemany(UPSERT_SQL, rows)
+    pg.commit()
+
+
+def main() -> int:
+    t0 = time.time()
+    print("Pripojuji se k PostgreSQL...")
+    pg = psycopg.connect(PG_DSN, connect_timeout=10)
+    with pg.cursor() as cur:
+        cur.execute(SCHEMA_SQL)
+    pg.commit()
+    print("Schema OK.")
+
+    print("Pripojuji se k MongoDB...")
+    mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    mongo.admin.command("ping")
+    db = mongo[MONGO_DB]
+    print("Mongo OK.")
+
+    results = []
+    for name in MONGO_COLLECTIONS:
+        results.append(process_collection(pg, db[name], name))
+
+    pg.close()
+
+    print("\n=== SHRNUTI ===")
+    for r in results:
+        print(f"  {r['study']}: processed={r['processed']}  ok={r['ok']}  "
+              f"errors={r['errors']}  skipped={r['skipped']}  too_big={r['too_big']}")
+    print(f"\nCelkem trvalo: {time.time() - t0:.1f} s")
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except KeyboardInterrupt:
+        print("\nPreruseno uzivatelem")
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
@@ -0,0 +1,22 @@
+# enrich_fulltext_v1.1
+
+**Verze:** 1.1
+**Datum:** 2026-06-03
+**Skript:** `enrich_fulltext_v1.1.py`
+
+## Změny proti v1.0
+- **NUL bajty (0x00) v textu** — PG TEXT je odmítá. v1.1 odstraní všechny `\x00` a ostatní controly (kromě `\n \r \t`) ve společné funkci `_clean_for_pg`, navíc bezpečnostní strip i v `_flush` před UPSERT.
+- **DOCX fallback** — pokud python-docx hodí výjimku (typicky `"no tr above topmost tr in w:tbl"` u VTMF formulářů s rozbitými tabulkami), v1.1 sáhne přímo do `word/document.xml` v ZIPu a regexem vytáhne text z `<w:t>` elementů. Přijde o strukturu tabulek, ale text zachrání.
+- `extractor_version` zvýšena na `1.1` → všechny řádky z v1.0 se přeparsují (původní jsou pravděpodobně stejně chyběly kvůli pádu).
+
+## Vše ostatní
+Beze změny proti [v1.0](Trash/enrich_fulltext_v1.0.md):
+- Tabulka `documents` v PG `MongoSoubory` (192.168.1.76:5432)
+- Text search config `soubory` (simple + unaccent)
+- Limity: PDF 500 MB, XLSX 200 MB, ostatní 300 MB; text max 5 MB
+- Inkrementálně podle `sha256` + `extractor_version`
+
+## Spuštění
+```
+python U:\PythonProject\Janssen\Soubory\enrich_fulltext_v1.1.py
+```
@@ -0,0 +1,457 @@
+"""
+==============================================================================
+Skript:   enrich_fulltext_v1.1.py
+Verze:    1.1
+Datum:    2026-06-03
+Autor:    vladimir.buzalka
+Popis:    Vytahne PLNY TEXT z dokumentu odkazovanych v MongoDB (db: soubory)
+          a ulozi ho do PostgreSQL (db: MongoSoubory) s GIN tsvector indexem.
+
+Zmeny proti v1.0:
+  - PG odmita NUL (0x00) bajty v TEXT -> v _truncate se vsechny NULy odstrani
+    (i jine controly krome \\n \\r \\t)
+  - DOCX fallback: pokud python-docx selze (typicky "no tr above topmost tr
+    in w:tbl" u rozbitych tabulek), pokusi se primy raw extract z word/document.xml
+    pres regex - prijde o strukturu tabulek, ale zachrani text
+  - drobnost: posunul jsem extractor_version na "1.1" -> stare radky se preparsuji
+
+Cilove ulozeni:
+  - MongoDB    192.168.1.76 db=soubory  kolekce=42847922MDD3003, 77242113UCO3001
+  - PostgreSQL 192.168.1.76 db=MongoSoubory tabulka=documents
+
+Podporovane pripony: pdf, docx, xlsx, xlsm, pptx, eml, msg, txt, csv
+==============================================================================
+"""
+
+from __future__ import annotations
+
+import email
+import email.policy
+import re
+import sys
+import time
+import traceback
+import zipfile
+from datetime import datetime, timezone
+from pathlib import Path
+
+import psycopg
+from pymongo import MongoClient
+
+# --- konfigurace ------------------------------------------------------------
+MONGO_URI = "mongodb://192.168.1.76:27017"
+MONGO_DB = "soubory"
+MONGO_COLLECTIONS = ["42847922MDD3003", "77242113UCO3001"]
+
+PG_DSN = ("host=192.168.1.76 port=5432 dbname=MongoSoubory "
+          "user=vladimir.buzalka password=Vlado7309208104++")
+
+EXTRACTOR_VERSION = "1.1"
+
+MAX_TEXT_BYTES = 5 * 1024 * 1024
+MAX_PDF_BYTES = 500 * 1024 * 1024
+MAX_XLSX_BYTES = 200 * 1024 * 1024
+MAX_GENERIC_BYTES = 300 * 1024 * 1024
+
+SUPPORTED = ("pdf", "docx", "xlsx", "xlsm", "pptx", "eml", "msg", "txt", "csv")
+
+
+# --- SCHEMA -----------------------------------------------------------------
+
+SCHEMA_SQL = """
+CREATE EXTENSION IF NOT EXISTS unaccent;
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+
+DO $$
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'soubory') THEN
+        CREATE TEXT SEARCH CONFIGURATION soubory ( COPY = simple );
+        ALTER TEXT SEARCH CONFIGURATION soubory
+            ALTER MAPPING FOR hword, hword_part, word
+            WITH unaccent, simple;
+    END IF;
+END$$;
+
+CREATE TABLE IF NOT EXISTS documents (
+    id              BIGSERIAL PRIMARY KEY,
+    mongo_id        TEXT NOT NULL,
+    study           TEXT NOT NULL,
+    path            TEXT NOT NULL,
+    rel_path        TEXT,
+    name            TEXT,
+    ext             TEXT,
+    sha256          TEXT NOT NULL,
+    size_bytes      BIGINT,
+    mtime           TIMESTAMPTZ,
+    body            TEXT,
+    body_length     INT,
+    tsv             tsvector GENERATED ALWAYS AS (
+                        to_tsvector('soubory'::regconfig, coalesce(body, ''))
+                    ) STORED,
+    extracted_at    TIMESTAMPTZ DEFAULT now(),
+    extractor_version TEXT,
+    ok              BOOLEAN,
+    error           TEXT,
+    UNIQUE (study, path)
+);
+
+CREATE INDEX IF NOT EXISTS documents_tsv_gin       ON documents USING gin(tsv);
+CREATE INDEX IF NOT EXISTS documents_name_trgm     ON documents USING gin(name gin_trgm_ops);
+CREATE INDEX IF NOT EXISTS documents_sha256_idx    ON documents(sha256);
+CREATE INDEX IF NOT EXISTS documents_study_ext_idx ON documents(study, ext);
+"""
+
+
+# --- HELPERY ----------------------------------------------------------------
+
+# odstrani 0x00 a ostatni controly krome whitespace
+_CTRL_RX = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
+
+
+def _clean_for_pg(s: str) -> str:
+    if not s:
+        return ""
+    return _CTRL_RX.sub("", s)
+
+
+def _truncate(s: str) -> str:
+    s = _clean_for_pg(s or "")
+    if not s:
+        return ""
+    b = s.encode("utf-8", errors="replace")
+    if len(b) <= MAX_TEXT_BYTES:
+        return s
+    return b[:MAX_TEXT_BYTES].decode("utf-8", errors="ignore")
+
+
+# --- EXTRAKTORY -------------------------------------------------------------
+
+def extract_pdf(path: Path) -> str:
+    from pypdf import PdfReader
+    reader = PdfReader(str(path))
+    if reader.is_encrypted:
+        try:
+            reader.decrypt("")
+        except Exception:
+            return ""
+    parts = []
+    total = 0
+    for page in reader.pages:
+        try:
+            t = page.extract_text() or ""
+        except Exception:
+            continue
+        parts.append(t)
+        total += len(t)
+        if total > MAX_TEXT_BYTES:
+            break
+    return _truncate("\n".join(parts))
+
+
+# regex pro DOCX fallback - vytahne <w:t>...</w:t>
+_DOCX_WT_RX = re.compile(r"<w:t[^>]*>([^<]*)</w:t>", re.DOTALL)
+_DOCX_WP_END_RX = re.compile(r"</w:p>")
+
+
+def _docx_raw_text(path: Path) -> str:
+    """Fallback - cte primo word/document.xml ze ZIPu."""
+    with zipfile.ZipFile(str(path)) as z:
+        try:
+            xml = z.read("word/document.xml").decode("utf-8", errors="replace")
+        except KeyError:
+            return ""
+    xml = _DOCX_WP_END_RX.sub("\n", xml)
+    return "\n".join(m.group(1) for m in _DOCX_WT_RX.finditer(xml))
+
+
+def extract_docx(path: Path) -> str:
+    from docx import Document
+    try:
+        doc = Document(str(path))
+        parts = [p.text for p in doc.paragraphs if p.text]
+        for tbl in doc.tables:
+            for row in tbl.rows:
+                parts.append(" | ".join(c.text for c in row.cells))
+        return _truncate("\n".join(parts))
+    except Exception:
+        # fallback - raw XML extract
+        return _truncate(_docx_raw_text(path))
+
+
+def extract_xlsx(path: Path) -> str:
+    from openpyxl import load_workbook
+    wb = load_workbook(str(path), read_only=True, data_only=True)
+    parts = []
+    total = 0
+    for ws in wb.worksheets:
+        parts.append(f"# {ws.title}")
+        for row in ws.iter_rows(values_only=True):
+            line = "\t".join("" if v is None else str(v) for v in row)
+            if line.strip():
+                parts.append(line)
+                total += len(line)
+                if total > MAX_TEXT_BYTES:
+                    break
+        if total > MAX_TEXT_BYTES:
+            break
+    wb.close()
+    return _truncate("\n".join(parts))
+
+
+def extract_pptx(path: Path) -> str:
+    from pptx import Presentation
+    prs = Presentation(str(path))
+    parts = []
+    for i, slide in enumerate(prs.slides, 1):
+        parts.append(f"# slide {i}")
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for para in shape.text_frame.paragraphs:
+                    line = "".join(run.text for run in para.runs)
+                    if line.strip():
+                        parts.append(line)
+        if slide.has_notes_slide:
+            notes = slide.notes_slide.notes_text_frame.text
+            if notes:
+                parts.append(f"[notes] {notes}")
+    return _truncate("\n".join(parts))
+
+
+def extract_eml(path: Path) -> str:
+    with path.open("rb") as f:
+        msg = email.message_from_binary_file(f, policy=email.policy.default)
+    head = []
+    for k in ("From", "To", "Cc", "Subject", "Date"):
+        v = msg.get(k)
+        if v:
+            head.append(f"{k}: {v}")
+    parts = ["\n".join(head)]
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.get_content_type() == "text/plain" and not part.get_filename():
+                try:
+                    parts.append(part.get_content())
+                except Exception:
+                    pass
+    else:
+        try:
+            parts.append(msg.get_content())
+        except Exception:
+            pass
+    return _truncate("\n\n".join(parts))
+
+
+def extract_msg(path: Path) -> str:
+    import extract_msg
+    with extract_msg.openMsg(str(path)) as m:
+        head = []
+        if m.subject:  head.append(f"Subject: {m.subject}")
+        if m.sender:   head.append(f"From: {m.sender}")
+        if m.to:       head.append(f"To: {m.to}")
+        if m.cc:       head.append(f"Cc: {m.cc}")
+        if m.date:     head.append(f"Date: {m.date}")
+        return _truncate("\n".join(head) + "\n\n" + (m.body or ""))
+
+
+def extract_text(path: Path) -> str:
+    data = path.read_bytes()[:MAX_TEXT_BYTES]
+    for enc in ("utf-8-sig", "cp1250", "latin-1"):
+        try:
+            return _truncate(data.decode(enc))
+        except UnicodeDecodeError:
+            continue
+    return _truncate(data.decode("utf-8", errors="replace"))
+
+
+EXTRACTORS = {
+    "pdf":  (extract_pdf,   MAX_PDF_BYTES),
+    "docx": (extract_docx,  MAX_GENERIC_BYTES),
+    "xlsx": (extract_xlsx,  MAX_XLSX_BYTES),
+    "xlsm": (extract_xlsx,  MAX_XLSX_BYTES),
+    "pptx": (extract_pptx,  MAX_GENERIC_BYTES),
+    "eml":  (extract_eml,   MAX_GENERIC_BYTES),
+    "msg":  (extract_msg,   MAX_GENERIC_BYTES),
+    "txt":  (extract_text,  MAX_GENERIC_BYTES),
+    "csv":  (extract_text,  MAX_GENERIC_BYTES),
+}
+
+
+def _short(s, n=40):
+    if not s:
+        return ""
+    s = str(s).replace("\n", " ").replace("\r", " ").strip()
+    return s if len(s) <= n else s[:n] + "..."
+
+
+def _now() -> datetime:
+    return datetime.now(tz=timezone.utc)
+
+
+# --- HLAVNI SMYCKA ----------------------------------------------------------
+
+def process_collection(pg: psycopg.Connection, mongo_coll, study: str) -> dict:
+    with pg.cursor() as cur:
+        cur.execute(
+            "SELECT path, sha256, extractor_version, ok FROM documents WHERE study = %s",
+            (study,),
+        )
+        existing = {row[0]: (row[1], row[2], row[3]) for row in cur.fetchall()}
+
+    cursor = mongo_coll.find(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}},
+        {"_id": 1, "path": 1, "rel_path": 1, "name": 1, "ext": 1,
+         "sha256": 1, "size_bytes": 1, "mtime": 1},
+        no_cursor_timeout=True,
+    )
+
+    processed = ok = errors = skipped = too_big = 0
+    queue: list[dict] = []
+    total_pending = mongo_coll.count_documents(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}}
+    )
+    print(f"[{study}] kandidatu v Mongo: {total_pending}")
+
+    n = 0
+    try:
+        for doc in cursor:
+            n += 1
+            prev = existing.get(doc["path"])
+            if prev and prev[0] == doc.get("sha256") and prev[1] == EXTRACTOR_VERSION and prev[2]:
+                skipped += 1
+                continue
+
+            ext = doc["ext"]
+            extractor, max_bytes = EXTRACTORS[ext]
+            path = Path(doc["path"])
+
+            row = {
+                "mongo_id":  str(doc["_id"]),
+                "study":     study,
+                "path":      doc["path"],
+                "rel_path":  doc.get("rel_path"),
+                "name":      doc.get("name"),
+                "ext":       ext,
+                "sha256":    doc.get("sha256"),
+                "size_bytes": doc.get("size_bytes"),
+                "mtime":     doc.get("mtime"),
+                "body":      None,
+                "body_length": 0,
+                "extracted_at": _now(),
+                "extractor_version": EXTRACTOR_VERSION,
+                "ok":        False,
+                "error":     None,
+            }
+
+            status = "OK "
+            detail = ""
+            size_mb = (doc.get("size_bytes") or 0) / 1024 / 1024
+
+            if not path.exists():
+                row["error"] = "file_missing"
+                status = "ERR"; detail = "file_missing"; errors += 1
+            elif (doc.get("size_bytes") or 0) > max_bytes:
+                row["error"] = f"too_big_>{max_bytes}"
+                status = "BIG"; detail = f"too_big_>{max_bytes//1024//1024}MB"; too_big += 1
+            else:
+                try:
+                    body = extractor(path) or ""
+                    row["body"] = body if body else None
+                    row["body_length"] = len(body)
+                    row["ok"] = True
+                    ok += 1
+                    detail = f"{len(body)} znaku  {_short(body, 60)!r}"
+                except Exception as e:
+                    row["error"] = f"{type(e).__name__}: {e}"[:500]
+                    status = "ERR"; detail = row["error"][:80]; errors += 1
+
+            queue.append(row)
+            processed += 1
+            print(f"  [{n:>4}/{total_pending}] {status} {ext:<4} {size_mb:6.1f}MB  "
+                  f"{path.name}  | {detail}", flush=True)
+
+            if len(queue) >= 50:
+                _flush(pg, queue); queue.clear()
+    finally:
+        cursor.close()
+
+    if queue:
+        _flush(pg, queue)
+
+    return {"study": study, "processed": processed, "ok": ok,
+            "errors": errors, "skipped": skipped, "too_big": too_big}
+
+
+UPSERT_SQL = """
+INSERT INTO documents
+    (mongo_id, study, path, rel_path, name, ext, sha256, size_bytes, mtime,
+     body, body_length, extracted_at, extractor_version, ok, error)
+VALUES
+    (%(mongo_id)s, %(study)s, %(path)s, %(rel_path)s, %(name)s, %(ext)s, %(sha256)s,
+     %(size_bytes)s, %(mtime)s, %(body)s, %(body_length)s, %(extracted_at)s,
+     %(extractor_version)s, %(ok)s, %(error)s)
+ON CONFLICT (study, path) DO UPDATE SET
+    mongo_id          = EXCLUDED.mongo_id,
+    rel_path          = EXCLUDED.rel_path,
+    name              = EXCLUDED.name,
+    ext               = EXCLUDED.ext,
+    sha256            = EXCLUDED.sha256,
+    size_bytes        = EXCLUDED.size_bytes,
+    mtime             = EXCLUDED.mtime,
+    body              = EXCLUDED.body,
+    body_length       = EXCLUDED.body_length,
+    extracted_at      = EXCLUDED.extracted_at,
+    extractor_version = EXCLUDED.extractor_version,
+    ok                = EXCLUDED.ok,
+    error             = EXCLUDED.error
+"""
+
+
+def _flush(pg: psycopg.Connection, rows: list[dict]) -> None:
+    # posledni pojistka - jeste jednou strip NUL (kdyby se necim prokrouzil)
+    for r in rows:
+        if r.get("body"):
+            r["body"] = _clean_for_pg(r["body"])
+        if r.get("error"):
+            r["error"] = _clean_for_pg(r["error"])
+    with pg.cursor() as cur:
+        cur.executemany(UPSERT_SQL, rows)
+    pg.commit()
+
+
+def main() -> int:
+    t0 = time.time()
+    print("Pripojuji se k PostgreSQL...")
+    pg = psycopg.connect(PG_DSN, connect_timeout=10)
+    with pg.cursor() as cur:
+        cur.execute(SCHEMA_SQL)
+    pg.commit()
+    print("Schema OK.")
+
+    print("Pripojuji se k MongoDB...")
+    mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    mongo.admin.command("ping")
+    db = mongo[MONGO_DB]
+    print("Mongo OK.")
+
+    results = []
+    for name in MONGO_COLLECTIONS:
+        results.append(process_collection(pg, db[name], name))
+
+    pg.close()
+
+    print("\n=== SHRNUTI ===")
+    for r in results:
+        print(f"  {r['study']}: processed={r['processed']}  ok={r['ok']}  "
+              f"errors={r['errors']}  skipped={r['skipped']}  too_big={r['too_big']}")
+    print(f"\nCelkem trvalo: {time.time() - t0:.1f} s")
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except KeyboardInterrupt:
+        print("\nPreruseno uzivatelem")
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
@@ -0,0 +1,46 @@
+# enrich_files_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+**Skript:** `enrich_files_v1.0.py`
+
+## Účel
+Doplnit do existujících záznamů v MongoDB `soubory.*` pole `content.*` parsovaná z obsahu souborů.
+
+Spouští se **až po** [scan_files_v1.0.py](scan_files_v1.0.md).
+
+## Podporované přípony a pole
+
+| ext | knihovna | pole v `content` |
+|---|---|---|
+| pdf | pypdf | pages, encrypted, author, title, subject, creator, producer, created, modified, text_head |
+| docx | python-docx | author, title, subject, last_modified_by, paragraphs, words, created, modified, text_head |
+| xlsx, xlsm | openpyxl | total_sheets, sheets[{name,rows,cols}], author, title, subject, last_modified_by, created, modified |
+| pptx | python-pptx | slides, author, title, subject, last_modified_by, created, modified, text_head (z prvních 3 snímků) |
+| eml | stdlib email | subject, from, to, cc, date, has_attachments, attachments[], body_head |
+| msg | extract_msg | totéž co eml |
+
+Společná pole vždy: `ok` (bool), `parsed_at`, `parser_version`, `sha256_at_parse`. Při chybě `error` (název výjimky + zpráva).
+
+## Inkrementální chování
+Zpracují se jen dokumenty kde:
+- `content` chybí, NEBO
+- `content.parser_version` != aktuální verze (1.0), NEBO
+- `content.sha256_at_parse` != aktuální `sha256` (soubor se změnil)
+
+Při dalším spuštění **přidá** jen nové/změněné. Při zvýšení verze parseru přeparsuje vše.
+
+## Limity (skip)
+- PDF nad 500 MB → ok=False, error="too_big_..."
+- XLSX nad 200 MB → ok=False
+- ostatní nad 300 MB → ok=False
+
+`text_head` max 2000 znaků.
+
+## Spuštění
+```
+python U:\PythonProject\Janssen\Soubory\enrich_files_v1.0.py
+```
+
+## Plán
+Po doběhnutí ověřit `content.ok` rate, případně doladit (chybové vzory) a teprve pak stavět `MCP_SOUBORY` server.
@@ -0,0 +1,388 @@
+"""
+==============================================================================
+Skript:   enrich_files_v1.0.py
+Verze:    1.0
+Datum:    2026-06-03
+Autor:    vladimir.buzalka
+Popis:    Doplni metadata z obsahu souboru (PDF/DOCX/XLSX/PPTX/EML/MSG)
+          do existujicich zaznamu v MongoDB (db: soubory).
+
+          Pole se uklada do podobjektu `content`:
+            - common: ok (bool), error (str|None), parsed_at, parser_version
+            - pdf:    pages, author, title, subject, creator, producer,
+                      created, modified, encrypted, text_head (prvni stranka, max 2000 znaku)
+            - docx:   author, title, subject, last_modified_by, paragraphs,
+                      words, created, modified, text_head
+            - xlsx:   sheets [{name, rows, cols}], total_sheets,
+                      author, title, last_modified_by, created, modified
+            - pptx:   slides, author, title, subject, last_modified_by,
+                      created, modified, text_head (text z prvnich 3 snimku)
+            - eml:    subject, from, to, cc, date, has_attachments,
+                      attachments [filenames], body_head
+            - msg:    same as eml
+
+          Inkrementalni:
+            - preskaci soubor, kde content.sha256_at_parse == aktualni sha256
+              a content.parser_version == aktualni verze
+            - pri zmene obsahu (jiny sha256) prepocita
+            - pri chybe ulozi content.error a content.ok=False
+
+MongoDB:  192.168.1.76:27017
+DB:       soubory
+==============================================================================
+"""
+
+from __future__ import annotations
+
+import email
+import email.policy
+import sys
+import time
+import traceback
+from datetime import datetime, timezone
+from pathlib import Path
+
+from pymongo import MongoClient, UpdateOne
+
+MONGO_URI = "mongodb://192.168.1.76:27017"
+DB_NAME = "soubory"
+COLLECTIONS = ["42847922MDD3003", "77242113UCO3001"]
+PARSER_VERSION = "1.0"
+TEXT_HEAD_LIMIT = 2000
+
+# limity pro velke soubory - aby skript neuvazil na 1GB PDF
+MAX_PDF_BYTES = 500 * 1024 * 1024   # 500 MB
+MAX_XLSX_BYTES = 200 * 1024 * 1024
+MAX_GENERIC_BYTES = 300 * 1024 * 1024
+
+
+def _now() -> datetime:
+    return datetime.now(tz=timezone.utc)
+
+
+def _truncate(s: str | None, n: int = TEXT_HEAD_LIMIT) -> str | None:
+    if s is None:
+        return None
+    s = s.strip()
+    return s if len(s) <= n else s[:n]
+
+
+def _to_dt(value):
+    if isinstance(value, datetime):
+        return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
+    if isinstance(value, str) and value:
+        try:
+            return datetime.fromisoformat(value.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+    return None
+
+
+# --- PARSERY ----------------------------------------------------------------
+
+def parse_pdf(path: Path) -> dict:
+    from pypdf import PdfReader
+    reader = PdfReader(str(path))
+    info = reader.metadata or {}
+    out = {
+        "pages": len(reader.pages),
+        "encrypted": reader.is_encrypted,
+        "author": getattr(info, "author", None),
+        "title": getattr(info, "title", None),
+        "subject": getattr(info, "subject", None),
+        "creator": getattr(info, "creator", None),
+        "producer": getattr(info, "producer", None),
+        "created": _to_dt(getattr(info, "creation_date", None)),
+        "modified": _to_dt(getattr(info, "modification_date", None)),
+    }
+    text_head = None
+    try:
+        if not reader.is_encrypted and reader.pages:
+            text_head = reader.pages[0].extract_text()
+    except Exception:
+        text_head = None
+    out["text_head"] = _truncate(text_head)
+    return out
+
+
+def parse_docx(path: Path) -> dict:
+    from docx import Document
+    doc = Document(str(path))
+    core = doc.core_properties
+    paragraphs = doc.paragraphs
+    text = "\n".join(p.text for p in paragraphs if p.text)
+    words = len(text.split())
+    return {
+        "author": core.author,
+        "title": core.title,
+        "subject": core.subject,
+        "last_modified_by": core.last_modified_by,
+        "paragraphs": len(paragraphs),
+        "words": words,
+        "created": _to_dt(core.created),
+        "modified": _to_dt(core.modified),
+        "text_head": _truncate(text),
+    }
+
+
+def parse_xlsx(path: Path) -> dict:
+    from openpyxl import load_workbook
+    wb = load_workbook(str(path), read_only=True, data_only=False)
+    sheets = []
+    for ws in wb.worksheets:
+        sheets.append({
+            "name": ws.title,
+            "rows": ws.max_row,
+            "cols": ws.max_column,
+        })
+    props = wb.properties
+    out = {
+        "total_sheets": len(sheets),
+        "sheets": sheets,
+        "author": props.creator,
+        "title": props.title,
+        "subject": props.subject,
+        "last_modified_by": props.lastModifiedBy,
+        "created": _to_dt(props.created),
+        "modified": _to_dt(props.modified),
+    }
+    wb.close()
+    return out
+
+
+def parse_pptx(path: Path) -> dict:
+    from pptx import Presentation
+    prs = Presentation(str(path))
+    core = prs.core_properties
+    head_parts = []
+    for slide in list(prs.slides)[:3]:
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for para in shape.text_frame.paragraphs:
+                    for run in para.runs:
+                        if run.text:
+                            head_parts.append(run.text)
+    return {
+        "slides": len(prs.slides),
+        "author": core.author,
+        "title": core.title,
+        "subject": core.subject,
+        "last_modified_by": core.last_modified_by,
+        "created": _to_dt(core.created),
+        "modified": _to_dt(core.modified),
+        "text_head": _truncate(" ".join(head_parts)),
+    }
+
+
+def parse_eml(path: Path) -> dict:
+    with path.open("rb") as f:
+        msg = email.message_from_binary_file(f, policy=email.policy.default)
+    attachments = []
+    body_parts = []
+    if msg.is_multipart():
+        for part in msg.walk():
+            disp = (part.get("Content-Disposition") or "").lower()
+            ctype = part.get_content_type()
+            if "attachment" in disp or part.get_filename():
+                fname = part.get_filename()
+                if fname:
+                    attachments.append(fname)
+            elif ctype == "text/plain":
+                try:
+                    body_parts.append(part.get_content())
+                except Exception:
+                    pass
+    else:
+        try:
+            body_parts.append(msg.get_content())
+        except Exception:
+            pass
+
+    def _addrs(field):
+        v = msg.get(field)
+        return v if v else None
+
+    return {
+        "subject": msg.get("Subject"),
+        "from": _addrs("From"),
+        "to": _addrs("To"),
+        "cc": _addrs("Cc"),
+        "date": msg.get("Date"),
+        "has_attachments": bool(attachments),
+        "attachments": attachments,
+        "body_head": _truncate("\n".join(body_parts)),
+    }
+
+
+def parse_msg(path: Path) -> dict:
+    import extract_msg
+    with extract_msg.openMsg(str(path)) as msg:
+        attachments = []
+        for att in msg.attachments or []:
+            try:
+                fname = att.longFilename or att.shortFilename
+                if fname:
+                    attachments.append(fname)
+            except Exception:
+                continue
+        return {
+            "subject": msg.subject,
+            "from": msg.sender,
+            "to": msg.to,
+            "cc": msg.cc,
+            "date": str(msg.date) if msg.date else None,
+            "has_attachments": bool(attachments),
+            "attachments": attachments,
+            "body_head": _truncate(msg.body or ""),
+        }
+
+
+PARSERS = {
+    "pdf": (parse_pdf, MAX_PDF_BYTES),
+    "docx": (parse_docx, MAX_GENERIC_BYTES),
+    "xlsx": (parse_xlsx, MAX_XLSX_BYTES),
+    "xlsm": (parse_xlsx, MAX_XLSX_BYTES),
+    "pptx": (parse_pptx, MAX_GENERIC_BYTES),
+    "eml": (parse_eml, MAX_GENERIC_BYTES),
+    "msg": (parse_msg, MAX_GENERIC_BYTES),
+}
+
+
+# --- SUMMARY PRO KONZOLI ----------------------------------------------------
+
+def _short(s, n=40):
+    if not s:
+        return ""
+    s = str(s).replace("\n", " ").replace("\r", " ").strip()
+    return s if len(s) <= n else s[:n] + "..."
+
+
+def _summary(content: dict, ext: str) -> str:
+    if not content.get("ok"):
+        return f"chyba: {_short(content.get('error'), 80)}"
+    parts = []
+    if ext == "pdf":
+        parts.append(f"{content.get('pages')}p")
+        if content.get("encrypted"): parts.append("enc")
+        if content.get("author"): parts.append(f"by={_short(content['author'], 25)}")
+        if content.get("title"): parts.append(f"t={_short(content['title'], 30)}")
+    elif ext == "docx":
+        parts.append(f"{content.get('paragraphs')}para")
+        parts.append(f"{content.get('words')}w")
+        if content.get("author"): parts.append(f"by={_short(content['author'], 25)}")
+    elif ext in ("xlsx", "xlsm"):
+        n = content.get("total_sheets", 0)
+        sheets = content.get("sheets") or []
+        names = ",".join(_short(s["name"], 12) for s in sheets[:3])
+        if n > 3:
+            names += f",+{n-3}"
+        parts.append(f"{n}sh[{names}]")
+        if content.get("author"): parts.append(f"by={_short(content['author'], 20)}")
+    elif ext == "pptx":
+        parts.append(f"{content.get('slides')}slides")
+        if content.get("author"): parts.append(f"by={_short(content['author'], 25)}")
+        if content.get("title"): parts.append(f"t={_short(content['title'], 25)}")
+    elif ext in ("eml", "msg"):
+        if content.get("from"): parts.append(f"from={_short(content['from'], 25)}")
+        if content.get("subject"): parts.append(f"subj={_short(content['subject'], 40)}")
+        if content.get("has_attachments"):
+            parts.append(f"att={len(content.get('attachments') or [])}")
+    return " ".join(parts) if parts else "ok"
+
+
+# --- HLAVNI SMYCKA ----------------------------------------------------------
+
+def enrich_collection(coll, study: str) -> dict:
+    supported = list(PARSERS.keys())
+    query = {
+        "ext": {"$in": supported},
+        "deleted_at": {"$exists": False},
+        "$or": [
+            {"content": {"$exists": False}},
+            {"content.parser_version": {"$ne": PARSER_VERSION}},
+            {"$expr": {"$ne": ["$content.sha256_at_parse", "$sha256"]}},
+        ],
+    }
+    total_pending = coll.count_documents(query)
+    print(f"[{study}] k zpracovani: {total_pending} souboru")
+
+    ops: list[UpdateOne] = []
+    processed = 0
+    ok = 0
+    errors = 0
+    too_big = 0
+
+    cursor = coll.find(query, {"path": 1, "ext": 1, "size_bytes": 1, "sha256": 1}, no_cursor_timeout=True)
+    try:
+        for doc in cursor:
+            ext = doc["ext"]
+            parser, max_bytes = PARSERS[ext]
+            path = Path(doc["path"])
+            content: dict = {
+                "parser_version": PARSER_VERSION,
+                "parsed_at": _now(),
+                "sha256_at_parse": doc.get("sha256"),
+            }
+            if not path.exists():
+                content.update(ok=False, error="file_missing")
+                errors += 1
+            elif doc.get("size_bytes", 0) > max_bytes:
+                content.update(ok=False, error=f"too_big_>{max_bytes}")
+                too_big += 1
+            else:
+                try:
+                    payload = parser(path)
+                    content["ok"] = True
+                    content.update(payload)
+                    ok += 1
+                except Exception as e:
+                    content["ok"] = False
+                    content["error"] = f"{type(e).__name__}: {e}"[:500]
+                    errors += 1
+
+            ops.append(UpdateOne({"_id": doc["_id"]}, {"$set": {"content": content}}))
+            processed += 1
+
+            status = "OK " if content.get("ok") else ("BIG" if "too_big" in (content.get("error") or "") else "ERR")
+            size_mb = (doc.get("size_bytes", 0) or 0) / 1024 / 1024
+            detail = _summary(content, ext)
+            print(f"  [{processed:>4}/{total_pending}] {status} {ext:<4} {size_mb:6.1f}MB  {path.name}  | {detail}", flush=True)
+
+            if len(ops) >= 50:
+                coll.bulk_write(ops, ordered=False)
+                ops.clear()
+    finally:
+        cursor.close()
+
+    if ops:
+        coll.bulk_write(ops, ordered=False)
+
+    return {"study": study, "processed": processed, "ok": ok, "errors": errors, "too_big": too_big}
+
+
+def main() -> int:
+    t0 = time.time()
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    client.admin.command("ping")
+    db = client[DB_NAME]
+
+    results = []
+    for name in COLLECTIONS:
+        results.append(enrich_collection(db[name], name))
+
+    print("\n=== SHRNUTI ===")
+    for r in results:
+        print(f"  {r['study']}: processed={r['processed']}  ok={r['ok']}  "
+              f"errors={r['errors']}  too_big={r['too_big']}")
+    print(f"\nCelkem trvalo: {time.time() - t0:.1f} s")
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except KeyboardInterrupt:
+        print("\nPreruseno uzivatelem")
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
@@ -0,0 +1,51 @@
+# enrich_fulltext_v1.2
+
+**Verze:** 1.2
+**Datum:** 2026-06-03
+**Skript:** `enrich_fulltext_v1.2.py`
+
+## Změna proti v1.1
+Velký XLSX (`#400 MDD3003_EAT detail report_30jun25.xlsx`, 5 242 128 znaků textu) způsobil pád:
+
+```
+psycopg.errors.ProgramLimitExceeded:
+  string is too long for tsvector (1114090 bytes, max 1048575 bytes)
+```
+
+PostgreSQL `tsvector` má **tvrdý limit ~1 MB** binární velikosti — nelze obejít.
+
+**Řešení:** `tsv` se generuje z prvních **800 000 znaků** sloupce `body`:
+
+```sql
+tsv tsvector GENERATED ALWAYS AS (
+    to_tsvector('soubory'::regconfig, left(coalesce(body, ''), 800000))
+) STORED
+```
+
+- sloupec `body` zůstává **plný** (až 5 MB) — pro náhledy, snippet, `ts_headline`
+- vyhledávání (`tsv @@ q`) ignoruje obsah za 800 000. znakem
+- u rozsáhlých XLSX/PDF (např. data exporty) je 800 KB stále víc než 100 000 slov — pro fulltext bohatě stačí
+
+## Migrace
+`SCHEMA_SQL` při startu zkontroluje, zda současný výraz `tsv` obsahuje `left(`. Pokud ne (starý sloupec z v1.0/v1.1):
+1. dropne `documents_tsv_gin` index
+2. dropne sloupec `tsv`
+3. přidá nový s `left(body, 800000)`
+4. index se vytvoří znovu na konci `SCHEMA_SQL`
+
+Bezpečné spustit opakovaně.
+
+## extractor_version
+Posunuto na `1.2` → všechny řádky z v1.0/v1.1 se přeparsují (potřebné už proto, že migrace tsv změnila co je v indexu).
+
+## Vše ostatní
+Beze změny proti [v1.1](Trash/enrich_fulltext_v1.1.md):
+- DOCX fallback přes raw `word/document.xml`
+- NUL byte strip
+- Limity souborů (PDF 500 MB, XLSX 200 MB, ostatní 300 MB), text max 5 MB
+- Inkrementálně podle `sha256` + `extractor_version`
+
+## Spuštění
+```
+python U:\PythonProject\Janssen\Soubory\enrich_fulltext_v1.2.py
+```
@@ -0,0 +1,481 @@
+"""
+==============================================================================
+Skript:   enrich_fulltext_v1.2.py
+Verze:    1.2
+Datum:    2026-06-03
+Autor:    vladimir.buzalka
+Popis:    Vytahne PLNY TEXT z dokumentu odkazovanych v MongoDB (db: soubory)
+          a ulozi ho do PostgreSQL (db: MongoSoubory) s GIN tsvector indexem.
+
+Zmeny proti v1.1:
+  - PG tsvector ma tvrdy limit ~1 MB binarne -> velky XLSX (5 MB textu) ho prekrocil.
+    v1.2 generuje tsv z prvnich 800 000 znaku body: left(body, 800000).
+    Sloupec body zustava plny (max 5 MB pro nahled / snippet).
+  - SCHEMA_SQL provadi migraci sloupce tsv: pokud uz existuje stara verze
+    (bez `left`), dropne index+sloupec a vytvori znovu s truncated vyrazem.
+  - extractor_version = "1.2" -> preparsuji se vsechny radky z v1.0/v1.1.
+
+Zachovano z v1.1:
+  - NUL bajty (0x00) se strippuji z body i error
+  - DOCX fallback na raw XML pres regex pri padu python-docx
+
+Cilove ulozeni:
+  - MongoDB    192.168.1.76 db=soubory  kolekce=42847922MDD3003, 77242113UCO3001
+  - PostgreSQL 192.168.1.76 db=MongoSoubory tabulka=documents
+
+Podporovane pripony: pdf, docx, xlsx, xlsm, pptx, eml, msg, txt, csv
+==============================================================================
+"""
+
+from __future__ import annotations
+
+import email
+import email.policy
+import re
+import sys
+import time
+import traceback
+import zipfile
+from datetime import datetime, timezone
+from pathlib import Path
+
+import psycopg
+from pymongo import MongoClient
+
+# --- konfigurace ------------------------------------------------------------
+MONGO_URI = "mongodb://192.168.1.76:27017"
+MONGO_DB = "soubory"
+MONGO_COLLECTIONS = ["42847922MDD3003", "77242113UCO3001"]
+
+PG_DSN = ("host=192.168.1.76 port=5432 dbname=MongoSoubory "
+          "user=vladimir.buzalka password=Vlado7309208104++")
+
+EXTRACTOR_VERSION = "1.2"
+
+MAX_TEXT_BYTES = 5 * 1024 * 1024
+MAX_PDF_BYTES = 500 * 1024 * 1024
+MAX_XLSX_BYTES = 200 * 1024 * 1024
+MAX_GENERIC_BYTES = 300 * 1024 * 1024
+
+SUPPORTED = ("pdf", "docx", "xlsx", "xlsm", "pptx", "eml", "msg", "txt", "csv")
+
+
+# --- SCHEMA -----------------------------------------------------------------
+
+SCHEMA_SQL = """
+CREATE EXTENSION IF NOT EXISTS unaccent;
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+
+DO $$
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'soubory') THEN
+        CREATE TEXT SEARCH CONFIGURATION soubory ( COPY = simple );
+        ALTER TEXT SEARCH CONFIGURATION soubory
+            ALTER MAPPING FOR hword, hword_part, word
+            WITH unaccent, simple;
+    END IF;
+END$$;
+
+CREATE TABLE IF NOT EXISTS documents (
+    id              BIGSERIAL PRIMARY KEY,
+    mongo_id        TEXT NOT NULL,
+    study           TEXT NOT NULL,
+    path            TEXT NOT NULL,
+    rel_path        TEXT,
+    name            TEXT,
+    ext             TEXT,
+    sha256          TEXT NOT NULL,
+    size_bytes      BIGINT,
+    mtime           TIMESTAMPTZ,
+    body            TEXT,
+    body_length     INT,
+    tsv             tsvector GENERATED ALWAYS AS (
+                        to_tsvector('soubory'::regconfig, left(coalesce(body, ''), 800000))
+                    ) STORED,
+    extracted_at    TIMESTAMPTZ DEFAULT now(),
+    extractor_version TEXT,
+    ok              BOOLEAN,
+    error           TEXT,
+    UNIQUE (study, path)
+);
+
+-- migrace tsv sloupce ze stareho vyrazu (bez `left`) na novy (s `left(..,800000)`)
+DO $$
+DECLARE
+    cur_expr TEXT;
+BEGIN
+    SELECT pg_get_expr(d.adbin, d.adrelid)
+      INTO cur_expr
+      FROM pg_attribute a
+      JOIN pg_class c     ON c.oid = a.attrelid
+      JOIN pg_attrdef d   ON d.adrelid = a.attrelid AND d.adnum = a.attnum
+     WHERE c.relname = 'documents' AND a.attname = 'tsv';
+
+    IF cur_expr IS NOT NULL AND position('left' in cur_expr) = 0 THEN
+        EXECUTE 'DROP INDEX IF EXISTS documents_tsv_gin';
+        EXECUTE 'ALTER TABLE documents DROP COLUMN tsv';
+        EXECUTE 'ALTER TABLE documents ADD COLUMN tsv tsvector GENERATED ALWAYS AS '
+             || '(to_tsvector(''soubory''::regconfig, left(coalesce(body, ''''), 800000))) STORED';
+    END IF;
+END$$;
+
+CREATE INDEX IF NOT EXISTS documents_tsv_gin       ON documents USING gin(tsv);
+CREATE INDEX IF NOT EXISTS documents_name_trgm     ON documents USING gin(name gin_trgm_ops);
+CREATE INDEX IF NOT EXISTS documents_sha256_idx    ON documents(sha256);
+CREATE INDEX IF NOT EXISTS documents_study_ext_idx ON documents(study, ext);
+"""
+
+
+# --- HELPERY ----------------------------------------------------------------
+
+# odstrani 0x00 a ostatni controly krome whitespace
+_CTRL_RX = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
+
+
+def _clean_for_pg(s: str) -> str:
+    if not s:
+        return ""
+    return _CTRL_RX.sub("", s)
+
+
+def _truncate(s: str) -> str:
+    s = _clean_for_pg(s or "")
+    if not s:
+        return ""
+    b = s.encode("utf-8", errors="replace")
+    if len(b) <= MAX_TEXT_BYTES:
+        return s
+    return b[:MAX_TEXT_BYTES].decode("utf-8", errors="ignore")
+
+
+# --- EXTRAKTORY -------------------------------------------------------------
+
+def extract_pdf(path: Path) -> str:
+    from pypdf import PdfReader
+    reader = PdfReader(str(path))
+    if reader.is_encrypted:
+        try:
+            reader.decrypt("")
+        except Exception:
+            return ""
+    parts = []
+    total = 0
+    for page in reader.pages:
+        try:
+            t = page.extract_text() or ""
+        except Exception:
+            continue
+        parts.append(t)
+        total += len(t)
+        if total > MAX_TEXT_BYTES:
+            break
+    return _truncate("\n".join(parts))
+
+
+# regex pro DOCX fallback - vytahne <w:t>...</w:t>
+_DOCX_WT_RX = re.compile(r"<w:t[^>]*>([^<]*)</w:t>", re.DOTALL)
+_DOCX_WP_END_RX = re.compile(r"</w:p>")
+
+
+def _docx_raw_text(path: Path) -> str:
+    """Fallback - cte primo word/document.xml ze ZIPu."""
+    with zipfile.ZipFile(str(path)) as z:
+        try:
+            xml = z.read("word/document.xml").decode("utf-8", errors="replace")
+        except KeyError:
+            return ""
+    xml = _DOCX_WP_END_RX.sub("\n", xml)
+    return "\n".join(m.group(1) for m in _DOCX_WT_RX.finditer(xml))
+
+
+def extract_docx(path: Path) -> str:
+    from docx import Document
+    try:
+        doc = Document(str(path))
+        parts = [p.text for p in doc.paragraphs if p.text]
+        for tbl in doc.tables:
+            for row in tbl.rows:
+                parts.append(" | ".join(c.text for c in row.cells))
+        return _truncate("\n".join(parts))
+    except Exception:
+        # fallback - raw XML extract
+        return _truncate(_docx_raw_text(path))
+
+
+def extract_xlsx(path: Path) -> str:
+    from openpyxl import load_workbook
+    wb = load_workbook(str(path), read_only=True, data_only=True)
+    parts = []
+    total = 0
+    for ws in wb.worksheets:
+        parts.append(f"# {ws.title}")
+        for row in ws.iter_rows(values_only=True):
+            line = "\t".join("" if v is None else str(v) for v in row)
+            if line.strip():
+                parts.append(line)
+                total += len(line)
+                if total > MAX_TEXT_BYTES:
+                    break
+        if total > MAX_TEXT_BYTES:
+            break
+    wb.close()
+    return _truncate("\n".join(parts))
+
+
+def extract_pptx(path: Path) -> str:
+    from pptx import Presentation
+    prs = Presentation(str(path))
+    parts = []
+    for i, slide in enumerate(prs.slides, 1):
+        parts.append(f"# slide {i}")
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for para in shape.text_frame.paragraphs:
+                    line = "".join(run.text for run in para.runs)
+                    if line.strip():
+                        parts.append(line)
+        if slide.has_notes_slide:
+            notes = slide.notes_slide.notes_text_frame.text
+            if notes:
+                parts.append(f"[notes] {notes}")
+    return _truncate("\n".join(parts))
+
+
+def extract_eml(path: Path) -> str:
+    with path.open("rb") as f:
+        msg = email.message_from_binary_file(f, policy=email.policy.default)
+    head = []
+    for k in ("From", "To", "Cc", "Subject", "Date"):
+        v = msg.get(k)
+        if v:
+            head.append(f"{k}: {v}")
+    parts = ["\n".join(head)]
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.get_content_type() == "text/plain" and not part.get_filename():
+                try:
+                    parts.append(part.get_content())
+                except Exception:
+                    pass
+    else:
+        try:
+            parts.append(msg.get_content())
+        except Exception:
+            pass
+    return _truncate("\n\n".join(parts))
+
+
+def extract_msg(path: Path) -> str:
+    import extract_msg
+    with extract_msg.openMsg(str(path)) as m:
+        head = []
+        if m.subject:  head.append(f"Subject: {m.subject}")
+        if m.sender:   head.append(f"From: {m.sender}")
+        if m.to:       head.append(f"To: {m.to}")
+        if m.cc:       head.append(f"Cc: {m.cc}")
+        if m.date:     head.append(f"Date: {m.date}")
+        return _truncate("\n".join(head) + "\n\n" + (m.body or ""))
+
+
+def extract_text(path: Path) -> str:
+    data = path.read_bytes()[:MAX_TEXT_BYTES]
+    for enc in ("utf-8-sig", "cp1250", "latin-1"):
+        try:
+            return _truncate(data.decode(enc))
+        except UnicodeDecodeError:
+            continue
+    return _truncate(data.decode("utf-8", errors="replace"))
+
+
+EXTRACTORS = {
+    "pdf":  (extract_pdf,   MAX_PDF_BYTES),
+    "docx": (extract_docx,  MAX_GENERIC_BYTES),
+    "xlsx": (extract_xlsx,  MAX_XLSX_BYTES),
+    "xlsm": (extract_xlsx,  MAX_XLSX_BYTES),
+    "pptx": (extract_pptx,  MAX_GENERIC_BYTES),
+    "eml":  (extract_eml,   MAX_GENERIC_BYTES),
+    "msg":  (extract_msg,   MAX_GENERIC_BYTES),
+    "txt":  (extract_text,  MAX_GENERIC_BYTES),
+    "csv":  (extract_text,  MAX_GENERIC_BYTES),
+}
+
+
+def _short(s, n=40):
+    if not s:
+        return ""
+    s = str(s).replace("\n", " ").replace("\r", " ").strip()
+    return s if len(s) <= n else s[:n] + "..."
+
+
+def _now() -> datetime:
+    return datetime.now(tz=timezone.utc)
+
+
+# --- HLAVNI SMYCKA ----------------------------------------------------------
+
+def process_collection(pg: psycopg.Connection, mongo_coll, study: str) -> dict:
+    with pg.cursor() as cur:
+        cur.execute(
+            "SELECT path, sha256, extractor_version, ok FROM documents WHERE study = %s",
+            (study,),
+        )
+        existing = {row[0]: (row[1], row[2], row[3]) for row in cur.fetchall()}
+
+    cursor = mongo_coll.find(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}},
+        {"_id": 1, "path": 1, "rel_path": 1, "name": 1, "ext": 1,
+         "sha256": 1, "size_bytes": 1, "mtime": 1},
+        no_cursor_timeout=True,
+    )
+
+    processed = ok = errors = skipped = too_big = 0
+    queue: list[dict] = []
+    total_pending = mongo_coll.count_documents(
+        {"ext": {"$in": list(EXTRACTORS.keys())}, "deleted_at": {"$exists": False}}
+    )
+    print(f"[{study}] kandidatu v Mongo: {total_pending}")
+
+    n = 0
+    try:
+        for doc in cursor:
+            n += 1
+            prev = existing.get(doc["path"])
+            if prev and prev[0] == doc.get("sha256") and prev[1] == EXTRACTOR_VERSION and prev[2]:
+                skipped += 1
+                continue
+
+            ext = doc["ext"]
+            extractor, max_bytes = EXTRACTORS[ext]
+            path = Path(doc["path"])
+
+            row = {
+                "mongo_id":  str(doc["_id"]),
+                "study":     study,
+                "path":      doc["path"],
+                "rel_path":  doc.get("rel_path"),
+                "name":      doc.get("name"),
+                "ext":       ext,
+                "sha256":    doc.get("sha256"),
+                "size_bytes": doc.get("size_bytes"),
+                "mtime":     doc.get("mtime"),
+                "body":      None,
+                "body_length": 0,
+                "extracted_at": _now(),
+                "extractor_version": EXTRACTOR_VERSION,
+                "ok":        False,
+                "error":     None,
+            }
+
+            status = "OK "
+            detail = ""
+            size_mb = (doc.get("size_bytes") or 0) / 1024 / 1024
+
+            if not path.exists():
+                row["error"] = "file_missing"
+                status = "ERR"; detail = "file_missing"; errors += 1
+            elif (doc.get("size_bytes") or 0) > max_bytes:
+                row["error"] = f"too_big_>{max_bytes}"
+                status = "BIG"; detail = f"too_big_>{max_bytes//1024//1024}MB"; too_big += 1
+            else:
+                try:
+                    body = extractor(path) or ""
+                    row["body"] = body if body else None
+                    row["body_length"] = len(body)
+                    row["ok"] = True
+                    ok += 1
+                    detail = f"{len(body)} znaku  {_short(body, 60)!r}"
+                except Exception as e:
+                    row["error"] = f"{type(e).__name__}: {e}"[:500]
+                    status = "ERR"; detail = row["error"][:80]; errors += 1
+
+            queue.append(row)
+            processed += 1
+            print(f"  [{n:>4}/{total_pending}] {status} {ext:<4} {size_mb:6.1f}MB  "
+                  f"{path.name}  | {detail}", flush=True)
+
+            if len(queue) >= 50:
+                _flush(pg, queue); queue.clear()
+    finally:
+        cursor.close()
+
+    if queue:
+        _flush(pg, queue)
+
+    return {"study": study, "processed": processed, "ok": ok,
+            "errors": errors, "skipped": skipped, "too_big": too_big}
+
+
+UPSERT_SQL = """
+INSERT INTO documents
+    (mongo_id, study, path, rel_path, name, ext, sha256, size_bytes, mtime,
+     body, body_length, extracted_at, extractor_version, ok, error)
+VALUES
+    (%(mongo_id)s, %(study)s, %(path)s, %(rel_path)s, %(name)s, %(ext)s, %(sha256)s,
+     %(size_bytes)s, %(mtime)s, %(body)s, %(body_length)s, %(extracted_at)s,
+     %(extractor_version)s, %(ok)s, %(error)s)
+ON CONFLICT (study, path) DO UPDATE SET
+    mongo_id          = EXCLUDED.mongo_id,
+    rel_path          = EXCLUDED.rel_path,
+    name              = EXCLUDED.name,
+    ext               = EXCLUDED.ext,
+    sha256            = EXCLUDED.sha256,
+    size_bytes        = EXCLUDED.size_bytes,
+    mtime             = EXCLUDED.mtime,
+    body              = EXCLUDED.body,
+    body_length       = EXCLUDED.body_length,
+    extracted_at      = EXCLUDED.extracted_at,
+    extractor_version = EXCLUDED.extractor_version,
+    ok                = EXCLUDED.ok,
+    error             = EXCLUDED.error
+"""
+
+
+def _flush(pg: psycopg.Connection, rows: list[dict]) -> None:
+    # posledni pojistka - jeste jednou strip NUL (kdyby se necim prokrouzil)
+    for r in rows:
+        if r.get("body"):
+            r["body"] = _clean_for_pg(r["body"])
+        if r.get("error"):
+            r["error"] = _clean_for_pg(r["error"])
+    with pg.cursor() as cur:
+        cur.executemany(UPSERT_SQL, rows)
+    pg.commit()
+
+
+def main() -> int:
+    t0 = time.time()
+    print("Pripojuji se k PostgreSQL...")
+    pg = psycopg.connect(PG_DSN, connect_timeout=10)
+    with pg.cursor() as cur:
+        cur.execute(SCHEMA_SQL)
+    pg.commit()
+    print("Schema OK.")
+
+    print("Pripojuji se k MongoDB...")
+    mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    mongo.admin.command("ping")
+    db = mongo[MONGO_DB]
+    print("Mongo OK.")
+
+    results = []
+    for name in MONGO_COLLECTIONS:
+        results.append(process_collection(pg, db[name], name))
+
+    pg.close()
+
+    print("\n=== SHRNUTI ===")
+    for r in results:
+        print(f"  {r['study']}: processed={r['processed']}  ok={r['ok']}  "
+              f"errors={r['errors']}  skipped={r['skipped']}  too_big={r['too_big']}")
+    print(f"\nCelkem trvalo: {time.time() - t0:.1f} s")
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except KeyboardInterrupt:
+        print("\nPreruseno uzivatelem")
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
@@ -0,0 +1,63 @@
+# scan_files_v1.0
+
+**Verze:** 1.0
+**Datum:** 2026-06-03
+**Skript:** `scan_files_v1.0.py`
+
+## Účel
+Rekurzivní sken dvou Dropbox složek studií (`!!42847922MDD3003`, `!77242113UCO3001`) a zápis metadat všech souborů do MongoDB.
+
+## Konfigurace
+- **MongoDB:** `mongodb://192.168.1.76:27017` (bez autentizace)
+- **DB:** `soubory`
+- **Kolekce:** `42847922MDD3003`, `77242113UCO3001` (jedna kolekce na studii)
+- **Cesta k Dropboxu:** zjištěna pomocí `Knihovny/najdi_dropbox.py` (přenositelné mezi PC)
+
+## Struktura dokumentu v MongoDB
+| pole | popis |
+|---|---|
+| `path` | absolutní cesta (unikátní klíč) |
+| `study` | kód studie (= název kolekce) |
+| `rel_path` | relativní cesta od kořene studie |
+| `dir`, `rel_dir` | nadřazený adresář (absolutní/relativní) |
+| `parent_folders` | pole názvů složek (pro filtrování) |
+| `name`, `stem`, `ext` | jméno, jméno bez přípony, přípona (lower-case) |
+| `size_bytes` | velikost |
+| `mtime`, `ctime`, `atime` | časové údaje (UTC) |
+| `sha256` | hash obsahu |
+| `mime` | mimetype dle přípony |
+| `tokens` | jméno rozparsované na slova/čísla (lower-case) |
+| `dates_in_name` | datumy nalezené v názvu, formát `YYYY-MM-DD` |
+| `first_seen_at` | první sken, kdy byl soubor viděn |
+| `last_seen_at` | poslední sken, kdy byl viděn |
+| `deleted_at` | nastaveno, pokud soubor v posledním skenu už nebyl nalezen |
+
+## Datumy v názvu
+Skript hledá tři varianty:
+- `12JAN2026`, `12Jan2026` (den + 3-písm. zkratka měsíce + rok)
+- `2026-01-12`, `2026_01_12`, `2026.01.12`
+- `12-01-2026`, `12_01_2026`, `12.01.2026`
+
+Všechny se normalizují do ISO `YYYY-MM-DD` v poli `dates_in_name`.
+
+## Inkrementální chování
+- `size_bytes` + `mtime` souhlasí se záznamem v DB → SHA256 se nepřepočítává, jen se aktualizuje `last_seen_at`
+- nový soubor → vloží se s `first_seen_at`
+- chybějící v aktuálním běhu → `deleted_at` se nastaví na čas běhu
+
+## Co se ignoruje
+- `.dropbox*`, `Thumbs.db`, `desktop.ini`, `~$*.*` (Office locky), `.DS_Store`
+- adresář `.dropbox.cache`
+
+## Spuštění
+```
+python U:\PythonProject\Janssen\Soubory\scan_files_v1.0.py
+```
+
+## Index pole pro rychlé dotazy
+`path` (unique), `ext`, `dates_in_name`, `tokens`, `sha256`
+
+## Plán pokračování
+1. Spustit první sken → zjistit profil dat (přípony, hloubku stromů)
+2. Doplnit dle potřeby (např. počet stran PDF, autor DOCX, listy XLSX)
+3. Postavit `MCP_SOUBORY` server nad touto kolekcí
@@ -0,0 +1,272 @@
+"""
+==============================================================================
+Skript:   scan_files_v1.0.py
+Verze:    1.0
+Datum:    2026-06-03
+Autor:    vladimir.buzalka
+Popis:    Rekurzivni sken Dropbox slozek dvou studii a zapis metadat
+          vsech souboru do MongoDB (db: soubory, kolekce = nazev studie).
+
+          - cesty k Dropboxu se zjisti pres Knihovny.najdi_dropbox
+          - pro kazdy soubor: stat, sha256, mime (podle pripony),
+            parsing data v nazvu (12JAN2026, 2026-01-12, 12-01-2026 ...)
+          - inkrementalni: pokud size+mtime souhlasi se zaznamem v DB,
+            sha256 se nepocita znovu (jen se aktualizuje last_seen_at)
+          - smazane soubory dostanou deleted_at pri behu, ve kterem
+            uz nebyly videny
+          - vynechavaji se: .dropbox*, Thumbs.db, desktop.ini,
+            ~$*.* (Office lock), .DS_Store, *.tmp
+
+MongoDB:  192.168.1.76:27017, bez autentizace
+DB:       soubory
+Kolekce:  42847922MDD3003, 77242113UCO3001 (extrahovano z rootu cesty)
+==============================================================================
+"""
+
+from __future__ import annotations
+
+import hashlib
+import mimetypes
+import os
+import re
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+from pymongo import MongoClient, UpdateOne, ASCENDING
+
+# --- prida Knihovny do path -------------------------------------------------
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE.parent))
+from Knihovny.najdi_dropbox import get_dropbox_root  # noqa: E402
+
+# --- konfigurace ------------------------------------------------------------
+MONGO_URI = "mongodb://192.168.1.76:27017"
+DB_NAME = "soubory"
+
+STUDIES = {
+    "42847922MDD3003": "!!42847922MDD3003",
+    "77242113UCO3001": "!77242113UCO3001",
+}
+
+SKIP_NAME_PATTERNS = [
+    re.compile(r"^\.dropbox.*", re.IGNORECASE),
+    re.compile(r"^Thumbs\.db$", re.IGNORECASE),
+    re.compile(r"^desktop\.ini$", re.IGNORECASE),
+    re.compile(r"^~\$.*", re.IGNORECASE),
+    re.compile(r"^\.DS_Store$", re.IGNORECASE),
+]
+
+SKIP_DIR_NAMES = {".dropbox.cache"}
+
+HASH_CHUNK = 1024 * 1024  # 1 MiB
+
+# --- parsovani datumu v nazvu ----------------------------------------------
+MONTHS = {
+    "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6,
+    "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12,
+}
+
+DATE_PATTERNS = [
+    # 12JAN2026 / 12Jan2026
+    (re.compile(r"(\d{1,2})([A-Za-z]{3})(\d{4})"), "dmonth"),
+    # 2026-01-12 / 2026_01_12 / 2026.01.12
+    (re.compile(r"(20\d{2})[-_.](\d{1,2})[-_.](\d{1,2})"), "ymd"),
+    # 12-01-2026 / 12_01_2026 / 12.01.2026
+    (re.compile(r"(\d{1,2})[-_.](\d{1,2})[-_.](20\d{2})"), "dmy"),
+]
+
+
+def extract_dates(name: str) -> list[str]:
+    """Vraci unikatni ISO datumy (YYYY-MM-DD) nalezene v nazvu."""
+    found: set[str] = set()
+    for rx, kind in DATE_PATTERNS:
+        for m in rx.finditer(name):
+            try:
+                if kind == "dmonth":
+                    d = int(m.group(1))
+                    mo = MONTHS.get(m.group(2).upper())
+                    y = int(m.group(3))
+                    if not mo:
+                        continue
+                elif kind == "ymd":
+                    y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
+                else:  # dmy
+                    d, mo, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
+                datetime(y, mo, d)
+                found.add(f"{y:04d}-{mo:02d}-{d:02d}")
+            except ValueError:
+                continue
+    return sorted(found)
+
+
+TOKEN_RX = re.compile(r"[A-Za-z0-9]+")
+
+
+def tokenize(name: str) -> list[str]:
+    return [t.lower() for t in TOKEN_RX.findall(name)]
+
+
+def should_skip(name: str) -> bool:
+    return any(p.match(name) for p in SKIP_NAME_PATTERNS)
+
+
+def sha256_of(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        while True:
+            chunk = f.read(HASH_CHUNK)
+            if not chunk:
+                break
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def to_dt(ts: float) -> datetime:
+    return datetime.fromtimestamp(ts, tz=timezone.utc)
+
+
+def scan_study(study_code: str, study_root: Path, db, scan_started_at: datetime) -> dict:
+    coll = db[study_code]
+    coll.create_index([("path", ASCENDING)], unique=True)
+    coll.create_index([("ext", ASCENDING)])
+    coll.create_index([("dates_in_name", ASCENDING)])
+    coll.create_index([("tokens", ASCENDING)])
+    coll.create_index([("sha256", ASCENDING)])
+
+    # existujici zaznamy -> mapa path -> (size, mtime_iso, sha256)
+    existing = {
+        d["path"]: (d.get("size_bytes"), d.get("mtime"), d.get("sha256"))
+        for d in coll.find({}, {"path": 1, "size_bytes": 1, "mtime": 1, "sha256": 1})
+    }
+
+    ops: list[UpdateOne] = []
+    seen = 0
+    rehashed = 0
+    skipped = 0
+    errors: list[tuple[str, str]] = []
+
+    print(f"[{study_code}] sken: {study_root}")
+    for root, dirs, files in os.walk(study_root):
+        # vyrad skip-dirs in-place
+        dirs[:] = [d for d in dirs if d not in SKIP_DIR_NAMES]
+        for fname in files:
+            if should_skip(fname):
+                skipped += 1
+                continue
+            fpath = Path(root) / fname
+            try:
+                st = fpath.stat()
+            except OSError as e:
+                errors.append((str(fpath), f"stat: {e}"))
+                continue
+
+            path_str = str(fpath)
+            size = st.st_size
+            mtime = to_dt(st.st_mtime)
+
+            prev = existing.get(path_str)
+            if prev and prev[0] == size and prev[1] == mtime and prev[2]:
+                # bez zmeny - jen last_seen_at + clear deleted_at
+                ops.append(UpdateOne(
+                    {"path": path_str},
+                    {"$set": {"last_seen_at": scan_started_at},
+                     "$unset": {"deleted_at": ""}},
+                ))
+            else:
+                try:
+                    digest = sha256_of(fpath)
+                except OSError as e:
+                    errors.append((path_str, f"hash: {e}"))
+                    continue
+                rehashed += 1
+
+                rel = fpath.relative_to(study_root)
+                doc = {
+                    "path": path_str,
+                    "study": study_code,
+                    "rel_path": str(rel),
+                    "dir": str(fpath.parent),
+                    "rel_dir": str(rel.parent) if str(rel.parent) != "." else "",
+                    "parent_folders": list(rel.parts[:-1]),
+                    "name": fname,
+                    "stem": fpath.stem,
+                    "ext": fpath.suffix.lower().lstrip("."),
+                    "size_bytes": size,
+                    "mtime": mtime,
+                    "ctime": to_dt(st.st_ctime),
+                    "atime": to_dt(st.st_atime),
+                    "sha256": digest,
+                    "mime": mimetypes.guess_type(fname)[0],
+                    "tokens": tokenize(fpath.stem),
+                    "dates_in_name": extract_dates(fname),
+                    "last_seen_at": scan_started_at,
+                }
+                ops.append(UpdateOne(
+                    {"path": path_str},
+                    {"$set": doc, "$unset": {"deleted_at": ""},
+                     "$setOnInsert": {"first_seen_at": scan_started_at}},
+                    upsert=True,
+                ))
+
+            seen += 1
+            if len(ops) >= 500:
+                coll.bulk_write(ops, ordered=False)
+                ops.clear()
+                print(f"  ... {seen} souboru zpracovano")
+
+    if ops:
+        coll.bulk_write(ops, ordered=False)
+
+    # oznac smazane
+    res = coll.update_many(
+        {"last_seen_at": {"$lt": scan_started_at}, "deleted_at": {"$exists": False}},
+        {"$set": {"deleted_at": scan_started_at}},
+    )
+
+    return {
+        "study": study_code,
+        "seen": seen,
+        "rehashed": rehashed,
+        "unchanged": seen - rehashed,
+        "skipped": skipped,
+        "marked_deleted": res.modified_count,
+        "errors": errors,
+    }
+
+
+def main() -> int:
+    t0 = time.time()
+    dropbox_root = Path(get_dropbox_root())
+    print(f"Dropbox root: {dropbox_root}")
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    client.admin.command("ping")
+    db = client[DB_NAME]
+
+    scan_started_at = datetime.now(tz=timezone.utc)
+
+    results = []
+    for study_code, folder in STUDIES.items():
+        study_root = dropbox_root / folder
+        if not study_root.is_dir():
+            print(f"[!] {study_root} neexistuje, preskakuji")
+            continue
+        results.append(scan_study(study_code, study_root, db, scan_started_at))
+
+    print("\n=== SHRNUTI ===")
+    for r in results:
+        print(f"  {r['study']}: seen={r['seen']}  rehashed={r['rehashed']}  "
+              f"unchanged={r['unchanged']}  skipped={r['skipped']}  "
+              f"deleted={r['marked_deleted']}  errors={len(r['errors'])}")
+        for path, err in r["errors"][:5]:
+            print(f"    ! {err}  ({path})")
+        if len(r["errors"]) > 5:
+            print(f"    ... +{len(r['errors']) - 5} dalsich chyb")
+    print(f"\nCelkem trvalo: {time.time() - t0:.1f} s")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())