Compare commits

..

2 Commits

Author SHA1 Message Date
administrator ec187e673a z230 2026-06-02 17:19:44 +02:00
administrator dd39339497 z230 2026-06-02 17:19:43 +02:00
17 changed files with 199 additions and 160 deletions
+2 -1
View File
@@ -3,7 +3,8 @@
"sel_proto": "77242113UCO3001",
"sel_role": [
"Principal Investigator",
"Sub-Investigator"
"Sub-Investigator",
"Study Coordinator"
],
"sel_site": []
}
+22 -5
View File
@@ -1,14 +1,16 @@
"""
import_to_mongo.py
Verze: 1.1
Datum: 2026-06-01
Verze: 1.2
Datum: 2026-06-02
Import Clario CSV do MongoDB (databáze: Clario).
Kolekce: Clario.MayoDiary / Clario.MayoScore (dle názvu souboru)
Kolekce: Clario.MayoDiary / Clario.MayoScore / Clario.eCOA_DCRs / Clario.ECG_DCRs
Filtr: pouze řádky s Country == "Czech Republic"
Klíč: MayoDiary → Subject ID + Form Number
MayoScore → Participant ID + Visit
eCOA_DCRs → Data Correction ID
ECG_DCRs → Data Correction ID
Historie: při změně fields se stará verze uloží do pole history[]
Po importu přesune zpracované CSV do downloads/Zpracovano/
@@ -58,6 +60,16 @@ COLLECTION_CONFIG = {
"Partial Mayo Response for Clinical Non-Responders",
),
},
"eCOA DCRs": {
"collection": "Clario.eCOA_DCRs",
"subject_col": "Subject ID",
"key_cols": ("Data Correction ID",),
},
"ECG DCRs": {
"collection": "Clario.ECG_DCRs",
"subject_col": "Subject Number",
"key_cols": ("Data Correction ID",),
},
}
DATE_FORMATS = [
@@ -120,7 +132,9 @@ def map_row(row: dict, col_type: str) -> dict:
subject_col = cfg["subject_col"]
doc["subject"] = {"id": cleaned.get(subject_col, "")}
doc["site"] = {"name": cleaned.get("Site", "")}
# ECG DCRs používají "Site ID" místo "Site"
site_name = cleaned.get("Site") or cleaned.get("Site ID", "")
doc["site"] = {"name": site_name}
doc["country"] = cleaned.get("Country", "")
doc["study"] = cleaned.get("Protocol", "")
@@ -173,7 +187,7 @@ def import_file(csv_path: str, db) -> dict:
for row in reader:
cleaned_row = {clean_colname(k): v for k, v in row.items()}
country = cleaned_row.get("Country", "").strip()
if country != COUNTRY_FILTER:
if COUNTRY_FILTER not in country:
filtered_out += 1
continue
@@ -221,6 +235,9 @@ def import_file(csv_path: str, db) -> dict:
collection.create_index([("site.name", ASCENDING)])
if col_type == "MayoScore":
collection.create_index([("Site Action", ASCENDING)])
if col_type in ("eCOA DCRs", "ECG DCRs"):
collection.create_index([("fields.Status", ASCENDING)])
collection.create_index([("fields.Type", ASCENDING)])
stats = {
"collection": col_name,
+170
View File
@@ -13,3 +13,173 @@
2026-06-01 15:22:48 | open failed [899C0000BE168C140000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 15:24:07 | open failed [899C0000C0CF55D50000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 15:24:18 | open failed [899C0000C1CA96890000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 15:40:07 | open failed [899C0000DB9693FF0000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 15:57:37 | open failed [899C0000FDE653340000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:02:27 | open failed [899C0001086D6C480000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:12:41 | open failed [899C0001140535280000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:22:18 | open failed [899C00012E0882410000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:26:43 | open failed [899C0001E06482F10000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:32:44 | open failed [899C0001ED5302060000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 16:39:45 | open failed [899C0001FCC6C2910000.msg]: Attachment method missing on attachment __attach_version1.0_#00000002, and it could not be determined automatically.
2026-06-01 17:07:04 | open failed [899C0002323988230000.msg]: Attachment method missing on attachment __attach_version1.0_#00000002, and it could not be determined automatically.
2026-06-01 17:16:10 | open failed [899C000242F1A76E0000.msg]: File was confirmed to be an olefile, but was not an MSG file.
2026-06-01 17:19:43 | open failed [899C00024C8DDE840000.msg]: File was confirmed to be an olefile, but was not an MSG file.
2026-06-01 17:56:09 | open failed [899C000265F79E010000.msg]: File was confirmed to be an olefile, but was not an MSG file.
2026-06-01 18:55:26 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 41, in decode
return variableByteDecode(codecName, data, errors, decodingTable)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 273, in variableByteDecode
rep = errorHandler(err)
^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'windows-950' codec can't decode bytes in position 2404-2405: character maps to <undefined>
decoding with 'windows-950' codec failed
2026-06-01 18:55:39 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 41, in decode
return variableByteDecode(codecName, data, errors, decodingTable)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 273, in variableByteDecode
rep = errorHandler(err)
^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'windows-950' codec can't decode bytes in position 2244-2245: character maps to <undefined>
decoding with 'windows-950' codec failed
2026-06-01 18:55:50 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 41, in decode
return variableByteDecode(codecName, data, errors, decodingTable)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 273, in variableByteDecode
rep = errorHandler(err)
^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'windows-950' codec can't decode bytes in position 496-497: character maps to <undefined>
decoding with 'windows-950' codec failed
2026-06-01 18:59:04 | open failed [899C0002A03911800000.msg]: Attachment method missing on attachment __attach_version1.0_#0000000A, and it could not be determined automatically.
2026-06-01 18:59:05 | open failed [899C0002A03911810000.msg]: Attachment method missing on attachment __attach_version1.0_#0000000A, and it could not be determined automatically.
2026-06-01 18:59:05 | open failed [899C0002A03911820000.msg]: Attachment method missing on attachment __attach_version1.0_#0000000A, and it could not be determined automatically.
2026-06-01 19:00:31 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\encodings\cp1258.py", line 15, in decode
return codecs.charmap_decode(input,errors,decoding_table)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9a in position 1: character maps to <undefined>
decoding with 'windows-1258' codec failed
2026-06-01 19:07:35 | open failed [899C0002A81D82F30000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 19:17:04 | open failed [899C0002AF8653970000.msg]: 'gb2312' codec can't decode byte 0xe9 in position 7: illegal multibyte sequence
2026-06-01 19:29:07 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'gb2312' codec can't decode byte 0x96 in position 402: illegal multibyte sequence
decoding with 'gb2312' codec failed
2026-06-01 19:32:42 | open failed [899C0002E47C3A1C0000.msg]: Attachment method missing on attachment __attach_version1.0_#00000004, and it could not be determined automatically.
2026-06-01 19:33:10 | open failed [899C0002E69328E00000.msg]: Attachment method missing on attachment __attach_version1.0_#00000004, and it could not be determined automatically.
2026-06-01 19:34:13 | open failed [899C0002E950197E0000.msg]: Attachment method missing on attachment __attach_version1.0_#00000004, and it could not be determined automatically.
2026-06-01 19:39:28 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 41, in decode
return variableByteDecode(codecName, data, errors, decodingTable)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\encoding\utils.py", line 273, in variableByteDecode
rep = errorHandler(err)
^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'windows-950' codec can't decode bytes in position 969-970: character maps to <undefined>
decoding with 'windows-950' codec failed
2026-06-01 19:45:55 | open failed [899C0003073E12A00000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 19:45:57 | open failed [899C0003073E12A90000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 19:48:04 | open failed [899C00030F081AA20000.msg]: Attachment method missing on attachment __attach_version1.0_#00000001, and it could not be determined automatically.
2026-06-01 19:48:54 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'shift_jis' codec can't decode byte 0x92 in position 679: illegal multibyte sequence
decoding with 'shift_jis' codec failed
2026-06-01 19:58:26 | Critical error accessing the body. File opened but accessing the body will throw an exception.
Traceback (most recent call last):
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 96, in __init__
self.body
File "C:\Python\Lib\functools.py", line 998, in __get__
val = self.func(instance)
^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\message_base.py", line 947, in body
if (body := self.getStringStream('__substg1.0_1000')) is not None:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "U:\PythonProject\Janssen\.venv\Lib\site-packages\extract_msg\msg_classes\msg.py", line 759, in getStringStream
return None if tmp is None else tmp.decode(self.stringEncoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'shift_jis' codec can't decode byte 0x92 in position 1240: illegal multibyte sequence
decoding with 'shift_jis' codec failed
+1 -1
View File
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -25,8 +25,8 @@ DB_NAME = "edc"
STUDY_FULL = "77242113UCO3001"
VERSION = "1.0"
OUTPUT_DIR = Path(__file__).parent / "reports"
TRASH_DIR = OUTPUT_DIR / "TRASH"
OUTPUT_DIR = Path(r"U:\Dropbox\!!!Days\Downloads Z230")
TRASH_DIR = Path(__file__).parent / "reports" / "TRASH"
COLLECTIONS = [
"UCO3001.DateofVisit",
Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.6 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 249 KiB

After

Width:  |  Height:  |  Size: 550 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 54 KiB

File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long