Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
"""
|
||||
Import 77242113UCO3002_CZ_SK_Investigators_with_internet_summary.xlsx
|
||||
do MongoDB databáze 'feasibility', kolekce 'investigators'.
|
||||
|
||||
Spustit: python import_to_mongo.py
|
||||
Závislosti: pip install openpyxl pymongo
|
||||
"""
|
||||
|
||||
import openpyxl
|
||||
from pymongo import MongoClient
|
||||
import os
|
||||
|
||||
XLSX = os.path.join(os.path.dirname(__file__),
|
||||
"77242113UCO3002_CZ_SK_Investigators_with_internet_summary.xlsx")
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017/"
|
||||
DB_NAME = "feasibility"
|
||||
COLLECTION = "investigators"
|
||||
|
||||
wb = openpyxl.load_workbook(XLSX, data_only=True)
|
||||
ws = wb["CZ + SK Investigators"]
|
||||
rows = list(ws.iter_rows(values_only=True))
|
||||
|
||||
docs = []
|
||||
for row in rows[1:]:
|
||||
if not any(row):
|
||||
continue
|
||||
zeme, prijmeni, jmeno, email, pracoviste, studie_raw, zdroje_raw, summary = row
|
||||
docs.append({
|
||||
"zeme": zeme,
|
||||
"prijmeni": prijmeni,
|
||||
"jmeno": jmeno,
|
||||
"email": email,
|
||||
"pracoviste": pracoviste,
|
||||
"studie": [s.strip() for s in studie_raw.split("\n") if s.strip()] if studie_raw else [],
|
||||
"zdroje": [u.strip() for u in zdroje_raw.split("\n") if u.strip()] if zdroje_raw else [],
|
||||
"internet_summary": summary,
|
||||
})
|
||||
|
||||
client = MongoClient(MONGO_URI)
|
||||
db = client[DB_NAME]
|
||||
col = db[COLLECTION]
|
||||
col.drop() # čistý start
|
||||
result = col.insert_many(docs)
|
||||
print(f"Vloženo {len(result.inserted_ids)} dokumentů do {DB_NAME}.{COLLECTION}")
|
||||
|
||||
col.create_index("studie")
|
||||
col.create_index("zeme")
|
||||
col.create_index("prijmeni")
|
||||
print("Indexy vytvořeny: studie, zeme, prijmeni")
|
||||
client.close()
|
||||
Reference in New Issue
Block a user