#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Ktere zpravy v dane slozce chybi v Mongo (online vs archiv) — charakteristika.""" import sys, collections import msal, requests from pymongo import MongoClient TENANT="7d269944-37a4-43a1-8140-c7517dc426e9" CLIENT="4b222bfd-78c9-4239-a53f-43006b3ed07f" SECRET="Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk" GRAPH="https://graph.microsoft.com/v1.0" MBOX="michaela.buzalkova@buzalka.cz" TARGETS=["Inbox","Odloženo","Deleted Items","Sent Items"] # displayName app=msal.ConfidentialClientApplication(CLIENT,authority=f"https://login.microsoftonline.com/{TENANT}",client_credential=SECRET) tok=app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])["access_token"] H={"Authorization":f"Bearer {tok}"} def allfolders(parent=None,ppath=""): url=f"{GRAPH}/users/{MBOX}/mailFolders" if parent is None else f"{GRAPH}/users/{MBOX}/mailFolders/{parent}/childFolders" params={"$top":100,"$select":"id,displayName,childFolderCount"}; out=[] while url: d=requests.get(url,headers=H,params=params,timeout=60).json() for f in d.get("value",[]): path=f"{ppath}/{f['displayName']}".lstrip("/") out.append((path,f["displayName"],f["id"])) if f.get("childFolderCount",0)>0: out+=allfolders(f["id"],path) url=d.get("@odata.nextLink"); params=None return out folders=allfolders() cli=MongoClient("mongodb://192.168.1.76:27017"); col=cli["emaily"][MBOX] for path,dname,fid in folders: if dname not in TARGETS: continue # online IDs url=f"{GRAPH}/users/{MBOX}/mailFolders/{fid}/messages" params={"$select":"internetMessageId,receivedDateTime,sentDateTime,from,subject","$top":100} online={} while url: d=requests.get(url,headers=H,params=params,timeout=60).json() for m in d.get("value",[]): mid=m.get("internetMessageId") if mid: online[mid]=m url=d.get("@odata.nextLink"); params=None have={x["_id"] for x in col.find({"folder_path":path},{"_id":1})} missing=[m for mid,m in online.items() if mid not in have] print(f"\n=== {path} (online {len(online)}, mongo {len(have)}, chybi {len(missing)}) ===") if not missing: continue dts=sorted([(m.get("receivedDateTime") or m.get("sentDateTime") or "")[:10] for m in missing]) print(f" datumove rozpeti chybejicich: {dts[0]} .. {dts[-1]}") by=collections.Counter(((m.get('from') or {}).get('emailAddress') or {}).get('address','?') for m in missing) print(" top odesilatele chybejicich:") for a,n in by.most_common(8): print(f" {n:>4} {a}") print(" ukazka 5 nejnovejsich:") for m in sorted(missing,key=lambda x:(x.get('receivedDateTime') or ''),reverse=True)[:5]: d=(m.get('receivedDateTime') or m.get('sentDateTime') or '')[:16] s=(m.get('subject') or '')[:55] print(f" {d} {s}")