notebook
This commit is contained in:
@@ -20,6 +20,7 @@ Spusteni:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from datetime import datetime, timezone, timedelta
|
||||
@@ -178,6 +179,8 @@ def search(
|
||||
mailbox: Optional[Union[str, list]] = None,
|
||||
since: Optional[str] = None,
|
||||
until: Optional[str] = None,
|
||||
days: Optional[int] = None,
|
||||
inflect: bool = False,
|
||||
folder_contains: Optional[str] = None,
|
||||
sender_contains: Optional[str] = None,
|
||||
has_attachments: Optional[bool] = None,
|
||||
@@ -186,15 +189,23 @@ def search(
|
||||
"""PRIMARY TOOL — fulltext search across all indexed emails.
|
||||
|
||||
Index includes: subject, sender (email + name), recipients (to/cc),
|
||||
attachment filenames, AND full body text.
|
||||
attachment filenames, AND full body text. Diacritics are stripped at index
|
||||
time, so search is already accent-insensitive (recept == řecept == RECEPT).
|
||||
|
||||
query: websearch_to_tsquery syntax:
|
||||
query: websearch_to_tsquery syntax (only when inflect=False):
|
||||
invoice payment -> AND
|
||||
"lot expiration" -> phrase
|
||||
SAE OR "serious adverse" -> OR
|
||||
urgent -newsletter -> exclude
|
||||
mailbox: one mailbox string or list (e.g. "vbuzalka@its.jnj.com"). None = all.
|
||||
since/until: ISO date "YYYY-MM-DD" on received_at
|
||||
days: convenience window — only the last N days (overrides `since`). "za posledních X dní".
|
||||
inflect: Czech declension. The index uses no stemmer, so a plain search for
|
||||
"recept" misses "recepty/receptu/receptem/...". With inflect=True each word
|
||||
in `query` is prefix-matched (recept -> recept:*) and AND-ed, catching the
|
||||
other grammatical cases. Set this for Czech-word searches. Trade-off: a
|
||||
prefix also matches unrelated longer words (recept:* also hits "receptor").
|
||||
In this mode the query is treated as plain words (operators/quotes ignored).
|
||||
folder_contains: substring match against folder_path (case-insensitive)
|
||||
sender_contains: substring match against sender_email OR sender_name (case-insensitive)
|
||||
has_attachments: True / False / None (any)
|
||||
@@ -207,11 +218,23 @@ def search(
|
||||
mboxes = normalize_mailbox(mailbox)
|
||||
since_dt = parse_since(since)
|
||||
until_dt = parse_since(until)
|
||||
if days and days > 0:
|
||||
since_dt = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
limit = min(max(1, limit), 100)
|
||||
|
||||
sql = """
|
||||
# Build the tsquery. inflect=True → prefix-match each word (Czech cases)
|
||||
# via to_tsquery; otherwise use websearch_to_tsquery for full operator support.
|
||||
tsq_func = "websearch_to_tsquery"
|
||||
tsq_text = query
|
||||
if inflect:
|
||||
tokens = re.findall(r"\w+", query, flags=re.UNICODE)
|
||||
if tokens:
|
||||
tsq_func = "to_tsquery"
|
||||
tsq_text = " & ".join(f"{t}:*" for t in tokens)
|
||||
|
||||
sql = f"""
|
||||
WITH q AS (
|
||||
SELECT websearch_to_tsquery('soubory'::regconfig, %(query)s) AS tsq
|
||||
SELECT {tsq_func}('soubory'::regconfig, %(query)s) AS tsq
|
||||
)
|
||||
SELECT
|
||||
e.id, e.mailbox, e.message_id, e.conversation_id, e.folder_path,
|
||||
@@ -573,14 +596,26 @@ def find_attachment(
|
||||
def top_senders(
|
||||
mailbox: Optional[Union[str, list]] = None,
|
||||
since: Optional[str] = None,
|
||||
days: Optional[int] = None,
|
||||
folder_contains: Optional[str] = None,
|
||||
limit: int = 20,
|
||||
) -> dict:
|
||||
"""Top senders by volume (count of received emails). Optionally limit by mailbox or date window.
|
||||
Use for "who emails me most" or "top senders this month".
|
||||
"""Unique senders grouped by sender_email, counted, sorted by count DESC.
|
||||
Use for "who emails me most" / "top senders this month".
|
||||
|
||||
mailbox: one mailbox string or list. None = all.
|
||||
since: ISO date "YYYY-MM-DD" lower bound on received_at.
|
||||
days: convenience window — count only the last N days (overrides `since`
|
||||
when both given). Use for "za posledních X dní".
|
||||
folder_contains: substring match against folder_path (case-insensitive).
|
||||
Pass "Inbox" to count ONLY received/incoming mail and exclude the
|
||||
mailbox owner's own Sent Items, Drafts, etc. Default None = all folders.
|
||||
"""
|
||||
try:
|
||||
mboxes = normalize_mailbox(mailbox)
|
||||
since_dt = parse_since(since)
|
||||
if days and days > 0:
|
||||
since_dt = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
limit = min(max(1, limit), 100)
|
||||
sql = """
|
||||
SELECT sender_email, count(*) AS c, max(received_at) AS last_at
|
||||
@@ -588,12 +623,18 @@ def top_senders(
|
||||
WHERE ok = TRUE AND sender_email IS NOT NULL
|
||||
AND (%(mboxes)s::text[] IS NULL OR mailbox = ANY(%(mboxes)s::text[]))
|
||||
AND (%(since)s::timestamptz IS NULL OR received_at >= %(since)s::timestamptz)
|
||||
AND (%(folder)s::text IS NULL OR folder_path ILIKE %(folder_like)s)
|
||||
GROUP BY sender_email
|
||||
ORDER BY c DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
with pg_conn() as pg, pg.cursor() as cur:
|
||||
cur.execute(sql, {"mboxes": mboxes, "since": since_dt, "limit": limit})
|
||||
cur.execute(sql, {
|
||||
"mboxes": mboxes, "since": since_dt,
|
||||
"folder": folder_contains,
|
||||
"folder_like": f"%{folder_contains}%" if folder_contains else None,
|
||||
"limit": limit,
|
||||
})
|
||||
rows = [{"sender_email": s, "count": c, "last_at": serialize(t)}
|
||||
for s, c, t in cur.fetchall()]
|
||||
return {"count": len(rows), "results": rows}
|
||||
|
||||
Reference in New Issue
Block a user