""" Reusable login flow for Rohlik.cz: 1. Load saved session (auth_state.json) if it exists. 2. Open the site and check whether we're already logged in. 3. If yes -> continue. 4. If no -> log in via the JSON API, accept cookies, save the session, continue. """ import json from pathlib import Path from playwright.sync_api import sync_playwright, BrowserContext, Page from config import BASE_URL, AUTH_STATE_PATH, ROHLIK_EMAIL, ROHLIK_PASSWORD LOGIN_URL = f"{BASE_URL}/services/frontend-service/login" def is_logged_in(page: Page) -> bool: return page.locator('text="Přihlásit se"').count() == 0 def accept_cookies(page: Page): """Accept the Usercentrics consent banner via its official JS API.""" result = page.evaluate('''async () => { for (let i = 0; i < 20; i++) { if (window.UC_UI && window.UC_UI.isInitialized && window.UC_UI.isInitialized()) break; await new Promise(r => setTimeout(r, 250)); } if (window.UC_UI && typeof window.UC_UI.acceptAllConsents === 'function') { await window.UC_UI.acceptAllConsents(); await window.UC_UI.closeCMP(); return "accepted"; } return "UC_UI not available"; }''') # Wait for the banner to actually detach from the DOM (close animation ~1s) try: page.wait_for_selector('#usercentrics-cmp-ui', state='detached', timeout=5000) except Exception: pass return result def api_login(context: BrowserContext) -> int: resp = context.request.post( LOGIN_URL, data=json.dumps({"email": ROHLIK_EMAIL, "password": ROHLIK_PASSWORD}), headers={"Content-Type": "application/json", "Accept": "application/json"}, ) return resp.status def ensure_logged_in(pw, headless=False) -> tuple[BrowserContext, Page]: auth_path = Path(AUTH_STATE_PATH) have_state = auth_path.exists() browser = pw.chromium.launch(headless=headless, args=["--start-maximized"]) ctx_args = {"no_viewport": True} if have_state: ctx_args["storage_state"] = AUTH_STATE_PATH context = browser.new_context(**ctx_args) page = context.new_page() print(f"1) Opening site (saved session: {have_state}) ...") page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000) page.wait_for_timeout(3000) if is_logged_in(page): print("2) Already logged in from saved session — continuing.") return context, page print("2) Not logged in — logging in via API ...") status = api_login(context) print(f" Login API status: {status}") page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000) page.wait_for_timeout(3000) print(f"3) Accepting cookies: {accept_cookies(page)}") if is_logged_in(page): context.storage_state(path=AUTH_STATE_PATH) print("4) Logged in and session saved.") else: print("4) Login FAILED — check API status above.") return context, page if __name__ == "__main__": with sync_playwright() as pw: context, page = ensure_logged_in(pw) print(f"\n -> logged in: {is_logged_in(page)}") print(f" -> cookie banner present: {page.locator('#usercentrics-cmp-ui').count() > 0}") print("\nReady to scrape. Press Enter to close browser...") input() context.browser.close()