100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
"""
|
|
Reusable login flow for Rohlik.cz:
|
|
|
|
1. Load saved session (auth_state.json) if it exists.
|
|
2. Open the site and check whether we're already logged in.
|
|
3. If yes -> continue.
|
|
4. If no -> log in via the JSON API, accept cookies, save the session, continue.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from playwright.sync_api import sync_playwright, BrowserContext, Page
|
|
from config import BASE_URL, AUTH_STATE_PATH, ROHLIK_EMAIL, ROHLIK_PASSWORD
|
|
|
|
LOGIN_URL = f"{BASE_URL}/services/frontend-service/login"
|
|
|
|
|
|
def is_logged_in(page: Page) -> bool:
|
|
return page.locator('text="Přihlásit se"').count() == 0
|
|
|
|
|
|
def accept_cookies(page: Page):
|
|
"""Accept the Usercentrics consent banner via its official JS API."""
|
|
result = page.evaluate('''async () => {
|
|
for (let i = 0; i < 20; i++) {
|
|
if (window.UC_UI && window.UC_UI.isInitialized && window.UC_UI.isInitialized()) break;
|
|
await new Promise(r => setTimeout(r, 250));
|
|
}
|
|
if (window.UC_UI && typeof window.UC_UI.acceptAllConsents === 'function') {
|
|
await window.UC_UI.acceptAllConsents();
|
|
await window.UC_UI.closeCMP();
|
|
return "accepted";
|
|
}
|
|
return "UC_UI not available";
|
|
}''')
|
|
# Wait for the banner to actually detach from the DOM (close animation ~1s)
|
|
try:
|
|
page.wait_for_selector('#usercentrics-cmp-ui', state='detached', timeout=5000)
|
|
except Exception:
|
|
pass
|
|
return result
|
|
|
|
|
|
def api_login(context: BrowserContext) -> int:
|
|
resp = context.request.post(
|
|
LOGIN_URL,
|
|
data=json.dumps({"email": ROHLIK_EMAIL, "password": ROHLIK_PASSWORD}),
|
|
headers={"Content-Type": "application/json", "Accept": "application/json"},
|
|
)
|
|
return resp.status
|
|
|
|
|
|
def ensure_logged_in(pw, headless=False) -> tuple[BrowserContext, Page]:
|
|
auth_path = Path(AUTH_STATE_PATH)
|
|
have_state = auth_path.exists()
|
|
|
|
browser = pw.chromium.launch(headless=headless, args=["--start-maximized"])
|
|
ctx_args = {"no_viewport": True}
|
|
if have_state:
|
|
ctx_args["storage_state"] = AUTH_STATE_PATH
|
|
context = browser.new_context(**ctx_args)
|
|
page = context.new_page()
|
|
|
|
print(f"1) Opening site (saved session: {have_state}) ...")
|
|
page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000)
|
|
page.wait_for_timeout(3000)
|
|
|
|
if is_logged_in(page):
|
|
print("2) Already logged in from saved session — continuing.")
|
|
return context, page
|
|
|
|
print("2) Not logged in — logging in via API ...")
|
|
status = api_login(context)
|
|
print(f" Login API status: {status}")
|
|
|
|
page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000)
|
|
page.wait_for_timeout(3000)
|
|
|
|
print(f"3) Accepting cookies: {accept_cookies(page)}")
|
|
|
|
if is_logged_in(page):
|
|
context.storage_state(path=AUTH_STATE_PATH)
|
|
print("4) Logged in and session saved.")
|
|
else:
|
|
print("4) Login FAILED — check API status above.")
|
|
|
|
return context, page
|
|
|
|
|
|
if __name__ == "__main__":
|
|
with sync_playwright() as pw:
|
|
context, page = ensure_logged_in(pw)
|
|
|
|
print(f"\n -> logged in: {is_logged_in(page)}")
|
|
print(f" -> cookie banner present: {page.locator('#usercentrics-cmp-ui').count() > 0}")
|
|
|
|
print("\nReady to scrape. Press Enter to close browser...")
|
|
input()
|
|
context.browser.close()
|