Files
rohlik/10PriceScraping/Rohlik/test_login.py
T
2026-06-01 07:24:46 +02:00

100 lines
3.3 KiB
Python

"""
Reusable login flow for Rohlik.cz:
1. Load saved session (auth_state.json) if it exists.
2. Open the site and check whether we're already logged in.
3. If yes -> continue.
4. If no -> log in via the JSON API, accept cookies, save the session, continue.
"""
import json
from pathlib import Path
from playwright.sync_api import sync_playwright, BrowserContext, Page
from config import BASE_URL, AUTH_STATE_PATH, ROHLIK_EMAIL, ROHLIK_PASSWORD
LOGIN_URL = f"{BASE_URL}/services/frontend-service/login"
def is_logged_in(page: Page) -> bool:
return page.locator('text="Přihlásit se"').count() == 0
def accept_cookies(page: Page):
"""Accept the Usercentrics consent banner via its official JS API."""
result = page.evaluate('''async () => {
for (let i = 0; i < 20; i++) {
if (window.UC_UI && window.UC_UI.isInitialized && window.UC_UI.isInitialized()) break;
await new Promise(r => setTimeout(r, 250));
}
if (window.UC_UI && typeof window.UC_UI.acceptAllConsents === 'function') {
await window.UC_UI.acceptAllConsents();
await window.UC_UI.closeCMP();
return "accepted";
}
return "UC_UI not available";
}''')
# Wait for the banner to actually detach from the DOM (close animation ~1s)
try:
page.wait_for_selector('#usercentrics-cmp-ui', state='detached', timeout=5000)
except Exception:
pass
return result
def api_login(context: BrowserContext) -> int:
resp = context.request.post(
LOGIN_URL,
data=json.dumps({"email": ROHLIK_EMAIL, "password": ROHLIK_PASSWORD}),
headers={"Content-Type": "application/json", "Accept": "application/json"},
)
return resp.status
def ensure_logged_in(pw, headless=False) -> tuple[BrowserContext, Page]:
auth_path = Path(AUTH_STATE_PATH)
have_state = auth_path.exists()
browser = pw.chromium.launch(headless=headless, args=["--start-maximized"])
ctx_args = {"no_viewport": True}
if have_state:
ctx_args["storage_state"] = AUTH_STATE_PATH
context = browser.new_context(**ctx_args)
page = context.new_page()
print(f"1) Opening site (saved session: {have_state}) ...")
page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000)
page.wait_for_timeout(3000)
if is_logged_in(page):
print("2) Already logged in from saved session — continuing.")
return context, page
print("2) Not logged in — logging in via API ...")
status = api_login(context)
print(f" Login API status: {status}")
page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000)
page.wait_for_timeout(3000)
print(f"3) Accepting cookies: {accept_cookies(page)}")
if is_logged_in(page):
context.storage_state(path=AUTH_STATE_PATH)
print("4) Logged in and session saved.")
else:
print("4) Login FAILED — check API status above.")
return context, page
if __name__ == "__main__":
with sync_playwright() as pw:
context, page = ensure_logged_in(pw)
print(f"\n -> logged in: {is_logged_in(page)}")
print(f" -> cookie banner present: {page.locator('#usercentrics-cmp-ui').count() > 0}")
print("\nReady to scrape. Press Enter to close browser...")
input()
context.browser.close()