From aa6562c921f34bf47b91819399a116804a20e285 Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Sun, 31 May 2026 14:37:57 +0200 Subject: [PATCH] notebookVB --- .gitignore | 2 ++ 10PriceScraping/Rohlik/config.py | 3 +- 10PriceScraping/Rohlik/test_login.py | 49 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 10PriceScraping/Rohlik/test_login.py diff --git a/.gitignore b/.gitignore index 0e649de..ad9504e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ .idea/ __pycache__/ *.pyc +.env +auth_state.json diff --git a/10PriceScraping/Rohlik/config.py b/10PriceScraping/Rohlik/config.py index a9e09e9..e809694 100644 --- a/10PriceScraping/Rohlik/config.py +++ b/10PriceScraping/Rohlik/config.py @@ -9,9 +9,10 @@ Defines scraping parameters (scroll behavior, timeouts) and URL constants. """ import os +from pathlib import Path from dotenv import load_dotenv -load_dotenv() +load_dotenv(Path(__file__).parent / ".env") MONGO_URI = os.getenv("MONGO_URI", "mongodb://192.168.1.76:27017") MONGO_DB = os.getenv("MONGO_DB", "rohlik") diff --git a/10PriceScraping/Rohlik/test_login.py b/10PriceScraping/Rohlik/test_login.py new file mode 100644 index 0000000..dc8fd24 --- /dev/null +++ b/10PriceScraping/Rohlik/test_login.py @@ -0,0 +1,49 @@ +""" +Reuse saved browser state (cookies + localStorage) so the Usercentrics cookie +banner never appears and we stay logged in — same situation as a returning user. + +- If auth_state.json is MISSING: opens a browser, you accept cookies + log in + manually, then press Enter to save the state. +- If auth_state.json EXISTS: loads it and just verifies (no banner, logged in). +""" + +from pathlib import Path +from playwright.sync_api import sync_playwright +from config import BASE_URL, AUTH_STATE_PATH + +auth_path = Path(AUTH_STATE_PATH) +have_state = auth_path.exists() + +with sync_playwright() as pw: + browser = pw.chromium.launch(headless=False, args=["--start-maximized"]) + + ctx_args = {"no_viewport": True} + if have_state: + ctx_args["storage_state"] = AUTH_STATE_PATH + context = browser.new_context(**ctx_args) + page = context.new_page() + + print(f"Opening {BASE_URL} (state loaded: {have_state}) ...") + page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000) + page.wait_for_timeout(4000) + + banner_present = page.locator('#usercentrics-cmp-ui').count() > 0 + is_logged_in = page.locator('text="Přihlásit se"').count() == 0 + print(f"Cookie banner present: {banner_present}") + print(f"Logged in: {is_logged_in}") + + if not have_state: + print("\n" + "=" * 60) + print("No saved state. Accept cookies + log in manually,") + print("then press Enter here to save the state.") + print("=" * 60) + input() + context.storage_state(path=AUTH_STATE_PATH) + print(f"Saved state to {AUTH_STATE_PATH}") + banner_present = page.locator('#usercentrics-cmp-ui').count() > 0 + is_logged_in = page.locator('text="Přihlásit se"').count() == 0 + print(f" -> banner present now: {banner_present}, logged in: {is_logged_in}") + + print("\nPress Enter to close browser...") + input() + browser.close()