notebookVB
This commit is contained in:
@@ -3,3 +3,5 @@
|
|||||||
.idea/
|
.idea/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
|
.env
|
||||||
|
auth_state.json
|
||||||
|
|||||||
@@ -9,9 +9,10 @@ Defines scraping parameters (scroll behavior, timeouts) and URL constants.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv(Path(__file__).parent / ".env")
|
||||||
|
|
||||||
MONGO_URI = os.getenv("MONGO_URI", "mongodb://192.168.1.76:27017")
|
MONGO_URI = os.getenv("MONGO_URI", "mongodb://192.168.1.76:27017")
|
||||||
MONGO_DB = os.getenv("MONGO_DB", "rohlik")
|
MONGO_DB = os.getenv("MONGO_DB", "rohlik")
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
Reuse saved browser state (cookies + localStorage) so the Usercentrics cookie
|
||||||
|
banner never appears and we stay logged in — same situation as a returning user.
|
||||||
|
|
||||||
|
- If auth_state.json is MISSING: opens a browser, you accept cookies + log in
|
||||||
|
manually, then press Enter to save the state.
|
||||||
|
- If auth_state.json EXISTS: loads it and just verifies (no banner, logged in).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from config import BASE_URL, AUTH_STATE_PATH
|
||||||
|
|
||||||
|
auth_path = Path(AUTH_STATE_PATH)
|
||||||
|
have_state = auth_path.exists()
|
||||||
|
|
||||||
|
with sync_playwright() as pw:
|
||||||
|
browser = pw.chromium.launch(headless=False, args=["--start-maximized"])
|
||||||
|
|
||||||
|
ctx_args = {"no_viewport": True}
|
||||||
|
if have_state:
|
||||||
|
ctx_args["storage_state"] = AUTH_STATE_PATH
|
||||||
|
context = browser.new_context(**ctx_args)
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
print(f"Opening {BASE_URL} (state loaded: {have_state}) ...")
|
||||||
|
page.goto(BASE_URL, wait_until="domcontentloaded", timeout=60000)
|
||||||
|
page.wait_for_timeout(4000)
|
||||||
|
|
||||||
|
banner_present = page.locator('#usercentrics-cmp-ui').count() > 0
|
||||||
|
is_logged_in = page.locator('text="Přihlásit se"').count() == 0
|
||||||
|
print(f"Cookie banner present: {banner_present}")
|
||||||
|
print(f"Logged in: {is_logged_in}")
|
||||||
|
|
||||||
|
if not have_state:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("No saved state. Accept cookies + log in manually,")
|
||||||
|
print("then press Enter here to save the state.")
|
||||||
|
print("=" * 60)
|
||||||
|
input()
|
||||||
|
context.storage_state(path=AUTH_STATE_PATH)
|
||||||
|
print(f"Saved state to {AUTH_STATE_PATH}")
|
||||||
|
banner_present = page.locator('#usercentrics-cmp-ui').count() > 0
|
||||||
|
is_logged_in = page.locator('text="Přihlásit se"').count() == 0
|
||||||
|
print(f" -> banner present now: {banner_present}, logged in: {is_logged_in}")
|
||||||
|
|
||||||
|
print("\nPress Enter to close browser...")
|
||||||
|
input()
|
||||||
|
browser.close()
|
||||||
Reference in New Issue
Block a user