W22
This commit is contained in:
42
Medevio5_ReadNamesFromKartoteka_html.py
Normal file
42
Medevio5_ReadNamesFromKartoteka_html.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import mysql.connector
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
||||
# ---------- CONFIG ----------
|
||||
# MySQL connection settings (fill in)
|
||||
MYSQL_CFG = dict(
|
||||
host="192.168.1.76",
|
||||
port=3307,
|
||||
user="root",
|
||||
password="Vlado9674+",
|
||||
database="medevio",
|
||||
)
|
||||
|
||||
conn=mysql.connector.connect(**MYSQL_CFG)
|
||||
cur=conn.cursor()
|
||||
cur.execute("select html from kartoteka_html where 'fetched-at'=(SELECT MAX('fetched-at') FROM kartoteka_html)")
|
||||
html=cur.fetchone()
|
||||
html=html[0]
|
||||
|
||||
|
||||
# html is the string containing the entire web page
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Find every <button> that has that specific class sequence
|
||||
# (space-separated class names → match as a set)
|
||||
buttons = soup.find_all(
|
||||
"button",
|
||||
class_="MuiTypography-root MuiTypography-body2 "
|
||||
"MuiLink-root MuiLink-underlineAlways "
|
||||
"MuiLink-button css-xf7pf8"
|
||||
)
|
||||
names = []
|
||||
for btn in buttons:
|
||||
text = btn.get_text(strip=True)
|
||||
print(text)
|
||||
names.append(text)
|
||||
|
||||
print(names)
|
||||
# names = [btn.get_text(strip=True) for btn in buttons]
|
||||
print(names)
|
||||
Reference in New Issue
Block a user