import streamlit as st import psycopg2 import pandas as pd import plotly.express as px import plotly.graph_objects as go st.set_page_config(page_title="FotkyBuzalkovi - Report", layout="wide", page_icon="📷") @st.cache_resource def get_conn(): return psycopg2.connect( host="192.168.1.76", port=5432, dbname="fotky_buzalkovi", user="vladimir.buzalka", password="Vlado7309208104++" ) def q(sql, params=None): conn = get_conn() return pd.read_sql(sql, conn, params=params) st.title("📷 FotkyBuzalkovi — Průzkum dat") # --- Celkové statistiky --- st.header("Celkové statistiky") c1, c2, c3, c4 = st.columns(4) counts = q(""" SELECT (SELECT COUNT(*) FROM photos) as photos, (SELECT COUNT(*) FROM photos WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif, (SELECT COUNT(*) FROM photos WHERE gps_lat IS NOT NULL) as s_gps, (SELECT COUNT(*) FROM photos WHERE camera_model IS NOT NULL) as s_camera """).iloc[0] c1.metric("Celkem fotek", f"{counts['photos']:,}") c2.metric("S EXIF daty", f"{counts['s_exif']:,}") c3.metric("S GPS", f"{counts['s_gps']:,}") c4.metric("S kamerou", f"{counts['s_camera']:,}") # --- Zálohovací pipeline --- st.subheader("Zálohovací pipeline (sběr fotek)") z1, z2, z3 = st.columns(3) zcounts = q(""" SELECT (SELECT COUNT(*) FROM zaloha_obrazku) as zalohy, (SELECT COUNT(*) FROM zdrojove_soubory) as zdroje, (SELECT COUNT(*) FROM zdrojove_soubory) - (SELECT COUNT(*) FROM zaloha_obrazku) as duplikaty """).iloc[0] z1.metric("Unikátních záloh", f"{zcounts['zalohy']:,}") z2.metric("Zdrojových souborů", f"{zcounts['zdroje']:,}") z3.metric("Duplikátních výskytů", f"{zcounts['duplikaty']:,}") st.divider() # --- Fotky po letech --- st.header("📅 Fotky po letech") df_years = q(""" SELECT EXTRACT(YEAR FROM taken_at)::INT as rok, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY rok ORDER BY rok """) fig = px.bar(df_years, x="rok", y="pocet", text="pocet", labels={"rok": "Rok", "pocet": "Počet fotek"}) fig.update_traces(textposition="outside", texttemplate="%{text:,}") fig.update_layout(height=450) st.plotly_chart(fig, use_container_width=True) # --- Fotoaparáty po letech --- st.header("📸 Fotoaparáty po letech") df_cam = q(""" SELECT EXTRACT(YEAR FROM taken_at)::INT as rok, COALESCE(camera_model, '(neznámý)') as model, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY rok, model ORDER BY rok, pocet DESC """) selected_year = st.selectbox("Vyber rok:", sorted(df_cam["rok"].unique()), index=len(df_cam["rok"].unique())-5) df_year_cam = df_cam[df_cam["rok"] == selected_year].head(15) fig_cam = px.bar(df_year_cam, x="model", y="pocet", text="pocet", labels={"model": "Fotoaparát", "pocet": "Počet fotek"}, title=f"Fotoaparáty v roce {selected_year}") fig_cam.update_traces(textposition="outside") fig_cam.update_layout(xaxis_tickangle=-45, height=500) st.plotly_chart(fig_cam, use_container_width=True) # Heatmapa kamery × rok (top 15 kamer celkově) st.subheader("Heatmapa: top kamery × roky") top_cameras = q(""" SELECT camera_model, COUNT(*) as cnt FROM photos WHERE camera_model IS NOT NULL GROUP BY camera_model ORDER BY cnt DESC LIMIT 15 """)["camera_model"].tolist() df_heat = df_cam[df_cam["model"].isin(top_cameras)].pivot_table( index="model", columns="rok", values="pocet", fill_value=0 ) fig_heat = px.imshow(df_heat, labels=dict(x="Rok", y="Fotoaparát", color="Fotek"), aspect="auto", color_continuous_scale="YlOrRd") fig_heat.update_layout(height=500) st.plotly_chart(fig_heat, use_container_width=True) st.divider() # --- Duplikáty --- st.header("🔄 Duplikáty") d1, d2 = st.columns(2) with d1: st.subheader("Identické pixely (sha256_pixels)") df_dup_px = q(""" SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM ( SELECT COUNT(*) as cnt FROM photos WHERE sha256_pixels IS NOT NULL GROUP BY sha256_pixels HAVING COUNT(*) > 1 ) x """).iloc[0] st.metric("Skupin duplikátů", f"{df_dup_px['skupin']:,}") st.metric("Fotek v duplikátech", f"{df_dup_px['fotek']:,}") with d2: st.subheader("Vizuálně podobné (phash)") df_dup_ph = q(""" SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM ( SELECT COUNT(*) as cnt FROM photos WHERE phash IS NOT NULL GROUP BY phash HAVING COUNT(*) > 1 ) x """).iloc[0] st.metric("Skupin podobných", f"{df_dup_ph['skupin']:,}") st.metric("Fotek v podobných skupinách", f"{df_dup_ph['fotek']:,}") st.divider() # --- GPS mapa --- st.header("🗺️ GPS lokace") df_gps = q(""" SELECT gps_lat as lat, gps_lon as lon FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL """) if not df_gps.empty: df_gps["lat"] = df_gps["lat"].astype(float) df_gps["lon"] = df_gps["lon"].astype(float) st.map(df_gps, size=2) st.subheader("Top lokace (zaokrouhleno na 0.1°)") df_gps_top = q(""" SELECT ROUND(gps_lat::numeric, 1) as lat, ROUND(gps_lon::numeric, 1) as lon, COUNT(*) as pocet FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL GROUP BY lat, lon ORDER BY pocet DESC LIMIT 20 """) st.dataframe(df_gps_top, use_container_width=True) st.divider() # --- Technické parametry --- st.header("⚙️ Technické parametry") tab_iso, tab_clona, tab_exp, tab_lens = st.tabs(["ISO", "Clona", "Expoziční čas", "Objektivy"]) with tab_iso: df_iso = q(""" SELECT iso, COUNT(*) as pocet FROM photos WHERE iso IS NOT NULL GROUP BY iso ORDER BY pocet DESC LIMIT 20 """) fig_iso = px.bar(df_iso, x="iso", y="pocet", text="pocet", labels={"iso": "ISO", "pocet": "Počet fotek"}) fig_iso.update_traces(textposition="outside") st.plotly_chart(fig_iso, use_container_width=True) with tab_clona: df_ap = q(""" SELECT aperture, COUNT(*) as pocet FROM photos WHERE aperture IS NOT NULL GROUP BY aperture ORDER BY pocet DESC LIMIT 20 """) df_ap["label"] = "f/" + df_ap["aperture"].astype(str) fig_ap = px.bar(df_ap, x="label", y="pocet", text="pocet", labels={"label": "Clona", "pocet": "Počet fotek"}) fig_ap.update_traces(textposition="outside") st.plotly_chart(fig_ap, use_container_width=True) with tab_exp: df_exp = q(""" SELECT exposure_time, COUNT(*) as pocet FROM photos WHERE exposure_time IS NOT NULL GROUP BY exposure_time ORDER BY pocet DESC LIMIT 20 """) fig_exp = px.bar(df_exp, x="exposure_time", y="pocet", text="pocet", labels={"exposure_time": "Expoziční čas", "pocet": "Počet fotek"}) fig_exp.update_traces(textposition="outside") fig_exp.update_layout(xaxis_tickangle=-45) st.plotly_chart(fig_exp, use_container_width=True) with tab_lens: df_lens = q(""" SELECT COALESCE(lens_model, '(neuvedeno)') as objektiv, COUNT(*) as pocet FROM photos GROUP BY objektiv ORDER BY pocet DESC LIMIT 15 """) st.dataframe(df_lens, use_container_width=True) st.divider() # --- Rozlišení po letech --- st.header("📐 Megapixely po letech") df_mp = q(""" SELECT EXTRACT(YEAR FROM taken_at)::INT as rok, ROUND(AVG(megapixels)::numeric, 1) as prumer, ROUND(MAX(megapixels)::numeric, 1) as maximum FROM photos WHERE taken_at IS NOT NULL AND megapixels IS NOT NULL GROUP BY rok ORDER BY rok """) fig_mp = go.Figure() fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["prumer"], mode="lines+markers", name="Průměr MP")) fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["maximum"], mode="lines+markers", name="Maximum MP")) fig_mp.update_layout(yaxis_title="Megapixely", xaxis_title="Rok", height=400) st.plotly_chart(fig_mp, use_container_width=True) st.divider() # --- Formáty a barevné módy --- st.header("🎨 Formáty a barvy") f1, f2 = st.columns(2) with f1: st.subheader("Přípony") df_ext = q(""" SELECT COALESCE(file_ext, '(none)') as pripona, COUNT(*) as pocet FROM photos GROUP BY pripona ORDER BY pocet DESC """) st.dataframe(df_ext, use_container_width=True) with f2: st.subheader("Barevné módy") df_mode = q(""" SELECT COALESCE(mode, '(none)') as mod, COUNT(*) as pocet FROM photos GROUP BY mod ORDER BY pocet DESC """) fig_mode = px.pie(df_mode, values="pocet", names="mod") st.plotly_chart(fig_mode, use_container_width=True) st.divider() # --- Neznámé fotky --- st.header("❓ Fotky bez kamery — analýza názvů") tab_2015, tab_2022 = st.tabs(["2015–2016", "2022"]) with tab_2015: df_unk15 = q(""" SELECT file_name, file_size, taken_at, taken_at_source FROM photos WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) BETWEEN 2015 AND 2016 ORDER BY taken_at LIMIT 50 """) st.dataframe(df_unk15, use_container_width=True) st.info("Přejmenované importním skriptem — vzor: `[NO MODEL] [MD5...]`") with tab_2022: df_unk22 = q(""" SELECT file_name, file_size, taken_at, taken_at_source FROM photos WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022 ORDER BY taken_at LIMIT 50 """) st.dataframe(df_unk22, use_container_width=True) df_prefix = q(""" SELECT LEFT(file_name, 10) as prefix, COUNT(*) as pocet FROM photos WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022 GROUP BY prefix ORDER BY pocet DESC LIMIT 10 """) st.subheader("Prefixes") st.dataframe(df_prefix, use_container_width=True) st.info("4 194 z 4 210 importováno najednou 25.9.2023 — pravděpodobně hromadný export z iCloudu/Google Photos") st.divider() # --- Časové vzory --- st.header("⏰ Časové vzory") tab_month, tab_dow, tab_hour, tab_topdays = st.tabs(["Měsíce", "Dny v týdnu", "Hodiny", "Top dny (události)"]) with tab_month: df_month = q(""" SELECT EXTRACT(MONTH FROM taken_at)::INT as mesic, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY mesic ORDER BY mesic """) nazvy = {1:'Leden',2:'Únor',3:'Březen',4:'Duben',5:'Květen',6:'Červen', 7:'Červenec',8:'Srpen',9:'Září',10:'Říjen',11:'Listopad',12:'Prosinec'} df_month["nazev"] = df_month["mesic"].map(nazvy) fig_m = px.bar(df_month, x="nazev", y="pocet", text="pocet", labels={"nazev": "Měsíc", "pocet": "Počet fotek"}) fig_m.update_traces(textposition="outside") st.plotly_chart(fig_m, use_container_width=True) with tab_dow: df_dow = q(""" SELECT EXTRACT(DOW FROM taken_at)::INT as den, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY den """) dny = {0:'Neděle',1:'Pondělí',2:'Úterý',3:'Středa',4:'Čtvrtek',5:'Pátek',6:'Sobota'} df_dow["nazev"] = df_dow["den"].map(dny) fig_d = px.bar(df_dow, x="nazev", y="pocet", text="pocet", labels={"nazev": "Den", "pocet": "Počet fotek"}) fig_d.update_traces(textposition="outside") st.plotly_chart(fig_d, use_container_width=True) with tab_hour: df_hour = q(""" SELECT EXTRACT(HOUR FROM taken_at)::INT as hodina, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY hodina ORDER BY hodina """) fig_h = px.bar(df_hour, x="hodina", y="pocet", text="pocet", labels={"hodina": "Hodina", "pocet": "Počet fotek"}) fig_h.update_traces(textposition="outside", texttemplate="%{text:,}") fig_h.update_layout(xaxis=dict(dtick=1)) st.plotly_chart(fig_h, use_container_width=True) with tab_topdays: df_topdays = q(""" SELECT taken_at::date as den, COUNT(*) as pocet FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY pocet DESC LIMIT 30 """) fig_td = px.bar(df_topdays, x="den", y="pocet", text="pocet", labels={"den": "Datum", "pocet": "Počet fotek"}) fig_td.update_traces(textposition="outside") fig_td.update_layout(xaxis_tickangle=-45, height=500) st.plotly_chart(fig_td, use_container_width=True) st.divider() # --- EXIF pokrytí --- st.header("📊 EXIF pokrytí") df_coverage = q(""" SELECT COUNT(*) FILTER (WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif, COUNT(*) FILTER (WHERE taken_at IS NOT NULL) as s_taken_at, COUNT(*) FILTER (WHERE camera_model IS NOT NULL) as s_camera, COUNT(*) FILTER (WHERE iso IS NOT NULL) as s_iso, COUNT(*) FILTER (WHERE gps_lat IS NOT NULL) as s_gps, COUNT(*) FILTER (WHERE aperture IS NOT NULL) as s_aperture, COUNT(*) FILTER (WHERE lens_model IS NOT NULL) as s_lens, COUNT(*) as celkem FROM photos """).iloc[0] categories = ["EXIF data", "Datum pořízení", "Model kamery", "ISO", "Clona", "GPS", "Objektiv"] values = [ int(df_coverage["s_exif"]), int(df_coverage["s_taken_at"]), int(df_coverage["s_camera"]), int(df_coverage["s_iso"]), int(df_coverage["s_aperture"]), int(df_coverage["s_gps"]), int(df_coverage["s_lens"]) ] total = int(df_coverage["celkem"]) pct = [round(v / total * 100, 1) for v in values] fig_cov = go.Figure(go.Bar( x=pct, y=categories, orientation='h', text=[f"{v:,} ({p}%)" for v, p in zip(values, pct)], textposition="auto" )) fig_cov.update_layout(xaxis_title="% fotek", height=350) st.plotly_chart(fig_cov, use_container_width=True) st.divider() st.caption("FotkyBuzalkovi — data z PostgreSQL 192.168.1.76 / fotky_buzalkovi")