Files
fotkyBuzalkovi/20 PrůzkumFotek/report.py
T
administrator 662c890257 notebookVb
2026-05-24 07:45:59 +02:00

362 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import streamlit as st
import psycopg2
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
st.set_page_config(page_title="FotkyBuzalkovi - Report", layout="wide", page_icon="📷")
@st.cache_resource
def get_conn():
return psycopg2.connect(
host="192.168.1.76", port=5432, dbname="fotky_buzalkovi",
user="vladimir.buzalka", password="Vlado7309208104++"
)
def q(sql, params=None):
conn = get_conn()
return pd.read_sql(sql, conn, params=params)
st.title("📷 FotkyBuzalkovi — Průzkum dat")
# --- Celkové statistiky ---
st.header("Celkové statistiky")
c1, c2, c3, c4 = st.columns(4)
counts = q("""
SELECT
(SELECT COUNT(*) FROM photos) as photos,
(SELECT COUNT(*) FROM photos WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif,
(SELECT COUNT(*) FROM photos WHERE gps_lat IS NOT NULL) as s_gps,
(SELECT COUNT(*) FROM photos WHERE camera_model IS NOT NULL) as s_camera
""").iloc[0]
c1.metric("Celkem fotek", f"{counts['photos']:,}")
c2.metric("S EXIF daty", f"{counts['s_exif']:,}")
c3.metric("S GPS", f"{counts['s_gps']:,}")
c4.metric("S kamerou", f"{counts['s_camera']:,}")
# --- Zálohovací pipeline ---
st.subheader("Zálohovací pipeline (sběr fotek)")
z1, z2, z3 = st.columns(3)
zcounts = q("""
SELECT
(SELECT COUNT(*) FROM zaloha_obrazku) as zalohy,
(SELECT COUNT(*) FROM zdrojove_soubory) as zdroje,
(SELECT COUNT(*) FROM zdrojove_soubory) - (SELECT COUNT(*) FROM zaloha_obrazku) as duplikaty
""").iloc[0]
z1.metric("Unikátních záloh", f"{zcounts['zalohy']:,}")
z2.metric("Zdrojových souborů", f"{zcounts['zdroje']:,}")
z3.metric("Duplikátních výskytů", f"{zcounts['duplikaty']:,}")
st.divider()
# --- Fotky po letech ---
st.header("📅 Fotky po letech")
df_years = q("""
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok, COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL
GROUP BY rok ORDER BY rok
""")
fig = px.bar(df_years, x="rok", y="pocet", text="pocet",
labels={"rok": "Rok", "pocet": "Počet fotek"})
fig.update_traces(textposition="outside", texttemplate="%{text:,}")
fig.update_layout(height=450)
st.plotly_chart(fig, use_container_width=True)
# --- Fotoaparáty po letech ---
st.header("📸 Fotoaparáty po letech")
df_cam = q("""
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok,
COALESCE(camera_model, '(neznámý)') as model,
COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL
GROUP BY rok, model ORDER BY rok, pocet DESC
""")
selected_year = st.selectbox("Vyber rok:", sorted(df_cam["rok"].unique()), index=len(df_cam["rok"].unique())-5)
df_year_cam = df_cam[df_cam["rok"] == selected_year].head(15)
fig_cam = px.bar(df_year_cam, x="model", y="pocet", text="pocet",
labels={"model": "Fotoaparát", "pocet": "Počet fotek"},
title=f"Fotoaparáty v roce {selected_year}")
fig_cam.update_traces(textposition="outside")
fig_cam.update_layout(xaxis_tickangle=-45, height=500)
st.plotly_chart(fig_cam, use_container_width=True)
# Heatmapa kamery × rok (top 15 kamer celkově)
st.subheader("Heatmapa: top kamery × roky")
top_cameras = q("""
SELECT camera_model, COUNT(*) as cnt FROM photos
WHERE camera_model IS NOT NULL
GROUP BY camera_model ORDER BY cnt DESC LIMIT 15
""")["camera_model"].tolist()
df_heat = df_cam[df_cam["model"].isin(top_cameras)].pivot_table(
index="model", columns="rok", values="pocet", fill_value=0
)
fig_heat = px.imshow(df_heat, labels=dict(x="Rok", y="Fotoaparát", color="Fotek"),
aspect="auto", color_continuous_scale="YlOrRd")
fig_heat.update_layout(height=500)
st.plotly_chart(fig_heat, use_container_width=True)
st.divider()
# --- Duplikáty ---
st.header("🔄 Duplikáty")
d1, d2 = st.columns(2)
with d1:
st.subheader("Identické pixely (sha256_pixels)")
df_dup_px = q("""
SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM (
SELECT COUNT(*) as cnt FROM photos WHERE sha256_pixels IS NOT NULL
GROUP BY sha256_pixels HAVING COUNT(*) > 1
) x
""").iloc[0]
st.metric("Skupin duplikátů", f"{df_dup_px['skupin']:,}")
st.metric("Fotek v duplikátech", f"{df_dup_px['fotek']:,}")
with d2:
st.subheader("Vizuálně podobné (phash)")
df_dup_ph = q("""
SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM (
SELECT COUNT(*) as cnt FROM photos WHERE phash IS NOT NULL
GROUP BY phash HAVING COUNT(*) > 1
) x
""").iloc[0]
st.metric("Skupin podobných", f"{df_dup_ph['skupin']:,}")
st.metric("Fotek v podobných skupinách", f"{df_dup_ph['fotek']:,}")
st.divider()
# --- GPS mapa ---
st.header("🗺️ GPS lokace")
df_gps = q("""
SELECT gps_lat as lat, gps_lon as lon
FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL
""")
if not df_gps.empty:
df_gps["lat"] = df_gps["lat"].astype(float)
df_gps["lon"] = df_gps["lon"].astype(float)
st.map(df_gps, size=2)
st.subheader("Top lokace (zaokrouhleno na 0.1°)")
df_gps_top = q("""
SELECT ROUND(gps_lat::numeric, 1) as lat, ROUND(gps_lon::numeric, 1) as lon,
COUNT(*) as pocet
FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL
GROUP BY lat, lon ORDER BY pocet DESC LIMIT 20
""")
st.dataframe(df_gps_top, use_container_width=True)
st.divider()
# --- Technické parametry ---
st.header("⚙️ Technické parametry")
tab_iso, tab_clona, tab_exp, tab_lens = st.tabs(["ISO", "Clona", "Expoziční čas", "Objektivy"])
with tab_iso:
df_iso = q("""
SELECT iso, COUNT(*) as pocet FROM photos WHERE iso IS NOT NULL
GROUP BY iso ORDER BY pocet DESC LIMIT 20
""")
fig_iso = px.bar(df_iso, x="iso", y="pocet", text="pocet",
labels={"iso": "ISO", "pocet": "Počet fotek"})
fig_iso.update_traces(textposition="outside")
st.plotly_chart(fig_iso, use_container_width=True)
with tab_clona:
df_ap = q("""
SELECT aperture, COUNT(*) as pocet FROM photos WHERE aperture IS NOT NULL
GROUP BY aperture ORDER BY pocet DESC LIMIT 20
""")
df_ap["label"] = "f/" + df_ap["aperture"].astype(str)
fig_ap = px.bar(df_ap, x="label", y="pocet", text="pocet",
labels={"label": "Clona", "pocet": "Počet fotek"})
fig_ap.update_traces(textposition="outside")
st.plotly_chart(fig_ap, use_container_width=True)
with tab_exp:
df_exp = q("""
SELECT exposure_time, COUNT(*) as pocet FROM photos WHERE exposure_time IS NOT NULL
GROUP BY exposure_time ORDER BY pocet DESC LIMIT 20
""")
fig_exp = px.bar(df_exp, x="exposure_time", y="pocet", text="pocet",
labels={"exposure_time": "Expoziční čas", "pocet": "Počet fotek"})
fig_exp.update_traces(textposition="outside")
fig_exp.update_layout(xaxis_tickangle=-45)
st.plotly_chart(fig_exp, use_container_width=True)
with tab_lens:
df_lens = q("""
SELECT COALESCE(lens_model, '(neuvedeno)') as objektiv, COUNT(*) as pocet
FROM photos GROUP BY objektiv ORDER BY pocet DESC LIMIT 15
""")
st.dataframe(df_lens, use_container_width=True)
st.divider()
# --- Rozlišení po letech ---
st.header("📐 Megapixely po letech")
df_mp = q("""
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok,
ROUND(AVG(megapixels)::numeric, 1) as prumer,
ROUND(MAX(megapixels)::numeric, 1) as maximum
FROM photos WHERE taken_at IS NOT NULL AND megapixels IS NOT NULL
GROUP BY rok ORDER BY rok
""")
fig_mp = go.Figure()
fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["prumer"], mode="lines+markers", name="Průměr MP"))
fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["maximum"], mode="lines+markers", name="Maximum MP"))
fig_mp.update_layout(yaxis_title="Megapixely", xaxis_title="Rok", height=400)
st.plotly_chart(fig_mp, use_container_width=True)
st.divider()
# --- Formáty a barevné módy ---
st.header("🎨 Formáty a barvy")
f1, f2 = st.columns(2)
with f1:
st.subheader("Přípony")
df_ext = q("""
SELECT COALESCE(file_ext, '(none)') as pripona, COUNT(*) as pocet
FROM photos GROUP BY pripona ORDER BY pocet DESC
""")
st.dataframe(df_ext, use_container_width=True)
with f2:
st.subheader("Barevné módy")
df_mode = q("""
SELECT COALESCE(mode, '(none)') as mod, COUNT(*) as pocet
FROM photos GROUP BY mod ORDER BY pocet DESC
""")
fig_mode = px.pie(df_mode, values="pocet", names="mod")
st.plotly_chart(fig_mode, use_container_width=True)
st.divider()
# --- Neznámé fotky ---
st.header("❓ Fotky bez kamery — analýza názvů")
tab_2015, tab_2022 = st.tabs(["20152016", "2022"])
with tab_2015:
df_unk15 = q("""
SELECT file_name, file_size, taken_at, taken_at_source
FROM photos
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) BETWEEN 2015 AND 2016
ORDER BY taken_at LIMIT 50
""")
st.dataframe(df_unk15, use_container_width=True)
st.info("Přejmenované importním skriptem — vzor: `[NO MODEL] [MD5...]`")
with tab_2022:
df_unk22 = q("""
SELECT file_name, file_size, taken_at, taken_at_source
FROM photos
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
ORDER BY taken_at LIMIT 50
""")
st.dataframe(df_unk22, use_container_width=True)
df_prefix = q("""
SELECT LEFT(file_name, 10) as prefix, COUNT(*) as pocet FROM photos
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
GROUP BY prefix ORDER BY pocet DESC LIMIT 10
""")
st.subheader("Prefixes")
st.dataframe(df_prefix, use_container_width=True)
st.info("4 194 z 4 210 importováno najednou 25.9.2023 — pravděpodobně hromadný export z iCloudu/Google Photos")
st.divider()
# --- Časové vzory ---
st.header("⏰ Časové vzory")
tab_month, tab_dow, tab_hour, tab_topdays = st.tabs(["Měsíce", "Dny v týdnu", "Hodiny", "Top dny (události)"])
with tab_month:
df_month = q("""
SELECT EXTRACT(MONTH FROM taken_at)::INT as mesic, COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL GROUP BY mesic ORDER BY mesic
""")
nazvy = {1:'Leden',2:'Únor',3:'Březen',4:'Duben',5:'Květen',6:'Červen',
7:'Červenec',8:'Srpen',9:'Září',10:'Říjen',11:'Listopad',12:'Prosinec'}
df_month["nazev"] = df_month["mesic"].map(nazvy)
fig_m = px.bar(df_month, x="nazev", y="pocet", text="pocet",
labels={"nazev": "Měsíc", "pocet": "Počet fotek"})
fig_m.update_traces(textposition="outside")
st.plotly_chart(fig_m, use_container_width=True)
with tab_dow:
df_dow = q("""
SELECT EXTRACT(DOW FROM taken_at)::INT as den, COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY den
""")
dny = {0:'Neděle',1:'Pondělí',2:'Úterý',3:'Středa',4:'Čtvrtek',5:'Pátek',6:'Sobota'}
df_dow["nazev"] = df_dow["den"].map(dny)
fig_d = px.bar(df_dow, x="nazev", y="pocet", text="pocet",
labels={"nazev": "Den", "pocet": "Počet fotek"})
fig_d.update_traces(textposition="outside")
st.plotly_chart(fig_d, use_container_width=True)
with tab_hour:
df_hour = q("""
SELECT EXTRACT(HOUR FROM taken_at)::INT as hodina, COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL GROUP BY hodina ORDER BY hodina
""")
fig_h = px.bar(df_hour, x="hodina", y="pocet", text="pocet",
labels={"hodina": "Hodina", "pocet": "Počet fotek"})
fig_h.update_traces(textposition="outside", texttemplate="%{text:,}")
fig_h.update_layout(xaxis=dict(dtick=1))
st.plotly_chart(fig_h, use_container_width=True)
with tab_topdays:
df_topdays = q("""
SELECT taken_at::date as den, COUNT(*) as pocet
FROM photos WHERE taken_at IS NOT NULL
GROUP BY den ORDER BY pocet DESC LIMIT 30
""")
fig_td = px.bar(df_topdays, x="den", y="pocet", text="pocet",
labels={"den": "Datum", "pocet": "Počet fotek"})
fig_td.update_traces(textposition="outside")
fig_td.update_layout(xaxis_tickangle=-45, height=500)
st.plotly_chart(fig_td, use_container_width=True)
st.divider()
# --- EXIF pokrytí ---
st.header("📊 EXIF pokrytí")
df_coverage = q("""
SELECT
COUNT(*) FILTER (WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif,
COUNT(*) FILTER (WHERE taken_at IS NOT NULL) as s_taken_at,
COUNT(*) FILTER (WHERE camera_model IS NOT NULL) as s_camera,
COUNT(*) FILTER (WHERE iso IS NOT NULL) as s_iso,
COUNT(*) FILTER (WHERE gps_lat IS NOT NULL) as s_gps,
COUNT(*) FILTER (WHERE aperture IS NOT NULL) as s_aperture,
COUNT(*) FILTER (WHERE lens_model IS NOT NULL) as s_lens,
COUNT(*) as celkem
FROM photos
""").iloc[0]
categories = ["EXIF data", "Datum pořízení", "Model kamery", "ISO", "Clona", "GPS", "Objektiv"]
values = [
int(df_coverage["s_exif"]), int(df_coverage["s_taken_at"]),
int(df_coverage["s_camera"]), int(df_coverage["s_iso"]),
int(df_coverage["s_aperture"]), int(df_coverage["s_gps"]),
int(df_coverage["s_lens"])
]
total = int(df_coverage["celkem"])
pct = [round(v / total * 100, 1) for v in values]
fig_cov = go.Figure(go.Bar(
x=pct, y=categories, orientation='h',
text=[f"{v:,} ({p}%)" for v, p in zip(values, pct)],
textposition="auto"
))
fig_cov.update_layout(xaxis_title="% fotek", height=350)
st.plotly_chart(fig_cov, use_container_width=True)
st.divider()
st.caption("FotkyBuzalkovi — data z PostgreSQL 192.168.1.76 / fotky_buzalkovi")