notebookVb
This commit is contained in:
@@ -0,0 +1,164 @@
|
|||||||
|
import sys, io
|
||||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
conn = psycopg2.connect(host='192.168.1.76', port=5432, dbname='fotky_buzalkovi',
|
||||||
|
user='vladimir.buzalka', password='Vlado7309208104++')
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# 1. Duplikáty sha256_pixels
|
||||||
|
print('=== DUPLIKÁTY sha256_pixels ===')
|
||||||
|
cur.execute('''SELECT sha256_pixels, COUNT(*) as cnt FROM photos
|
||||||
|
WHERE sha256_pixels IS NOT NULL GROUP BY sha256_pixels HAVING COUNT(*) > 1
|
||||||
|
ORDER BY cnt DESC LIMIT 10''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0][:16]}... : {r[1]}x')
|
||||||
|
cur.execute('''SELECT COUNT(*), SUM(cnt) FROM (
|
||||||
|
SELECT COUNT(*) as cnt FROM photos WHERE sha256_pixels IS NOT NULL
|
||||||
|
GROUP BY sha256_pixels HAVING COUNT(*) > 1) x''')
|
||||||
|
r = cur.fetchone()
|
||||||
|
print(f'Celkem skupin duplikátů: {r[0]}, fotek v duplikátech: {r[1]}')
|
||||||
|
|
||||||
|
# 2. Identické phash
|
||||||
|
print('\n=== IDENTICKÉ phash ===')
|
||||||
|
cur.execute('''SELECT phash, COUNT(*) as cnt FROM photos
|
||||||
|
WHERE phash IS NOT NULL GROUP BY phash HAVING COUNT(*) > 1
|
||||||
|
ORDER BY cnt DESC LIMIT 10''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' phash={r[0]}: {r[1]}x')
|
||||||
|
cur.execute('''SELECT COUNT(*), SUM(cnt) FROM (
|
||||||
|
SELECT COUNT(*) as cnt FROM photos WHERE phash IS NOT NULL
|
||||||
|
GROUP BY phash HAVING COUNT(*) > 1) x''')
|
||||||
|
r = cur.fetchone()
|
||||||
|
print(f'Celkem skupin: {r[0]}, fotek: {r[1]}')
|
||||||
|
|
||||||
|
# 3. Screenshoty
|
||||||
|
print('\n=== SCREENSHOTY ===')
|
||||||
|
cur.execute('SELECT COUNT(*) FROM photos WHERE is_screenshot = true')
|
||||||
|
print(f'is_screenshot=true: {cur.fetchone()[0]}')
|
||||||
|
|
||||||
|
# 4. Objektivy
|
||||||
|
print('\n=== TOP OBJEKTIVY ===')
|
||||||
|
cur.execute('''SELECT COALESCE(lens_model, '(neuvedeno)') as lens, COUNT(*) as cnt
|
||||||
|
FROM photos GROUP BY lens ORDER BY cnt DESC LIMIT 15''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[1]:>6} {r[0]}')
|
||||||
|
|
||||||
|
# 5. ISO distribuce
|
||||||
|
print('\n=== ISO DISTRIBUCE ===')
|
||||||
|
cur.execute('''SELECT iso, COUNT(*) as cnt FROM photos WHERE iso IS NOT NULL
|
||||||
|
GROUP BY iso ORDER BY cnt DESC LIMIT 15''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' ISO {r[0]:>6}: {r[1]}')
|
||||||
|
|
||||||
|
# 6. Clona
|
||||||
|
print('\n=== CLONA (aperture) TOP ===')
|
||||||
|
cur.execute('''SELECT aperture, COUNT(*) as cnt FROM photos WHERE aperture IS NOT NULL
|
||||||
|
GROUP BY aperture ORDER BY cnt DESC LIMIT 15''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' f/{r[0]}: {r[1]}')
|
||||||
|
|
||||||
|
# 7. Expoziční čas
|
||||||
|
print('\n=== EXPOZIČNÍ ČAS TOP ===')
|
||||||
|
cur.execute('''SELECT exposure_time, COUNT(*) as cnt FROM photos WHERE exposure_time IS NOT NULL
|
||||||
|
GROUP BY exposure_time ORDER BY cnt DESC LIMIT 15''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: {r[1]}')
|
||||||
|
|
||||||
|
# 8. GPS top lokace
|
||||||
|
print('\n=== GPS TOP LOKACE (zaokrouhleno na 0.1 stupne) ===')
|
||||||
|
cur.execute('''SELECT ROUND(gps_lat::numeric, 1) as lat, ROUND(gps_lon::numeric, 1) as lon, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL
|
||||||
|
GROUP BY lat, lon ORDER BY cnt DESC LIMIT 20''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' [{r[0]}, {r[1]}]: {r[2]} fotek')
|
||||||
|
|
||||||
|
cur.execute('''SELECT MIN(gps_lat), MAX(gps_lat), MIN(gps_lon), MAX(gps_lon)
|
||||||
|
FROM photos WHERE gps_lat IS NOT NULL''')
|
||||||
|
r = cur.fetchone()
|
||||||
|
print(f' Rozsah Lat: {r[0]} .. {r[1]}')
|
||||||
|
print(f' Rozsah Lon: {r[2]} .. {r[3]}')
|
||||||
|
|
||||||
|
# 9. Megapixely po letech
|
||||||
|
print('\n=== PRUMERNE MEGAPIXELY PO LETECH ===')
|
||||||
|
cur.execute('''SELECT EXTRACT(YEAR FROM taken_at)::INT as rok,
|
||||||
|
ROUND(AVG(megapixels)::numeric, 1) as avg_mp,
|
||||||
|
ROUND(MAX(megapixels)::numeric, 1) as max_mp, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL AND megapixels IS NOT NULL
|
||||||
|
GROUP BY rok ORDER BY rok''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: avg={r[1]} MP, max={r[2]} MP ({r[3]} fotek)')
|
||||||
|
|
||||||
|
# 10. Formáty
|
||||||
|
print('\n=== FORMATY ===')
|
||||||
|
cur.execute('''SELECT COALESCE(file_ext, '(none)') as ext, COUNT(*) as cnt
|
||||||
|
FROM photos GROUP BY ext ORDER BY cnt DESC''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: {r[1]}')
|
||||||
|
|
||||||
|
# 11. Průhlednost
|
||||||
|
print('\n=== PRUHLEDNOST ===')
|
||||||
|
cur.execute('SELECT COUNT(*) FROM photos WHERE has_transparency = true')
|
||||||
|
print(f' S pruhlednosti: {cur.fetchone()[0]}')
|
||||||
|
|
||||||
|
# 12. Barevné módy
|
||||||
|
print('\n=== BAREVNE MODY ===')
|
||||||
|
cur.execute('''SELECT COALESCE(mode, '(none)') as m, COUNT(*) as cnt
|
||||||
|
FROM photos GROUP BY m ORDER BY cnt DESC''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: {r[1]}')
|
||||||
|
|
||||||
|
# 13. Neznámé fotky - vzory názvů
|
||||||
|
print('\n=== NEZNAME FOTKY 2015-2016 (bez kamery) - vzory nazvu ===')
|
||||||
|
cur.execute('''SELECT file_name FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) BETWEEN 2015 AND 2016
|
||||||
|
LIMIT 30''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}')
|
||||||
|
|
||||||
|
print('\n=== NEZNAME FOTKY 2022 - vzory nazvu ===')
|
||||||
|
cur.execute('''SELECT file_name FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
|
||||||
|
LIMIT 30''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}')
|
||||||
|
|
||||||
|
print('\n=== 2022 PREFIXES ===')
|
||||||
|
cur.execute('''SELECT LEFT(file_name, 10) as prefix, COUNT(*) as cnt FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
|
||||||
|
GROUP BY prefix ORDER BY cnt DESC LIMIT 15''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: {r[1]}')
|
||||||
|
|
||||||
|
# 14. Měsíce
|
||||||
|
print('\n=== FOTKY PO MESICICH ===')
|
||||||
|
nazvy = ['','Leden','Unor','Brezen','Duben','Kveten','Cerven','Cervenec','Srpen','Zari','Rijen','Listopad','Prosinec']
|
||||||
|
cur.execute('''SELECT EXTRACT(MONTH FROM taken_at)::INT as mesic, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY mesic ORDER BY mesic''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {nazvy[r[0]]}: {r[1]}')
|
||||||
|
|
||||||
|
# 15. Dny v týdnu
|
||||||
|
print('\n=== FOTKY PO DNECH V TYDNU ===')
|
||||||
|
dny = ['Nedele','Pondeli','Utery','Streda','Ctvrtek','Patek','Sobota']
|
||||||
|
cur.execute('''SELECT EXTRACT(DOW FROM taken_at)::INT as den, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY den''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {dny[r[0]]}: {r[1]}')
|
||||||
|
|
||||||
|
# 16. Hodiny
|
||||||
|
print('\n=== FOTKY PO HODINACH ===')
|
||||||
|
cur.execute('''SELECT EXTRACT(HOUR FROM taken_at)::INT as hod, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY hod ORDER BY hod''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]:02d}:00 - {r[1]}')
|
||||||
|
|
||||||
|
# 17. Top dny (události)
|
||||||
|
print('\n=== TOP 20 DNU (nejvic fotek = udalosti) ===')
|
||||||
|
cur.execute('''SELECT taken_at::date as den, COUNT(*) as cnt
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY cnt DESC LIMIT 20''')
|
||||||
|
for r in cur.fetchall():
|
||||||
|
print(f' {r[0]}: {r[1]} fotek')
|
||||||
|
|
||||||
|
conn.close()
|
||||||
@@ -0,0 +1,361 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
st.set_page_config(page_title="FotkyBuzalkovi - Report", layout="wide", page_icon="📷")
|
||||||
|
|
||||||
|
@st.cache_resource
|
||||||
|
def get_conn():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host="192.168.1.76", port=5432, dbname="fotky_buzalkovi",
|
||||||
|
user="vladimir.buzalka", password="Vlado7309208104++"
|
||||||
|
)
|
||||||
|
|
||||||
|
def q(sql, params=None):
|
||||||
|
conn = get_conn()
|
||||||
|
return pd.read_sql(sql, conn, params=params)
|
||||||
|
|
||||||
|
|
||||||
|
st.title("📷 FotkyBuzalkovi — Průzkum dat")
|
||||||
|
|
||||||
|
# --- Celkové statistiky ---
|
||||||
|
st.header("Celkové statistiky")
|
||||||
|
c1, c2, c3, c4 = st.columns(4)
|
||||||
|
counts = q("""
|
||||||
|
SELECT
|
||||||
|
(SELECT COUNT(*) FROM photos) as photos,
|
||||||
|
(SELECT COUNT(*) FROM photos WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif,
|
||||||
|
(SELECT COUNT(*) FROM photos WHERE gps_lat IS NOT NULL) as s_gps,
|
||||||
|
(SELECT COUNT(*) FROM photos WHERE camera_model IS NOT NULL) as s_camera
|
||||||
|
""").iloc[0]
|
||||||
|
c1.metric("Celkem fotek", f"{counts['photos']:,}")
|
||||||
|
c2.metric("S EXIF daty", f"{counts['s_exif']:,}")
|
||||||
|
c3.metric("S GPS", f"{counts['s_gps']:,}")
|
||||||
|
c4.metric("S kamerou", f"{counts['s_camera']:,}")
|
||||||
|
|
||||||
|
# --- Zálohovací pipeline ---
|
||||||
|
st.subheader("Zálohovací pipeline (sběr fotek)")
|
||||||
|
z1, z2, z3 = st.columns(3)
|
||||||
|
zcounts = q("""
|
||||||
|
SELECT
|
||||||
|
(SELECT COUNT(*) FROM zaloha_obrazku) as zalohy,
|
||||||
|
(SELECT COUNT(*) FROM zdrojove_soubory) as zdroje,
|
||||||
|
(SELECT COUNT(*) FROM zdrojove_soubory) - (SELECT COUNT(*) FROM zaloha_obrazku) as duplikaty
|
||||||
|
""").iloc[0]
|
||||||
|
z1.metric("Unikátních záloh", f"{zcounts['zalohy']:,}")
|
||||||
|
z2.metric("Zdrojových souborů", f"{zcounts['zdroje']:,}")
|
||||||
|
z3.metric("Duplikátních výskytů", f"{zcounts['duplikaty']:,}")
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Fotky po letech ---
|
||||||
|
st.header("📅 Fotky po letech")
|
||||||
|
df_years = q("""
|
||||||
|
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok, COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL
|
||||||
|
GROUP BY rok ORDER BY rok
|
||||||
|
""")
|
||||||
|
fig = px.bar(df_years, x="rok", y="pocet", text="pocet",
|
||||||
|
labels={"rok": "Rok", "pocet": "Počet fotek"})
|
||||||
|
fig.update_traces(textposition="outside", texttemplate="%{text:,}")
|
||||||
|
fig.update_layout(height=450)
|
||||||
|
st.plotly_chart(fig, use_container_width=True)
|
||||||
|
|
||||||
|
# --- Fotoaparáty po letech ---
|
||||||
|
st.header("📸 Fotoaparáty po letech")
|
||||||
|
df_cam = q("""
|
||||||
|
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok,
|
||||||
|
COALESCE(camera_model, '(neznámý)') as model,
|
||||||
|
COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL
|
||||||
|
GROUP BY rok, model ORDER BY rok, pocet DESC
|
||||||
|
""")
|
||||||
|
|
||||||
|
selected_year = st.selectbox("Vyber rok:", sorted(df_cam["rok"].unique()), index=len(df_cam["rok"].unique())-5)
|
||||||
|
df_year_cam = df_cam[df_cam["rok"] == selected_year].head(15)
|
||||||
|
fig_cam = px.bar(df_year_cam, x="model", y="pocet", text="pocet",
|
||||||
|
labels={"model": "Fotoaparát", "pocet": "Počet fotek"},
|
||||||
|
title=f"Fotoaparáty v roce {selected_year}")
|
||||||
|
fig_cam.update_traces(textposition="outside")
|
||||||
|
fig_cam.update_layout(xaxis_tickangle=-45, height=500)
|
||||||
|
st.plotly_chart(fig_cam, use_container_width=True)
|
||||||
|
|
||||||
|
# Heatmapa kamery × rok (top 15 kamer celkově)
|
||||||
|
st.subheader("Heatmapa: top kamery × roky")
|
||||||
|
top_cameras = q("""
|
||||||
|
SELECT camera_model, COUNT(*) as cnt FROM photos
|
||||||
|
WHERE camera_model IS NOT NULL
|
||||||
|
GROUP BY camera_model ORDER BY cnt DESC LIMIT 15
|
||||||
|
""")["camera_model"].tolist()
|
||||||
|
|
||||||
|
df_heat = df_cam[df_cam["model"].isin(top_cameras)].pivot_table(
|
||||||
|
index="model", columns="rok", values="pocet", fill_value=0
|
||||||
|
)
|
||||||
|
fig_heat = px.imshow(df_heat, labels=dict(x="Rok", y="Fotoaparát", color="Fotek"),
|
||||||
|
aspect="auto", color_continuous_scale="YlOrRd")
|
||||||
|
fig_heat.update_layout(height=500)
|
||||||
|
st.plotly_chart(fig_heat, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Duplikáty ---
|
||||||
|
st.header("🔄 Duplikáty")
|
||||||
|
d1, d2 = st.columns(2)
|
||||||
|
|
||||||
|
with d1:
|
||||||
|
st.subheader("Identické pixely (sha256_pixels)")
|
||||||
|
df_dup_px = q("""
|
||||||
|
SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM (
|
||||||
|
SELECT COUNT(*) as cnt FROM photos WHERE sha256_pixels IS NOT NULL
|
||||||
|
GROUP BY sha256_pixels HAVING COUNT(*) > 1
|
||||||
|
) x
|
||||||
|
""").iloc[0]
|
||||||
|
st.metric("Skupin duplikátů", f"{df_dup_px['skupin']:,}")
|
||||||
|
st.metric("Fotek v duplikátech", f"{df_dup_px['fotek']:,}")
|
||||||
|
|
||||||
|
with d2:
|
||||||
|
st.subheader("Vizuálně podobné (phash)")
|
||||||
|
df_dup_ph = q("""
|
||||||
|
SELECT COUNT(*) as skupin, SUM(cnt) as fotek FROM (
|
||||||
|
SELECT COUNT(*) as cnt FROM photos WHERE phash IS NOT NULL
|
||||||
|
GROUP BY phash HAVING COUNT(*) > 1
|
||||||
|
) x
|
||||||
|
""").iloc[0]
|
||||||
|
st.metric("Skupin podobných", f"{df_dup_ph['skupin']:,}")
|
||||||
|
st.metric("Fotek v podobných skupinách", f"{df_dup_ph['fotek']:,}")
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- GPS mapa ---
|
||||||
|
st.header("🗺️ GPS lokace")
|
||||||
|
df_gps = q("""
|
||||||
|
SELECT gps_lat as lat, gps_lon as lon
|
||||||
|
FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL
|
||||||
|
""")
|
||||||
|
if not df_gps.empty:
|
||||||
|
df_gps["lat"] = df_gps["lat"].astype(float)
|
||||||
|
df_gps["lon"] = df_gps["lon"].astype(float)
|
||||||
|
st.map(df_gps, size=2)
|
||||||
|
|
||||||
|
st.subheader("Top lokace (zaokrouhleno na 0.1°)")
|
||||||
|
df_gps_top = q("""
|
||||||
|
SELECT ROUND(gps_lat::numeric, 1) as lat, ROUND(gps_lon::numeric, 1) as lon,
|
||||||
|
COUNT(*) as pocet
|
||||||
|
FROM photos WHERE gps_lat IS NOT NULL AND gps_lon IS NOT NULL
|
||||||
|
GROUP BY lat, lon ORDER BY pocet DESC LIMIT 20
|
||||||
|
""")
|
||||||
|
st.dataframe(df_gps_top, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Technické parametry ---
|
||||||
|
st.header("⚙️ Technické parametry")
|
||||||
|
tab_iso, tab_clona, tab_exp, tab_lens = st.tabs(["ISO", "Clona", "Expoziční čas", "Objektivy"])
|
||||||
|
|
||||||
|
with tab_iso:
|
||||||
|
df_iso = q("""
|
||||||
|
SELECT iso, COUNT(*) as pocet FROM photos WHERE iso IS NOT NULL
|
||||||
|
GROUP BY iso ORDER BY pocet DESC LIMIT 20
|
||||||
|
""")
|
||||||
|
fig_iso = px.bar(df_iso, x="iso", y="pocet", text="pocet",
|
||||||
|
labels={"iso": "ISO", "pocet": "Počet fotek"})
|
||||||
|
fig_iso.update_traces(textposition="outside")
|
||||||
|
st.plotly_chart(fig_iso, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_clona:
|
||||||
|
df_ap = q("""
|
||||||
|
SELECT aperture, COUNT(*) as pocet FROM photos WHERE aperture IS NOT NULL
|
||||||
|
GROUP BY aperture ORDER BY pocet DESC LIMIT 20
|
||||||
|
""")
|
||||||
|
df_ap["label"] = "f/" + df_ap["aperture"].astype(str)
|
||||||
|
fig_ap = px.bar(df_ap, x="label", y="pocet", text="pocet",
|
||||||
|
labels={"label": "Clona", "pocet": "Počet fotek"})
|
||||||
|
fig_ap.update_traces(textposition="outside")
|
||||||
|
st.plotly_chart(fig_ap, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_exp:
|
||||||
|
df_exp = q("""
|
||||||
|
SELECT exposure_time, COUNT(*) as pocet FROM photos WHERE exposure_time IS NOT NULL
|
||||||
|
GROUP BY exposure_time ORDER BY pocet DESC LIMIT 20
|
||||||
|
""")
|
||||||
|
fig_exp = px.bar(df_exp, x="exposure_time", y="pocet", text="pocet",
|
||||||
|
labels={"exposure_time": "Expoziční čas", "pocet": "Počet fotek"})
|
||||||
|
fig_exp.update_traces(textposition="outside")
|
||||||
|
fig_exp.update_layout(xaxis_tickangle=-45)
|
||||||
|
st.plotly_chart(fig_exp, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_lens:
|
||||||
|
df_lens = q("""
|
||||||
|
SELECT COALESCE(lens_model, '(neuvedeno)') as objektiv, COUNT(*) as pocet
|
||||||
|
FROM photos GROUP BY objektiv ORDER BY pocet DESC LIMIT 15
|
||||||
|
""")
|
||||||
|
st.dataframe(df_lens, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Rozlišení po letech ---
|
||||||
|
st.header("📐 Megapixely po letech")
|
||||||
|
df_mp = q("""
|
||||||
|
SELECT EXTRACT(YEAR FROM taken_at)::INT as rok,
|
||||||
|
ROUND(AVG(megapixels)::numeric, 1) as prumer,
|
||||||
|
ROUND(MAX(megapixels)::numeric, 1) as maximum
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL AND megapixels IS NOT NULL
|
||||||
|
GROUP BY rok ORDER BY rok
|
||||||
|
""")
|
||||||
|
fig_mp = go.Figure()
|
||||||
|
fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["prumer"], mode="lines+markers", name="Průměr MP"))
|
||||||
|
fig_mp.add_trace(go.Scatter(x=df_mp["rok"], y=df_mp["maximum"], mode="lines+markers", name="Maximum MP"))
|
||||||
|
fig_mp.update_layout(yaxis_title="Megapixely", xaxis_title="Rok", height=400)
|
||||||
|
st.plotly_chart(fig_mp, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Formáty a barevné módy ---
|
||||||
|
st.header("🎨 Formáty a barvy")
|
||||||
|
f1, f2 = st.columns(2)
|
||||||
|
with f1:
|
||||||
|
st.subheader("Přípony")
|
||||||
|
df_ext = q("""
|
||||||
|
SELECT COALESCE(file_ext, '(none)') as pripona, COUNT(*) as pocet
|
||||||
|
FROM photos GROUP BY pripona ORDER BY pocet DESC
|
||||||
|
""")
|
||||||
|
st.dataframe(df_ext, use_container_width=True)
|
||||||
|
|
||||||
|
with f2:
|
||||||
|
st.subheader("Barevné módy")
|
||||||
|
df_mode = q("""
|
||||||
|
SELECT COALESCE(mode, '(none)') as mod, COUNT(*) as pocet
|
||||||
|
FROM photos GROUP BY mod ORDER BY pocet DESC
|
||||||
|
""")
|
||||||
|
fig_mode = px.pie(df_mode, values="pocet", names="mod")
|
||||||
|
st.plotly_chart(fig_mode, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Neznámé fotky ---
|
||||||
|
st.header("❓ Fotky bez kamery — analýza názvů")
|
||||||
|
|
||||||
|
tab_2015, tab_2022 = st.tabs(["2015–2016", "2022"])
|
||||||
|
|
||||||
|
with tab_2015:
|
||||||
|
df_unk15 = q("""
|
||||||
|
SELECT file_name, file_size, taken_at, taken_at_source
|
||||||
|
FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) BETWEEN 2015 AND 2016
|
||||||
|
ORDER BY taken_at LIMIT 50
|
||||||
|
""")
|
||||||
|
st.dataframe(df_unk15, use_container_width=True)
|
||||||
|
st.info("Přejmenované importním skriptem — vzor: `[NO MODEL] [MD5...]`")
|
||||||
|
|
||||||
|
with tab_2022:
|
||||||
|
df_unk22 = q("""
|
||||||
|
SELECT file_name, file_size, taken_at, taken_at_source
|
||||||
|
FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
|
||||||
|
ORDER BY taken_at LIMIT 50
|
||||||
|
""")
|
||||||
|
st.dataframe(df_unk22, use_container_width=True)
|
||||||
|
|
||||||
|
df_prefix = q("""
|
||||||
|
SELECT LEFT(file_name, 10) as prefix, COUNT(*) as pocet FROM photos
|
||||||
|
WHERE camera_model IS NULL AND EXTRACT(YEAR FROM taken_at) = 2022
|
||||||
|
GROUP BY prefix ORDER BY pocet DESC LIMIT 10
|
||||||
|
""")
|
||||||
|
st.subheader("Prefixes")
|
||||||
|
st.dataframe(df_prefix, use_container_width=True)
|
||||||
|
st.info("4 194 z 4 210 importováno najednou 25.9.2023 — pravděpodobně hromadný export z iCloudu/Google Photos")
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- Časové vzory ---
|
||||||
|
st.header("⏰ Časové vzory")
|
||||||
|
|
||||||
|
tab_month, tab_dow, tab_hour, tab_topdays = st.tabs(["Měsíce", "Dny v týdnu", "Hodiny", "Top dny (události)"])
|
||||||
|
|
||||||
|
with tab_month:
|
||||||
|
df_month = q("""
|
||||||
|
SELECT EXTRACT(MONTH FROM taken_at)::INT as mesic, COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY mesic ORDER BY mesic
|
||||||
|
""")
|
||||||
|
nazvy = {1:'Leden',2:'Únor',3:'Březen',4:'Duben',5:'Květen',6:'Červen',
|
||||||
|
7:'Červenec',8:'Srpen',9:'Září',10:'Říjen',11:'Listopad',12:'Prosinec'}
|
||||||
|
df_month["nazev"] = df_month["mesic"].map(nazvy)
|
||||||
|
fig_m = px.bar(df_month, x="nazev", y="pocet", text="pocet",
|
||||||
|
labels={"nazev": "Měsíc", "pocet": "Počet fotek"})
|
||||||
|
fig_m.update_traces(textposition="outside")
|
||||||
|
st.plotly_chart(fig_m, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_dow:
|
||||||
|
df_dow = q("""
|
||||||
|
SELECT EXTRACT(DOW FROM taken_at)::INT as den, COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY den ORDER BY den
|
||||||
|
""")
|
||||||
|
dny = {0:'Neděle',1:'Pondělí',2:'Úterý',3:'Středa',4:'Čtvrtek',5:'Pátek',6:'Sobota'}
|
||||||
|
df_dow["nazev"] = df_dow["den"].map(dny)
|
||||||
|
fig_d = px.bar(df_dow, x="nazev", y="pocet", text="pocet",
|
||||||
|
labels={"nazev": "Den", "pocet": "Počet fotek"})
|
||||||
|
fig_d.update_traces(textposition="outside")
|
||||||
|
st.plotly_chart(fig_d, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_hour:
|
||||||
|
df_hour = q("""
|
||||||
|
SELECT EXTRACT(HOUR FROM taken_at)::INT as hodina, COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL GROUP BY hodina ORDER BY hodina
|
||||||
|
""")
|
||||||
|
fig_h = px.bar(df_hour, x="hodina", y="pocet", text="pocet",
|
||||||
|
labels={"hodina": "Hodina", "pocet": "Počet fotek"})
|
||||||
|
fig_h.update_traces(textposition="outside", texttemplate="%{text:,}")
|
||||||
|
fig_h.update_layout(xaxis=dict(dtick=1))
|
||||||
|
st.plotly_chart(fig_h, use_container_width=True)
|
||||||
|
|
||||||
|
with tab_topdays:
|
||||||
|
df_topdays = q("""
|
||||||
|
SELECT taken_at::date as den, COUNT(*) as pocet
|
||||||
|
FROM photos WHERE taken_at IS NOT NULL
|
||||||
|
GROUP BY den ORDER BY pocet DESC LIMIT 30
|
||||||
|
""")
|
||||||
|
fig_td = px.bar(df_topdays, x="den", y="pocet", text="pocet",
|
||||||
|
labels={"den": "Datum", "pocet": "Počet fotek"})
|
||||||
|
fig_td.update_traces(textposition="outside")
|
||||||
|
fig_td.update_layout(xaxis_tickangle=-45, height=500)
|
||||||
|
st.plotly_chart(fig_td, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
|
||||||
|
# --- EXIF pokrytí ---
|
||||||
|
st.header("📊 EXIF pokrytí")
|
||||||
|
df_coverage = q("""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE exif_raw IS NOT NULL AND exif_raw != '{}') as s_exif,
|
||||||
|
COUNT(*) FILTER (WHERE taken_at IS NOT NULL) as s_taken_at,
|
||||||
|
COUNT(*) FILTER (WHERE camera_model IS NOT NULL) as s_camera,
|
||||||
|
COUNT(*) FILTER (WHERE iso IS NOT NULL) as s_iso,
|
||||||
|
COUNT(*) FILTER (WHERE gps_lat IS NOT NULL) as s_gps,
|
||||||
|
COUNT(*) FILTER (WHERE aperture IS NOT NULL) as s_aperture,
|
||||||
|
COUNT(*) FILTER (WHERE lens_model IS NOT NULL) as s_lens,
|
||||||
|
COUNT(*) as celkem
|
||||||
|
FROM photos
|
||||||
|
""").iloc[0]
|
||||||
|
|
||||||
|
categories = ["EXIF data", "Datum pořízení", "Model kamery", "ISO", "Clona", "GPS", "Objektiv"]
|
||||||
|
values = [
|
||||||
|
int(df_coverage["s_exif"]), int(df_coverage["s_taken_at"]),
|
||||||
|
int(df_coverage["s_camera"]), int(df_coverage["s_iso"]),
|
||||||
|
int(df_coverage["s_aperture"]), int(df_coverage["s_gps"]),
|
||||||
|
int(df_coverage["s_lens"])
|
||||||
|
]
|
||||||
|
total = int(df_coverage["celkem"])
|
||||||
|
pct = [round(v / total * 100, 1) for v in values]
|
||||||
|
|
||||||
|
fig_cov = go.Figure(go.Bar(
|
||||||
|
x=pct, y=categories, orientation='h',
|
||||||
|
text=[f"{v:,} ({p}%)" for v, p in zip(values, pct)],
|
||||||
|
textposition="auto"
|
||||||
|
))
|
||||||
|
fig_cov.update_layout(xaxis_title="% fotek", height=350)
|
||||||
|
st.plotly_chart(fig_cov, use_container_width=True)
|
||||||
|
|
||||||
|
st.divider()
|
||||||
|
st.caption("FotkyBuzalkovi — data z PostgreSQL 192.168.1.76 / fotky_buzalkovi")
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
# Databázové schéma — fotky_buzalkovi
|
||||||
|
|
||||||
|
PostgreSQL 192.168.1.76:5432, databáze `fotky_buzalkovi`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Skupina 1: Zpracované fotky
|
||||||
|
|
||||||
|
Tyto tabulky obsahují naparsované informace o fotkách — EXIF, hashe, metadata, tagy.
|
||||||
|
Jsou základem pro veškerou další práci (vyhledávání, deduplikace, organizace).
|
||||||
|
|
||||||
|
### photos (85 833 záznamů)
|
||||||
|
|
||||||
|
Hlavní tabulka. Každý řádek = jedna unikátní fotka identifikovaná hashem `sha256_file`.
|
||||||
|
|
||||||
|
| Sloupec | Typ | Nullable | Default | Popis |
|
||||||
|
|---------|-----|----------|---------|-------|
|
||||||
|
| **id** | BIGSERIAL | NO | autoincrement | PK |
|
||||||
|
| **sha256_file** | CHAR(64) | NO | — | SHA-256 celého souboru (UNIQUE) |
|
||||||
|
| sha256_pixels | CHAR(64) | YES | — | SHA-256 pixelových dat (odhalí změnu jen v metadatech) |
|
||||||
|
| phash | BIGINT | YES | — | Perceptuální hash (vizuální podobnost) |
|
||||||
|
| dhash | BIGINT | YES | — | Difference hash (vizuální podobnost) |
|
||||||
|
| **file_path** | VARCHAR(2000) | NO | — | Absolutní cesta k souboru |
|
||||||
|
| file_path_relative | VARCHAR(2000) | YES | — | Relativní cesta |
|
||||||
|
| **file_name** | VARCHAR(500) | NO | — | Název souboru |
|
||||||
|
| file_stem | VARCHAR(500) | YES | — | Název bez přípony |
|
||||||
|
| file_ext | VARCHAR(20) | YES | — | Přípona (.jpg, .png, …) |
|
||||||
|
| file_size | BIGINT | YES | — | Velikost v bajtech |
|
||||||
|
| mime_type | VARCHAR(50) | YES | — | MIME typ (image/jpeg, …) |
|
||||||
|
| format | VARCHAR(20) | YES | — | Formát obrázku (JPEG, PNG, …) |
|
||||||
|
| mode | VARCHAR(20) | YES | — | Barevný mód (RGB, L, RGBA, …) |
|
||||||
|
| width | INT | YES | — | Šířka v pixelech |
|
||||||
|
| height | INT | YES | — | Výška v pixelech |
|
||||||
|
| megapixels | NUMERIC | YES | — | Rozlišení v megapixelech |
|
||||||
|
| has_transparency | BOOLEAN | YES | false | Má alfa kanál |
|
||||||
|
| icc_profile | BOOLEAN | YES | false | Obsahuje ICC profil |
|
||||||
|
| embedded_thumbnail | BOOLEAN | YES | false | Obsahuje vložený náhled |
|
||||||
|
| taken_at | TIMESTAMPTZ | YES | — | Datum pořízení fotky |
|
||||||
|
| taken_at_source | VARCHAR(20) | YES | — | Zdroj datumu (exif / mtime / …) |
|
||||||
|
| mtime | TIMESTAMPTZ | YES | — | Datum poslední modifikace souboru |
|
||||||
|
| collected_at | TIMESTAMPTZ | YES | — | Datum sběru/importu do pipeline |
|
||||||
|
| camera_make | VARCHAR(100) | YES | — | Výrobce fotoaparátu |
|
||||||
|
| camera_model | VARCHAR(255) | YES | — | Model fotoaparátu |
|
||||||
|
| lens_model | VARCHAR(255) | YES | — | Model objektivu |
|
||||||
|
| iso | INT | YES | — | ISO citlivost |
|
||||||
|
| aperture | NUMERIC | YES | — | Clona (f/2.8, …) |
|
||||||
|
| exposure_time | VARCHAR(30) | YES | — | Expoziční čas (1/250, …) |
|
||||||
|
| focal_length_mm | NUMERIC | YES | — | Ohnisková vzdálenost v mm |
|
||||||
|
| gps_lat | NUMERIC | YES | — | GPS šířka |
|
||||||
|
| gps_lon | NUMERIC | YES | — | GPS délka |
|
||||||
|
| gps_altitude | NUMERIC | YES | — | GPS nadmořská výška |
|
||||||
|
| is_screenshot | BOOLEAN | YES | false | Detekováno jako screenshot |
|
||||||
|
| face_count | INT | YES | — | Počet detekovaných obličejů |
|
||||||
|
| exif_raw | JSONB | YES | — | Kompletní surová EXIF data |
|
||||||
|
| iptc_raw | JSONB | YES | — | Kompletní surová IPTC data |
|
||||||
|
| xmp_raw | JSONB | YES | — | Kompletní surová XMP data |
|
||||||
|
| imported_at | TIMESTAMPTZ | YES | now() | Kdy byl záznam vložen do DB |
|
||||||
|
| processing_status | VARCHAR(50) | YES | 'pending' | Stav zpracování |
|
||||||
|
|
||||||
|
**Indexy:**
|
||||||
|
- `photos_pkey` — PK (id)
|
||||||
|
- `photos_sha256_file_key` — UNIQUE (sha256_file)
|
||||||
|
- `idx_photos_sha256_pixels` — (sha256_pixels)
|
||||||
|
- `idx_photos_phash` — (phash)
|
||||||
|
- `idx_photos_taken_at` — (taken_at)
|
||||||
|
- `idx_photos_camera_model` — (camera_model)
|
||||||
|
- `idx_photos_file_name` — (file_name)
|
||||||
|
- `idx_photos_file_ext` — (file_ext)
|
||||||
|
- `idx_photos_exif_gin` — GIN (exif_raw)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### tags
|
||||||
|
|
||||||
|
Hierarchická tabulka tagů. Podporuje stromovou strukturu přes `parent_tag_id`.
|
||||||
|
|
||||||
|
| Sloupec | Typ | Nullable | Default | Popis |
|
||||||
|
|---------|-----|----------|---------|-------|
|
||||||
|
| **id** | SERIAL | NO | autoincrement | PK |
|
||||||
|
| **name** | VARCHAR(100) | NO | — | Název tagu |
|
||||||
|
| parent_tag_id | INT | YES | — | Rodičovský tag (FK → tags.id) |
|
||||||
|
|
||||||
|
**Constrainty:**
|
||||||
|
- PK (id)
|
||||||
|
- UNIQUE (name, parent_tag_id) — stejný název může existovat pod různými rodiči
|
||||||
|
- FK parent_tag_id → tags(id)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### photo_tags
|
||||||
|
|
||||||
|
Vazební tabulka M:N mezi `photos` a `tags`.
|
||||||
|
|
||||||
|
| Sloupec | Typ | Nullable | Default | Popis |
|
||||||
|
|---------|-----|----------|---------|-------|
|
||||||
|
| **photo_id** | BIGINT | NO | — | FK → photos(id) ON DELETE CASCADE |
|
||||||
|
| **tag_id** | INT | NO | — | FK → tags(id) ON DELETE CASCADE |
|
||||||
|
| source | VARCHAR(20) | YES | — | Zdroj tagu (manual / auto / …) |
|
||||||
|
| created_at | TIMESTAMPTZ | YES | now() | Kdy byl tag přiřazen |
|
||||||
|
|
||||||
|
**Constrainty:**
|
||||||
|
- PK (photo_id, tag_id)
|
||||||
|
- FK photo_id → photos(id) ON DELETE CASCADE
|
||||||
|
- FK tag_id → tags(id) ON DELETE CASCADE
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Skupina 2: Sběr a záloha fotek
|
||||||
|
|
||||||
|
Tyto tabulky slouží **výhradně** pro proces sběru fotek ze všech počítačů na jedno centrální
|
||||||
|
úložiště (Tower1). Neobsahují žádné informace o obsahu fotek — jen evidenci, odkud byly
|
||||||
|
soubory sebrány a kam byly zálohovány. S dalším zpracováním (EXIF, tagy, organizace) nemají
|
||||||
|
nic společného.
|
||||||
|
|
||||||
|
Skript: `00 PictureCollector/collect_pictures.py` (Linux) / `collect_pictures_windows.py` (Windows)
|
||||||
|
|
||||||
|
### zaloha_obrazku (39 961 záznamů)
|
||||||
|
|
||||||
|
Každý řádek = jeden unikátní soubor fyzicky uložený v záloze (identifikovaný BLAKE3 hashem).
|
||||||
|
|
||||||
|
| Sloupec | Typ | Nullable | Default | Popis |
|
||||||
|
|---------|-----|----------|---------|-------|
|
||||||
|
| **id** | SERIAL | NO | autoincrement | PK |
|
||||||
|
| **blake3_hash** | VARCHAR(64) | NO | — | BLAKE3 hash souboru (UNIQUE) |
|
||||||
|
| **cesta_zalohy** | TEXT | NO | — | Cesta k záloze na Tower1 |
|
||||||
|
| **nazev_souboru** | VARCHAR(512) | NO | — | Název souboru |
|
||||||
|
| velikost | BIGINT | YES | — | Velikost v bajtech |
|
||||||
|
| datum_kopirovani | TIMESTAMP | YES | now() | Kdy byl soubor zkopírován |
|
||||||
|
|
||||||
|
**Indexy:**
|
||||||
|
- `zaloha_obrazku_pkey` — PK (id)
|
||||||
|
- `zaloha_obrazku_blake3_hash_key` — UNIQUE (blake3_hash)
|
||||||
|
- `idx_zaloha_hash` — (blake3_hash)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### zdrojove_soubory (45 090 záznamů)
|
||||||
|
|
||||||
|
Každý řádek = jeden nalezený zdrojový soubor na nějakém počítači. Stejný soubor (stejný
|
||||||
|
BLAKE3 hash) může mít více záznamů, pokud existuje na různých místech/počítačích.
|
||||||
|
|
||||||
|
| Sloupec | Typ | Nullable | Default | Popis |
|
||||||
|
|---------|-----|----------|---------|-------|
|
||||||
|
| **id** | SERIAL | NO | autoincrement | PK |
|
||||||
|
| **hostname** | VARCHAR(255) | NO | — | Název počítače, kde byl soubor nalezen |
|
||||||
|
| **cesta_zdroje** | TEXT | NO | — | Původní cesta k souboru |
|
||||||
|
| **nazev_souboru** | VARCHAR(512) | NO | — | Název souboru |
|
||||||
|
| velikost | BIGINT | YES | — | Velikost v bajtech |
|
||||||
|
| datum_nalezeni | TIMESTAMP | YES | now() | Kdy byl soubor nalezen |
|
||||||
|
| **blake3_hash** | VARCHAR(64) | NO | — | BLAKE3 hash souboru |
|
||||||
|
| zaloha_id | INT | YES | — | FK → zaloha_obrazku(id) |
|
||||||
|
|
||||||
|
**Constrainty:**
|
||||||
|
- PK (id)
|
||||||
|
- UNIQUE (hostname, cesta_zdroje) — každý soubor z každého PC jen jednou
|
||||||
|
- FK zaloha_id → zaloha_obrazku(id)
|
||||||
|
|
||||||
|
**Indexy:**
|
||||||
|
- `idx_zdroj_hash` — (blake3_hash)
|
||||||
|
- `idx_zdroj_zaloha` — (zaloha_id)
|
||||||
|
- `idx_zdroj_host` — (hostname)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Poznámky
|
||||||
|
|
||||||
|
- Počty záznamů jsou k datu 2026-05-24.
|
||||||
|
- Tabulka `cameras` z původního `create_schema.py` v DB neexistuje — informace o kameře
|
||||||
|
jsou přímo ve sloupcích `camera_make` / `camera_model` v tabulce `photos`.
|
||||||
|
- EXIF parser: ExifRead (Pillow má bug v GPS).
|
||||||
|
- Tabulky skupiny 1 a skupiny 2 zatím nejsou propojené (žádný FK mezi `photos` a `zaloha_obrazku`).
|
||||||
Reference in New Issue
Block a user