Building Interactive Project Dashboards with Streamlit and Plotly

I. Why Streamlit for Dashboards?

Streamlit turns Python scripts into interactive web apps with zero frontend code. For project tracking dashboards that pull from Google Sheets, process data with Pandas, and visualize with Plotly, it's the fastest path from script to shareable tool. This guide covers the key patterns for building production-quality dashboards.

II. Project Structure

A well-organized Streamlit dashboard separates data loading, processing, and rendering. Here's the minimal structure:

python

# app.py - Main dashboard entry point
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
from io import StringIO
import requests

# Page config must be the first Streamlit command
st.set_page_config(
    page_title="Project Dashboard",
    layout="wide"  # Use full browser width
)

# Cache data loading to avoid re-fetching on every interaction
@st.cache_data(ttl=30)  # Cache for 30 seconds
def load_sheet(url: str) -> pd.DataFrame:
    """Download a Google Sheet as CSV and return a DataFrame."""
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    return pd.read_csv(StringIO(r.text))

III. Data Ingestion from Google Sheets

Google Sheets can be published as CSV via a shareable link. Store URLs as Streamlit secrets (in .streamlit/secrets.toml) or environment variables. The @st.cache_data decorator prevents re-downloading on every widget interaction:

python

# Read URLs from secrets or environment
def get_url(name: str) -> str:
    """Get a URL from Streamlit secrets or env vars."""
    try:
        return st.secrets.get(name, os.getenv(name, ''))
    except Exception:
        return os.getenv(name, '')

# Load multiple data sources
assignments_df = load_sheet(get_url('SHEET_URL'))
stage1_df = load_sheet(get_url('STAGE1_SHEET_URL'))
stage2_df = load_sheet(get_url('STAGE2_SHEET_URL'))

# Secrets file (.streamlit/secrets.toml):
# SHEET_URL = "https://docs.google.com/spreadsheets/d/.../export?format=csv"
# STAGE1_SHEET_URL = "https://docs.google.com/..."

IV. Multi-Tab Layout with Sidebar Filters

Streamlit's tab and sidebar APIs create a clean dashboard layout. Define filters in the sidebar so they persist across tabs:

python

# Sidebar filters (shared across all tabs)
st.sidebar.header("Filters")
stage_filter = st.sidebar.multiselect(
    "Stage",
    ["Stage 1", "Stage 2", "Stage 3"],
    default=["Stage 1", "Stage 2", "Stage 3"]
)
coder_filter = st.sidebar.multiselect("Coder", coder_options)
paper_search = st.sidebar.text_input("Search Paper ID")
assigned_only = st.sidebar.toggle("Assigned only", value=True)

# Force refresh button
if st.sidebar.button("Force refresh"):
    st.cache_data.clear()
    st.rerun()

# Tab layout
tab_overview, tab_coders, tab_supervisors = st.tabs(
    ["Overview", "Coders", "Supervisors"]
)

with tab_overview:
    st.subheader("Progress Overview")
    # ... overview content

with tab_coders:
    st.subheader("Coder Summary")
    # ... coder content

V. KPI Cards and Metrics

Streamlit's st.metric renders clean KPI cards with optional delta indicators. Use st.columns for horizontal layout:

python

def render_kpi_cards(metrics: dict):
    """Render KPI cards in a responsive grid."""
    # Row 1: high-level counts
    r1 = st.columns(3)
    r1[0].metric("Total Entries", f"{metrics['total']:,}")
    r1[1].metric("Papers Coded", f"{metrics['coded']:,}")
    r1[2].metric("Papers Reviewed", f"{metrics['reviewed']:,}")

    # Row 2: detailed breakdown
    r2 = st.columns(4)
    r2[0].metric("Stage 1", metrics['s1_coded'])
    r2[1].metric("Stage 2", metrics['s2_coded'])
    r2[2].metric("Stage 3", metrics['s3_coded'])
    r2[3].metric(
        "Review Rate",
        f"{metrics['review_rate']:.1f}%",
        delta=f"{metrics['review_delta']:+.1f}%"  # Shows green/red arrow
    )

VI. Interactive Plotly Charts

Plotly integrates natively with Streamlit via st.plotly_chart. Here's a reusable pattern for a progress bar chart with annotations:

python

def build_progress_chart(
    stages: list[str],
    coded_counts: list[int],
    reviewed_counts: list[int],
    total: int
) -> go.Figure:
    """Overlaid bar chart: total vs coded vs reviewed."""
    fig = go.Figure()

    # Background: total capacity
    fig.add_trace(go.Bar(
        x=stages, y=[total] * len(stages),
        name=f'Total ({total})',
        marker_color='lightgrey', opacity=0.5
    ))
    # Coded papers
    fig.add_trace(go.Bar(
        x=stages, y=coded_counts,
        name='Coded', marker_color='lightblue'
    ))
    # Reviewed papers
    fig.add_trace(go.Bar(
        x=stages, y=reviewed_counts,
        name='Reviewed', marker_color='orange'
    ))

    # Add percentage annotations
    for i, stage in enumerate(stages):
        pct = coded_counts[i] / total * 100 if total else 0
        fig.add_annotation(
            x=stage, y=coded_counts[i],
            text=f"{coded_counts[i]} ({pct:.1f}%)",
            showarrow=False, yshift=10
        )

    fig.update_layout(
        barmode='overlay',
        template='plotly_white',
        height=500,
        legend=dict(orientation='h', y=1.02)
    )
    return fig

# Render in Streamlit
st.plotly_chart(
    build_progress_chart(stages, coded, reviewed, total),
    use_container_width=True  # Stretch to fill container
)

VII. Styled DataFrames

Pandas Styler objects render directly in Streamlit with conditional formatting. This is powerful for summary tables with color-coded metrics:

python

def style_summary_table(df: pd.DataFrame):
    """Apply conditional formatting to a summary DataFrame."""
    def extract_pct(val: str) -> float:
        """Extract percentage from '42 (85.3%)' format."""
        try:
            return float(str(val).split('(')[1].split('%')[0])
        except Exception:
            return 0.0

    # Identify percentage columns
    pct_cols = [c for c in df.columns if 'Coded' in c or 'Reviewed' in c]

    def highlight(row):
        styles = [''] * len(row)
        for col in pct_cols:
            if col in df.columns:
                pct = extract_pct(row[col])
                idx = df.columns.get_loc(col)
                if pct == 100.0:
                    styles[idx] = 'background-color: lightgreen'
                elif pct == 0.0:
                    styles[idx] = 'background-color: lightcoral'
        return styles

    return df.style.apply(highlight, axis=1)

# Render styled table
st.dataframe(
    style_summary_table(summary_df),
    width='stretch',
    hide_index=True
)

VIII. Donut Charts for Multi-Stage Progress

Plotly subplots with pie charts create effective multi-stage donut visualizations. Each stage gets its own ring showing coded vs not-coded vs reviewed:

python

from plotly.subplots import make_subplots

def build_donut_charts(
    stages: list[str],
    counts: dict,  # {stage: {'coded': int, 'reviewed': int}}
    total: int
) -> go.Figure:
    """One donut per stage in a horizontal row."""
    n = len(stages)
    fig = make_subplots(
        rows=1, cols=n,
        specs=[[{'type': 'pie'}] * n]
    )

    colors = {
        'reviewed': '#38B2AC',   # Deep teal
        'coded': '#4ECDC480',    # Light teal
        'not_coded': '#E8E8E8',  # Grey
    }

    for i, stage in enumerate(stages):
        coded = counts[stage]['coded']
        reviewed = counts[stage]['reviewed']
        not_coded = total - coded
        coded_only = coded - reviewed

        fig.add_trace(go.Pie(
            values=[reviewed, coded_only, not_coded],
            labels=['Reviewed', 'Coded', 'Not Coded'],
            hole=0.4,
            marker=dict(colors=[
                colors['reviewed'],
                colors['coded'],
                colors['not_coded']
            ]),
            textinfo='value',
            showlegend=(i == 0),  # Legend only on first
        ), row=1, col=i+1)

    fig.update_layout(
        template='plotly_white',
        height=400,
        showlegend=True
    )
    return fig

IX. Session State and Performance

Streamlit reruns the entire script on every interaction. Use session state to persist data across reruns, and @st.cache_data to avoid redundant computation:

python

# Session state for cross-rerun persistence
if 'prev_metrics' not in st.session_state:
    st.session_state['prev_metrics'] = None
if 'run_id' not in st.session_state:
    st.session_state['run_id'] = 0

# Cache expensive computations
@st.cache_data(ttl=60)
def process_all_stages(s1_url, s2_url, s3_url):
    """Load and process all stage data. Cached for 60s."""
    s1 = load_sheet(s1_url)
    s2 = load_sheet(s2_url)
    s3 = load_sheet(s3_url)
    # ... expensive processing ...
    return processed_data

# Use run_id as chart key to force re-render
rid = st.session_state['run_id']
st.plotly_chart(fig, key=f"chart_{rid}")

# Compare with previous metrics for delta display
prev = st.session_state['prev_metrics']
st.session_state['prev_metrics'] = current_metrics

X. Deployment

Streamlit Community Cloud deploys directly from a GitHub repo. Add a requirements.txt and secrets via the dashboard settings. For private deployments, Docker works well:

yaml

# requirements.txt
streamlit>=1.30
pandas>=2.0
plotly>=5.18
requests>=2.31

dockerfile

FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8501
CMD ["streamlit", "run", "app.py", "--server.port=8501"]

The patterns above scale from simple single-page dashboards to complex multi-tab applications with dozens of charts and tables. Streamlit handles the reactivity, Plotly handles the visualization, and Pandas handles the data. Focus your effort on the processing logic and let the framework handle the rest.