Source code for diff_diff.stacked_did_results

Name: diff-diff
Author: diff-diff contributors
"""
Result containers for the Stacked DiD estimator.

This module contains StackedDiDResults dataclass for Wing, Freedman &
Hollingsworth (2024) stacked difference-in-differences estimation.
"""

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd

from diff_diff.results import _format_survey_block, _get_significance_stars
from diff_diff.results_base import BaseResults

__all__ = [
    "StackedDiDResults",
]



[docs]
@dataclass
class StackedDiDResults(BaseResults):
    """
    Results from Stacked DiD estimation (Wing, Freedman & Hollingsworth 2024).

    Attributes
    ----------
    overall_att : float
        Overall average treatment effect on the treated (average of
        post-treatment event-study coefficients).
    overall_se : float
        Standard error of overall ATT (delta method on VCV).
    overall_t_stat : float
        T-statistic for overall ATT.
    overall_p_value : float
        P-value for overall ATT.
    overall_conf_int : tuple
        Confidence interval for overall ATT.
    event_study_effects : dict, optional
        Dictionary mapping event time h to effect dict with keys:
        'effect', 'se', 't_stat', 'p_value', 'conf_int', 'n_obs'.
    group_effects : dict, optional
        Dictionary mapping cohort g to effect dict.
    stacked_data : pd.DataFrame
        Full stacked dataset with _sub_exp, _event_time, _D_sa,
        _Q_weight columns. Accessible for custom analysis.
    groups : list
        Adoption events in the trimmed set (Omega_kappa).
    trimmed_groups : list
        Adoption events excluded by IC1/IC2.
    time_periods : list
        All time periods in the original data.
    n_obs : int
        Number of observations in the original data.
    n_stacked_obs : int
        Number of observations in the stacked dataset.
    n_sub_experiments : int
        Number of sub-experiments in the stack.
    n_treated_units : int
        Distinct treated units across trimmed set.
    n_control_units : int
        Distinct control units across trimmed set.
    kappa_pre : int
        Pre-treatment event-time window size.
    kappa_post : int
        Post-treatment event-time window size.
    weighting : str
        Weighting scheme used.
    clean_control : str
        Clean control definition used.
    alpha : float
        Significance level used.
    event_study_vcov : np.ndarray, optional
        Full event-study variance-covariance matrix: the sub-block of the
        pooled stacked-regression coefficient covariance over the estimated
        ``D_sa x event-time`` interaction columns, ordered by
        ``event_study_vcov_index``. The reported per-event-time SEs are
        exactly ``sqrt(diag())`` of this matrix in every inference mode
        (analytical hc1/hc2_bm sandwich, survey replicate refit, and survey
        TSL all produce the coefficient covariance the SEs are read from).
        The reference period is synthesized, never a regression column, so
        it is absent from the index. None when no event study was requested.
    event_study_vcov_index : list of int, optional
        Event-time labels ordering ``event_study_vcov``'s rows/columns
        (the estimated event times, reference excluded).
    event_study_df : dict, optional
        Per-event-time inference degrees of freedom PROVENANCE: maps each
        estimated event time to the df actually passed to
        ``safe_inference`` for its stored p-value/CI (per-event
        Bell-McCaffrey Satterthwaite df under ``hc2_bm``; the scalar survey
        df under survey designs), or NaN when the row used normal theory,
        the df was undefined, or hc2_bm failed closed. None when no event
        study was requested.
    """

    overall_att: float
    overall_se: float
    overall_t_stat: float
    overall_p_value: float
    overall_conf_int: Tuple[float, float]
    event_study_effects: Optional[Dict[int, Dict[str, Any]]]
    group_effects: Optional[Dict[Any, Dict[str, Any]]]
    stacked_data: pd.DataFrame = field(repr=False)
    groups: List[Any] = field(default_factory=list)
    trimmed_groups: List[Any] = field(default_factory=list)
    time_periods: List[Any] = field(default_factory=list)
    n_obs: int = 0
    n_stacked_obs: int = 0
    n_sub_experiments: int = 0
    n_treated_units: int = 0
    n_control_units: int = 0
    kappa_pre: int = 1
    kappa_post: int = 1
    weighting: str = "aggregate"
    clean_control: str = "not_yet_treated"
    alpha: float = 0.05
    anticipation: int = 0
    # Analytical variance family configured at fit time (Phase 1b 2/8). When
    # survey_design= is supplied the survey TSL/replicate variance overrides
    # the analytical family; this field still records the configured value.
    vcov_type: str = "hc1"
    # Cluster identity ("unit" or "unit_subexp") and realized cluster count
    # at fit time. Used by summary() to render the correct CR1/CR2-BM label
    # via `_format_vcov_label(cluster_name=, n_clusters=)`. Per CI codex R2
    # P2: passing cluster_name=None mislabelled clustered StackedDiD fits
    # as one-way HC1/HC2-BM. StackedDiD is intrinsically clustered.
    cluster_name: Optional[str] = None
    n_clusters: Optional[int] = None
    # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
    survey_metadata: Optional[Any] = field(default=None)
    # --- Covariate balancing (CBWSDID, Ustyuzhanin 2026) ---
    # balance: "none" (default, plain weighted stacked DID) or "entropy". When
    # "entropy", `covariates` lists the balanced columns and `balance_diagnostics`
    # maps each sub-experiment a to {n_treated, n_control, effective_control_mass
    # (Ñ^C_a), ess, max_imbalance_pre, max_imbalance_post, balance_solver}. When
    # balancing, `stacked_data` carries `_b_sa` (raw design weights) and the
    # `_Q_weight` column holds the composed final weights W_sa.
    balance: str = "none"
    covariates: Optional[List[str]] = None
    balance_diagnostics: Optional[Dict[Any, Dict[str, Any]]] = field(default=None)
    # Unified event-study surface support (spec section 5, row M-092): the
    # full ES VCV sub-block + ordered horizon index + per-event df actually
    # used. See the class docstring for semantics.
    event_study_vcov: Optional[np.ndarray] = field(default=None, repr=False)
    event_study_vcov_index: Optional[List[int]] = field(default=None, repr=False)
    event_study_df: Optional[Dict[int, float]] = field(default=None, repr=False)

    # --- Inference-field aliases (balance/external-adapter compatibility) ---
    @property
    def att(self) -> float:
        return self.overall_att

    @property
    def se(self) -> float:
        return self.overall_se

    @property
    def conf_int(self) -> Tuple[float, float]:
        return self.overall_conf_int

    @property
    def p_value(self) -> float:
        return self.overall_p_value

    @property
    def t_stat(self) -> float:
        return self.overall_t_stat


[docs]
    def __repr__(self) -> str:
        """Concise string representation."""
        sig = _get_significance_stars(self.overall_p_value)
        return (
            f"StackedDiDResults(ATT={self.overall_att:.4f}{sig}, "
            f"SE={self.overall_se:.4f}, "
            f"n_sub_exp={self.n_sub_experiments}, "
            f"n_stacked_obs={self.n_stacked_obs})"
        )


    @property
    def coef_var(self) -> float:
        """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite."""
        if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
            return np.nan
        if not np.isfinite(self.overall_att) or self.overall_att == 0:
            return np.nan
        return self.overall_se / abs(self.overall_att)


[docs]
    def summary(self, alpha: Optional[float] = None) -> str:
        """
        Generate formatted summary of estimation results.

        Parameters
        ----------
        alpha : float, optional
            Significance level. Defaults to alpha used in estimation.

        Returns
        -------
        str
            Formatted summary.
        """
        alpha = alpha or self.alpha
        conf_level = int((1 - alpha) * 100)

        lines = [
            "=" * 85,
            "Stacked DiD Estimator Results (Wing, Freedman & Hollingsworth 2024)".center(85),
            "=" * 85,
            "",
            f"{'Original observations:':<30} {self.n_obs:>10}",
            f"{'Stacked observations:':<30} {self.n_stacked_obs:>10}",
            f"{'Sub-experiments:':<30} {self.n_sub_experiments:>10}",
            f"{'Treated units:':<30} {self.n_treated_units:>10}",
            f"{'Control units:':<30} {self.n_control_units:>10}",
            f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
            f"{'Trimmed cohorts:':<30} {len(self.trimmed_groups):>10}",
            f"{'Event window:':<30} {'[' + str(-self.kappa_pre) + ', ' + str(self.kappa_post) + ']':>10}",
            f"{'Weighting:':<30} {self.weighting:>10}",
            f"{'Clean control:':<30} {self.clean_control:>10}",
            "",
        ]

        # Variance family label (per CI codex R1 P2): surface the analytical
        # vcov_type when the survey path didn't override. Per R2 P2: pass
        # cluster_name + n_clusters so the label renders as "CR1 cluster-
        # robust at unit, G=N" rather than the one-way "HC1 heteroskedasticity-
        # robust" — StackedDiD is intrinsically clustered.
        if self.survey_metadata is None and self.vcov_type:
            from diff_diff.results import _format_vcov_label

            label = _format_vcov_label(
                self.vcov_type,
                cluster_name=self.cluster_name,
                n_clusters=self.n_clusters,
                n_obs=self.n_stacked_obs,
            )
            if label is not None:
                lines.append(f"{'Variance:':<30} {label:>50}")
                lines.append("")

        # Add survey design info
        if self.survey_metadata is not None:
            sm = self.survey_metadata
            lines.extend(_format_survey_block(sm, 85))

        # Overall ATT
        lines.extend(
            [
                "-" * 85,
                "Overall Average Treatment Effect on the Treated".center(85),
                "-" * 85,
                f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
                f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
                "-" * 85,
            ]
        )

        t_str = (
            f"{self.overall_t_stat:>10.3f}" if np.isfinite(self.overall_t_stat) else f"{'NaN':>10}"
        )
        p_str = (
            f"{self.overall_p_value:>10.4f}"
            if np.isfinite(self.overall_p_value)
            else f"{'NaN':>10}"
        )
        sig = _get_significance_stars(self.overall_p_value)

        lines.extend(
            [
                f"{'ATT':<15} {self.overall_att:>12.4f} {self.overall_se:>12.4f} "
                f"{t_str} {p_str} {sig:>6}",
                "-" * 85,
                "",
                f"{conf_level}% Confidence Interval: "
                f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
            ]
        )

        cv = self.coef_var
        if np.isfinite(cv):
            lines.append(f"{'CV (SE/abs(ATT)):':<25} {cv:>10.4f}")

        lines.append("")

        # Event study effects
        if self.event_study_effects:
            lines.extend(
                [
                    "-" * 85,
                    "Event Study (Dynamic) Effects".center(85),
                    "-" * 85,
                    f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
                    "-" * 85,
                ]
            )

            for h in sorted(self.event_study_effects.keys()):
                eff = self.event_study_effects[h]
                if eff.get("n_obs", 1) == 0:
                    # Reference period marker
                    lines.append(
                        f"[ref: {h}]" f"{'0.0000':>17} {'---':>12} {'---':>10} {'---':>10} {'':>6}"
                    )
                elif np.isnan(eff["effect"]):
                    lines.append(f"{h:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
                else:
                    e_sig = _get_significance_stars(eff["p_value"])
                    e_t = (
                        f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
                    )
                    e_p = (
                        f"{eff['p_value']:>10.4f}"
                        if np.isfinite(eff["p_value"])
                        else f"{'NaN':>10}"
                    )
                    lines.append(
                        f"{h:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
                        f"{e_t} {e_p} {e_sig:>6}"
                    )

            lines.extend(["-" * 85, ""])

        # Group effects
        if self.group_effects:
            lines.extend(
                [
                    "-" * 85,
                    "Group (Cohort) Effects".center(85),
                    "-" * 85,
                    f"{'Cohort':<15} {'Estimate':>12} {'Std. Err.':>12} "
                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
                    "-" * 85,
                ]
            )

            for g in sorted(self.group_effects.keys()):
                eff = self.group_effects[g]
                if np.isnan(eff["effect"]):
                    lines.append(f"{g:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
                else:
                    g_sig = _get_significance_stars(eff["p_value"])
                    g_t = (
                        f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
                    )
                    g_p = (
                        f"{eff['p_value']:>10.4f}"
                        if np.isfinite(eff["p_value"])
                        else f"{'NaN':>10}"
                    )
                    lines.append(
                        f"{g:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
                        f"{g_t} {g_p} {g_sig:>6}"
                    )

            lines.extend(["-" * 85, ""])

        lines.extend(
            [
                "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
                "=" * 85,
            ]
        )

        return "\n".join(lines)



[docs]
    def print_summary(self, alpha: Optional[float] = None) -> None:
        """Print summary to stdout."""
        print(self.summary(alpha))



[docs]
    def to_dict(self) -> Dict[str, Any]:
        """
        Convert headline results to a dictionary.

        Returns
        -------
        Dict[str, Any]
            Canonical inference row plus scalar metadata. Detailed
            event-study / group tables are available via
            ``to_dataframe(level=...)``.
        """
        result = {
            "att": self.att,
            "se": self.se,
            "t_stat": self.t_stat,
            "p_value": self.p_value,
            "conf_int_lower": self.overall_conf_int[0],
            "conf_int_upper": self.overall_conf_int[1],
            "n_obs": self.n_obs,
            "n_stacked_obs": self.n_stacked_obs,
            "n_sub_experiments": self.n_sub_experiments,
            "n_treated_units": self.n_treated_units,
            "n_control_units": self.n_control_units,
            "kappa_pre": self.kappa_pre,
            "kappa_post": self.kappa_post,
            "weighting": self.weighting,
            "clean_control": self.clean_control,
            "anticipation": self.anticipation,
            "alpha": self.alpha,
            "vcov_type": self.vcov_type,
        }
        if self.cluster_name is not None:
            result["cluster_name"] = self.cluster_name
        if self.n_clusters is not None:
            result["n_clusters"] = self.n_clusters
        return result



[docs]
    def to_dataframe(self, level: str = "event_study") -> pd.DataFrame:
        """
        Convert results to DataFrame.

        Parameters
        ----------
        level : str, default="event_study"
            Level of aggregation:
            - "event_study": Event study effects by relative time
            - "group": Group (cohort) effects

        Returns
        -------
        pd.DataFrame
            Results as DataFrame.
        """
        if level == "event_study":
            if self.event_study_effects is None:
                raise ValueError(
                    "Event study effects not computed. " "Use aggregate='event_study'."
                )
            rows = []
            for h, data in sorted(self.event_study_effects.items()):
                rows.append(
                    {
                        "relative_period": h,
                        "effect": data["effect"],
                        "se": data["se"],
                        "t_stat": data["t_stat"],
                        "p_value": data["p_value"],
                        "conf_int_lower": data["conf_int"][0],
                        "conf_int_upper": data["conf_int"][1],
                        "n_obs": data.get("n_obs", np.nan),
                    }
                )
            return pd.DataFrame(rows)

        elif level == "group":
            raise ValueError(
                "Group aggregation is not supported by StackedDiD. "
                "The pooled stacked regression cannot produce cohort-specific "
                "effects. Use CallawaySantAnna or ImputationDiD for "
                "cohort-level estimates."
            )

        else:
            raise ValueError(f"Unknown level: {level}. Use 'event_study' or 'group'.")


    @property
    def is_significant(self) -> bool:
        """Check if overall ATT is significant."""
        return bool(self.overall_p_value < self.alpha)

    @property
    def significance_stars(self) -> str:
        """Significance stars for overall ATT."""
        return _get_significance_stars(self.overall_p_value)