Source code for diff_diff.continuous_did_results

Name: diff-diff
Author: diff-diff contributors
"""
Result container classes for Continuous Difference-in-Differences estimator.

Provides dataclass containers for dose-response curves, group-time effects,
and aggregated estimation results.
"""

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd

from diff_diff.results import _format_survey_block, _get_significance_stars
from diff_diff.results_base import BaseResults

__all__ = ["ContinuousDiDResults", "DoseResponseCurve"]



[docs]
@dataclass
class DoseResponseCurve:
    """
    Dose-response curve from continuous DiD estimation.

    Attributes
    ----------
    dose_grid : np.ndarray
        Evaluation points, shape ``(n_grid,)``.
    effects : np.ndarray
        ATT(d) or ACRT(d) values, shape ``(n_grid,)``.
    se : np.ndarray
        Standard errors, shape ``(n_grid,)``.
    conf_int_lower : np.ndarray
        Lower CI bounds, shape ``(n_grid,)``.
    conf_int_upper : np.ndarray
        Upper CI bounds, shape ``(n_grid,)``.
    target : str
        ``"att"`` or ``"acrt"``.
    """

    dose_grid: np.ndarray
    effects: np.ndarray
    se: np.ndarray
    conf_int_lower: np.ndarray
    conf_int_upper: np.ndarray
    target: str
    p_value: Optional[np.ndarray] = None
    n_bootstrap: int = 0
    df_survey: Optional[int] = None


[docs]
    def to_dataframe(self) -> pd.DataFrame:
        """Convert to DataFrame with dose, effect, se, CI, t_stat, p_value."""
        n = len(self.effects)
        if self.n_bootstrap > 0 and self.p_value is not None:
            # Bootstrap inference: use stored p-values, t-stat is undefined
            t_stat = np.full(n, np.nan)
            p_value = self.p_value
        else:
            # Analytic inference: compute t-stat and p-value from normal approx
            from diff_diff.utils import safe_inference

            t_stat = np.full(n, np.nan)
            p_value = np.full(n, np.nan)
            for i in range(n):
                t_i, p_i, _ = safe_inference(self.effects[i], self.se[i], df=self.df_survey)
                t_stat[i] = t_i
                p_value[i] = p_i
        return pd.DataFrame(
            {
                "dose": self.dose_grid,
                "effect": self.effects,
                "se": self.se,
                "conf_int_lower": self.conf_int_lower,
                "conf_int_upper": self.conf_int_upper,
                "t_stat": t_stat,
                "p_value": p_value,
            }
        )





[docs]
@dataclass
class ContinuousDiDResults(BaseResults):
    """
    Results from Continuous Difference-in-Differences estimation.

    Implements Callaway, Goodman-Bacon & Sant'Anna (2024).

    Attributes
    ----------
    dose_response_att : DoseResponseCurve
        ATT(d) dose-response curve.
    dose_response_acrt : DoseResponseCurve
        ACRT(d) dose-response curve.
    overall_att : float
        Binarized overall ATT (ATT^{loc} under PT, equals ATT^{glob} under SPT).
    overall_acrt : float
        Plug-in overall ACRT^{glob}.
    group_time_effects : dict
        Per (g,t) cell results.
    base_period : str
        Base period strategy (``"varying"`` or ``"universal"``).
    anticipation : int
        Number of anticipation periods.
    n_bootstrap : int
        Number of bootstrap iterations used.
    bootstrap_weights : str
        Bootstrap weight type (``"rademacher"``, ``"mammen"``, or ``"webb"``).
    seed : int or None
        Random seed used for bootstrap.
    rank_deficient_action : str
        How rank deficiency is handled (``"warn"``, ``"error"``, ``"silent"``).
    """

    dose_response_att: DoseResponseCurve
    dose_response_acrt: DoseResponseCurve
    overall_att: float
    overall_att_se: float
    overall_att_t_stat: float
    overall_att_p_value: float
    overall_att_conf_int: Tuple[float, float]
    overall_acrt: float
    overall_acrt_se: float
    overall_acrt_t_stat: float
    overall_acrt_p_value: float
    overall_acrt_conf_int: Tuple[float, float]
    group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]]
    dose_grid: np.ndarray
    groups: List[Any]
    time_periods: List[Any]
    n_obs: int
    n_treated_units: int
    n_control_units: int
    alpha: float = 0.05
    control_group: str = "never_treated"
    degree: int = 3
    num_knots: int = 0
    base_period: str = "varying"
    anticipation: int = 0
    n_bootstrap: int = 0
    bootstrap_weights: str = "rademacher"
    seed: Optional[int] = None
    rank_deficient_action: str = "warn"
    # Covariate adjustment (conditional parallel trends). ``covariates`` is None
    # for the unconditional path; ``estimation_method`` is only meaningful when
    # covariates are used (``"reg"`` or ``"dr"``).
    covariates: Optional[List[str]] = field(default=None)
    estimation_method: str = "dr"
    pscore_trim: float = 0.01
    epv_threshold: float = 10.0
    pscore_fallback: str = "error"
    # "continuous" (B-spline sieve dose-response) or "discrete" (saturated
    # per-dose-level regression); the ``dose_grid`` holds the distinct dose
    # levels when discrete.
    treatment_type: str = "continuous"
    # Lowest-dose reference d_L for ``control_group="lowest_dose"`` (Remark 3.1);
    # the estimand is ``ATT(d) - ATT(d_L)`` and ``ATT(d_L) = 0`` by construction.
    # ``None`` for the never/not-yet-treated (D=0 control) paths.
    reference_dose: Optional[float] = None
    event_study_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None)
    # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
    survey_metadata: Optional[Any] = field(default=None)

    # --- Inference-field aliases (balance/external-adapter compatibility) ---
    # ATT-side is the headline contract; ACRT remains accessible via overall_acrt_*.
    @property
    def att(self) -> float:
        return self.overall_att

    @property
    def se(self) -> float:
        return self.overall_att_se

    @property
    def conf_int(self) -> Tuple[float, float]:
        return self.overall_att_conf_int

    @property
    def p_value(self) -> float:
        return self.overall_att_p_value

    @property
    def t_stat(self) -> float:
        return self.overall_att_t_stat

    # `overall_*` aliases for naming consistency with the rest of the staggered family.
    @property
    def overall_se(self) -> float:
        return self.overall_att_se

    @property
    def overall_conf_int(self) -> Tuple[float, float]:
        return self.overall_att_conf_int

    @property
    def overall_p_value(self) -> float:
        return self.overall_att_p_value

    @property
    def overall_t_stat(self) -> float:
        return self.overall_att_t_stat

    def __repr__(self) -> str:
        sig_att = _get_significance_stars(self.overall_att_p_value)
        sig_acrt = _get_significance_stars(self.overall_acrt_p_value)
        return (
            f"ContinuousDiDResults("
            f"ATT_glob={self.overall_att:.4f}{sig_att}, "
            f"ACRT_glob={self.overall_acrt:.4f}{sig_acrt}, "
            f"n_groups={len(self.groups)}, "
            f"n_periods={len(self.time_periods)})"
        )

    @property
    def coef_var(self) -> float:
        """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite."""
        if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
            return np.nan
        if not np.isfinite(self.overall_att) or self.overall_att == 0:
            return np.nan
        return self.overall_att_se / abs(self.overall_att)


[docs]
    def summary(self, alpha: Optional[float] = None) -> str:
        """Generate formatted summary."""
        alpha = alpha or self.alpha
        conf_level = int((1 - alpha) * 100)
        w = 85

        lines = [
            "=" * w,
            "Continuous Difference-in-Differences Results".center(w),
            "(Callaway, Goodman-Bacon & Sant'Anna 2024)".center(w),
            "=" * w,
            "",
            f"{'Total observations:':<30} {self.n_obs:>10}",
            f"{'Treated units:':<30} {self.n_treated_units:>10}",
            f"{'Control units:':<30} {self.n_control_units:>10}",
            f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
            f"{'Time periods:':<30} {len(self.time_periods):>10}",
            f"{'Control group:':<30} {self.control_group:>10}",
            f"{'Treatment type:':<30} {self.treatment_type:>10}",
        ]
        # Lowest-dose reference (Remark 3.1): show d_L when it is the control.
        if self.reference_dose is not None:
            lines.append(f"{'Reference dose (d_L):':<30} {self.reference_dose:>10.4g}")
        # Basis metadata: B-spline degree/knots (continuous) or the number of
        # saturated dose levels (discrete).
        if self.treatment_type == "discrete":
            lines.append(f"{'Dose levels:':<30} {len(self.dose_grid):>10}")
        else:
            lines.append(f"{'B-spline degree:':<30} {self.degree:>10}")
            lines.append(f"{'Interior knots:':<30} {self.num_knots:>10}")
        lines.append(f"{'Base period:':<30} {self.base_period:>10}")
        lines.append(f"{'Anticipation:':<30} {self.anticipation:>10}")
        if self.covariates:
            lines.append(f"{'Covariates:':<30} {', '.join(self.covariates):>10}")
            lines.append(f"{'Estimation method:':<30} {self.estimation_method:>10}")
        lines.append("")

        # Add survey design info
        if self.survey_metadata is not None:
            sm = self.survey_metadata
            lines.extend(_format_survey_block(sm, w))

        # Overall summary parameters
        lines.extend(
            [
                "-" * w,
                "Overall Summary Parameters".center(w),
                "-" * w,
                f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
                f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
                "-" * w,
            ]
        )
        for label, est, se, t, p in [
            (
                "ATT_glob",
                self.overall_att,
                self.overall_att_se,
                self.overall_att_t_stat,
                self.overall_att_p_value,
            ),
            (
                "ACRT_glob",
                self.overall_acrt,
                self.overall_acrt_se,
                self.overall_acrt_t_stat,
                self.overall_acrt_p_value,
            ),
        ]:
            t_str = f"{t:>10.3f}" if np.isfinite(t) else f"{'NaN':>10}"
            p_str = f"{p:>10.4f}" if np.isfinite(p) else f"{'NaN':>10}"
            sig = _get_significance_stars(p)
            lines.append(f"{label:<15} {est:>12.4f} {se:>12.4f} {t_str} {p_str} {sig:>6}")
        lines.extend(
            [
                "-" * w,
                "",
                f"{conf_level}% CI for ATT_glob: "
                f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
                f"{conf_level}% CI for ACRT_glob: "
                f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
            ]
        )

        cv = self.coef_var
        if np.isfinite(cv):
            lines.append(f"{'CV (SE/abs(ATT)):':<25} {cv:>10.4f}")

        lines.append("")

        # Dose-response curve summary (first/mid/last points)
        if len(self.dose_grid) > 0:
            lines.extend(
                [
                    "-" * w,
                    "Dose-Response Curve (selected points)".center(w),
                    "-" * w,
                    f"{'Dose':>10} {'ATT(d)':>12} {'SE':>10} " f"{'ACRT(d)':>12} {'SE':>10}",
                    "-" * w,
                ]
            )
            n_grid = len(self.dose_grid)
            indices = sorted(set([0, n_grid // 4, n_grid // 2, 3 * n_grid // 4, n_grid - 1]))
            for idx in indices:
                if idx < n_grid:
                    lines.append(
                        f"{self.dose_grid[idx]:>10.3f} "
                        f"{self.dose_response_att.effects[idx]:>12.4f} "
                        f"{self.dose_response_att.se[idx]:>10.4f} "
                        f"{self.dose_response_acrt.effects[idx]:>12.4f} "
                        f"{self.dose_response_acrt.se[idx]:>10.4f}"
                    )
            lines.extend(["-" * w, ""])

        # Event study effects if available
        if self.event_study_effects:
            lines.extend(
                [
                    "-" * w,
                    "Event Study (Dynamic) Effects (Binarized ATT)".center(w),
                    "-" * w,
                    f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
                    "-" * w,
                ]
            )
            for rel_t in sorted(self.event_study_effects.keys()):
                eff = self.event_study_effects[rel_t]
                sig = _get_significance_stars(eff["p_value"])
                t_str = f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
                p_str = (
                    f"{eff['p_value']:>10.4f}" if np.isfinite(eff["p_value"]) else f"{'NaN':>10}"
                )
                lines.append(
                    f"{rel_t:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
                    f"{t_str} {p_str} {sig:>6}"
                )
            lines.extend(["-" * w, ""])

        lines.extend(
            [
                "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
                "=" * w,
            ]
        )
        return "\n".join(lines)



[docs]
    def print_summary(self, alpha: Optional[float] = None) -> None:
        """Print summary to stdout."""
        print(self.summary(alpha))



[docs]
    def to_dict(self) -> Dict[str, Any]:
        """
        Convert headline results to a dictionary.

        Returns
        -------
        Dict[str, Any]
            Canonical ATT inference row, the ACRT companion estimand, and
            scalar metadata. Detailed dose-response / event-study tables
            are available via ``to_dataframe(level=...)``.
        """
        return {
            "att": self.att,
            "se": self.se,
            "t_stat": self.t_stat,
            "p_value": self.p_value,
            "conf_int_lower": self.overall_att_conf_int[0],
            "conf_int_upper": self.overall_att_conf_int[1],
            "acrt": self.overall_acrt,
            "acrt_se": self.overall_acrt_se,
            "acrt_t_stat": self.overall_acrt_t_stat,
            "acrt_p_value": self.overall_acrt_p_value,
            "acrt_conf_int_lower": self.overall_acrt_conf_int[0],
            "acrt_conf_int_upper": self.overall_acrt_conf_int[1],
            "n_obs": self.n_obs,
            "n_treated_units": self.n_treated_units,
            "n_control_units": self.n_control_units,
            "control_group": self.control_group,
            "treatment_type": self.treatment_type,
            "estimation_method": self.estimation_method,
            "degree": self.degree,
            "num_knots": self.num_knots,
            "base_period": self.base_period,
            "anticipation": self.anticipation,
            "n_bootstrap": self.n_bootstrap,
            "alpha": self.alpha,
        }



[docs]
    def to_dataframe(self, level: str = "dose_response") -> pd.DataFrame:
        """
        Convert results to DataFrame.

        Parameters
        ----------
        level : str, default="dose_response"
            ``"dose_response"``, ``"group_time"``, or ``"event_study"``.
        """
        if level == "dose_response":
            att_df = self.dose_response_att.to_dataframe()
            acrt_df = self.dose_response_acrt.to_dataframe()
            return pd.DataFrame(
                {
                    "dose": att_df["dose"],
                    "att": att_df["effect"],
                    "att_se": att_df["se"],
                    "att_ci_lower": att_df["conf_int_lower"],
                    "att_ci_upper": att_df["conf_int_upper"],
                    "acrt": acrt_df["effect"],
                    "acrt_se": acrt_df["se"],
                    "acrt_ci_lower": acrt_df["conf_int_lower"],
                    "acrt_ci_upper": acrt_df["conf_int_upper"],
                }
            )
        elif level == "group_time":
            rows = []
            for (g, t), data in sorted(self.group_time_effects.items()):
                rows.append(
                    {
                        "group": g,
                        "time": t,
                        "att_glob": data.get("att_glob", np.nan),
                        "acrt_glob": data.get("acrt_glob", np.nan),
                        "n_treated": data.get("n_treated", 0),
                        "n_control": data.get("n_control", 0),
                    }
                )
            return pd.DataFrame(rows)
        elif level == "event_study":
            if self.event_study_effects is None:
                raise ValueError("Event study effects not computed. Use aggregate='eventstudy'.")
            rows = []
            for rel_t, data in sorted(self.event_study_effects.items()):
                rows.append(
                    {
                        "relative_period": rel_t,
                        "att_glob": data["effect"],
                        "se": data["se"],
                        "t_stat": data["t_stat"],
                        "p_value": data["p_value"],
                        "conf_int_lower": data["conf_int"][0],
                        "conf_int_upper": data["conf_int"][1],
                    }
                )
            return pd.DataFrame(rows)
        else:
            raise ValueError(
                f"Unknown level: {level}. Use 'dose_response', 'group_time', or 'event_study'."
            )


    @property
    def is_significant(self) -> bool:
        """Check if overall ATT is significant."""
        return bool(self.overall_att_p_value < self.alpha)

    @property
    def significance_stars(self) -> str:
        """Significance stars for overall ATT."""
        return _get_significance_stars(self.overall_att_p_value)